diff --git a/docs/design/migrate-evaluator-playground/README.md b/docs/design/migrate-evaluator-playground/README.md
new file mode 100644
index 0000000000..b0b9d0c319
--- /dev/null
+++ b/docs/design/migrate-evaluator-playground/README.md
@@ -0,0 +1,84 @@
+# Migrate Evaluator Playground to New Evaluator Endpoints
+
+## Overview
+
+This planning workspace documents the migration of the Evaluator Playground frontend to use the new workflow-based evaluator endpoints. The backend team has migrated evaluators from the old `EvaluatorConfig` model to the new `SimpleEvaluator` (workflow-based) model.
+
+## Migration Strategy
+
+**Direct migration (no adapters)** split into two PRs:
+
+| PR | Scope | Description |
+|----|-------|-------------|
+| **PR 1** | CRUD | Migrate to `/preview/simple/evaluators/*`, change internal types to `SimpleEvaluator` |
+| **PR 2** | Run | Migrate to `/preview/workflows/invoke`, add workflow service types |
+
+See [plan.md](./plan.md) for detailed implementation steps.
+
+## Context
+
+- **PR #3527**: Backend migration that introduces new evaluator endpoints
+- **Goal**: Full migration to new endpoints, no legacy code remaining
+
+## Documents
+
+| File | Description |
+|------|-------------|
+| [context.md](./context.md) | Background, motivation, problem statement, goals, and non-goals |
+| [current-system.md](./current-system.md) | Detailed map of current Evaluator Playground implementation |
+| [new-endpoints.md](./new-endpoints.md) | New evaluator endpoint shapes and differences from legacy |
+| [research.md](./research.md) | Deep dive into evaluator execution architecture and URI-based handlers |
+| [migration-options.md](./migration-options.md) | Why we chose direct migration over adapters |
+| [risk-analysis.md](./risk-analysis.md) | Coupling points and risk areas for the migration |
+| [plan.md](./plan.md) | **Main plan** - PR 1 (CRUD) and PR 2 (Run) implementation details |
+| [status.md](./status.md) | Living document for progress updates and decisions |
+
+## Key Mapping Changes
+
+| Legacy | New |
+|--------|-----|
+| `EvaluatorConfig` | `SimpleEvaluator` |
+| `evaluator_key` | derived from `data.uri` |
+| `settings_values` | `data.parameters` |
+| `GET /evaluators/configs/` | `POST /preview/simple/evaluators/query` |
+| `POST /evaluators/configs/` | `POST /preview/simple/evaluators/` |
+| `PUT /evaluators/configs/{id}/` | `PUT /preview/simple/evaluators/{id}` |
+| `DELETE /evaluators/configs/{id}/` | `POST /preview/simple/evaluators/{id}/archive` |
+| `POST /evaluators/{key}/run/` | `POST /preview/workflows/invoke` |
+
+## Files Affected
+
+### PR 1: CRUD Migration
+
+| Area | Files |
+|------|-------|
+| Types | `web/oss/src/lib/Types.ts` |
+| Services | `web/oss/src/services/evaluators/index.ts` |
+| State | `web/oss/src/state/evaluators/atoms.ts` |
+| Playground State | `web/oss/src/components/.../ConfigureEvaluator/state/atoms.ts` |
+| Playground UI | `web/oss/src/components/.../ConfigureEvaluator/index.tsx` |
+| Registry | `web/oss/src/components/Evaluators/index.tsx` |
+| Registry Hook | `web/oss/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts` |
+| Columns | `web/oss/src/components/Evaluators/assets/getColumns.tsx` |
+
+### PR 2: Run Migration
+
+| Area | Files |
+|------|-------|
+| Types | `web/oss/src/lib/Types.ts` (add workflow types) |
+| Invoke Service | `web/oss/src/services/workflows/invoke.ts` (new) |
+| Debug Section | `web/oss/src/components/.../ConfigureEvaluator/DebugSection.tsx` |
+
+### Backend Reference (PR #3527)
+- `api/oss/src/routers/evaluators_router.py` - Legacy endpoints (kept temporarily)
+- `api/oss/src/apis/fastapi/evaluators/router.py` - New `SimpleEvaluators` router
+- `api/oss/src/apis/fastapi/workflows/router.py` - Workflow invoke endpoint
+- `api/oss/src/core/evaluators/dtos.py` - New data transfer objects
+
+## Effort Estimate
+
+| PR | Effort |
+|----|--------|
+| PR 1: CRUD | 4-5 days |
+| PR 2: Run | 3-4 days |
+| **Total** | **7-9 days** |
diff --git a/docs/design/migrate-evaluator-playground/context.md b/docs/design/migrate-evaluator-playground/context.md
new file mode 100644
index 0000000000..5fa82e8b21
--- /dev/null
+++ b/docs/design/migrate-evaluator-playground/context.md
@@ -0,0 +1,72 @@
+# Context: Migrate Evaluator Playground
+
+## Background
+
+The Agenta platform has undergone a significant architectural change where **evaluators are now workflows**. This means evaluators follow the same git-like versioning model as other workflows:
+- **Artifact** (Evaluator) → **Variant** → **Revision**
+
+Previously, evaluators were stored in a flat `EvaluatorConfigDB` table with simple key-value settings. The new model stores evaluators as `WorkflowArtifactDBE`, `WorkflowVariantDBE`, and `WorkflowRevisionDBE` records with richer metadata and versioning.
+
+## Motivation
+
+1. **Unified Architecture**: Evaluators, testsets, and apps now share the same git-like workflow model
+2. **Better Versioning**: Evaluators can have multiple variants and revision history
+3. **Richer Metadata**: New model supports URIs, schemas, scripts, and configuration in a structured way
+4. **Future Extensibility**: Custom evaluators will be first-class citizens with the same capabilities as built-in ones
+
+## Problem Statement
+
+The Evaluator Playground frontend currently uses legacy endpoints:
+- `GET /evaluators/` - List evaluator templates
+- `GET/POST/PUT/DELETE /evaluators/configs/` - CRUD for evaluator configurations
+- `POST /evaluators/{key}/run/` - Run evaluator in playground
+
+The backend (PR #3527) has:
+1. Migrated all evaluator configs to the new workflow-based model via DB migrations
+2. Created new `SimpleEvaluators` endpoints at `/preview/simple/evaluators/`
+3. Native workflow execution available at `/preview/workflows/invoke`
+4. Kept legacy endpoints as thin wrappers (to be deprecated)
+
+**The frontend needs to migrate to use the new endpoints directly.**
+
+## Goals
+
+1. **Replace legacy evaluator config CRUD** with new `SimpleEvaluator` endpoints
+2. **Replace legacy evaluator run** with native workflow invoke (`/preview/workflows/invoke`)
+3. **Update data models** in frontend to match new `SimpleEvaluator` shape (no adapters)
+4. **Preserve UX** - no user-facing changes to the Evaluator Playground functionality
+5. **Remove all legacy endpoint usage** - clean migration, no dual-path code
+
+## Non-Goals
+
+1. **Not changing the Evaluator Playground UI** - Only the data layer changes
+2. **Not migrating evaluation batch runs** - Those already use the new workflow system internally
+3. **Not introducing new evaluator features** - This is a pure endpoint migration
+
+## Success Criteria
+
+1. Evaluator Playground can create, edit, delete evaluators using new `SimpleEvaluator` endpoints
+2. Evaluator Playground can run evaluators using native workflow invoke
+3. All existing evaluator configurations continue to work
+4. No regression in evaluator testing functionality
+5. No legacy endpoint calls remain in frontend code
+
+## Constraints
+
+1. Must not break existing evaluator configurations
+2. Must coordinate with backend team on endpoint availability (PR #3527)
+3. Split into two PRs for reviewability (CRUD first, then Run)
+
+## Migration Approach
+
+**Direct migration (no adapters):**
+
+| PR | Scope | Endpoints |
+|----|-------|-----------|
+| PR 1 | CRUD | `/preview/simple/evaluators/*` |
+| PR 2 | Run | `/preview/workflows/invoke` |
+
+This approach:
+- Avoids tech debt from adapter layers
+- Aligns internal types with backend models
+- Keeps changes reviewable by splitting into two PRs
diff --git a/docs/design/migrate-evaluator-playground/current-system.md b/docs/design/migrate-evaluator-playground/current-system.md
new file mode 100644
index 0000000000..7797d76ec4
--- /dev/null
+++ b/docs/design/migrate-evaluator-playground/current-system.md
@@ -0,0 +1,230 @@
+# Current System: Evaluator Playground
+
+## Overview
+
+The Evaluator Playground allows users to:
+1. **Browse** evaluator templates (built-in evaluators)
+2. **Create/Configure** evaluator configurations with custom settings
+3. **Test** evaluators by running them against app variants and test cases
+4. **Manage** (edit, clone, delete) existing evaluator configurations
+
+## File Structure
+
+### Entry Points (Pages)
+
+| Path | Purpose |
+|------|---------|
+| `/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluators/index.tsx` | Evaluators list page |
+| `/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluators/configure/[evaluator_id].tsx` | Configure evaluator page |
+
+### Core Components
+
+#### Evaluators Registry (`/web/oss/src/components/Evaluators/`)
+
+| File | Purpose |
+|------|---------|
+| `index.tsx` | Main registry with table, search, tabs (automatic/human) |
+| `hooks/useEvaluatorsRegistryData.ts` | Fetches and transforms evaluator data |
+| `assets/getColumns.tsx` | Table column definitions |
+| `components/SelectEvaluatorModal/` | Modal to select evaluator template for new config |
+| `components/ConfigureEvaluator/index.tsx` | Page wrapper that loads data and initializes atoms |
+| `components/DeleteEvaluatorsModal/` | Delete confirmation modal |
+
+#### ConfigureEvaluator (Main UI) 
+
+Location: `/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/`
+
+| File | Purpose |
+|------|---------|
+| `index.tsx` | Configuration form + test panel layout |
+| `DebugSection.tsx` | Test evaluator panel (run variant, run evaluator) |
+| `DynamicFormField.tsx` | Renders settings fields based on evaluator template |
+| `AdvancedSettings.tsx` | Collapsible advanced parameters |
+| `state/atoms.ts` | Jotai atoms for playground state |
+| `variantUtils.ts` | Utility for building variants from revisions |
+
+### State Management
+
+#### Playground Atoms (`state/atoms.ts`)
+
+```typescript
+// Session state
+playgroundSessionAtom          // { evaluator, existingConfigId, mode }
+playgroundEvaluatorAtom        // Current evaluator template (derived)
+playgroundIsEditModeAtom       // Is editing existing config? (derived)
+playgroundIsCloneModeAtom      // Is cloning config? (derived)
+playgroundEditValuesAtom       // Current config values being edited
+
+// Form state
+playgroundFormRefAtom          // Ant Design Form instance
+
+// Test section state
+playgroundSelectedVariantAtom  // Selected variant for testing
+playgroundSelectedTestsetIdAtom // Selected testset ID
+playgroundSelectedRevisionIdAtom // Selected revision ID
+playgroundSelectedTestcaseAtom // Testcase data
+playgroundTraceTreeAtom        // Trace output from running variant
+
+// Persisted state (localStorage)
+playgroundLastAppIdAtom        // Last used app ID
+playgroundLastVariantIdAtom    // Last used variant ID
+
+// Action atoms
+initPlaygroundAtom             // Initialize playground state
+resetPlaygroundAtom            // Reset all state
+commitPlaygroundAtom           // Update state after save
+cloneCurrentConfigAtom         // Switch to clone mode
+```
+
+#### Global Evaluator Atoms (`/web/oss/src/state/evaluators/atoms.ts`)
+
+```typescript
+evaluatorConfigsQueryAtomFamily // Query for evaluator configs
+evaluatorsQueryAtomFamily       // Query for evaluator templates
+nonArchivedEvaluatorsAtom       // Derived: non-archived evaluators
+evaluatorByKeyAtomFamily        // Find evaluator by key
+```
+
+### API Service Layer
+
+#### Evaluators Service (`/web/oss/src/services/evaluators/index.ts`)
+
+```typescript
+// Evaluator Templates (legacy)
+fetchAllEvaluators()           // GET /evaluators
+
+// Evaluator Configs (legacy)
+fetchAllEvaluatorConfigs()     // GET /evaluators/configs
+createEvaluatorConfig()        // POST /evaluators/configs
+updateEvaluatorConfig()        // PUT /evaluators/configs/{id}
+deleteEvaluatorConfig()        // DELETE /evaluators/configs/{id}
+
+// Custom/Human Evaluators (new)
+createEvaluator()              // POST /preview/simple/evaluators/
+updateEvaluator()              // PUT /preview/simple/evaluators/{id}
+fetchEvaluatorById()           // GET /preview/simple/evaluators/{id}
+deleteHumanEvaluator()         // POST /preview/simple/evaluators/{id}/archive
+```
+
+#### Evaluator Run Service (`/web/oss/src/services/evaluations/api_ee/index.ts`)
+
+```typescript
+createEvaluatorDataMapping()   // POST /evaluators/map
+createEvaluatorRunExecution()  // POST /evaluators/{key}/run
+```
+
+## Data Flow
+
+```
+┌─────────────────────────────────────────────────────────────────────────────┐
+│                           USER ACTIONS                                       │
+│  - Browse evaluators list                                                   │
+│  - Create new evaluator config                                              │
+│  - Edit existing evaluator config                                           │
+│  - Test evaluator with variant + testcase                                   │
+└─────────────────────────────────────────────────────────────────────────────┘
+                                    │
+                                    ▼
+┌─────────────────────────────────────────────────────────────────────────────┐
+│  ENTRY POINTS                                                                │
+│  /evaluators → EvaluatorsRegistry                                           │
+│       ├─ Uses useEvaluatorsRegistryData() hook                              │
+│       │     ├─ Calls fetchAllEvaluators() → GET /evaluators                 │
+│       │     └─ Calls fetchAllEvaluatorConfigs() → GET /evaluators/configs   │
+│       │                                                                      │
+│       ├─ "Create new" → SelectEvaluatorModal → /evaluators/configure/new    │
+│       └─ Click row → /evaluators/configure/{id}                             │
+│                                                                              │
+│  /evaluators/configure/{id} → ConfigureEvaluatorPage                        │
+│       ├─ Loads evaluator template & existing config                         │
+│       ├─ Initializes playgroundSessionAtom                                  │
+│       └─ Renders ConfigureEvaluator component                               │
+└─────────────────────────────────────────────────────────────────────────────┘
+                                    │
+                                    ▼
+┌─────────────────────────────────────────────────────────────────────────────┐
+│  ConfigureEvaluator                                                          │
+│  ┌─────────────────────────────┐  ┌─────────────────────────────┐           │
+│  │  LEFT: Configuration Form   │  │  RIGHT: DebugSection        │           │
+│  │  - Name input               │  │  - Testcase selector        │           │
+│  │  - DynamicFormField[]       │  │  - Variant selector         │           │
+│  │  - AdvancedSettings         │  │  - Run variant button       │           │
+│  │  - Commit/Reset buttons     │  │  - Run evaluator button     │           │
+│  └─────────────────────────────┘  └─────────────────────────────┘           │
+│                                                                              │
+│  Commit Actions:                                                             │
+│  - Create: POST /evaluators/configs → createEvaluatorConfig()               │
+│  - Update: PUT /evaluators/configs/{id} → updateEvaluatorConfig()           │
+│                                                                              │
+│  Test Actions:                                                               │
+│  - Run Variant: callVariant() → POST to variant URL                         │
+│  - Run Evaluator: createEvaluatorRunExecution()                             │
+│                   → POST /evaluators/{key}/run                              │
+└─────────────────────────────────────────────────────────────────────────────┘
+```
+
+## Current API Endpoints Used
+
+### Legacy Endpoints (to be migrated)
+
+| Endpoint | Method | Frontend Function | Purpose |
+|----------|--------|-------------------|---------|
+| `/evaluators/` | GET | `fetchAllEvaluators()` | List evaluator templates |
+| `/evaluators/configs/` | GET | `fetchAllEvaluatorConfigs()` | List evaluator configs |
+| `/evaluators/configs/` | POST | `createEvaluatorConfig()` | Create new config |
+| `/evaluators/configs/{id}/` | PUT | `updateEvaluatorConfig()` | Update existing config |
+| `/evaluators/configs/{id}/` | DELETE | `deleteEvaluatorConfig()` | Delete config |
+
+### Endpoints That Remain Unchanged
+
+| Endpoint | Method | Frontend Function | Purpose |
+|----------|--------|-------------------|---------|
+| `/evaluators/map/` | POST | `createEvaluatorDataMapping()` | Map trace data for RAG evaluators |
+| `/evaluators/{key}/run/` | POST | `createEvaluatorRunExecution()` | Run evaluator (test) |
+
+### Already Using New Endpoints (for custom evaluators)
+
+| Endpoint | Method | Frontend Function | Purpose |
+|----------|--------|-------------------|---------|
+| `/preview/simple/evaluators/` | POST | `createEvaluator()` | Create custom evaluator |
+| `/preview/simple/evaluators/{id}` | PUT | `updateEvaluator()` | Update custom evaluator |
+| `/preview/simple/evaluators/{id}` | GET | `fetchEvaluatorById()` | Fetch evaluator by ID |
+| `/preview/simple/evaluators/{id}/archive` | POST | `deleteHumanEvaluator()` | Archive human evaluator |
+
+## Data Types
+
+### Current EvaluatorConfig (Legacy)
+
+```typescript
+interface EvaluatorConfig {
+    id: string
+    evaluator_key: string
+    name: string
+    settings_values: Record<string, any>
+    created_at: string
+    updated_at: string
+    color?: string
+    tags?: string[]
+    // Frontend additions
+    icon_url?: string | StaticImageData
+}
+```
+
+### Current Evaluator Template (Legacy)
+
+```typescript
+interface Evaluator {
+    name: string
+    key: string
+    settings_presets?: SettingsPreset[]
+    settings_template: Record<string, EvaluationSettingsTemplate>
+    icon_url?: string | StaticImageData
+    color?: string
+    direct_use?: boolean
+    description: string
+    oss?: boolean
+    requires_llm_api_keys?: boolean
+    tags: string[]
+    archived?: boolean
+}
+```
diff --git a/docs/design/migrate-evaluator-playground/migration-options.md b/docs/design/migrate-evaluator-playground/migration-options.md
new file mode 100644
index 0000000000..40bf6b4caa
--- /dev/null
+++ b/docs/design/migrate-evaluator-playground/migration-options.md
@@ -0,0 +1,106 @@
+# Migration Options
+
+## Goal
+
+Full migration of the Evaluator Playground to the new workflow-based evaluator APIs, including:
+- CRUD on evaluator configs via `/preview/simple/evaluators/*`
+- Running evaluators via native workflow invocation (`/preview/workflows/invoke`) instead of the legacy `/evaluators/{key}/run`
+
+---
+
+## Option A (Rejected): Adapter Pattern
+
+Keep the UI/state assuming the legacy `EvaluatorConfig` shape and translate at the API boundary.
+
+### Why it was considered
+
+- Minimizes touching UI/atoms/forms initially
+- Lets you swap endpoints quickly with limited regression surface
+- Good when backend is still stabilizing schemas
+
+### Why it was rejected
+
+- Adds tech debt (adapter layer becomes permanent)
+- Delays alignment with new architecture
+- Makes future changes harder (two mental models)
+
+---
+
+## Option B (Chosen): Direct Migration
+
+Change the frontend domain model to match the backend:
+- "Evaluator config" becomes `SimpleEvaluator`
+- Internal shapes use `data.parameters` instead of `settings_values`
+- Internal shapes derive `evaluator_key` from `data.uri`
+
+### Why it's better
+
+- No translation debt
+- Aligns with "evaluators are workflows" concept end-to-end
+- Unlocks revision-aware runs and custom evaluator URIs
+- Cleaner codebase long-term
+
+---
+
+## Execution Strategy
+
+To keep changes reviewable while avoiding adapters:
+
+### PR 1: CRUD Migration
+- Migrate all CRUD operations to `/preview/simple/evaluators/*`
+- Change internal types from `EvaluatorConfig` to `SimpleEvaluator`
+- Update atoms, services, and components
+- Keep legacy run endpoint temporarily
+
+### PR 2: Run Migration
+- Migrate run from `/evaluators/{key}/run` to `/preview/workflows/invoke`
+- Add `WorkflowServiceRequest/Response` types
+- Update `DebugSection.tsx` to use native invoke
+
+This sequencing:
+1. Isolates CRUD changes for easier review
+2. Allows CRUD to stabilize before changing run
+3. Avoids adapter layer entirely
+4. Results in full migration with no legacy code
+
+---
+
+## Files Affected
+
+### PR 1 (CRUD)
+
+| Area | Files |
+|------|-------|
+| Types | `web/oss/src/lib/Types.ts` |
+| Services | `web/oss/src/services/evaluators/index.ts` |
+| State | `web/oss/src/state/evaluators/atoms.ts` |
+| Playground State | `web/oss/src/components/.../ConfigureEvaluator/state/atoms.ts` |
+| Playground UI | `web/oss/src/components/.../ConfigureEvaluator/index.tsx` |
+| Registry | `web/oss/src/components/Evaluators/index.tsx` |
+| Registry Hook | `web/oss/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts` |
+| Columns | `web/oss/src/components/Evaluators/assets/getColumns.tsx` |
+
+### PR 2 (Run)
+
+| Area | Files |
+|------|-------|
+| Types | `web/oss/src/lib/Types.ts` (add workflow types) |
+| Invoke Service | `web/oss/src/services/workflows/invoke.ts` (new) |
+| Debug Section | `web/oss/src/components/.../ConfigureEvaluator/DebugSection.tsx` |
+
+---
+
+## Key Mapping Changes
+
+| Legacy | New |
+|--------|-----|
+| `EvaluatorConfig` | `SimpleEvaluator` |
+| `evaluator_key` | derived from `data.uri` |
+| `settings_values` | `data.parameters` |
+| `GET /evaluators/configs/` | `POST /preview/simple/evaluators/query` |
+| `POST /evaluators/configs/` | `POST /preview/simple/evaluators/` |
+| `PUT /evaluators/configs/{id}/` | `PUT /preview/simple/evaluators/{id}` |
+| `DELETE /evaluators/configs/{id}/` | `POST /preview/simple/evaluators/{id}/archive` |
+| `POST /evaluators/{key}/run/` | `POST /preview/workflows/invoke` |
+
+See [plan.md](./plan.md) for detailed implementation steps.
diff --git a/docs/design/migrate-evaluator-playground/new-endpoints.md b/docs/design/migrate-evaluator-playground/new-endpoints.md
new file mode 100644
index 0000000000..97a20f01b2
--- /dev/null
+++ b/docs/design/migrate-evaluator-playground/new-endpoints.md
@@ -0,0 +1,434 @@
+# New Evaluator Endpoints
+
+## Overview
+
+The new evaluator system treats evaluators as **workflows** with git-like versioning. The `SimpleEvaluator` API provides a simplified interface that abstracts the underlying workflow structure.
+
+## Key Architectural Change
+
+**Evaluators are now workflows identified by URIs.**
+
+URI Format: `agenta:builtin:{evaluator_key}:v0`
+
+Example: `agenta:builtin:auto_exact_match:v0`
+
+The SDK has a `HANDLER_REGISTRY` that maps URIs to actual handler functions. This enables:
+- Native workflow invocation via URI
+- Custom evaluators with user-defined URIs (`user:custom:my_evaluator:latest`)
+- Version management of evaluator implementations
+
+## Evaluator Execution Paths
+
+### Option 1: Legacy Run Endpoint (Maintained for Backward Compatibility)
+
+```
+POST /evaluators/{evaluator_key}/run/
+```
+
+**Request:**
+```typescript
+interface EvaluatorInputInterface {
+    inputs: Record<string, any>    // prediction, ground_truth, etc.
+    settings: Record<string, any>  // evaluator configuration
+    credentials?: Record<string, any>
+}
+```
+
+**Response:**
+```typescript
+interface EvaluatorOutputInterface {
+    outputs: Record<string, any>  // score, success, etc.
+}
+```
+
+**Internal Implementation (PR #3527):**
+```python
+async def _run_evaluator(evaluator_key: str, evaluator_input):
+    # Build URI from evaluator_key
+    uri = f"agenta:builtin:{evaluator_key}:v0"
+    
+    # Retrieve handler from SDK registry
+    handler = retrieve_handler(uri)
+    
+    # Invoke handler directly
+    result = handler(inputs=inputs, outputs=outputs, parameters=settings)
+    
+    return {"outputs": result}
+```
+
+### Option 2: Native Workflow Invoke Endpoint
+
+```
+POST /preview/workflows/invoke
+```
+
+**Request:**
+```typescript
+interface WorkflowServiceRequest {
+    data: {
+        inputs: Record<string, any>
+        outputs?: any
+        parameters?: Record<string, any>  // settings
+    }
+    revision?: {
+        data?: {
+            uri: string  // e.g., "agenta:builtin:auto_exact_match:v0"
+            parameters?: Record<string, any>
+        }
+    }
+}
+```
+
+**Response:**
+```typescript
+interface WorkflowServiceBatchResponse {
+    data: {
+        outputs: Record<string, any>
+    }
+    status?: {
+        code: number
+        message: string
+    }
+}
+```
+
+### Option 3: Evaluator Revision-Based Invoke
+
+For a fully "native" approach:
+
+1. **Fetch the evaluator revision:**
+   ```
+   POST /preview/evaluators/revisions/retrieve
+   ```
+   
+2. **Get the URI from revision data:**
+   ```typescript
+   const uri = evaluatorRevision.data.uri  // "agenta:builtin:auto_exact_match:v0"
+   ```
+
+3. **Invoke via workflow service:**
+   ```
+   POST /preview/workflows/invoke
+   ```
+
+## Comparison: Which Approach to Use?
+
+| Aspect | Legacy Run | Native Invoke | Revision-Based |
+|--------|------------|---------------|----------------|
+| **Simplicity** | High | Medium | Low |
+| **Frontend Changes** | Minimal | Medium | Significant |
+| **Architecture Alignment** | Legacy | Native | Most Native |
+| **Flexibility** | Low | High | High |
+| **Custom Evaluators** | Limited | Full Support | Full Support |
+| **Requires URI** | No (uses key) | Yes | Yes (fetched) |
+
+**Recommendation:** 
+
+For the Evaluator Playground migration:
+- **Short-term:** Keep using legacy `/evaluators/{key}/run/` - it works the same and the backend handles URI resolution internally
+- **Long-term:** Consider migrating to native workflow invoke when supporting custom evaluators or revision-specific execution
+
+---
+
+## New SimpleEvaluator CRUD Endpoints
+
+Base path: `/preview/simple/evaluators/`
+
+| Endpoint | Method | Purpose |
+|----------|--------|---------|
+| `/preview/simple/evaluators/` | POST | Create new evaluator |
+| `/preview/simple/evaluators/{id}` | GET | Fetch evaluator by ID |
+| `/preview/simple/evaluators/{id}` | PUT | Update evaluator |
+| `/preview/simple/evaluators/{id}/archive` | POST | Archive (soft delete) evaluator |
+| `/preview/simple/evaluators/{id}/unarchive` | POST | Restore archived evaluator |
+| `/preview/simple/evaluators/query` | POST | Query evaluators with filters |
+
+## Data Structures
+
+### SimpleEvaluator (Response)
+
+```python
+class SimpleEvaluator:
+    id: UUID
+    slug: str
+    
+    # Lifecycle
+    created_at: datetime
+    updated_at: datetime
+    
+    # Header
+    name: Optional[str]
+    description: Optional[str]
+    
+    # Metadata
+    tags: Optional[List[str]]
+    meta: Optional[dict]
+    
+    # Flags
+    flags: Optional[SimpleEvaluatorFlags]
+    
+    # Data (revision data)
+    data: Optional[SimpleEvaluatorData]
+```
+
+### SimpleEvaluatorData (Revision Configuration)
+
+```python
+class SimpleEvaluatorData:
+    # Version
+    version: Optional[str]  # e.g., "2025.07.14"
+    
+    # Service Interface - THE KEY FIELD
+    uri: Optional[str]      # e.g., "agenta:builtin:auto_exact_match:v0"
+    url: Optional[str]      # For webhook evaluators
+    headers: Optional[Dict[str, Union[Reference, str]]]
+    
+    # Schema definitions
+    schemas: Optional[Dict[str, Schema]]  # e.g., {"outputs": {...}}
+    
+    # Configuration
+    script: Optional[dict]      # For custom code: {"content": "...", "runtime": "python"}
+    parameters: Optional[dict]  # Settings values (same as legacy settings_values)
+    
+    # Legacy fields (for backward compatibility)
+    service: Optional[dict]
+    configuration: Optional[dict]
+```
+
+### URI-based Handler Registry
+
+The SDK maintains registries that map URIs to implementations:
+
+```python
+HANDLER_REGISTRY = {
+    "agenta": {
+        "builtin": {
+            "echo": {"v0": echo_v0},
+            "auto_exact_match": {"v0": auto_exact_match_v0},
+            "auto_regex_test": {"v0": auto_regex_test_v0},
+            # ... all built-in evaluators
+        }
+    },
+    "user": {
+        "custom": {
+            # User-defined evaluators go here
+        }
+    }
+}
+```
+
+Retrieve handler by URI:
+```python
+handler = retrieve_handler("agenta:builtin:auto_exact_match:v0")
+```
+
+---
+
+## Endpoint Comparison: Old vs New (CRUD)
+
+### List Evaluator Configs
+
+**Old:**
+```
+GET /evaluators/configs/?project_id={project_id}
+
+Response: EvaluatorConfig[]
+{
+    id: string
+    name: string
+    evaluator_key: string
+    settings_values: object
+    created_at: string
+    updated_at: string
+}
+```
+
+**New:**
+```
+POST /preview/simple/evaluators/query?project_id={project_id}
+
+Request: SimpleEvaluatorQuery
+{
+    flags?: { is_evaluator: true }
+}
+
+Response: SimpleEvaluatorsResponse
+{
+    count: number
+    evaluators: SimpleEvaluator[]
+}
+```
+
+**Note:** For the Evaluator Registry (automatic configs), pass `flags.is_human = false` and `include_archived = false` so archived or human evaluators don't show up.
+
+### Create Evaluator Config
+
+**Old:**
+```
+POST /evaluators/configs/?project_id={project_id}
+
+Request: NewEvaluatorConfig
+{
+    name: string
+    evaluator_key: string
+    settings_values: object
+}
+
+Response: EvaluatorConfig
+```
+
+**New:**
+```
+POST /preview/simple/evaluators/?project_id={project_id}
+
+Request: SimpleEvaluatorCreateRequest
+{
+    evaluator: {
+        slug: string       # Generated from name
+        name: string
+        flags: { is_evaluator: true, is_human: false }
+        data: {
+            uri: "agenta:builtin:{evaluator_key}:v0"
+            parameters: object  # settings_values
+            schemas: { outputs: object }  # Output schema
+        }
+    }
+}
+
+Response: SimpleEvaluatorResponse
+{
+    count: number
+    evaluator: SimpleEvaluator
+}
+```
+
+**Note:** Workflow slugs are unique per project. We append a short random suffix when generating slugs to avoid collisions when names repeat.
+
+### Update Evaluator Config
+
+**Old:**
+```
+PUT /evaluators/configs/{id}/?project_id={project_id}
+
+Request: UpdateEvaluatorConfig
+{
+    name?: string
+    settings_values?: object
+}
+
+Response: EvaluatorConfig
+```
+
+**New:**
+```
+PUT /preview/simple/evaluators/{id}?project_id={project_id}
+
+Request: SimpleEvaluatorEditRequest
+{
+    evaluator: {
+        id: UUID
+        name?: string
+        data?: {
+            parameters?: object  # settings_values
+        }
+    }
+}
+
+Response: SimpleEvaluatorResponse
+```
+
+**Note:** `SimpleEvaluatorEdit.data` is treated as the full revision payload. When updating, include the existing `data.uri` (and any schemas) along with `data.parameters` to avoid clearing the URI.
+
+### Delete Evaluator Config
+
+**Old:**
+```
+DELETE /evaluators/configs/{id}/?project_id={project_id}
+
+Response: boolean
+```
+
+**New:**
+```
+POST /preview/simple/evaluators/{id}/archive?project_id={project_id}
+
+Response: SimpleEvaluatorResponse
+```
+
+---
+
+## Key Differences Summary
+
+### 1. URI-based Evaluator Identification
+
+**Old:** `evaluator_key: "auto_exact_match"`
+
+**New:** `uri: "agenta:builtin:auto_exact_match:v0"`
+
+The URI enables:
+- Version management (`v0`, `v1`, etc.)
+- Custom evaluators (`user:custom:my_eval:latest`)
+- Handler registry lookup
+
+### 2. Settings Location
+
+**Old:** `settings_values: { threshold: 0.5 }`
+
+**New:** `data.parameters: { threshold: 0.5 }`
+
+### 3. Output Schema (New)
+
+The new model includes explicit output schemas:
+
+```python
+data.schemas = {
+    "outputs": {
+        "$schema": "https://json-schema.org/draft/2020-12/schema",
+        "type": "object",
+        "properties": {
+            "score": {"type": "number"},
+            "success": {"type": "boolean"}
+        }
+    }
+}
+```
+
+### 4. Soft Delete vs Hard Delete
+
+- **Old:** Hard delete (`DELETE`)
+- **New:** Soft delete via archive (`POST .../archive`)
+
+### 5. Response Wrapper
+
+**Old:** Returns data directly
+
+**New:** Returns wrapped response: `{ count: number, evaluator: SimpleEvaluator }`
+
+---
+
+## Frontend Mapping Requirements
+
+To migrate, the frontend needs to:
+
+1. **When creating an evaluator:**
+   - Generate `slug` from name
+   - Build `uri` from `evaluator_key`: `"agenta:builtin:{evaluator_key}:v0"`
+   - Move `settings_values` to `data.parameters`
+   - Set `flags.is_evaluator = true`
+   - Optionally include `data.schemas.outputs`
+
+2. **When reading evaluators:**
+   - Extract `evaluator_key` from `uri` (parse the third segment)
+   - Read settings from `data.parameters`
+   - Unwrap response from `{ evaluator: ... }`
+
+3. **When updating:**
+   - Include `id` in request body
+   - Update `data.parameters` for settings changes
+
+4. **When deleting:**
+   - Use `POST .../archive` instead of `DELETE`
+
+5. **When running evaluators:**
+   - **Option A (Recommended):** Keep using `/evaluators/{key}/run/` - no change needed
+   - **Option B (Native):** Use `/preview/workflows/invoke` with URI from revision
diff --git a/docs/design/migrate-evaluator-playground/plan.md b/docs/design/migrate-evaluator-playground/plan.md
new file mode 100644
index 0000000000..8a384658f9
--- /dev/null
+++ b/docs/design/migrate-evaluator-playground/plan.md
@@ -0,0 +1,734 @@
+# Migration Plan: Evaluator Playground
+
+## Overview
+
+Full migration of the Evaluator Playground to the new workflow-based evaluator APIs. This plan follows **Plan B (Direct Migration)** - no adapters, internal shapes change to match the new `SimpleEvaluator` model.
+
+## Migration Strategy
+
+**Two PRs, no adapters:**
+
+1. **PR 1:** Migrate CRUD to `SimpleEvaluator` endpoints (internal shapes change)
+2. **PR 2:** Migrate run to native workflow invoke (`/preview/workflows/invoke`)
+
+This keeps changes reviewable while avoiding tech debt from adapter layers.
+
+```
+PR 1: CRUD Migration
+┌─────────────────────────────────────────────────────────────────┐
+│  EvaluatorConfig → SimpleEvaluator                              │
+│  /evaluators/configs/* → /preview/simple/evaluators/*           │
+│  settings_values → data.parameters                              │
+│  evaluator_key → data.uri                                       │
+└─────────────────────────────────────────────────────────────────┘
+
+PR 2: Run Migration  
+┌─────────────────────────────────────────────────────────────────┐
+│  /evaluators/{key}/run → /preview/workflows/invoke              │
+│  EvaluatorInputInterface → WorkflowServiceRequest               │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## PR 1: CRUD Migration
+
+**Goal:** Replace legacy evaluator config endpoints with new SimpleEvaluator endpoints. Change internal data model from `EvaluatorConfig` to `SimpleEvaluator`.
+
+### Phase 1.1: Type Definitions
+
+**File:** `web/oss/src/lib/Types.ts` (add to existing types)
+
+```typescript
+// ============ SimpleEvaluator Types ============
+
+export interface SimpleEvaluatorData {
+    version?: string
+    uri?: string                              // e.g., "agenta:builtin:auto_exact_match:v0"
+    url?: string                              // for webhook evaluators
+    headers?: Record<string, string>
+    schemas?: { 
+        outputs?: Record<string, any>
+        inputs?: Record<string, any>
+        parameters?: Record<string, any>
+    }
+    script?: { content: string; runtime: string }
+    parameters?: Record<string, any>          // replaces settings_values
+}
+
+export interface SimpleEvaluatorFlags {
+    is_custom?: boolean
+    is_evaluator?: boolean
+    is_human?: boolean
+}
+
+export interface SimpleEvaluator {
+    id: string
+    slug: string
+    name?: string
+    description?: string
+    tags?: string[]
+    meta?: Record<string, any>
+    flags?: SimpleEvaluatorFlags
+    data?: SimpleEvaluatorData
+    created_at: string
+    updated_at: string
+}
+
+export interface SimpleEvaluatorCreate {
+    slug: string
+    name?: string
+    description?: string
+    tags?: string[]
+    flags?: SimpleEvaluatorFlags
+    data?: SimpleEvaluatorData
+}
+
+export interface SimpleEvaluatorEdit {
+    id: string
+    name?: string
+    description?: string
+    tags?: string[]
+    data?: SimpleEvaluatorData
+}
+
+export interface SimpleEvaluatorResponse {
+    count: number
+    evaluator: SimpleEvaluator | null
+}
+
+export interface SimpleEvaluatorsResponse {
+    count: number
+    evaluators: SimpleEvaluator[]
+}
+```
+
+**Deliverables:**
+- [ ] Add `SimpleEvaluator*` types to Types.ts
+- [ ] Keep `EvaluatorConfig` temporarily for areas not yet migrated
+
+---
+
+### Phase 1.2: Service Layer Changes
+
+**File:** `web/oss/src/services/evaluators/index.ts`
+
+Replace legacy functions with new implementations:
+
+```typescript
+// ============ Helper Functions ============
+
+/**
+ * Extract evaluator_key from URI
+ * URI format: "agenta:builtin:{key}:v0"
+ */
+export function extractEvaluatorKeyFromUri(uri: string | undefined): string {
+    if (!uri) return ""
+    const parts = uri.split(":")
+    if (parts.length >= 3 && parts[0] === "agenta" && parts[1] === "builtin") {
+        return parts[2]
+    }
+    return ""
+}
+
+/**
+ * Build URI from evaluator key
+ */
+export function buildEvaluatorUri(evaluatorKey: string): string {
+    return `agenta:builtin:${evaluatorKey}:v0`
+}
+
+/**
+ * Generate slug from name (append suffix to avoid collisions)
+ */
+export function generateSlug(name: string): string {
+    const base = name
+        .toLowerCase()
+        .replace(/[^a-z0-9]+/g, "-")
+        .replace(/^-|-$/g, "")
+
+    const suffix = Math.random().toString(36).slice(2, 8)
+    const maxBaseLength = Math.max(1, 50 - suffix.length - 1)
+    return `${base.slice(0, maxBaseLength)}-${suffix}`
+}
+
+// ============ CRUD Functions ============
+
+export const fetchAllEvaluatorConfigs = async (
+    _appId?: string | null,  // kept for backward compat, ignored
+    projectIdOverride?: string | null,
+): Promise<SimpleEvaluator[]> => {
+    const {projectId: projectIdFromStore} = getProjectValues()
+    const projectId = projectIdOverride ?? projectIdFromStore
+
+    if (!projectId) return []
+
+    const response = await axios.post(
+        `${getAgentaApiUrl()}/preview/simple/evaluators/query?project_id=${projectId}`,
+        {
+            evaluator: { flags: { is_evaluator: true, is_human: false } },
+            include_archived: false,
+        }
+    )
+    
+    return response.data?.evaluators || []
+}
+
+export const createEvaluatorConfig = async (
+    evaluatorKey: string,
+    name: string,
+    settingsValues: Record<string, any>,
+): Promise<SimpleEvaluator> => {
+    const {projectId} = getProjectValues()
+    
+    const payload: SimpleEvaluatorCreate = {
+        slug: generateSlug(name),
+        name,
+        flags: { is_evaluator: true, is_human: false },
+        data: {
+            uri: buildEvaluatorUri(evaluatorKey),
+            parameters: settingsValues,
+        },
+    }
+    
+    const response = await axios.post(
+        `${getAgentaApiUrl()}/preview/simple/evaluators/?project_id=${projectId}`,
+        { evaluator: payload },
+    )
+    
+    const result = response.data?.evaluator
+    if (!result) throw new Error("Failed to create evaluator")
+    
+    return result
+}
+
+export const updateEvaluatorConfig = async (
+    evaluatorId: string,
+    updates: { name?: string; settingsValues?: Record<string, any> },
+    existing?: SimpleEvaluator,
+): Promise<SimpleEvaluator> => {
+    const {projectId} = getProjectValues()
+
+    // IMPORTANT: include existing data (uri/schemas) when editing
+    const payload: SimpleEvaluatorEdit = {
+        id: evaluatorId,
+        name: updates.name ?? existing?.name,
+        data: {
+            ...(existing?.data ?? {}),
+            ...(updates.settingsValues ? {parameters: updates.settingsValues} : {}),
+        },
+        tags: existing?.tags,
+        meta: existing?.meta,
+        flags: existing?.flags,
+    }
+
+    const response = await axios.put(
+        `${getAgentaApiUrl()}/preview/simple/evaluators/${evaluatorId}?project_id=${projectId}`,
+        { evaluator: payload },
+    )
+    
+    const result = response.data?.evaluator
+    if (!result) throw new Error("Failed to update evaluator")
+    
+    return result
+}
+
+export const deleteEvaluatorConfig = async (evaluatorId: string): Promise<boolean> => {
+    const {projectId} = getProjectValues()
+
+    await axios.post(
+        `${getAgentaApiUrl()}/preview/simple/evaluators/${evaluatorId}/archive?project_id=${projectId}`,
+    )
+    
+    return true
+}
+
+export const fetchEvaluatorById = async (evaluatorId: string): Promise<SimpleEvaluator | null> => {
+    const {projectId} = getProjectValues()
+
+    const response = await axios.get(
+        `${getAgentaApiUrl()}/preview/simple/evaluators/${evaluatorId}?project_id=${projectId}`,
+    )
+    
+    return response.data?.evaluator || null
+}
+```
+
+**Deliverables:**
+- [ ] Replace `fetchAllEvaluatorConfigs` implementation
+- [ ] Replace `createEvaluatorConfig` implementation
+- [ ] Replace `updateEvaluatorConfig` implementation
+- [ ] Replace `deleteEvaluatorConfig` implementation
+- [ ] Add helper functions for URI handling
+- [ ] Remove legacy endpoint calls
+
+---
+
+### Phase 1.3: State/Atoms Changes
+
+**File:** `web/oss/src/state/evaluators/atoms.ts`
+
+Update query atoms to return `SimpleEvaluator[]`:
+
+```typescript
+export const evaluatorConfigsQueryAtomFamily = atomFamily((projectId: string | null) =>
+    atomWithQuery(() => ({
+        queryKey: ["evaluator-configs", projectId],
+        queryFn: () => fetchAllEvaluatorConfigs(null, projectId),
+        enabled: !!projectId,
+    }))
+)
+
+// Derived atom for non-archived evaluators
+export const nonArchivedEvaluatorsAtom = atom((get) => {
+    const projectId = get(projectIdAtom)
+    if (!projectId) return []
+    
+    const query = get(evaluatorConfigsQueryAtomFamily(projectId))
+    const evaluators = query.data ?? []
+    
+    // Filter out archived (deleted_at is set)
+    return evaluators.filter((e) => !e.deleted_at)
+})
+```
+
+**File:** `web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms.ts`
+
+Update playground atoms to use `SimpleEvaluator`:
+
+```typescript
+// Session now stores SimpleEvaluator instead of EvaluatorConfig
+export interface PlaygroundSession {
+    evaluator: Evaluator              // template (unchanged)
+    simpleEvaluator?: SimpleEvaluator // existing config being edited
+    mode: "create" | "edit" | "clone"
+}
+
+export const playgroundSessionAtom = atom<PlaygroundSession | null>(null)
+
+// Edit values now use SimpleEvaluator shape
+export const playgroundEditValuesAtom = atom<Partial<SimpleEvaluator> | null>(null)
+
+// Derived: get evaluator_key from URI
+export const playgroundEvaluatorKeyAtom = atom((get) => {
+    const session = get(playgroundSessionAtom)
+    if (!session) return null
+    
+    // From template
+    if (session.evaluator?.key) return session.evaluator.key
+    
+    // From existing SimpleEvaluator
+    if (session.simpleEvaluator?.data?.uri) {
+        return extractEvaluatorKeyFromUri(session.simpleEvaluator.data.uri)
+    }
+    
+    return null
+})
+```
+
+**Deliverables:**
+- [ ] Update `evaluatorConfigsQueryAtomFamily` return type
+- [ ] Update playground session atoms
+- [ ] Update `playgroundEditValuesAtom` shape
+- [ ] Add derived atoms for backward-compatible access (e.g., `evaluator_key`)
+
+---
+
+### Phase 1.4: Component Changes
+
+#### ConfigureEvaluator/index.tsx
+
+Key changes:
+- Form fields read/write to `data.parameters` instead of `settings_values`
+- On commit, build `SimpleEvaluatorCreate` or `SimpleEvaluatorEdit`
+- Load existing config as `SimpleEvaluator`
+
+```typescript
+// Before
+form.setFieldsValue({
+    name: editEvalEditValues.name,
+    settings_values: editEvalEditValues.settings_values,
+})
+
+// After (use parameters field to match SimpleEvaluator)
+form.setFieldsValue({
+    name: simpleEvaluator.name,
+    parameters: simpleEvaluator.data?.parameters,
+})
+```
+
+#### useEvaluatorsRegistryData.ts
+
+Update to work with `SimpleEvaluator[]`:
+
+```typescript
+// Derive evaluator_key for display
+const enrichedEvaluators = evaluators.map((e) => ({
+    ...e,
+    evaluator_key: extractEvaluatorKeyFromUri(e.data?.uri),
+    parameters: e.data?.parameters,
+}))
+```
+
+#### getColumns.tsx
+
+Update column accessors:
+
+```typescript
+// Before
+dataIndex: "evaluator_key"
+
+// After  
+dataIndex: ["data", "uri"],
+render: (uri) => extractEvaluatorKeyFromUri(uri)
+```
+
+**Deliverables:**
+- [ ] Update ConfigureEvaluator form bindings
+- [ ] Update commit logic to use new service functions
+- [ ] Update useEvaluatorsRegistryData hook
+- [ ] Update table columns in getColumns.tsx
+- [ ] Update any other components that read evaluator configs
+
+---
+
+### Phase 1.5: Testing
+
+**Test Cases:**
+
+1. **List Evaluators**
+   - [ ] Registry shows all existing evaluator configs
+   - [ ] Correct names, types, icons displayed
+   - [ ] Filtering and search work
+   - [ ] Archived evaluators hidden
+
+2. **Create Evaluator**
+   - [ ] Select template → Configure → Commit works
+   - [ ] Settings (parameters) saved correctly
+   - [ ] URI generated correctly from evaluator_key
+   - [ ] Slug generated from name
+
+3. **Edit Evaluator**
+   - [ ] Load existing config into form
+   - [ ] Form populated with current values from `data.parameters`
+   - [ ] Update name and settings
+   - [ ] Changes persisted
+
+4. **Delete Evaluator**
+   - [ ] Archive endpoint called
+   - [ ] Evaluator removed from list
+   - [ ] No errors
+
+5. **Run Evaluator (legacy endpoint - still works)**
+   - [ ] Run evaluator button works
+   - [ ] Uses evaluator_key derived from URI
+   - [ ] Results displayed correctly
+
+**Deliverables:**
+- [ ] Manual test all flows
+- [ ] Fix any bugs found
+- [ ] Document any edge cases
+
+---
+
+### PR 1 Summary
+
+| Task | Files | Effort |
+|------|-------|--------|
+| Type definitions | `Types.ts` | 0.5 day |
+| Service layer | `services/evaluators/index.ts` | 1 day |
+| State/atoms | `state/evaluators/atoms.ts`, playground atoms | 1 day |
+| Components | ConfigureEvaluator, Registry, columns | 1-2 days |
+| Testing | Manual testing | 1 day |
+
+**Total PR 1 Effort:** 4-5 days
+
+---
+
+## PR 2: Run Migration
+
+**Goal:** Replace legacy `/evaluators/{key}/run` with native workflow invoke `/preview/workflows/invoke`.
+
+**Prerequisite:** PR 1 merged and stable.
+
+### Phase 2.1: WorkflowService Types
+
+**File:** `web/oss/src/lib/Types.ts` (add)
+
+```typescript
+// ============ Workflow Service Types ============
+
+export interface WorkflowServiceRequestData {
+    revision?: Record<string, any>
+    parameters?: Record<string, any>    // evaluator settings
+    testcase?: Record<string, any>
+    inputs?: Record<string, any>        // merged testcase data
+    trace?: Record<string, any>
+    outputs?: any                        // prediction/output
+}
+
+export interface WorkflowServiceInterface {
+    version?: string
+    uri?: string                         // e.g., "agenta:builtin:auto_exact_match:v0"
+    url?: string
+    headers?: Record<string, string>
+    schemas?: Record<string, any>
+}
+
+export interface WorkflowServiceConfiguration {
+    script?: Record<string, any>
+    parameters?: Record<string, any>
+}
+
+export interface WorkflowServiceRequest {
+    version?: string
+    flags?: Record<string, any>
+    interface?: WorkflowServiceInterface
+    configuration?: WorkflowServiceConfiguration
+    data?: WorkflowServiceRequestData
+    references?: Record<string, any>
+    links?: Record<string, any>
+}
+
+export interface WorkflowServiceStatus {
+    code?: number
+    message?: string
+    type?: string
+    stacktrace?: string | string[]
+}
+
+export interface WorkflowServiceResponseData {
+    outputs?: any
+}
+
+export interface WorkflowServiceBatchResponse {
+    version?: string
+    trace_id?: string
+    span_id?: string
+    status?: WorkflowServiceStatus
+    data?: WorkflowServiceResponseData
+}
+```
+
+---
+
+### Phase 2.2: Workflow Invoke Service
+
+**File:** `web/oss/src/services/workflows/invoke.ts` (new file)
+
+```typescript
+import axios from "@/oss/lib/api/assets/axiosConfig"
+import { getAgentaApiUrl } from "@/oss/lib/helpers/utils"
+import { getProjectValues } from "@/oss/contexts/project.context"
+import {
+    WorkflowServiceRequest,
+    WorkflowServiceBatchResponse,
+    SimpleEvaluator,
+} from "@/oss/lib/Types"
+
+export interface InvokeEvaluatorParams {
+    evaluator: SimpleEvaluator
+    inputs: Record<string, any>        // testcase data + any extra inputs
+    outputs: any                        // prediction/output from variant
+    parameters?: Record<string, any>   // override settings (optional)
+}
+
+/**
+ * Invoke an evaluator using native workflow service
+ */
+export const invokeEvaluator = async (
+    params: InvokeEvaluatorParams
+): Promise<WorkflowServiceBatchResponse> => {
+    const { projectId } = getProjectValues()
+    const { evaluator, inputs, outputs, parameters } = params
+
+    const uri = evaluator.data?.uri
+    if (!uri) {
+        throw new Error("Evaluator has no URI configured")
+    }
+
+    const request: WorkflowServiceRequest = {
+        version: "2025.07.14",
+        interface: {
+            uri,
+        },
+        configuration: {
+            parameters: parameters ?? evaluator.data?.parameters,
+        },
+        data: {
+            inputs,
+            outputs,
+            parameters: parameters ?? evaluator.data?.parameters,
+        },
+    }
+
+    const response = await axios.post<WorkflowServiceBatchResponse>(
+        `${getAgentaApiUrl()}/preview/workflows/invoke?project_id=${projectId}`,
+        request,
+    )
+
+    return response.data
+}
+
+/**
+ * Map workflow response to evaluator output format
+ */
+export function mapWorkflowResponseToEvaluatorOutput(
+    response: WorkflowServiceBatchResponse
+): { outputs: Record<string, any> } {
+    if (response.status?.code && response.status.code >= 400) {
+        throw new Error(response.status.message || "Evaluator execution failed")
+    }
+
+    return {
+        outputs: response.data?.outputs ?? {},
+    }
+}
+```
+
+---
+
+### Phase 2.3: Update DebugSection
+
+**File:** `web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx`
+
+Replace `createEvaluatorRunExecution` with `invokeEvaluator`:
+
+```typescript
+// Before
+const runResponse = await createEvaluatorRunExecution(
+    selectedEvaluator.key,
+    {
+        inputs: outputs,
+        settings: formValues.parameters,
+    }
+)
+
+// After
+import { invokeEvaluator, mapWorkflowResponseToEvaluatorOutput } from "@/oss/services/workflows/invoke"
+
+const workflowResponse = await invokeEvaluator({
+    evaluator: simpleEvaluator,  // from playground state
+    inputs: {
+        ...testcaseData,
+        prediction: variantOutput,
+    },
+    outputs: variantOutput,
+    parameters: formValues.parameters,  // current form settings
+})
+
+const runResponse = mapWorkflowResponseToEvaluatorOutput(workflowResponse)
+```
+
+**Error Handling:**
+
+```typescript
+try {
+    const workflowResponse = await invokeEvaluator(...)
+    
+    // Check for workflow-level errors
+    if (workflowResponse.status?.code && workflowResponse.status.code >= 400) {
+        message.error(workflowResponse.status.message || "Evaluator failed")
+        return
+    }
+    
+    const result = mapWorkflowResponseToEvaluatorOutput(workflowResponse)
+    setEvaluatorResult(result.outputs)
+    
+} catch (error) {
+    message.error(getErrorMessage(error))
+}
+```
+
+---
+
+### Phase 2.4: Update Evaluations Service (if needed)
+
+If other parts of the app use `createEvaluatorRunExecution`, update them too:
+
+**File:** `web/oss/src/services/evaluations/api_ee/index.ts`
+
+- Keep `createEvaluatorRunExecution` for now (batch evaluations may still use it via backend)
+- Or deprecate and point to new invoke
+
+---
+
+### Phase 2.5: Testing
+
+**Test Cases:**
+
+1. **Run Evaluator in Playground**
+   - [ ] Click "Run Evaluator" with testcase loaded
+   - [ ] Native invoke endpoint called
+   - [ ] Results displayed correctly
+   - [ ] Errors handled gracefully
+
+2. **Different Evaluator Types**
+   - [ ] Test exact_match evaluator
+   - [ ] Test regex evaluator
+   - [ ] Test AI critique evaluator (LLM-based)
+   - [ ] Test custom code evaluator
+
+3. **Error Scenarios**
+   - [ ] Invalid evaluator (no URI)
+   - [ ] Missing inputs
+   - [ ] Evaluator execution error
+   - [ ] Network error
+
+4. **Permissions**
+   - [ ] User with RUN_WORKFLOWS permission can run
+   - [ ] User without permission gets appropriate error
+
+**Deliverables:**
+- [ ] Manual test all evaluator types
+- [ ] Fix any bugs found
+- [ ] Verify error messages are user-friendly
+
+---
+
+### PR 2 Summary
+
+| Task | Files | Effort |
+|------|-------|--------|
+| Workflow types | `Types.ts` | 0.5 day |
+| Invoke service | `services/workflows/invoke.ts` | 0.5 day |
+| DebugSection update | `DebugSection.tsx` | 1 day |
+| Error handling | Various | 0.5 day |
+| Testing | Manual testing | 1 day |
+
+**Total PR 2 Effort:** 3-4 days
+
+---
+
+## Timeline Summary
+
+| PR | Tasks | Effort | Dependencies |
+|----|-------|--------|--------------|
+| PR 1: CRUD Migration | Types, services, atoms, components | 4-5 days | Backend PR #3527 merged |
+| PR 2: Run Migration | Workflow types, invoke service, DebugSection | 3-4 days | PR 1 merged and stable |
+
+**Total Implementation:** 7-9 days
+
+---
+
+## Rollback Plan
+
+### PR 1 Rollback
+- Revert PR 1 commit
+- Legacy endpoints still exist on backend for a period
+
+### PR 2 Rollback
+- Revert PR 2 commit
+- Fall back to legacy `/evaluators/{key}/run` (still supported)
+
+---
+
+## Open Questions
+
+1. **Slug uniqueness:** Backend enforces unique slugs per project; generate a short suffix client-side to avoid collisions.
+
+2. **Output schemas:** Should frontend pass `data.schemas.outputs` when creating? Or does backend derive from evaluator type?
+
+3. **Permission model:** Is `RUN_WORKFLOWS` the right permission for evaluator playground? Or should there be `RUN_EVALUATORS`?
+
+4. **Trace linking:** Should the playground display trace_id from workflow response for debugging?
diff --git a/docs/design/migrate-evaluator-playground/research.md b/docs/design/migrate-evaluator-playground/research.md
new file mode 100644
index 0000000000..eda511d37b
--- /dev/null
+++ b/docs/design/migrate-evaluator-playground/research.md
@@ -0,0 +1,211 @@
+# Research Notes: Evaluator Execution Architecture
+
+## Findings from PR #3527 Investigation
+
+### Discovery: Native Evaluator Execution Path
+
+The new architecture treats evaluators as workflows with URI-based identification. The key discovery is that even the legacy `/evaluators/{key}/run/` endpoint now uses the native handler registry internally.
+
+### Handler Registry Architecture
+
+The SDK maintains a global registry of workflow handlers:
+
+**Location:** `sdk/agenta/sdk/workflows/utils.py`
+
+```python
+HANDLER_REGISTRY = {
+    "agenta": {
+        "builtin": {
+            "echo": {"v0": echo_v0},
+            "auto_exact_match": {"v0": auto_exact_match_v0},
+            "auto_regex_test": {"v0": auto_regex_test_v0},
+            "field_match_test": {"v0": field_match_test_v0},
+            "json_multi_field_match": {"v0": json_multi_field_match_v0},
+            "auto_webhook_test": {"v0": auto_webhook_test_v0},
+            "auto_custom_code_run": {"v0": auto_custom_code_run_v0},
+            "auto_ai_critique": {"v0": auto_ai_critique_v0},
+            # ... more evaluators
+        }
+    },
+    "user": {
+        "custom": {
+            # Custom user evaluators
+        }
+    }
+}
+```
+
+**URI Format:** `provider:kind:key:version`
+
+Examples:
+- `agenta:builtin:auto_exact_match:v0`
+- `user:custom:my_custom_eval:latest`
+
+**URI Parsing:**
+```python
+def parse_uri(uri: str) -> Tuple[provider, kind, key, version]:
+    # "agenta:builtin:echo:v0" → ("agenta", "builtin", "echo", "v0")
+```
+
+### How the Legacy Run Endpoint Works Now (PR #3527)
+
+**File:** `api/oss/src/routers/evaluators_router.py`
+
+The PR changed the implementation to use the native handler registry:
+
+```python
+@router.post("/{evaluator_key}/run/", response_model=EvaluatorOutputInterface)
+async def evaluator_run(request: Request, evaluator_key: str, payload: EvaluatorInputInterface):
+    # ... auth setup ...
+    result = await _run_evaluator(evaluator_key, payload)
+    return result
+
+async def _run_evaluator(evaluator_key: str, evaluator_input: EvaluatorInputInterface):
+    # Build URI from evaluator_key
+    uri = f"agenta:builtin:{evaluator_key}:v0"
+    
+    # Retrieve the handler from SDK registry
+    handler = retrieve_handler(uri)
+    if handler is None:
+        raise NotImplementedError(f"Evaluator {evaluator_key} not found (uri={uri})")
+    
+    # Extract data from evaluator_input
+    inputs = evaluator_input.inputs or {}
+    settings = evaluator_input.settings or {}
+    outputs = inputs.get("prediction", inputs.get("output"))
+    
+    # Build kwargs based on handler signature
+    sig = inspect.signature(handler)
+    kwargs = {}
+    if "parameters" in sig.parameters:
+        kwargs["parameters"] = settings
+    if "inputs" in sig.parameters:
+        kwargs["inputs"] = inputs
+    if "outputs" in sig.parameters:
+        kwargs["outputs"] = outputs
+    
+    # Invoke the handler
+    result = handler(**kwargs)
+    if inspect.iscoroutine(result):
+        result = await result
+    
+    return {"outputs": result}
+```
+
+**Key Insight:** The legacy endpoint is now a thin wrapper that:
+1. Builds the URI from the evaluator_key
+2. Looks up the handler in the registry
+3. Invokes it directly
+
+### Native Workflow Invoke Path
+
+For fully native execution, there's also a generic workflow invoke endpoint:
+
+**Endpoint:** `POST /preview/workflows/invoke`
+
+**Request Structure:**
+```python
+class WorkflowServiceRequest:
+    data: WorkflowServiceRequestData  # inputs, outputs, parameters
+    revision: Optional[dict]           # contains URI in data.uri
+```
+
+**How Batch Evaluations Use It:**
+
+**File:** `api/oss/src/core/evaluations/tasks/legacy.py` (lines 1185-1228)
+
+```python
+workflow_service_request_data = WorkflowServiceRequestData(
+    inputs=inputs,
+    outputs=outputs,
+    #
+    parameters=evaluator_reference.get("configuration"),  # settings
+)
+
+workflow_service_request = WorkflowServiceRequest(
+    data=workflow_service_request_data,
+    #
+    environment=environment,
+    revision=evaluator_reference.get("revision"),  # contains URI
+)
+
+await workflows_service.invoke_workflow(
+    project_id=project_id,
+    user_id=user_id,
+    request=workflow_service_request,
+)
+```
+
+### Implications for Frontend Migration
+
+#### For Evaluator CRUD (Create/Read/Update/Delete)
+
+**Must migrate to new endpoints** because:
+- Legacy endpoints now call SimpleEvaluator endpoints internally
+- Data is stored in new workflow-based format
+- Frontend should use native API to avoid translation overhead
+
+#### For Evaluator Run (Testing in Playground)
+
+**Options:**
+
+1. **Keep using `/evaluators/{key}/run/`** (Recommended for now)
+   - Simplest approach
+   - Endpoint still works
+   - Internally uses native path
+   - No frontend changes needed
+
+2. **Use native workflow invoke**
+   - Requires building `WorkflowServiceRequest`
+   - Need to include evaluator revision with URI
+   - More complex but more "correct"
+   - Enables custom evaluator support
+
+3. **Hybrid approach**
+   - Use legacy endpoint for built-in evaluators
+   - Use native invoke for custom evaluators (which will have custom URIs)
+
+### Questions Resolved
+
+**Q: Why does the legacy run endpoint remain unchanged?**
+
+A: It's not unchanged internally - PR #3527 refactored it to use the native handler registry. But the external interface (URL, request/response format) is preserved for backward compatibility.
+
+**Q: Is there a "native" way to run evaluators?**
+
+A: Yes, via the workflow invoke endpoint with `WorkflowServiceRequest` containing the evaluator's URI. But for the playground, the legacy endpoint is simpler and equivalent.
+
+**Q: Should we migrate the run endpoint usage?**
+
+A: Not necessarily. The benefits of migrating would be:
+- Consistency with new architecture
+- Support for custom evaluators with custom URIs
+- Ability to run specific evaluator revisions
+
+But the costs are:
+- More complex payload construction
+- Need to fetch evaluator revision to get URI
+- No immediate user-facing benefit
+
+**Recommendation:** Keep using legacy run endpoint for now, plan native invoke for custom evaluator feature.
+
+## Note on "Qdrant changes"
+
+Within this repository, Qdrant appears in examples and cookbook/tutorial code (e.g., `examples/python/*`, `docs/docs/tutorials/*`), but not in the core evaluator/workflow execution path under `api/oss/src`.
+
+Implication for this migration:
+- Migrating the evaluator playground to `/preview/workflows/invoke` does not require any Qdrant-specific frontend changes.
+- Any Qdrant-related behavior is part of the *application/workflow being evaluated* (e.g., a RAG app calling Qdrant), and would surface only through normal workflow invocation inputs/outputs/traces.
+
+---
+
+## Related Files Analyzed
+
+- `api/oss/src/routers/evaluators_router.py` - Legacy endpoints (now with native internals)
+- `api/oss/src/apis/fastapi/evaluators/router.py` - New SimpleEvaluators router
+- `api/oss/src/apis/fastapi/workflows/router.py` - Workflow invoke endpoint
+- `api/oss/src/core/workflows/service.py` - Workflow invocation service
+- `api/oss/src/core/evaluations/tasks/legacy.py` - Batch evaluation using native invoke
+- `sdk/agenta/sdk/workflows/utils.py` - Handler registry and URI parsing
+- `sdk/agenta/sdk/workflows/interfaces.py` - Evaluator interfaces (schemas)
+- `sdk/agenta/sdk/workflows/handlers.py` - Actual evaluator implementations
diff --git a/docs/design/migrate-evaluator-playground/risk-analysis.md b/docs/design/migrate-evaluator-playground/risk-analysis.md
new file mode 100644
index 0000000000..3c522d441a
--- /dev/null
+++ b/docs/design/migrate-evaluator-playground/risk-analysis.md
@@ -0,0 +1,320 @@
+# Risk Analysis: Evaluator Playground Migration
+
+## Coupling Points
+
+### 1. State Management Coupling
+
+**Location:** `web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms.ts`
+
+**Risk Level:** MEDIUM
+
+The playground state is tightly coupled to the `EvaluatorConfig` shape:
+
+```typescript
+// playgroundEditValuesAtom expects EvaluatorConfig shape
+interface EvaluatorConfig {
+    id: string
+    evaluator_key: string
+    name: string
+    settings_values: Record<string, any>
+}
+```
+
+**Impact:** 
+- `commitPlaygroundAtom` expects `EvaluatorConfig` as input
+- `playgroundEditValuesAtom` is read throughout ConfigureEvaluator and DebugSection
+- Form initialization relies on `settings_values` property name
+
+**Mitigation (PR 1):**
+- Update atoms to use `SimpleEvaluator` shape directly
+- Add derived atoms for backward-compatible access (e.g., `evaluator_key` from URI)
+- Update all atom consumers in ConfigureEvaluator and DebugSection
+
+---
+
+### 2. Form Initialization Coupling
+
+**Location:** `web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx`
+
+**Risk Level:** MEDIUM
+
+Form initialization directly accesses `settings_values`:
+
+```typescript
+// Line 383-410
+if (editMode && editEvalEditValues) {
+    form.setFieldsValue({
+        ...editEvalEditValues,
+        settings_values: editEvalEditValues.settings_values || {},
+    })
+}
+```
+
+**Impact:**
+- Changing to `data.parameters` would break form binding
+- DynamicFormField components use `["settings_values", field.key]` name paths
+
+**Mitigation (PR 1):**
+- Update form field names from `settings_values` to `parameters`
+- Update DynamicFormField name paths
+- Update form.getFieldsValue() to extract `parameters`
+
+---
+
+### 3. Service Layer Coupling
+
+**Location:** `web/oss/src/services/evaluators/index.ts`
+
+**Risk Level:** LOW-MEDIUM
+
+API calls directly construct legacy payload shapes:
+
+```typescript
+// createEvaluatorConfig
+return axios.post(`/evaluators/configs?project_id=${projectId}`, {
+    ...config,
+})
+
+// updateEvaluatorConfig  
+return axios.put(`/evaluators/configs/${configId}?project_id=${projectId}`, config)
+```
+
+**Impact:**
+- Need to update URLs and payload transformation
+- Response handling needs to unwrap `{ evaluator: ... }` wrapper
+
+**Mitigation (PR 1):**
+- Replace all service functions with new implementations
+- New functions build `SimpleEvaluator` payloads directly
+- Handle response wrapper in service layer
+
+---
+
+### 4. Evaluators Registry Coupling
+
+**Location:** `web/oss/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts`
+
+**Risk Level:** MEDIUM
+
+The hook transforms and combines data from two sources:
+
+```typescript
+const {evaluatorConfigs} = useFetchEvaluatorsData()
+// Combines with evaluator templates for display
+```
+
+**Impact:**
+- Table columns expect `evaluator_key` property
+- Tag cells, type pills depend on config shape
+- Filtering/search operates on legacy property names
+
+**Mitigation (PR 1):**
+- Update hook to work with `SimpleEvaluator[]`
+- Derive `evaluator_key` from `data.uri` for display
+- Update column accessors in getColumns.tsx
+
+---
+
+### 5. Debug Section - Evaluator Run Coupling
+
+**Location:** `web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx`
+
+**Risk Level:** MEDIUM (PR 2)
+
+The evaluator run uses legacy endpoint:
+
+```typescript
+const runResponse = await createEvaluatorRunExecution(
+    selectedEvaluator.key,  // evaluator_key
+    { inputs: outputs, settings: ... }
+)
+```
+
+**Impact:**
+- Must migrate to `/preview/workflows/invoke`
+- Need to construct `WorkflowServiceRequest`
+- Different error handling (workflow status vs HTTP errors)
+
+**Mitigation (PR 2):**
+- Create new `invokeEvaluator()` service function
+- Build `WorkflowServiceRequest` with URI from `SimpleEvaluator.data.uri`
+- Map workflow response/errors to UI
+
+---
+
+### 6. Global Atoms Coupling
+
+**Location:** `web/oss/src/state/evaluators/atoms.ts`
+
+**Risk Level:** MEDIUM
+
+Query atoms return legacy-shaped data:
+
+```typescript
+const evaluatorConfigsQueryAtomFamily = atomFamily((projectId) =>
+    atomWithQuery(() => ({
+        queryKey: ['evaluator-configs', projectId],
+        queryFn: () => fetchAllEvaluatorConfigs(null, projectId),
+    }))
+)
+```
+
+**Impact:**
+- Multiple components may depend on these atoms
+- Changing shape could cascade through application
+
+**Mitigation (PR 1):**
+- Update service function to return `SimpleEvaluator[]`
+- Update all consumers to handle new shape
+- Change in one place (service), ripple through atoms automatically
+
+---
+
+### 7. Evaluator Templates vs Configs Distinction
+
+**Location:** Throughout frontend
+
+**Risk Level:** LOW
+
+The frontend distinguishes between:
+- **Evaluator templates** (`Evaluator`): Built-in evaluator definitions with `settings_template`
+- **Evaluator configs** (`SimpleEvaluator`): User-created configurations with `data.parameters`
+
+**Impact:**
+- This distinction is maintained in the new system
+- Templates come from `/evaluators/` (unchanged)
+- Configs become `SimpleEvaluator` objects
+
+**Mitigation:**
+- No conceptual change needed
+- Templates API unchanged
+- Just update config handling
+
+---
+
+## Risk Summary Table
+
+| Component | Risk Level | PR | Priority |
+|-----------|-----------|-----|----------|
+| Service Layer | LOW-MEDIUM | PR 1 | HIGH (change first) |
+| State Atoms | MEDIUM | PR 1 | HIGH |
+| ConfigureEvaluator Form | MEDIUM | PR 1 | MEDIUM |
+| Evaluators Registry | MEDIUM | PR 1 | MEDIUM |
+| Global Query Atoms | MEDIUM | PR 1 | MEDIUM |
+| Debug Section (Run) | MEDIUM | PR 2 | MEDIUM |
+
+## Concrete Breakage Scenarios
+
+### Scenario 1: Form Submission Fails
+
+**Trigger:** Form still uses `settings_values` but service expects `parameters`
+
+**Symptoms:**
+- Form submits but settings are lost
+- Backend receives empty configuration
+- Evaluator created but doesn't work
+
+**Prevention:**
+- Update form field names to `parameters`
+- Test form submission with real backend
+- Verify payload in network tab
+
+---
+
+### Scenario 2: Evaluator List Empty
+
+**Trigger:** Query endpoint returns `SimpleEvaluator[]`, UI expects `EvaluatorConfig[]`
+
+**Symptoms:**
+- Evaluators registry shows empty list
+- No error messages (data exists but unparseable)
+- Console shows undefined property access
+
+**Prevention:**
+- Update all components to use `SimpleEvaluator` shape
+- Add null checks for `data?.uri`, `data?.parameters`
+- Log transformation errors
+
+---
+
+### Scenario 3: Edit Mode Fails to Load
+
+**Trigger:** Component expects `settings_values`, receives `data.parameters`
+
+**Symptoms:**
+- Navigate to edit page, form is empty
+- Settings not populated
+- Save overwrites with empty config
+
+**Prevention:**
+- Update form initialization to read from `data.parameters`
+- Test edit flow with existing configs
+
+---
+
+### Scenario 4: Delete Fails Silently
+
+**Trigger:** `DELETE` endpoint no longer exists, `POST .../archive` required
+
+**Symptoms:**
+- Click delete, no error
+- Evaluator still appears
+- Network tab shows 404/405
+
+**Prevention:**
+- Update delete function to use archive endpoint
+- Verify response handling
+
+---
+
+### Scenario 5: Evaluator Run Fails (PR 2)
+
+**Trigger:** Workflow invoke returns different response shape
+
+**Symptoms:**
+- Run button shows error
+- Results not displayed
+- Console shows parsing errors
+
+**Prevention:**
+- Map `WorkflowServiceBatchResponse` to expected output format
+- Handle `status.code` errors from workflow response
+- Test with all evaluator types
+
+---
+
+## Recommended Testing Strategy
+
+### PR 1 Testing
+
+**Unit Tests:**
+- [ ] URI parsing (`agenta:builtin:key:v0` → `key`)
+- [ ] Slug generation from name
+- [ ] Service function request/response handling
+
+**Integration Tests:**
+- [ ] Create evaluator config flow
+- [ ] Edit evaluator config flow  
+- [ ] Delete (archive) evaluator config flow
+- [ ] List/query evaluator configs flow
+
+**E2E Tests:**
+- [ ] Full playground flow: select template → configure → test → commit
+- [ ] Edit existing evaluator configuration
+- [ ] Clone evaluator configuration
+- [ ] Delete evaluator configuration
+
+### PR 2 Testing
+
+**Unit Tests:**
+- [ ] `WorkflowServiceRequest` construction
+- [ ] Response mapping to evaluator output format
+- [ ] Error status handling
+
+**Integration Tests:**
+- [ ] Run evaluator with different types (exact_match, regex, AI critique)
+- [ ] Error scenarios (invalid inputs, missing outputs)
+
+**Regression Tests:**
+- [ ] Existing configs load correctly
+- [ ] Batch evaluations still work (they use backend workflow invoke)
diff --git a/docs/design/migrate-evaluator-playground/status.md b/docs/design/migrate-evaluator-playground/status.md
new file mode 100644
index 0000000000..dbce737e8f
--- /dev/null
+++ b/docs/design/migrate-evaluator-playground/status.md
@@ -0,0 +1,136 @@
+# Status: Evaluator Playground Migration
+
+## Current Phase: PR 1 (CRUD) In Progress
+
+**Last Updated:** 2026-01-27
+
+---
+
+## Chosen Approach
+
+**Direct Migration (No Adapters)** - Split into two PRs:
+
+1. **PR 1:** CRUD migration to `SimpleEvaluator` endpoints
+2. **PR 2:** Run migration to native workflow invoke
+
+See [plan.md](./plan.md) for detailed implementation steps.
+
+---
+
+## Progress Summary
+
+### Completed
+
+- [x] Map current Evaluator Playground implementation
+  - Identified all frontend components
+  - Documented state management (atoms)
+  - Mapped API endpoints used
+  - Documented data flow
+
+- [x] Analyze PR #3527 (backend migration)
+  - Understood new `SimpleEvaluator` data model
+  - Documented new endpoint shapes
+  - Identified backward compatibility layer
+
+- [x] Investigate native evaluator execution path
+  - Confirmed `/evaluators/{key}/run` now resolves `agenta:builtin:{key}:v0` via SDK handler registry
+  - Confirmed native workflow execution endpoint exists: `POST /preview/workflows/invoke`
+  - Documented request structure used by batch evaluation tasks
+
+- [x] Compare old vs new endpoints
+  - Documented request/response differences
+  - Identified URI-based evaluator identification
+  - Noted response wrapper changes
+
+- [x] Identify coupling and risk areas
+  - State management coupling (MEDIUM risk)
+  - Form initialization coupling (MEDIUM risk)
+  - Service layer coupling (LOW-MEDIUM risk)
+  - Created risk mitigation strategies
+
+- [x] Finalize migration plan
+  - Chose direct migration (no adapters)
+  - Split into PR 1 (CRUD) and PR 2 (Run)
+  - Documented all file changes needed
+
+### Next Steps
+
+- [ ] Complete PR 1: CRUD migration (stacked on PR #3527)
+- [ ] After PR 1 stable, start PR 2: Run migration
+
+---
+
+## Key Decisions
+
+| Decision | Rationale | Date |
+|----------|-----------|------|
+| Direct migration (no adapters) | Avoids tech debt, aligns with new architecture | 2026-01-27 |
+| Two-PR approach | Keeps changes reviewable, allows CRUD to stabilize first | 2026-01-27 |
+| Internal shapes become `SimpleEvaluator` | Matches backend model, no translation layer | 2026-01-27 |
+
+---
+
+## Key Findings
+
+### 1. The `/evaluators/{key}/run/` endpoint is a thin wrapper
+
+PR #3527 refactored the legacy run endpoint to use the native handler registry internally:
+- It builds a URI from the evaluator_key: `agenta:builtin:{key}:v0`
+- Uses `retrieve_handler(uri)` to get the actual handler function
+- Directly invokes the handler
+
+### 2. Native workflow invoke path exists
+
+There's a fully native way to run evaluators:
+- Endpoint: `POST /preview/workflows/invoke`
+- Uses `WorkflowServiceRequest` with URI in interface
+- Same mechanism used by batch evaluations
+
+### 3. URI-based handler registry
+
+The SDK maintains a `HANDLER_REGISTRY` that maps URIs to handler functions:
+- Format: `agenta:builtin:{evaluator_key}:v0`
+- Supports custom evaluators: `user:custom:my_eval:latest`
+- Enables version management of evaluator implementations
+
+### 4. Key mapping changes
+
+| Legacy | New |
+|--------|-----|
+| `evaluator_key` | derived from `data.uri` |
+| `settings_values` | `data.parameters` |
+| `EvaluatorConfig` | `SimpleEvaluator` |
+
+---
+
+## Open Questions
+
+1. **Slug uniqueness:** Backend enforces unique slugs per project; generate a short suffix client-side to avoid collisions.
+
+2. **Output schemas:** Should frontend pass `data.schemas.outputs` when creating? Or does backend derive from evaluator type?
+
+3. **Permission model:** Is `RUN_WORKFLOWS` the right permission for evaluator playground? Or should there be `RUN_EVALUATORS`?
+
+---
+
+## Effort Estimates
+
+| PR | Effort | Dependencies |
+|----|--------|--------------|
+| PR 1: CRUD Migration | 4-5 days | Backend PR #3527 merged |
+| PR 2: Run Migration | 3-4 days | PR 1 merged and stable |
+
+**Total:** 7-9 days implementation
+
+---
+
+## Related Links
+
+- [PR #3527: Migrate evaluators but keep legacy endpoints](https://github.com/Agenta-AI/agenta/pull/3527)
+- [context.md](./context.md) - Background and goals
+- [current-system.md](./current-system.md) - Current implementation details
+- [new-endpoints.md](./new-endpoints.md) - New endpoint documentation
+- [research.md](./research.md) - Handler registry and execution research
+- [migration-options.md](./migration-options.md) - Why we chose direct migration
+- [risk-analysis.md](./risk-analysis.md) - Coupling and risk analysis
+- [plan.md](./plan.md) - Detailed implementation plan
diff --git a/web/oss/src/components/Evaluators/assets/types.ts b/web/oss/src/components/Evaluators/assets/types.ts
index f928cdc801..ccfdfaaa06 100644
--- a/web/oss/src/components/Evaluators/assets/types.ts
+++ b/web/oss/src/components/Evaluators/assets/types.ts
@@ -1,5 +1,5 @@
 import {EvaluatorPreviewDto} from "@/oss/lib/hooks/useEvaluators/types"
-import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types"
+import {Evaluator, SimpleEvaluator} from "@/oss/lib/Types"
 
 export type EvaluatorCategory = "automatic" | "human"
 
@@ -15,7 +15,7 @@ export type EvaluatorPreview = EvaluatorPreviewDto & {
     metrics?: Record<string, unknown>
 }
 
-export type EvaluatorConfigRow = EvaluatorConfig & {
+export type EvaluatorConfigRow = SimpleEvaluator & {
     evaluator?: Evaluator | null
     kind?: "config"
 }
diff --git a/web/oss/src/components/Evaluators/assets/utils.ts b/web/oss/src/components/Evaluators/assets/utils.ts
index 4b09fa2d46..a750ce248f 100644
--- a/web/oss/src/components/Evaluators/assets/utils.ts
+++ b/web/oss/src/components/Evaluators/assets/utils.ts
@@ -1,6 +1,7 @@
+import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils"
 import {formatDay} from "@/oss/lib/helpers/dateTimeHelper"
 import {capitalize} from "@/oss/lib/helpers/utils"
-import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types"
+import {Evaluator, SimpleEvaluator} from "@/oss/lib/Types"
 
 import {
     EvaluatorCategory,
@@ -54,7 +55,7 @@ const formatDate = (value?: string) => {
     return formatDay({date: value})
 }
 
-const collectConfigTags = (config: EvaluatorConfig, evaluator?: Evaluator | null) => {
+const collectConfigTags = (config: SimpleEvaluator, evaluator?: Evaluator | null) => {
     const tags = new Set<string>()
 
     if (Array.isArray(config.tags)) {
@@ -132,11 +133,12 @@ export const transformEvaluatorsToRows = (
 }
 
 const buildConfigTypeBadge = (
-    config: EvaluatorConfig,
+    config: SimpleEvaluator,
     category: Extract<EvaluatorCategory, "automatic" | "custom">,
     evaluator?: Evaluator | null,
 ): EvaluatorTypeBadge => {
-    const label = evaluator?.name || createTypeLabel(config.evaluator_key, config.name)
+    const evaluatorKey = resolveEvaluatorKey(config)
+    const label = evaluator?.name || createTypeLabel(evaluatorKey, config.name)
     const colorHex = config.color || evaluator?.color
 
     return {
@@ -146,44 +148,54 @@ const buildConfigTypeBadge = (
     }
 }
 
-const extractConfigVersion = (config: EvaluatorConfig) => {
-    const serviceValues = (config.settings_values as any)?.service || {}
+const extractConfigVersion = (config: SimpleEvaluator) => {
+    const parameters = (config.data as any)?.parameters || {}
+    const serviceValues = (config.data as any)?.service || {}
+    const serviceConfig = serviceValues?.configuration || {}
     const candidate =
         (config as any)?.version ||
         serviceValues?.agenta ||
         serviceValues?.version ||
-        (config.settings_values as any)?.version ||
+        serviceConfig?.version ||
+        serviceConfig?.agenta ||
+        parameters?.version ||
         ""
 
     return sanitizeVersion(typeof candidate === "string" ? candidate : "")
 }
 
-const extractConfigModifiedBy = (config: EvaluatorConfig) => {
+const extractConfigModifiedBy = (config: SimpleEvaluator) => {
     const modifiedBy =
         (config as any)?.updated_by ||
         (config as any)?.updatedBy ||
+        (config as any)?.updated_by_id ||
+        (config as any)?.updatedById ||
         (config as any)?.created_by ||
         (config as any)?.createdBy ||
+        (config as any)?.created_by_id ||
+        (config as any)?.createdById ||
         ""
 
     return typeof modifiedBy === "string" ? modifiedBy : ""
 }
 
 export const transformEvaluatorConfigsToRows = (
-    configs: EvaluatorConfig[],
+    configs: SimpleEvaluator[],
     category: Extract<EvaluatorCategory, "automatic">,
     evaluators: Evaluator[],
 ): EvaluatorRegistryRow[] => {
     const evaluatorsMap = new Map(evaluators.map((item) => [item.key, item]))
 
     return configs.map((config) => {
-        const evaluator = evaluatorsMap.get(config.evaluator_key) || null
+        const evaluatorKey = resolveEvaluatorKey(config)
+        const evaluator = evaluatorKey ? evaluatorsMap.get(evaluatorKey) || null : null
         const badge = buildConfigTypeBadge(config, category, evaluator)
         const versionLabel = extractConfigVersion(config)
         const tags = collectConfigTags(config, evaluator)
         const modifiedBy = extractConfigModifiedBy(config)
         const createdAt = config.created_at
         const updatedAt = config.updated_at || createdAt
+        const displayName = config.name || evaluator?.name || evaluatorKey || config.slug || ""
 
         const raw: EvaluatorConfigRow = {
             ...config,
@@ -194,15 +206,15 @@ export const transformEvaluatorConfigsToRows = (
         return {
             key: config.id,
             id: config.id,
-            name: config.name,
-            slug: config.evaluator_key,
+            name: displayName,
+            slug: evaluatorKey || config.slug,
             typeBadge: badge,
             versionLabel,
             tags,
             dateCreated: formatDate(createdAt),
             lastModified: formatDate(updatedAt),
             modifiedBy,
-            avatarName: modifiedBy || config.name,
+            avatarName: modifiedBy || displayName,
             raw,
         }
     })
diff --git a/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx b/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx
index ca07709a52..e1494219ab 100644
--- a/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx
+++ b/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx
@@ -25,6 +25,7 @@ import {
     resetPlaygroundAtom,
 } from "@/oss/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms"
 import useURL from "@/oss/hooks/useURL"
+import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils"
 import useFetchEvaluatorsData from "@/oss/lib/hooks/useFetchEvaluatorsData"
 import {Evaluator} from "@/oss/lib/Types"
 import {evaluatorByKeyAtomFamily} from "@/oss/state/evaluators"
@@ -63,7 +64,7 @@ const ConfigureEvaluatorPage = ({evaluatorId}: {evaluatorId?: string | null}) =>
         )
     }, [evaluatorConfigs, evaluatorId, stagedConfig])
 
-    const evaluatorKey = existingConfig?.evaluator_key ?? evaluatorId ?? null
+    const evaluatorKey = resolveEvaluatorKey(existingConfig) ?? evaluatorId ?? null
 
     const evaluatorQuery = useAtomValue(evaluatorByKeyAtomFamily(evaluatorKey))
     const evaluatorFromRegular = evaluators.find((item) => item.key === evaluatorKey)
diff --git a/web/oss/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts b/web/oss/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts
index 3aa171dc76..97fbb7ffc4 100644
--- a/web/oss/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts
+++ b/web/oss/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts
@@ -2,7 +2,7 @@ import {useCallback, useMemo} from "react"
 
 import useEvaluators from "@/oss/lib/hooks/useEvaluators"
 import useFetchEvaluatorsData from "@/oss/lib/hooks/useFetchEvaluatorsData"
-import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types"
+import {Evaluator, SimpleEvaluator} from "@/oss/lib/Types"
 
 import {EvaluatorCategory, EvaluatorPreview, EvaluatorRegistryRow} from "../assets/types"
 import {
@@ -33,7 +33,7 @@ const useEvaluatorsRegistryData = (category: EvaluatorCategory) => {
             const humanEvaluators = (humanEvaluatorsSwr.data || []) as EvaluatorPreview[]
             unsortedRows = transformEvaluatorsToRows(humanEvaluators, "human")
         } else {
-            const evaluatorConfigs = (evaluatorConfigsSwr.data || []) as EvaluatorConfig[]
+            const evaluatorConfigs = (evaluatorConfigsSwr.data || []) as SimpleEvaluator[]
             const baseEvaluators = (baseEvaluatorsSwr.data || []) as Evaluator[]
 
             unsortedRows = transformEvaluatorConfigsToRows(
diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/Components/NewEvaluationModalInner.tsx b/web/oss/src/components/pages/evaluations/NewEvaluation/Components/NewEvaluationModalInner.tsx
index cffdfdcd23..fd64b589ec 100644
--- a/web/oss/src/components/pages/evaluations/NewEvaluation/Components/NewEvaluationModalInner.tsx
+++ b/web/oss/src/components/pages/evaluations/NewEvaluation/Components/NewEvaluationModalInner.tsx
@@ -7,6 +7,7 @@ import {useRouter} from "next/router"
 import {message} from "@/oss/components/AppMessageContext"
 import useURL from "@/oss/hooks/useURL"
 import {useVaultSecret} from "@/oss/hooks/useVaultSecret"
+import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils"
 import {redirectIfNoLLMKeys} from "@/oss/lib/helpers/utils"
 import useAppVariantRevisions from "@/oss/lib/hooks/useAppVariantRevisions"
 import useFetchEvaluatorsData from "@/oss/lib/hooks/useFetchEvaluatorsData"
@@ -289,7 +290,7 @@ const NewEvaluationModalInner = ({
             !preview &&
             selectedEvalConfigs.some(
                 (id) =>
-                    evaluatorConfigs.find((config) => config.id === id)?.evaluator_key ===
+                    resolveEvaluatorKey(evaluatorConfigs.find((config) => config.id === id)) ===
                     "auto_ai_critique",
             ) &&
             (await redirectIfNoLLMKeys({secrets}))
diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx b/web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx
index 3545f0b98a..b7bd3b649a 100644
--- a/web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx
+++ b/web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx
@@ -11,9 +11,10 @@ import router from "next/router"
 
 import {getMetricsFromEvaluator} from "@/oss/components/SharedDrawers/AnnotateDrawer/assets/transforms"
 import useURL from "@/oss/hooks/useURL"
+import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils"
 import {EvaluatorDto} from "@/oss/lib/hooks/useEvaluators/types"
 import useFetchEvaluatorsData from "@/oss/lib/hooks/useFetchEvaluatorsData"
-import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types"
+import {Evaluator, SimpleEvaluator} from "@/oss/lib/Types"
 
 import {openEvaluatorDrawerAtom} from "../../../autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms"
 import type {SelectEvaluatorSectionProps} from "../../types"
@@ -88,12 +89,12 @@ const SelectEvaluatorSection = <Preview extends boolean = false>({
 
     const evaluatorConfigs = useMemo(() => {
         if (preview) {
-            return evaluators as EvaluatorConfig[]
+            return [] as SimpleEvaluator[]
         }
         return (
             propsEvaluatorConfigs?.length ? propsEvaluatorConfigs : evaluatorConfigsSwr.data || []
-        ) as EvaluatorConfig[]
-    }, [preview, propsEvaluatorConfigs, evaluatorConfigsSwr.data, evaluators])
+        ) as SimpleEvaluator[]
+    }, [preview, propsEvaluatorConfigs, evaluatorConfigsSwr.data])
 
     const isLoadingEvaluators = fetchLoadingEvaluators
     const isLoadingEvaluatorConfigs = fetchLoadingConfigs
@@ -122,7 +123,7 @@ const SelectEvaluatorSection = <Preview extends boolean = false>({
         const availableIds = new Set(
             (preview
                 ? (evaluators as EvaluatorDto<"response">[])
-                : (evaluatorConfigs as EvaluatorConfig[])
+                : (evaluatorConfigs as SimpleEvaluator[])
             ).map((config) => config.id),
         )
 
@@ -141,10 +142,9 @@ const SelectEvaluatorSection = <Preview extends boolean = false>({
 
     // Handler to open the drawer in edit mode
     const handleEditConfig = useCallback(
-        (record: EvaluatorConfig) => {
-            const evaluator = (evaluators as Evaluator[]).find(
-                (e) => e.key === record.evaluator_key,
-            )
+        (record: SimpleEvaluator) => {
+            const evaluatorKey = resolveEvaluatorKey(record)
+            const evaluator = (evaluators as Evaluator[]).find((e) => e.key === evaluatorKey)
             if (evaluator) {
                 openEvaluatorDrawer({
                     evaluator,
@@ -158,10 +158,9 @@ const SelectEvaluatorSection = <Preview extends boolean = false>({
 
     // Handler to open the drawer in clone mode
     const handleCloneConfig = useCallback(
-        (record: EvaluatorConfig) => {
-            const evaluator = (evaluators as Evaluator[]).find(
-                (e) => e.key === record.evaluator_key,
-            )
+        (record: SimpleEvaluator) => {
+            const evaluatorKey = resolveEvaluatorKey(record)
+            const evaluator = (evaluators as Evaluator[]).find((e) => e.key === evaluatorKey)
             if (evaluator) {
                 openEvaluatorDrawer({
                     evaluator,
@@ -203,13 +202,13 @@ const SelectEvaluatorSection = <Preview extends boolean = false>({
         [],
     )
 
-    const columnsConfig: ColumnsType<EvaluatorConfig> = useMemo(
+    const columnsConfig: ColumnsType<SimpleEvaluator> = useMemo(
         () => [
             {
                 title: "Name",
                 dataIndex: "name",
                 key: "name",
-                render: (_, record: EvaluatorConfig) => {
+                render: (_, record: SimpleEvaluator) => {
                     return <div>{record.name}</div>
                 },
             },
@@ -217,10 +216,11 @@ const SelectEvaluatorSection = <Preview extends boolean = false>({
                 title: "Type",
                 dataIndex: "type",
                 key: "type",
-                render: (x, record: EvaluatorConfig) => {
+                render: (x, record: SimpleEvaluator) => {
                     // Find the evaluator by key to display its name
+                    const evaluatorKey = resolveEvaluatorKey(record)
                     const evaluator = (evaluators as Evaluator[]).find(
-                        (item) => item.key === record.evaluator_key,
+                        (item) => item.key === evaluatorKey,
                     )
                     return <Tag color={record.color}>{evaluator?.name}</Tag>
                 },
@@ -231,7 +231,7 @@ const SelectEvaluatorSection = <Preview extends boolean = false>({
                 width: 56,
                 fixed: "right",
                 align: "center",
-                render: (_, record: EvaluatorConfig) => {
+                render: (_, record: SimpleEvaluator) => {
                     return (
                         <Dropdown
                             trigger={["click"]}
@@ -276,7 +276,7 @@ const SelectEvaluatorSection = <Preview extends boolean = false>({
     // Conditionally type filteredEvalConfigs based on Preview
     const filteredEvalConfigs: Preview extends true
         ? EvaluatorDto<"response">[]
-        : EvaluatorConfig[] = useMemo(() => {
+        : SimpleEvaluator[] = useMemo(() => {
         if (preview) {
             // Explicitly narrow types for Preview = true (human evaluations)
             let data = evaluators as EvaluatorDto<"response">[]
@@ -295,21 +295,21 @@ const SelectEvaluatorSection = <Preview extends boolean = false>({
 
             if (!searchTerm) return data as any
             return data.filter((item) =>
-                item.name.toLowerCase().includes(searchTerm.toLowerCase()),
+                (item.name || "").toLowerCase().includes(searchTerm.toLowerCase()),
             ) as any
         } else {
             // Explicitly narrow types for Preview = false
-            const data = evaluatorConfigs as EvaluatorConfig[]
+            const data = evaluatorConfigs as SimpleEvaluator[]
             if (!searchTerm) return data
             return data.filter((item) =>
-                item.name.toLowerCase().includes(searchTerm.toLowerCase()),
+                (item.name || "").toLowerCase().includes(searchTerm.toLowerCase()),
             ) as any
         }
     }, [searchTerm, evaluatorConfigs, preview, evaluators])
 
     const onSelectEvalConfig = (selectedRowKeys: React.Key[]) => {
         const currentSelected = new Set(selectedEvalConfigs)
-        const configs = filteredEvalConfigs as EvaluatorDto<"response">[]
+        const configs = filteredEvalConfigs as {id: string}[]
         configs.forEach((item) => {
             if (selectedRowKeys.includes(item.id)) {
                 currentSelected.add(item.id)
@@ -331,7 +331,7 @@ const SelectEvaluatorSection = <Preview extends boolean = false>({
                 ).length > 0
             )
         }
-        return (evaluatorConfigs as EvaluatorConfig[]).length > 0
+        return (evaluatorConfigs as SimpleEvaluator[]).length > 0
     }, [preview, evaluators, evaluatorConfigs])
 
     return (
@@ -418,7 +418,7 @@ const SelectEvaluatorSection = <Preview extends boolean = false>({
                         pagination={false}
                     />
                 ) : (
-                    <Table<EvaluatorConfig>
+                    <Table<SimpleEvaluator>
                         rowSelection={{
                             type: "checkbox",
                             columnWidth: 48,
@@ -442,7 +442,7 @@ const SelectEvaluatorSection = <Preview extends boolean = false>({
                         className="ph-no-capture"
                         columns={columnsConfig}
                         rowKey={"id"}
-                        dataSource={filteredEvalConfigs as EvaluatorConfig[]}
+                        dataSource={filteredEvalConfigs as SimpleEvaluator[]}
                         scroll={{x: true, y: 455}}
                         bordered
                         pagination={false}
diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/types.ts b/web/oss/src/components/pages/evaluations/NewEvaluation/types.ts
index a068971bc4..5f838f8665 100644
--- a/web/oss/src/components/pages/evaluations/NewEvaluation/types.ts
+++ b/web/oss/src/components/pages/evaluations/NewEvaluation/types.ts
@@ -4,7 +4,7 @@ import {ModalProps} from "antd"
 
 import {EvaluatorDto} from "@/oss/lib/hooks/useEvaluators/types"
 import {EnhancedVariant} from "@/oss/lib/shared/variant/transformer/types"
-import {LLMRunRateLimit, Evaluator, EvaluatorConfig, testset} from "@/oss/lib/Types"
+import {LLMRunRateLimit, Evaluator, SimpleEvaluator, testset} from "@/oss/lib/Types"
 
 export interface NewEvaluationAppOption {
     label: string
@@ -54,7 +54,7 @@ export interface NewEvaluationModalContentProps extends HTMLProps<HTMLDivElement
     variants?: EnhancedVariant[]
     variantsLoading?: boolean
     evaluators: Evaluator[] | EvaluatorDto<"response">[]
-    evaluatorConfigs: EvaluatorConfig[]
+    evaluatorConfigs: SimpleEvaluator[]
     advanceSettings: LLMRunRateLimitWithCorrectAnswer
     setAdvanceSettings: Dispatch<SetStateAction<LLMRunRateLimitWithCorrectAnswer>>
     appOptions: NewEvaluationAppOption[]
@@ -95,7 +95,7 @@ export interface SelectTestsetSectionProps extends HTMLProps<HTMLDivElement> {
 }
 
 export interface SelectEvaluatorSectionProps extends HTMLProps<HTMLDivElement> {
-    evaluatorConfigs: EvaluatorConfig[]
+    evaluatorConfigs: SimpleEvaluator[]
     evaluators: Evaluator[]
     selectedEvalConfigs: string[]
     setSelectedEvalConfigs: Dispatch<SetStateAction<string[]>>
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/AdvancedSettings.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/AdvancedSettings.tsx
index 6957d3438a..6a0aed5f8f 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/AdvancedSettings.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/AdvancedSettings.tsx
@@ -71,7 +71,7 @@ const AdvancedSettings: React.FC<AdvancedSettingsProps> = ({settings, selectedTe
                     return (
                         <Form.Item
                             key={field.key}
-                            name={["settings_values", field.key]}
+                            name={["parameters", field.key]}
                             initialValue={field.default}
                             rules={rules}
                             label={label}
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx
index 5de982d585..49ef6e2f85 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx
@@ -372,8 +372,8 @@ const DebugSection = () => {
             setEvalOutputStatus({success: false, error: false})
             setIsLoadingResult(true)
 
-            const settingsValues = form.getFieldValue("settings_values") || {}
-            let normalizedSettings = {...settingsValues}
+            const parameters = form.getFieldValue("parameters") || {}
+            let normalizedSettings = {...parameters}
 
             if (typeof normalizedSettings.json_schema === "string") {
                 try {
@@ -419,7 +419,7 @@ const DebugSection = () => {
             }
 
             if (!selectedEvaluator.key.startsWith("rag_")) {
-                const correctAnswerKey = settingsValues.correct_answer_key
+                const correctAnswerKey = parameters.correct_answer_key
                 const groundTruthKey =
                     typeof correctAnswerKey === "string" && correctAnswerKey.startsWith("testcase.")
                         ? correctAnswerKey.split(".")[1]
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx
index a8128c43e7..c7a3df73f6 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx
@@ -105,7 +105,7 @@ export const DynamicFormField: React.FC<DynamicFormFieldProps> = ({
     form,
 }) => {
     const settingsValue = Form.useWatch(name, form)
-    const runtime = Form.useWatch(["settings_values", "runtime"], form)
+    const runtime = Form.useWatch(["parameters", "runtime"], form)
 
     const classes = useStyles()
     const {token} = theme.useToken()
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/FieldsTagsEditor.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/FieldsTagsEditor.tsx
index a96a07a37f..f5ddf000df 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/FieldsTagsEditor.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/FieldsTagsEditor.tsx
@@ -55,7 +55,7 @@ export const FieldsTagsEditor: React.FC<FieldsTagsEditorProps> = ({
 
     // Watch the correct_answer_key from form to react to changes
     // Using Form.useWatch instead of form.getFieldValue for reactivity
-    const formCorrectAnswerKey = Form.useWatch(["settings_values", "correct_answer_key"], form)
+    const formCorrectAnswerKey = Form.useWatch(["parameters", "correct_answer_key"], form)
     const effectiveKey = formCorrectAnswerKey || correctAnswerKey
 
     // Check if we can detect fields from testcase
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
index 331afe0852..1454b99565 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
@@ -13,7 +13,7 @@ import {useAppId} from "@/oss/hooks/useAppId"
 import useURL from "@/oss/hooks/useURL"
 import {EvaluationSettingsTemplate, JSSTheme, SettingsPreset} from "@/oss/lib/Types"
 import {
-    CreateEvaluationConfigData,
+    CreateEvaluatorConfigData,
     createEvaluatorConfig,
     updateEvaluatorConfig,
 } from "@/oss/services/evaluations/api"
@@ -69,6 +69,13 @@ interface ConfigureEvaluatorProps {
     onToggleTestPanel?: () => void
 }
 
+interface ConfigureEvaluatorFormValues {
+    name: string
+    description?: string
+    tags?: string[]
+    parameters?: Record<string, any>
+}
+
 const useStyles = createUseStyles((theme: JSSTheme) => ({
     collapseContainer: {
         "& .ant-collapse-header": {
@@ -199,12 +206,10 @@ const ConfigureEvaluator = ({
             const allKeys = Array.from(new Set([...templateKeys, ...presetKeys]))
 
             // Clear subtree before applying new values to avoid stale keys
-            form.setFieldsValue({settings_values: {}})
+            form.setFieldsValue({parameters: {}})
 
             if (allKeys.length) {
-                const fieldNames = allKeys.map(
-                    (key) => ["settings_values", key] as (string | number)[],
-                )
+                const fieldNames = allKeys.map((key) => ["parameters", key] as (string | number)[])
                 form.resetFields(fieldNames)
 
                 const nextFields = fieldNames
@@ -248,7 +253,7 @@ const ConfigureEvaluator = ({
 
     const evaluatorVersionNumber = useMemo(() => {
         const raw =
-            editEvalEditValues?.settings_values?.version ??
+            editEvalEditValues?.data?.parameters?.version ??
             selectedEvaluator?.settings_template?.version?.default ??
             3
 
@@ -256,7 +261,7 @@ const ConfigureEvaluator = ({
         // extract leading number (e.g., "4", "4.1", "v4")
         const match = String(raw).match(/\d+(\.\d+)?/)
         return match ? parseFloat(match[0]) : 3
-    }, [editEvalEditValues?.settings_values?.version, selectedEvaluator])
+    }, [editEvalEditValues?.data?.parameters?.version, selectedEvaluator])
 
     const evalFields = useMemo(() => {
         const templateEntries = Object.entries(selectedEvaluator?.settings_template || {})
@@ -283,28 +288,25 @@ const ConfigureEvaluator = ({
     const advancedSettingsFields = evalFields.filter((field) => field.advanced)
     const basicSettingsFields = evalFields.filter((field) => !field.advanced)
 
-    const onSubmit = async (values: CreateEvaluationConfigData) => {
+    const onSubmit = async (values: ConfigureEvaluatorFormValues) => {
         try {
             setSubmitLoading(true)
             if (!selectedEvaluator?.key) throw new Error("No selected key")
-            const settingsValues = values.settings_values || {}
+            const parameters = values.parameters || {}
 
-            const jsonSchemaFieldPath: (string | number)[] = ["settings_values", "json_schema"]
-            const hasJsonSchema = Object.prototype.hasOwnProperty.call(
-                settingsValues,
-                "json_schema",
-            )
+            const jsonSchemaFieldPath: (string | number)[] = ["parameters", "json_schema"]
+            const hasJsonSchema = Object.prototype.hasOwnProperty.call(parameters, "json_schema")
 
             if (hasJsonSchema) {
                 form.setFields([{name: jsonSchemaFieldPath, errors: []}])
 
-                if (typeof settingsValues.json_schema === "string") {
+                if (typeof parameters.json_schema === "string") {
                     try {
-                        const parsed = JSON.parse(settingsValues.json_schema)
+                        const parsed = JSON.parse(parameters.json_schema)
                         if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
                             throw new Error()
                         }
-                        settingsValues.json_schema = parsed
+                        parameters.json_schema = parsed
                     } catch {
                         form.setFields([
                             {
@@ -315,9 +317,9 @@ const ConfigureEvaluator = ({
                         throw new Error("JSON schema must be a valid JSON object")
                     }
                 } else if (
-                    settingsValues.json_schema &&
-                    (typeof settingsValues.json_schema !== "object" ||
-                        Array.isArray(settingsValues.json_schema))
+                    parameters.json_schema &&
+                    (typeof parameters.json_schema !== "object" ||
+                        Array.isArray(parameters.json_schema))
                 ) {
                     form.setFields([
                         {
@@ -329,40 +331,43 @@ const ConfigureEvaluator = ({
                 }
             }
 
-            const data = {
-                ...values,
-                evaluator_key: selectedEvaluator!.key,
-                settings_values: settingsValues,
+            const existingParameters = editEvalEditValues?.data?.parameters || {}
+            const mergedParameters = {...existingParameters, ...parameters}
+
+            const payload: CreateEvaluatorConfigData = {
+                name: values.name,
+                description: values.description,
+                tags: values.tags,
+                evaluator_key: selectedEvaluator.key,
+                parameters,
             }
 
             if (editMode) {
-                await updateEvaluatorConfig(editEvalEditValues?.id!, data)
-
-                // Update atom with merged values
-                const updatedConfig = editEvalEditValues
-                    ? {
-                          ...editEvalEditValues,
-                          ...data,
-                          settings_values: settingsValues,
-                      }
-                    : null
-                if (updatedConfig) {
-                    commitPlayground(updatedConfig)
-                }
+                const updatedEvaluator = await updateEvaluatorConfig(editEvalEditValues?.id!, {
+                    id: editEvalEditValues?.id!,
+                    name: values.name,
+                    description: editEvalEditValues?.description,
+                    tags: editEvalEditValues?.tags,
+                    meta: editEvalEditValues?.meta,
+                    flags: editEvalEditValues?.flags,
+                    data: {
+                        ...(editEvalEditValues?.data ?? {}),
+                        parameters: mergedParameters,
+                    },
+                })
+
+                commitPlayground(updatedEvaluator)
             } else {
-                const response = await createEvaluatorConfig(appId, data)
-                const createdConfig = response?.data
-
-                if (createdConfig) {
-                    // Use commitPlayground to update state and switch to edit mode
-                    commitPlayground(createdConfig)
-                    if (uiVariant === "page" && createdConfig.id) {
-                        await router.replace(
-                            `${projectURL}/evaluators/configure/${encodeURIComponent(
-                                createdConfig.id,
-                            )}`,
-                        )
-                    }
+                const createdConfig = await createEvaluatorConfig(appId, payload)
+
+                // Use commitPlayground to update state and switch to edit mode
+                commitPlayground(createdConfig)
+                if (uiVariant === "page" && createdConfig.id) {
+                    await router.replace(
+                        `${projectURL}/evaluators/configure/${encodeURIComponent(
+                            createdConfig.id,
+                        )}`,
+                    )
                 }
             }
 
@@ -381,15 +386,15 @@ const ConfigureEvaluator = ({
         form.resetFields()
 
         if (editMode && editEvalEditValues) {
-            // Load all values including nested settings_values
+            // Load all values including nested parameters
             form.setFieldsValue({
                 ...editEvalEditValues,
-                settings_values: editEvalEditValues.settings_values || {},
+                parameters: editEvalEditValues.data?.parameters || {},
             })
         } else if (cloneConfig && editEvalEditValues) {
-            // When cloning, copy only settings_values and clear the name so user provides a new name
+            // When cloning, copy only parameters and clear the name so user provides a new name
             form.setFieldsValue({
-                settings_values: editEvalEditValues.settings_values || {},
+                parameters: editEvalEditValues.data?.parameters || {},
                 name: "",
             })
         } else if (selectedEvaluator?.settings_template) {
@@ -404,7 +409,7 @@ const ConfigureEvaluator = ({
             }
             if (Object.keys(defaultSettings).length > 0) {
                 form.setFieldsValue({
-                    settings_values: defaultSettings,
+                    parameters: defaultSettings,
                 })
             }
         }
@@ -556,7 +561,7 @@ const ConfigureEvaluator = ({
                                                     key={field.key}
                                                     traceTree={traceTree}
                                                     form={form}
-                                                    name={["settings_values", field.key]}
+                                                    name={["parameters", field.key]}
                                                 />
                                             ))}
                                         </div>
@@ -674,7 +679,7 @@ const ConfigureEvaluator = ({
                                                     key={field.key}
                                                     traceTree={traceTree}
                                                     form={form}
-                                                    name={["settings_values", field.key]}
+                                                    name={["parameters", field.key]}
                                                 />
                                             ))}
                                         </div>
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms.ts b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms.ts
index 76b8c134c2..dcb15dcd42 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms.ts
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/state/atoms.ts
@@ -18,7 +18,7 @@ import type {FormInstance} from "antd"
 import {atom} from "jotai"
 import {atomWithReset, atomWithStorage, RESET} from "jotai/utils"
 
-import type {Evaluator, EvaluatorConfig, Variant} from "@/oss/lib/Types"
+import type {Evaluator, SimpleEvaluator, Variant} from "@/oss/lib/Types"
 import {stringStorage} from "@/oss/state/utils/stringStorage"
 
 // ================================================================
@@ -84,7 +84,7 @@ export const playgroundIsCloneModeAtom = atom((get) => get(playgroundSessionAtom
  * - In edit mode: loaded from existing config
  * - In clone mode: copied from source config (with cleared name)
  */
-export const playgroundEditValuesAtom = atomWithReset<EvaluatorConfig | null>(null)
+export const playgroundEditValuesAtom = atomWithReset<SimpleEvaluator | null>(null)
 
 // ================================================================
 // FORM STATE
@@ -95,7 +95,7 @@ export const playgroundEditValuesAtom = atomWithReset<EvaluatorConfig | null>(nu
  * Allows DebugSection to read form values for running the evaluator
  *
  * This is set by ConfigureEvaluator when the form mounts
- * and read by DebugSection to get current settings_values
+ * and read by DebugSection to get current parameters
  */
 export const playgroundFormRefAtom = atom<FormInstance | null>(null)
 
@@ -179,7 +179,7 @@ export const initPlaygroundAtom = atom(
         set,
         payload: {
             evaluator: Evaluator
-            existingConfig?: EvaluatorConfig | null
+            existingConfig?: SimpleEvaluator | null
             mode?: PlaygroundMode
         },
     ) => {
@@ -226,7 +226,7 @@ export const resetPlaygroundAtom = atom(null, (get, set) => {
  *
  * @param savedConfig - The config returned from the API
  */
-export const commitPlaygroundAtom = atom(null, (get, set, savedConfig: EvaluatorConfig) => {
+export const commitPlaygroundAtom = atom(null, (get, set, savedConfig: SimpleEvaluator) => {
     // Update edit values with saved config
     set(playgroundEditValuesAtom, savedConfig)
 
@@ -280,7 +280,7 @@ export const openEvaluatorDrawerAtom = atom(
         set,
         payload: {
             evaluator: Evaluator
-            existingConfig?: EvaluatorConfig | null
+            existingConfig?: SimpleEvaluator | null
             mode?: PlaygroundMode
         },
     ) => {
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/DeleteModal.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/DeleteModal.tsx
index 0ac235b386..c30bb3c1f1 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/DeleteModal.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/DeleteModal.tsx
@@ -5,11 +5,11 @@ import {Modal, Space, theme, Typography} from "antd"
 import {createUseStyles} from "react-jss"
 
 import {checkIfResourceValidForDeletion} from "@/oss/lib/evaluations/legacy"
-import {EvaluatorConfig, JSSTheme} from "@/oss/lib/Types"
+import {JSSTheme, SimpleEvaluator} from "@/oss/lib/Types"
 import {deleteEvaluatorConfig} from "@/oss/services/evaluations/api"
 
 type DeleteModalProps = {
-    selectedEvalConfig: EvaluatorConfig
+    selectedEvalConfig: SimpleEvaluator
     onSuccess: () => void
 } & React.ComponentProps<typeof Modal>
 
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorCard.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorCard.tsx
index f3c9434a38..72aaf034fc 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorCard.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorCard.tsx
@@ -7,18 +7,19 @@ import {useAtom} from "jotai"
 import {createUseStyles} from "react-jss"
 
 import {evaluatorsAtom} from "@/oss/lib/atoms/evaluation"
+import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils"
 import {formatDay} from "@/oss/lib/helpers/dateTimeHelper"
-import {Evaluator, EvaluatorConfig, JSSTheme} from "@/oss/lib/Types"
+import {Evaluator, JSSTheme, SimpleEvaluator} from "@/oss/lib/Types"
 
 import DeleteModal from "./DeleteModal"
 
 interface EvaluatorCardProps {
-    evaluatorConfigs: EvaluatorConfig[]
+    evaluatorConfigs: SimpleEvaluator[]
     setEditMode: React.Dispatch<React.SetStateAction<boolean>>
     setCloneConfig: React.Dispatch<React.SetStateAction<boolean>>
     setCurrent: React.Dispatch<React.SetStateAction<number>>
     setSelectedEvaluator: React.Dispatch<React.SetStateAction<Evaluator | null>>
-    setEditEvalEditValues: React.Dispatch<React.SetStateAction<EvaluatorConfig | null>>
+    setEditEvalEditValues: React.Dispatch<React.SetStateAction<SimpleEvaluator | null>>
     onSuccess: () => void
 }
 
@@ -88,22 +89,21 @@ const EvaluatorCard = ({
     const classes = useStyles()
     const evaluators = useAtom(evaluatorsAtom)[0]
     const [openDeleteModal, setOpenDeleteModal] = useState(false)
-    const [selectedDelEval, setSelectedDelEval] = useState<EvaluatorConfig | null>(null)
+    const [selectedDelEval, setSelectedDelEval] = useState<SimpleEvaluator | null>(null)
 
     return (
         <div className={classes.container}>
             {evaluatorConfigs.length ? (
                 evaluatorConfigs.map((item) => {
-                    const evaluator = evaluators.find((e) => e.key === item.evaluator_key)
+                    const evaluatorKey = resolveEvaluatorKey(item)
+                    const evaluator = evaluators.find((e) => e.key === evaluatorKey)
 
                     return (
                         <Card
                             key={item.id}
                             className={classes.evaluatorCard}
                             onClick={() => {
-                                const selectedEval = evaluators.find(
-                                    (e) => e.key === item.evaluator_key,
-                                )
+                                const selectedEval = evaluators.find((e) => e.key === evaluatorKey)
                                 if (selectedEval) {
                                     setEditMode(true)
                                     setSelectedEvaluator(selectedEval)
@@ -130,7 +130,7 @@ const EvaluatorCard = ({
                                                 onClick: (e: any) => {
                                                     e.domEvent.stopPropagation()
                                                     const selectedEval = evaluators.find(
-                                                        (e) => e.key === item.evaluator_key,
+                                                        (e) => e.key === evaluatorKey,
                                                     )
                                                     if (selectedEval) {
                                                         setEditMode(true)
@@ -147,7 +147,7 @@ const EvaluatorCard = ({
                                                 onClick: (e: any) => {
                                                     e.domEvent.stopPropagation()
                                                     const selectedEval = evaluators.find(
-                                                        (e) => e.key === item.evaluator_key,
+                                                        (e) => e.key === evaluatorKey,
                                                     )
                                                     if (selectedEval) {
                                                         setCloneConfig(true)
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorList.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorList.tsx
index 2e38bfd1c2..33c03a9f89 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorList.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorList.tsx
@@ -7,17 +7,18 @@ import {ColumnsType} from "antd/es/table"
 import {useAtom} from "jotai"
 
 import {evaluatorsAtom} from "@/oss/lib/atoms/evaluation"
-import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types"
+import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils"
+import {Evaluator, SimpleEvaluator} from "@/oss/lib/Types"
 
 import DeleteModal from "./DeleteModal"
 
 interface EvaluatorListProps {
-    evaluatorConfigs: EvaluatorConfig[]
+    evaluatorConfigs: SimpleEvaluator[]
     setEditMode: React.Dispatch<React.SetStateAction<boolean>>
     setCloneConfig: React.Dispatch<React.SetStateAction<boolean>>
     setCurrent: React.Dispatch<React.SetStateAction<number>>
     setSelectedEvaluator: React.Dispatch<React.SetStateAction<Evaluator | null>>
-    setEditEvalEditValues: React.Dispatch<React.SetStateAction<EvaluatorConfig | null>>
+    setEditEvalEditValues: React.Dispatch<React.SetStateAction<SimpleEvaluator | null>>
     onSuccess: () => void
 }
 
@@ -32,9 +33,9 @@ const EvaluatorList = ({
 }: EvaluatorListProps) => {
     const evaluators = useAtom(evaluatorsAtom)[0]
     const [openDeleteModal, setOpenDeleteModal] = useState(false)
-    const [selectedDelEval, setSelectedDelEval] = useState<EvaluatorConfig | null>(null)
+    const [selectedDelEval, setSelectedDelEval] = useState<SimpleEvaluator | null>(null)
 
-    const columns: ColumnsType<EvaluatorConfig> = [
+    const columns: ColumnsType<SimpleEvaluator> = [
         // {
         //     title: "Version",
         //     dataIndex: "version",
@@ -56,7 +57,8 @@ const EvaluatorList = ({
             dataIndex: "type",
             key: "type",
             render: (_, record) => {
-                const evaluator = evaluators.find((item) => item.key === record.evaluator_key)
+                const evaluatorKey = resolveEvaluatorKey(record)
+                const evaluator = evaluators.find((item) => item.key === evaluatorKey)
                 return <Tag color={record.color}>{evaluator?.name}</Tag>
             },
         },
@@ -84,8 +86,9 @@ const EvaluatorList = ({
                                     icon: <Note size={16} />,
                                     onClick: (e: any) => {
                                         e.domEvent.stopPropagation()
+                                        const evaluatorKey = resolveEvaluatorKey(record)
                                         const selectedEval = evaluators.find(
-                                            (e) => e.key === record.evaluator_key,
+                                            (e) => e.key === evaluatorKey,
                                         )
                                         if (selectedEval) {
                                             setEditMode(true)
@@ -101,8 +104,9 @@ const EvaluatorList = ({
                                     icon: <Copy size={16} />,
                                     onClick: (e: any) => {
                                         e.domEvent.stopPropagation()
+                                        const evaluatorKey = resolveEvaluatorKey(record)
                                         const selectedEval = evaluators.find(
-                                            (e) => e.key === record.evaluator_key,
+                                            (e) => e.key === evaluatorKey,
                                         )
                                         if (selectedEval) {
                                             setCloneConfig(true)
@@ -151,7 +155,8 @@ const EvaluatorList = ({
                 onRow={(record) => ({
                     style: {cursor: "pointer"},
                     onClick: () => {
-                        const selectedEval = evaluators.find((e) => e.key === record.evaluator_key)
+                        const evaluatorKey = resolveEvaluatorKey(record)
+                        const selectedEval = evaluators.find((e) => e.key === evaluatorKey)
                         if (selectedEval) {
                             setEditMode(true)
                             setSelectedEvaluator(selectedEval)
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/index.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/index.tsx
index 60569766c2..564bc38df9 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/index.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/index.tsx
@@ -8,21 +8,22 @@ import {createUseStyles} from "react-jss"
 
 import {evaluatorsAtom} from "@/oss/lib/atoms/evaluation"
 import {getEvaluatorTags} from "@/oss/lib/evaluations/legacy"
-import {Evaluator, EvaluatorConfig, JSSTheme} from "@/oss/lib/Types"
+import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils"
+import {Evaluator, JSSTheme, SimpleEvaluator} from "@/oss/lib/Types"
 import {nonArchivedEvaluatorsAtom} from "@/oss/state/evaluators"
 
 import EvaluatorCard from "./EvaluatorCard"
 import EvaluatorList from "./EvaluatorList"
 
 interface EvaluatorsProps {
-    evaluatorConfigs: EvaluatorConfig[]
+    evaluatorConfigs: SimpleEvaluator[]
     handleOnCancel: () => void
     setCurrent: React.Dispatch<React.SetStateAction<number>>
     setSelectedEvaluator: React.Dispatch<React.SetStateAction<Evaluator | null>>
     fetchingEvalConfigs: boolean
     setEditMode: React.Dispatch<React.SetStateAction<boolean>>
     setCloneConfig: React.Dispatch<React.SetStateAction<boolean>>
-    setEditEvalEditValues: React.Dispatch<React.SetStateAction<EvaluatorConfig | null>>
+    setEditEvalEditValues: React.Dispatch<React.SetStateAction<SimpleEvaluator | null>>
     onSuccess: () => void
     setEvaluatorsDisplay: any
     evaluatorsDisplay: string
@@ -95,10 +96,13 @@ const Evaluators = ({
 
     const updatedEvaluatorConfigs = useMemo(() => {
         return evaluatorConfigs.map((config) => {
-            const matchingEvaluator = evaluators.find(
-                (evaluator) => evaluator.key === config.evaluator_key,
+            const evaluatorKey = resolveEvaluatorKey(config)
+            const matchingEvaluator = evaluators.find((evaluator) => evaluator.key === evaluatorKey)
+            if (!matchingEvaluator) return config
+            const nextTags = Array.from(
+                new Set([...(config.tags || []), ...(matchingEvaluator.tags || [])]),
             )
-            return matchingEvaluator ? {...config, tags: matchingEvaluator.tags} : config
+            return {...config, tags: nextTags}
         })
     }, [evaluatorConfigs, evaluators])
 
@@ -111,7 +115,7 @@ const Evaluators = ({
 
         if (searchTerm) {
             filtered = filtered.filter((item) =>
-                item.name.toLowerCase().includes(searchTerm.toLowerCase()),
+                (item.name || "").toLowerCase().includes(searchTerm.toLowerCase()),
             )
         }
 
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx
index b89da2ee19..c06202394c 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx
+++ b/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx
@@ -9,7 +9,7 @@ import EnhancedModal from "@/oss/components/EnhancedUIs/Modal"
 import {useAppId} from "@/oss/hooks/useAppId"
 import {evaluatorConfigsAtom} from "@/oss/lib/atoms/evaluation"
 import useFetchEvaluatorsData from "@/oss/lib/hooks/useFetchEvaluatorsData"
-import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types"
+import {Evaluator, SimpleEvaluator} from "@/oss/lib/Types"
 
 import ConfigureEvaluator from "./ConfigureEvaluator"
 import {initPlaygroundAtom, resetPlaygroundAtom} from "./ConfigureEvaluator/state/atoms"
@@ -39,7 +39,7 @@ const EvaluatorsModal = ({
         useFetchEvaluatorsData({appId: appId ?? ""})
     const [editMode, setEditMode] = useState(false)
     const [cloneConfig, setCloneConfig] = useState(false)
-    const [editEvalEditValues, setEditEvalEditValues] = useState<EvaluatorConfig | null>(null)
+    const [editEvalEditValues, setEditEvalEditValues] = useState<SimpleEvaluator | null>(null)
     const [evaluatorsDisplay, setEvaluatorsDisplay] = useLocalStorage<"card" | "list">(
         "evaluator_view",
         "list",
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluationDrawer.tsx b/web/oss/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluationDrawer.tsx
index 9687626c3c..d0281e7539 100644
--- a/web/oss/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluationDrawer.tsx
+++ b/web/oss/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluationDrawer.tsx
@@ -63,7 +63,7 @@ const OnlineEvaluationDrawer = ({open, onClose, onCreate}: OnlineEvaluationDrawe
     const filterColumns = useMemo(() => getFilterColumns(), [])
     const [filters, setFilters] = useAtom(onlineEvalFiltersAtom)
     const resetFilters = useSetAtom(resetOnlineEvalFiltersAtom)
-    // Load preview evaluators (with IDs) to map evaluator_config.evaluator_key -> evaluator.id
+    // Load preview evaluators (with IDs) to map config URI key -> evaluator.id
     const previewEvaluatorsSwr = useEvaluators({preview: true, queries: {is_human: false}})
     const baseEvaluators = (baseEvaluatorsSwr.data as Evaluator[] | undefined) ?? []
     const evaluators = useAtomValue(evaluatorConfigsAtom)
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorDetails.ts b/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorDetails.ts
index a49787e814..0545163cae 100644
--- a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorDetails.ts
+++ b/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorDetails.ts
@@ -47,18 +47,41 @@ const mergeEvaluatorWithConfig = (
         ...configAny,
     }
 
-    const previewSettings = isPlainObject(evaluatorAny.settings_values)
-        ? (evaluatorAny.settings_values as Record<string, unknown>)
+    const previewData = isPlainObject(evaluatorAny.data)
+        ? (evaluatorAny.data as Record<string, unknown>)
         : undefined
-    const configSettings = isPlainObject(configAny.settings_values)
-        ? (configAny.settings_values as Record<string, unknown>)
+    const configData = isPlainObject(configAny.data)
+        ? (configAny.data as Record<string, unknown>)
         : undefined
+    if (previewData || configData) {
+        const mergedData: Record<string, unknown> = {
+            ...(previewData ?? {}),
+            ...(configData ?? {}),
+        }
 
-    if (previewSettings || configSettings) {
-        merged.settings_values = {
+        const previewParameters = isPlainObject(previewData?.parameters)
+            ? (previewData?.parameters as Record<string, unknown>)
+            : undefined
+        const configParameters = isPlainObject(configData?.parameters)
+            ? (configData?.parameters as Record<string, unknown>)
+            : undefined
+        const previewSettings = isPlainObject(evaluatorAny.settings_values)
+            ? (evaluatorAny.settings_values as Record<string, unknown>)
+            : undefined
+        const configSettings = isPlainObject(configAny.settings_values)
+            ? (configAny.settings_values as Record<string, unknown>)
+            : undefined
+        const mergedParameters = {
+            ...(previewParameters ?? {}),
             ...(previewSettings ?? {}),
+            ...(configParameters ?? {}),
             ...(configSettings ?? {}),
         }
+        if (Object.keys(mergedParameters).length) {
+            mergedData.parameters = mergedParameters
+        }
+
+        merged.data = mergedData
     }
 
     return merged as EvaluatorPreviewDto
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorSelection.tsx b/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorSelection.tsx
index af624b9f83..d5e724dcef 100644
--- a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorSelection.tsx
+++ b/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorSelection.tsx
@@ -2,8 +2,9 @@ import {useMemo} from "react"
 
 import {SelectProps} from "antd"
 
+import {getEvaluatorParameters, resolveEvaluatorKey} from "@/oss/lib/evaluators/utils"
 import type {EvaluatorPreviewDto} from "@/oss/lib/hooks/useEvaluators/types"
-import type {Evaluator} from "@/oss/lib/Types"
+import type {Evaluator, SimpleEvaluator} from "@/oss/lib/Types"
 
 import {
     ALLOWED_ONLINE_EVALUATOR_KEYS,
@@ -13,7 +14,7 @@ import {
 import {capitalize, collectEvaluatorCandidates} from "../utils/evaluatorDetails"
 
 interface UseEvaluatorSelectionParams {
-    evaluators: any[]
+    evaluators: SimpleEvaluator[]
     selectedEvaluatorId: string | undefined
     previewEvaluators: EvaluatorPreviewDto[]
     baseEvaluators: Evaluator[]
@@ -21,16 +22,17 @@ interface UseEvaluatorSelectionParams {
 
 interface EvaluatorSelectionResult {
     evaluatorOptions: SelectProps["options"]
-    selectedEvaluatorConfig?: any
+    selectedEvaluatorConfig?: SimpleEvaluator
     matchedPreviewEvaluator?: EvaluatorPreviewDto
     evaluatorTypeLookup: Map<string, {slug: string; label: string}>
 }
 
-const buildEvaluatorOptions = (configs: any[]): SelectProps["options"] =>
+const buildEvaluatorOptions = (configs: SimpleEvaluator[]): SelectProps["options"] =>
     (configs || []).map((cfg: any) => {
         const iconSrc = (cfg?.icon_url && (cfg.icon_url.src || cfg.icon_url)) || undefined
         const displayName = cfg?.name || ""
-        const searchable = [displayName, cfg?.evaluator_key, cfg?.id]
+        const evaluatorKey = resolveEvaluatorKey(cfg)
+        const searchable = [displayName, evaluatorKey, cfg?.id, cfg?.slug, cfg?.data?.uri]
             .map((item) => {
                 if (item === undefined || item === null) return undefined
                 const text = String(item).trim()
@@ -61,6 +63,7 @@ const buildPreviewLookup = (previewEvaluators: EvaluatorPreviewDto[]) => {
     const map = new Map<string, EvaluatorPreviewDto>()
     previewEvaluators.forEach((evaluator) => {
         const rawKey =
+            resolveEvaluatorKey(evaluator as any) ||
             (evaluator as any)?.evaluator_key ||
             (evaluator as any)?.flags?.evaluator_key ||
             (evaluator as any)?.meta?.evaluator_key ||
@@ -122,13 +125,14 @@ export const useEvaluatorSelection = ({
 
     const allowedEvaluators = useMemo(() => {
         if (!evaluators?.length) return []
-        return evaluators.filter((config: any) => {
+        return evaluators.filter((config: SimpleEvaluator) => {
             if (!config) return false
+            const evaluatorKey = resolveEvaluatorKey(config)
             const candidates = collectEvaluatorCandidates(
-                config?.evaluator_key,
-                (config as any)?.slug,
+                evaluatorKey,
+                config?.slug,
                 config?.name,
-                config?.key,
+                (config as any)?.key,
                 config?.meta?.evaluator_key,
                 config?.meta?.key,
             )
@@ -141,13 +145,13 @@ export const useEvaluatorSelection = ({
         if (!allowedEvaluators.length) return []
         if (!ENABLE_CORRECT_ANSWER_KEY_FILTER) return allowedEvaluators
         const requiringKey = evaluatorsRequiringCorrectAnswerKey ?? new Set<string>()
-        return allowedEvaluators.filter((config: any) => {
+        return allowedEvaluators.filter((config: SimpleEvaluator) => {
             if (!config) return false
-            const evaluatorKey = config?.evaluator_key
+            const evaluatorKey = resolveEvaluatorKey(config)
             if (evaluatorKey && requiringKey.has(evaluatorKey)) {
                 return false
             }
-            const settingsValues = config?.settings_values || {}
+            const settingsValues = getEvaluatorParameters(config)
             const requiresCorrectAnswerKey = Object.entries(settingsValues).some(([key, value]) => {
                 if (!key) return false
                 const normalizedKey = key.toLowerCase()
@@ -176,7 +180,7 @@ export const useEvaluatorSelection = ({
     const previewLookup = useMemo(() => buildPreviewLookup(previewEvaluators), [previewEvaluators])
 
     const matchedPreviewEvaluator = useMemo(() => {
-        const key = (selectedEvaluatorConfig as any)?.evaluator_key as string | undefined
+        const key = resolveEvaluatorKey(selectedEvaluatorConfig)
         if (!key) return undefined
         return previewLookup.get(key.toLowerCase())
     }, [selectedEvaluatorConfig, previewLookup])
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeFromConfigs.ts b/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeFromConfigs.ts
index fb54e0978b..3cef385d5d 100644
--- a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeFromConfigs.ts
+++ b/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeFromConfigs.ts
@@ -3,6 +3,7 @@ import {useMemo} from "react"
 import {useAtomValue} from "jotai"
 
 import {evaluatorConfigsAtom} from "@/oss/lib/atoms/evaluation"
+import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils"
 import useEvaluatorConfigs from "@/oss/lib/hooks/useEvaluatorConfigs"
 
 import {EVALUATOR_CATEGORY_LABEL_MAP} from "../constants"
@@ -25,6 +26,7 @@ export const useEvaluatorTypeFromConfigs = ({
         }
 
         const candidates = collectEvaluatorCandidates(
+            resolveEvaluatorKey(evaluator as any),
             (evaluator as any)?.slug,
             (evaluator as any)?.key,
             (evaluator as any)?.meta?.evaluator_key,
@@ -32,7 +34,7 @@ export const useEvaluatorTypeFromConfigs = ({
         )
 
         const match = configs.find((cfg) => {
-            const key = (cfg?.evaluator_key || cfg?.name || cfg?.id || "").toString().trim()
+            const key = (resolveEvaluatorKey(cfg) || cfg?.name || cfg?.id || "").toString().trim()
             if (!key) return false
             const lower = key.toLowerCase()
             if (candidates.includes(lower)) return true
@@ -63,7 +65,7 @@ export const useEvaluatorTypeFromConfigs = ({
         // 2) Infer label by scanning evaluator_key/name tokens for known category slugs
         const categorySlugs = Object.keys(EVALUATOR_CATEGORY_LABEL_MAP || {})
         const keyTokens = [
-            (match as any)?.evaluator_key,
+            resolveEvaluatorKey(match),
             (match as any)?.name,
             (evaluator as any)?.key,
             (evaluator as any)?.name,
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/utils/evaluatorDetails.ts b/web/oss/src/components/pages/evaluations/onlineEvaluation/utils/evaluatorDetails.ts
index fddb7511af..fdbd26e16f 100644
--- a/web/oss/src/components/pages/evaluations/onlineEvaluation/utils/evaluatorDetails.ts
+++ b/web/oss/src/components/pages/evaluations/onlineEvaluation/utils/evaluatorDetails.ts
@@ -1,3 +1,4 @@
+import {resolveEvaluatorKey} from "@/oss/lib/evaluators/utils"
 import type {EvaluatorPreviewDto} from "@/oss/lib/hooks/useEvaluators/types"
 
 import {
@@ -126,6 +127,7 @@ export const extractEvaluatorType = (
     }
 
     const candidates = collectEvaluatorCandidates(
+        resolveEvaluatorKey(evaluator as any),
         (evaluator as any)?.slug,
         (evaluator as any)?.key,
         (evaluator as any)?.name,
@@ -290,8 +292,8 @@ export const extractParameterList = (evaluator?: EvaluatorPreviewDto): Parameter
 
     // Support both simple preview artifacts and workflow evaluators
     const parameterSources = [
-        (evaluator as any)?.settings_values,
         (evaluator as any)?.data?.parameters,
+        (evaluator as any)?.settings_values,
         (evaluator as any)?.data?.service?.configuration?.parameters,
         (evaluator as any)?.data?.configuration?.parameters,
     ]
@@ -359,8 +361,8 @@ export const extractModelName = (evaluator?: EvaluatorPreviewDto) => {
     }
 
     const sources = [
-        (evaluator as any)?.settings_values,
         (evaluator as any)?.data?.parameters,
+        (evaluator as any)?.settings_values,
         (evaluator as any)?.data?.service?.configuration,
         (evaluator as any)?.data?.service?.configuration?.parameters,
         (evaluator as any)?.data?.configuration,
@@ -660,7 +662,8 @@ const normalizeMessageContent = (
 export const extractPromptSections = (evaluator?: EvaluatorPreviewDto): PromptPreviewSection[] => {
     if (!evaluator) return []
     const data = (evaluator as any)?.data ?? {}
-    const settings = (evaluator as any)?.settings_values
+    const parameters = data?.parameters
+    const settings = parameters ?? (evaluator as any)?.settings_values
     const agConfig = data?.parameters?.ag_config ?? data?.parameters?.agConfig
     const messages =
         findFirstMessages(settings) ??
@@ -728,7 +731,6 @@ export const extractPromptSections = (evaluator?: EvaluatorPreviewDto): PromptPr
 
     const promptSources = [
         settings,
-        data?.parameters,
         data?.service?.configuration?.parameters,
         data?.configuration?.parameters,
     ]
diff --git a/web/oss/src/lib/Types.ts b/web/oss/src/lib/Types.ts
index 74f6f31a51..2bd357fc60 100644
--- a/web/oss/src/lib/Types.ts
+++ b/web/oss/src/lib/Types.ts
@@ -870,6 +870,76 @@ export interface Evaluator {
     archived?: boolean
 }
 
+export interface SimpleEvaluatorData {
+    version?: string
+    uri?: string
+    url?: string
+    headers?: Record<string, string>
+    schemas?: Record<string, any>
+    script?: {content?: string; runtime?: string}
+    parameters?: Record<string, any>
+    service?: Record<string, any>
+    configuration?: Record<string, any>
+}
+
+export interface SimpleEvaluatorFlags {
+    is_custom?: boolean
+    is_evaluator?: boolean
+    is_human?: boolean
+    requires_llm_api_keys?: boolean
+    evaluator_key?: string
+    color?: string
+}
+
+export interface SimpleEvaluator {
+    id: string
+    slug: string
+    name?: string
+    description?: string
+    tags?: string[]
+    meta?: Record<string, any>
+    flags?: SimpleEvaluatorFlags
+    data?: SimpleEvaluatorData
+    created_at?: string
+    updated_at?: string
+    deleted_at?: string | null
+    created_by_id?: string
+    updated_by_id?: string
+    deleted_by_id?: string
+    color?: string
+    icon_url?: string | StaticImageData
+}
+
+export interface SimpleEvaluatorCreate {
+    slug: string
+    name?: string
+    description?: string
+    tags?: string[]
+    meta?: Record<string, any>
+    flags?: SimpleEvaluatorFlags
+    data?: SimpleEvaluatorData
+}
+
+export interface SimpleEvaluatorEdit {
+    id: string
+    name?: string
+    description?: string
+    tags?: string[]
+    meta?: Record<string, any>
+    flags?: SimpleEvaluatorFlags
+    data?: SimpleEvaluatorData
+}
+
+export interface SimpleEvaluatorResponse {
+    count: number
+    evaluator: SimpleEvaluator | null
+}
+
+export interface SimpleEvaluatorsResponse {
+    count: number
+    evaluators: SimpleEvaluator[]
+}
+
 export interface EvaluatorConfig {
     id: string
     evaluator_key: string
diff --git a/web/oss/src/lib/atoms/evaluation.ts b/web/oss/src/lib/atoms/evaluation.ts
index 323dde41cb..1fbc0039ad 100644
--- a/web/oss/src/lib/atoms/evaluation.ts
+++ b/web/oss/src/lib/atoms/evaluation.ts
@@ -1,6 +1,6 @@
 import {atom} from "jotai"
 
-import {Evaluation, EvaluationScenario, Evaluator, EvaluatorConfig} from "../Types"
+import {Evaluation, EvaluationScenario, Evaluator, SimpleEvaluator} from "../Types"
 
 export const evaluationAtom = atom<Evaluation | undefined>(undefined)
 
@@ -8,4 +8,4 @@ export const evaluationScenariosAtom = atom<EvaluationScenario[]>([])
 
 export const evaluatorsAtom = atom<Evaluator[]>([])
 
-export const evaluatorConfigsAtom = atom<EvaluatorConfig[]>([])
+export const evaluatorConfigsAtom = atom<SimpleEvaluator[]>([])
diff --git a/web/oss/src/lib/evaluators/utils.ts b/web/oss/src/lib/evaluators/utils.ts
new file mode 100644
index 0000000000..e21d98a62e
--- /dev/null
+++ b/web/oss/src/lib/evaluators/utils.ts
@@ -0,0 +1,80 @@
+import type {SimpleEvaluator, SimpleEvaluatorData} from "@/oss/lib/Types"
+
+const normalizeSlugBase = (value?: string | null) =>
+    String(value ?? "")
+        .trim()
+        .toLowerCase()
+        .replace(/[^a-z0-9]+/g, "-")
+        .replace(/^-+|-+$/g, "")
+
+const trimVersionSuffix = (value: string) => value.replace(/-v\d+$/i, "")
+
+export const extractEvaluatorKeyFromUri = (uri?: string | null): string | undefined => {
+    if (!uri) return undefined
+    const trimmed = uri.trim()
+    if (!trimmed) return undefined
+
+    const builtinMatch = trimmed.match(/^agenta:builtin:([^:]+)(:|$)/i)
+    if (builtinMatch?.[1]) {
+        return trimVersionSuffix(builtinMatch[1])
+    }
+
+    const parts = trimmed.split(":").filter(Boolean)
+    if (parts.length >= 3 && parts[2]) {
+        return trimVersionSuffix(parts[2])
+    }
+
+    const slashParts = trimmed.split("/").filter(Boolean)
+    const lastSegment = slashParts[slashParts.length - 1]
+    if (lastSegment) {
+        return trimVersionSuffix(lastSegment)
+    }
+
+    return undefined
+}
+
+export const resolveEvaluatorKey = (
+    evaluator?: Partial<SimpleEvaluator> | null,
+): string | undefined => {
+    if (!evaluator) return undefined
+
+    const candidate =
+        extractEvaluatorKeyFromUri(evaluator.data?.uri) ||
+        (typeof (evaluator as any)?.evaluator_key === "string"
+            ? (evaluator as any).evaluator_key
+            : undefined) ||
+        (typeof evaluator.meta?.evaluator_key === "string"
+            ? evaluator.meta.evaluator_key
+            : undefined) ||
+        (typeof evaluator.flags?.evaluator_key === "string"
+            ? evaluator.flags.evaluator_key
+            : undefined) ||
+        (typeof (evaluator as any)?.key === "string" ? (evaluator as any).key : undefined)
+
+    return candidate ? String(candidate).trim() : undefined
+}
+
+export const buildEvaluatorUri = (evaluatorKey: string, version = "v0") =>
+    `agenta:builtin:${evaluatorKey}:${version}`
+
+export const buildEvaluatorSlug = (name?: string | null) => {
+    const base = normalizeSlugBase(name) || "evaluator"
+    const suffix = Math.random().toString(36).slice(2, 8)
+    const maxBaseLength = Math.max(1, 50 - suffix.length - 1)
+    const trimmedBase = base.slice(0, maxBaseLength)
+    return `${trimmedBase}-${suffix}`
+}
+
+export const mergeEvaluatorData = (
+    base?: SimpleEvaluatorData | null,
+    updates?: Partial<SimpleEvaluatorData> | null,
+): SimpleEvaluatorData | undefined => {
+    if (!base && !updates) return undefined
+    return {
+        ...(base ?? {}),
+        ...(updates ?? {}),
+    }
+}
+
+export const getEvaluatorParameters = (evaluator?: Partial<SimpleEvaluator> | null) =>
+    (evaluator?.data?.parameters as Record<string, any>) || {}
diff --git a/web/oss/src/lib/hooks/useEvaluatorConfigs/index.ts b/web/oss/src/lib/hooks/useEvaluatorConfigs/index.ts
index 3765eb6677..998f65459e 100644
--- a/web/oss/src/lib/hooks/useEvaluatorConfigs/index.ts
+++ b/web/oss/src/lib/hooks/useEvaluatorConfigs/index.ts
@@ -6,11 +6,11 @@ import {SWRConfiguration} from "swr"
 import {useAppId} from "@/oss/hooks/useAppId"
 import {evaluatorConfigsQueryAtomFamily} from "@/oss/state/evaluators"
 
-import {EvaluatorConfig} from "../../Types"
+import {SimpleEvaluator} from "../../Types"
 
 type EvaluatorConfigResult<Preview extends boolean> = Preview extends true
     ? undefined
-    : EvaluatorConfig[]
+    : SimpleEvaluator[]
 
 type EvaluatorConfigsOptions<Preview extends boolean> = {
     preview?: Preview
diff --git a/web/oss/src/services/evaluations/api/index.ts b/web/oss/src/services/evaluations/api/index.ts
index 43bfdb3ca8..9702d501b5 100644
--- a/web/oss/src/services/evaluations/api/index.ts
+++ b/web/oss/src/services/evaluations/api/index.ts
@@ -17,7 +17,7 @@ export {
     createEvaluatorConfig,
     updateEvaluatorConfig,
     deleteEvaluatorConfig,
-    type CreateEvaluationConfigData,
+    type CreateEvaluatorConfigData,
 } from "@/oss/services/evaluators"
 
 //Prefix convention:
diff --git a/web/oss/src/services/evaluators/index.ts b/web/oss/src/services/evaluators/index.ts
index 2a9bb15de7..9a85ae7c8c 100644
--- a/web/oss/src/services/evaluators/index.ts
+++ b/web/oss/src/services/evaluators/index.ts
@@ -1,9 +1,21 @@
 import axios from "@/oss/lib/api/assets/axiosConfig"
+import {
+    buildEvaluatorSlug,
+    buildEvaluatorUri,
+    resolveEvaluatorKey,
+} from "@/oss/lib/evaluators/utils"
 import {getAgentaApiUrl} from "@/oss/lib/helpers/api"
 import {getTagColors} from "@/oss/lib/helpers/colors"
 import {isDemo, stringToNumberInRange} from "@/oss/lib/helpers/utils"
 import {EvaluatorResponseDto} from "@/oss/lib/hooks/useEvaluators/types"
-import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types"
+import {
+    Evaluator,
+    SimpleEvaluator,
+    SimpleEvaluatorCreate,
+    SimpleEvaluatorEdit,
+    SimpleEvaluatorResponse,
+    SimpleEvaluatorsResponse,
+} from "@/oss/lib/Types"
 import aiImg from "@/oss/media/artificial-intelligence.png"
 import bracketCurlyImg from "@/oss/media/bracket-curly.png"
 import codeImg from "@/oss/media/browser.png"
@@ -48,7 +60,7 @@ export const updateEvaluator = async (
     }
 }
 
-export const fetchEvaluatorById = async (evaluatorId: string) => {
+export const fetchEvaluatorById = async (evaluatorId: string): Promise<SimpleEvaluator | null> => {
     const {projectId} = getProjectValues()
     if (!projectId) {
         return null
@@ -59,7 +71,7 @@ export const fetchEvaluatorById = async (evaluatorId: string) => {
     )
     const payload = (response?.data as any)?.evaluator ?? response?.data ?? null
     if (!payload) return null
-    return payload as EvaluatorResponseDto<"response">["evaluator"]
+    return decorateSimpleEvaluator(payload as SimpleEvaluator)
 }
 
 const evaluatorIconsMap = {
@@ -103,58 +115,112 @@ export const fetchAllEvaluators = async (includeArchived = false) => {
 }
 
 // Evaluator Configs
+function decorateSimpleEvaluator(evaluator: SimpleEvaluator) {
+    const tagColors = getTagColors()
+    const evaluatorKey = resolveEvaluatorKey(evaluator)
+    if (!evaluatorKey) return evaluator
+
+    return {
+        ...evaluator,
+        icon_url: evaluatorIconsMap[evaluatorKey as keyof typeof evaluatorIconsMap],
+        color: tagColors[stringToNumberInRange(evaluatorKey, 0, tagColors.length - 1)],
+    }
+}
+
 export const fetchAllEvaluatorConfigs = async (
     appId?: string | null,
     projectIdOverride?: string | null,
-) => {
-    const tagColors = getTagColors()
+): Promise<SimpleEvaluator[]> => {
     const {projectId: projectIdFromStore} = getProjectValues()
     const projectId = projectIdOverride ?? projectIdFromStore
+    void appId
 
     if (!projectId) {
-        return [] as EvaluatorConfig[]
+        return [] as SimpleEvaluator[]
     }
 
-    const response = await axios.get("/evaluators/configs", {
-        params: {
-            project_id: projectId,
-            ...(appId ? {app_id: appId} : {}),
+    const response = await axios.post<SimpleEvaluatorsResponse>(
+        `${getAgentaApiUrl()}/preview/simple/evaluators/query?project_id=${projectId}`,
+        {
+            evaluator: {
+                flags: {
+                    is_evaluator: true,
+                    is_human: false,
+                },
+            },
+            include_archived: false,
         },
-    })
-    const evaluatorConfigs = (response.data || []).map((item: EvaluatorConfig) => ({
-        ...item,
-        icon_url: evaluatorIconsMap[item.evaluator_key as keyof typeof evaluatorIconsMap],
-        color: tagColors[stringToNumberInRange(item.evaluator_key, 0, tagColors.length - 1)],
-    })) as EvaluatorConfig[]
-    return evaluatorConfigs
+    )
+
+    const evaluators = response.data?.evaluators ?? []
+    return evaluators.filter((item) => !item.deleted_at).map(decorateSimpleEvaluator)
+}
+
+export interface CreateEvaluatorConfigData {
+    name: string
+    evaluator_key: string
+    parameters: Record<string, any>
+    tags?: string[]
+    description?: string
 }
 
-export type CreateEvaluationConfigData = Omit<EvaluatorConfig, "id" | "created_at">
 export const createEvaluatorConfig = async (
     _appId: string | null | undefined,
-    config: CreateEvaluationConfigData,
-) => {
+    config: CreateEvaluatorConfigData,
+): Promise<SimpleEvaluator> => {
     const {projectId} = getProjectValues()
     void _appId
 
-    return axios.post(`/evaluators/configs?project_id=${projectId}`, {
-        ...config,
-    })
+    const payload: SimpleEvaluatorCreate = {
+        slug: buildEvaluatorSlug(config.name),
+        name: config.name,
+        description: config.description,
+        tags: config.tags,
+        flags: {is_evaluator: true, is_human: false},
+        data: {
+            uri: buildEvaluatorUri(config.evaluator_key),
+            parameters: config.parameters,
+        },
+    }
+
+    const response = await axios.post<SimpleEvaluatorResponse>(
+        `${getAgentaApiUrl()}/preview/simple/evaluators/?project_id=${projectId}`,
+        {evaluator: payload},
+    )
+
+    const evaluator = response.data?.evaluator
+    if (!evaluator) {
+        throw new Error("Failed to create evaluator")
+    }
+
+    return decorateSimpleEvaluator(evaluator)
 }
 
 export const updateEvaluatorConfig = async (
     configId: string,
-    config: Partial<CreateEvaluationConfigData>,
-) => {
+    config: SimpleEvaluatorEdit,
+): Promise<SimpleEvaluator> => {
     const {projectId} = getProjectValues()
 
-    return axios.put(`/evaluators/configs/${configId}?project_id=${projectId}`, config)
+    const response = await axios.put<SimpleEvaluatorResponse>(
+        `${getAgentaApiUrl()}/preview/simple/evaluators/${configId}?project_id=${projectId}`,
+        {evaluator: {...config, id: configId}},
+    )
+
+    const evaluator = response.data?.evaluator
+    if (!evaluator) {
+        throw new Error("Failed to update evaluator")
+    }
+
+    return decorateSimpleEvaluator(evaluator)
 }
 
 export const deleteEvaluatorConfig = async (configId: string) => {
     const {projectId} = getProjectValues()
 
-    return axios.delete(`/evaluators/configs/${configId}?project_id=${projectId}`)
+    return axios.post(
+        `${getAgentaApiUrl()}/preview/simple/evaluators/${configId}/archive?project_id=${projectId}`,
+    )
 }
 
 export const deleteHumanEvaluator = async (evaluatorId: string) => {
diff --git a/web/oss/src/state/evaluators/atoms.ts b/web/oss/src/state/evaluators/atoms.ts
index 24f390e884..5c6b34c9e2 100644
--- a/web/oss/src/state/evaluators/atoms.ts
+++ b/web/oss/src/state/evaluators/atoms.ts
@@ -5,6 +5,7 @@ import {atomWithQuery} from "jotai-tanstack-query"
 import {getMetricsFromEvaluator} from "@/oss/components/SharedDrawers/AnnotateDrawer/assets/transforms"
 import axios from "@/oss/lib/api/assets/axiosConfig"
 import {evaluatorsAtom} from "@/oss/lib/atoms/evaluation"
+import {extractEvaluatorKeyFromUri} from "@/oss/lib/evaluators/utils"
 import {transformApiData} from "@/oss/lib/hooks/useAnnotations/assets/transformer"
 import {
     EvaluatorDto,
@@ -13,7 +14,7 @@ import {
     EvaluatorRevisionsResponseDto,
     EvaluatorsResponseDto,
 } from "@/oss/lib/hooks/useEvaluators/types"
-import {Evaluator, EvaluatorConfig} from "@/oss/lib/Types"
+import {Evaluator, SimpleEvaluator} from "@/oss/lib/Types"
 import {fetchAllEvaluatorConfigs, fetchAllEvaluators} from "@/oss/services/evaluators"
 import {selectedAppIdAtom} from "@/oss/state/app"
 import {selectedOrgAtom} from "@/oss/state/org"
@@ -26,16 +27,15 @@ import {EvaluatorConfigsParams, EvaluatorsParams} from "./types"
 
 const extractKeyFromUri = (uri: unknown): string | undefined => {
     if (typeof uri !== "string") return undefined
-    const match = uri.match(/[:/](auto_[a-z0-9_]+)/i)
-    if (match?.[1]) return match[1]
-    const parts = uri.split(":").filter(Boolean)
-    if (parts.length) {
-        const candidate = parts[parts.length - 1]
-        if (candidate) {
-            return candidate.replace(/-v\d+$/i, "")
-        }
-    }
-    return undefined
+    return (
+        extractEvaluatorKeyFromUri(uri) ||
+        uri.match(/[:/](auto_[a-z0-9_]+)/i)?.[1] ||
+        uri
+            .split(":")
+            .filter(Boolean)
+            .slice(-1)[0]
+            ?.replace(/-v\d+$/i, "")
+    )
 }
 
 const isPlainObject = (value: unknown): value is Record<string, any> => {
@@ -102,7 +102,7 @@ const extractRequiresLlmApiKeys = (source: unknown): boolean | undefined => {
 
 export const evaluatorConfigsQueryAtomFamily = atomFamily(
     ({projectId: overrideProjectId, appId: overrideAppId, preview}: EvaluatorConfigsParams = {}) =>
-        atomWithQuery<EvaluatorConfig[]>((get) => {
+        atomWithQuery<SimpleEvaluator[]>((get) => {
             const projectId = overrideProjectId || get(projectIdAtom)
             const appId = overrideAppId || get(selectedAppIdAtom)
             const user = get(userAtom) as {id?: string} | null