From d89c7b731a03fb7ecbfba61be47ea10ea8547c88 Mon Sep 17 00:00:00 2001
From: majiayu000 <1835304752@qq.com>
Date: Tue, 30 Dec 2025 16:56:18 +0800
Subject: [PATCH 1/2] fix: resolve duplicate MCP route registration causing 50%
 failure rate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes #7772

The issue was caused by duplicate registration of the MCP endpoint
/mcp/v1/chat/completions in both openai.go and localai.go, leading
to a race condition where requests would randomly hit different
handlers with incompatible behaviors.

Changes:
- Removed duplicate MCP route registration from openai.go
- Kept the localai.MCPStreamEndpoint as the canonical handler
- Added all three MCP route patterns for backward compatibility:
  * /v1/mcp/chat/completions
  * /mcp/v1/chat/completions
  * /mcp/chat/completions
- Added comments to clarify route ownership and prevent future conflicts
- Fixed formatting in ui_api.go

The localai.MCPStreamEndpoint handler is more feature-complete as it
supports both streaming and non-streaming modes, while the removed
openai.MCPCompletionEndpoint only supported synchronous requests.

This eliminates the ~50% failure rate where the cogito library would
receive "Invalid http method" errors when internal HTTP requests were
routed to the wrong handler.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
Signed-off-by: majiayu000 <1835304752@qq.com>
---
 core/http/routes/localai.go |  4 +++-
 core/http/routes/openai.go  | 19 ++-----------------
 core/http/routes/ui_api.go  |  2 +-
 3 files changed, 6 insertions(+), 19 deletions(-)

diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
index 32e030bf34f0..f7db61b0eceb 100644
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -137,7 +137,8 @@ func RegisterLocalAIRoutes(router *echo.Echo,
 		requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TOKENIZE)),
 		requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TokenizeRequest) }))
 
-	// MCP Stream endpoint
+	// MCP endpoint - supports both streaming and non-streaming modes
+	// Note: These are the canonical MCP routes (not duplicated in openai.go)
 	if evaluator != nil {
 		mcpStreamHandler := localai.MCPStreamEndpoint(cl, ml, evaluator, appConfig)
 		mcpStreamMiddleware := []echo.MiddlewareFunc{
@@ -154,6 +155,7 @@ func RegisterLocalAIRoutes(router *echo.Echo,
 		}
 		router.POST("/v1/mcp/chat/completions", mcpStreamHandler, mcpStreamMiddleware...)
 		router.POST("/mcp/v1/chat/completions", mcpStreamHandler, mcpStreamMiddleware...)
+		router.POST("/mcp/chat/completions", mcpStreamHandler, mcpStreamMiddleware...)
 	}
 
 	// Agent job routes
diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go
index 93fed71dbb59..e61e48a050e3 100644
--- a/core/http/routes/openai.go
+++ b/core/http/routes/openai.go
@@ -79,23 +79,8 @@ func RegisterOpenAIRoutes(app *echo.Echo,
 	app.POST("/completions", completionHandler, completionMiddleware...)
 	app.POST("/v1/engines/:model/completions", completionHandler, completionMiddleware...)
 
-	// MCPcompletion
-	mcpCompletionHandler := openai.MCPCompletionEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig())
-	mcpCompletionMiddleware := []echo.MiddlewareFunc{
-		traceMiddleware,
-		re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
-		re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
-		func(next echo.HandlerFunc) echo.HandlerFunc {
-			return func(c echo.Context) error {
-				if err := re.SetOpenAIRequest(c); err != nil {
-					return err
-				}
-				return next(c)
-			}
-		},
-	}
-	app.POST("/mcp/v1/chat/completions", mcpCompletionHandler, mcpCompletionMiddleware...)
-	app.POST("/mcp/chat/completions", mcpCompletionHandler, mcpCompletionMiddleware...)
+	// Note: MCP endpoints are registered in localai.go to avoid route conflicts
+	// The localai.MCPStreamEndpoint handler supports both streaming and non-streaming modes
 
 	// embeddings
 	embeddingHandler := openai.EmbeddingsEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go
index 84af2e32fe57..78b19468f612 100644
--- a/core/http/routes/ui_api.go
+++ b/core/http/routes/ui_api.go
@@ -954,7 +954,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model
 		if !appConfig.EnableTracing {
 			return c.JSON(503, map[string]any{
 				"error": "Tracing disabled",
-				})
+			})
 		}
 		traces := middleware.GetTraces()
 		return c.JSON(200, map[string]interface{}{

From 2adddef5fe266037b6c53c644cab06e7346edba8 Mon Sep 17 00:00:00 2001
From: Ettore Di Giacinto <mudler@localai.io>
Date: Fri, 2 Jan 2026 21:34:23 +0100
Subject: [PATCH 2/2] Address feedback from review

Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
---
 core/http/endpoints/localai/mcp.go |   4 +-
 core/http/endpoints/openai/mcp.go  | 148 -----------------------------
 core/http/routes/localai.go        |   4 +-
 core/http/routes/openai.go         |   3 -
 4 files changed, 4 insertions(+), 155 deletions(-)
 delete mode 100644 core/http/endpoints/openai/mcp.go

diff --git a/core/http/endpoints/localai/mcp.go b/core/http/endpoints/localai/mcp.go
index a2367fbc3ccb..721f97a69e81 100644
--- a/core/http/endpoints/localai/mcp.go
+++ b/core/http/endpoints/localai/mcp.go
@@ -53,12 +53,12 @@ type MCPErrorEvent struct {
 	Message string `json:"message"`
 }
 
-// MCPStreamEndpoint is the SSE streaming endpoint for MCP chat completions
+// MCPEndpoint is the endpoint for MCP chat completions. Supports SSE mode, but it is not compatible with the OpenAI apis.
 // @Summary Stream MCP chat completions with reasoning, tool calls, and results
 // @Param request body schema.OpenAIRequest true "query params"
 // @Success 200 {object} schema.OpenAIResponse "Response"
 // @Router /v1/mcp/chat/completions [post]
-func MCPStreamEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc {
+func MCPEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc {
 	return func(c echo.Context) error {
 		ctx := c.Request().Context()
 		created := int(time.Now().Unix())
diff --git a/core/http/endpoints/openai/mcp.go b/core/http/endpoints/openai/mcp.go
deleted file mode 100644
index e9987cd54039..000000000000
--- a/core/http/endpoints/openai/mcp.go
+++ /dev/null
@@ -1,148 +0,0 @@
-package openai
-
-import (
-	"context"
-	"encoding/json"
-	"errors"
-	"fmt"
-	"net"
-	"time"
-
-	"github.com/labstack/echo/v4"
-	"github.com/mudler/LocalAI/core/config"
-	mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp"
-	"github.com/mudler/LocalAI/core/http/middleware"
-
-	"github.com/google/uuid"
-	"github.com/mudler/LocalAI/core/schema"
-	"github.com/mudler/LocalAI/core/templates"
-	"github.com/mudler/LocalAI/pkg/model"
-	"github.com/mudler/cogito"
-	"github.com/mudler/xlog"
-)
-
-// MCPCompletionEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/completions
-// @Summary Generate completions for a given prompt and model.
-// @Param request body schema.OpenAIRequest true "query params"
-// @Success 200 {object} schema.OpenAIResponse "Response"
-// @Router /mcp/v1/completions [post]
-func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc {
-	// We do not support streaming mode (Yet?)
-	return func(c echo.Context) error {
-		created := int(time.Now().Unix())
-
-		ctx := c.Request().Context()
-
-		// Handle Correlation
-		id := c.Request().Header.Get("X-Correlation-ID")
-		if id == "" {
-			id = uuid.New().String()
-		}
-
-		input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest)
-		if !ok || input.Model == "" {
-			return echo.ErrBadRequest
-		}
-
-		config, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig)
-		if !ok || config == nil {
-			return echo.ErrBadRequest
-		}
-
-		if config.MCP.Servers == "" && config.MCP.Stdio == "" {
-			return fmt.Errorf("no MCP servers configured")
-		}
-
-		// Get MCP config from model config
-		remote, stdio, err := config.MCP.MCPConfigFromYAML()
-		if err != nil {
-			return fmt.Errorf("failed to get MCP config: %w", err)
-		}
-
-		// Check if we have tools in cache, or we have to have an initial connection
-		sessions, err := mcpTools.SessionsFromMCPConfig(config.Name, remote, stdio)
-		if err != nil {
-			return fmt.Errorf("failed to get MCP sessions: %w", err)
-		}
-
-		if len(sessions) == 0 {
-			return fmt.Errorf("no working MCP servers found")
-		}
-
-		fragment := cogito.NewEmptyFragment()
-
-		for _, message := range input.Messages {
-			fragment = fragment.AddMessage(message.Role, message.StringContent)
-		}
-
-		_, port, err := net.SplitHostPort(appConfig.APIAddress)
-		if err != nil {
-			return err
-		}
-
-		apiKey := ""
-		if appConfig.ApiKeys != nil {
-			apiKey = appConfig.ApiKeys[0]
-		}
-
-		ctxWithCancellation, cancel := context.WithCancel(ctx)
-		defer cancel()
-
-		// TODO: instead of connecting to the API, we should just wire this internally
-		// and act like completion.go.
-		// We can do this as cogito expects an interface and we can create one that
-		// we satisfy to just call internally ComputeChoices
-		defaultLLM := cogito.NewOpenAILLM(config.Name, apiKey, "http://127.0.0.1:"+port)
-
-		// Build cogito options using the consolidated method
-		cogitoOpts := config.BuildCogitoOptions()
-
-		cogitoOpts = append(
-			cogitoOpts,
-			cogito.WithContext(ctxWithCancellation),
-			cogito.WithMCPs(sessions...),
-			cogito.WithStatusCallback(func(s string) {
-				xlog.Debug("[model agent] Status", "model", config.Name, "status", s)
-			}),
-			cogito.WithReasoningCallback(func(s string) {
-				xlog.Debug("[model agent] Reasoning", "model", config.Name, "reasoning", s)
-			}),
-			cogito.WithToolCallBack(func(t *cogito.ToolChoice, state *cogito.SessionState) cogito.ToolCallDecision {
-				xlog.Debug("[model agent] Tool call", "model", config.Name, "tool", t.Name, "reasoning", t.Reasoning, "arguments", t.Arguments)
-				return cogito.ToolCallDecision{
-					Approved: true,
-				}
-			}),
-			cogito.WithToolCallResultCallback(func(t cogito.ToolStatus) {
-				xlog.Debug("[model agent] Tool call result", "model", config.Name, "tool", t.Name, "result", t.Result, "tool_arguments", t.ToolArguments)
-			}),
-		)
-
-		f, err := cogito.ExecuteTools(
-			defaultLLM, fragment,
-			cogitoOpts...,
-		)
-		if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) {
-			return err
-		}
-
-		f, err = defaultLLM.Ask(ctx, f)
-		if err != nil {
-			return err
-		}
-
-		resp := &schema.OpenAIResponse{
-			ID:      id,
-			Created: created,
-			Model:   input.Model, // we have to return what the user sent here, due to OpenAI spec.
-			Choices: []schema.Choice{{Message: &schema.Message{Role: "assistant", Content: &f.LastMessage().Content}}},
-			Object:  "text_completion",
-		}
-
-		jsonResult, _ := json.Marshal(resp)
-		xlog.Debug("Response", "response", string(jsonResult))
-
-		// Return the prediction in the response body
-		return c.JSON(200, resp)
-	}
-}
diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go
index f7db61b0eceb..f70a44b2109c 100644
--- a/core/http/routes/localai.go
+++ b/core/http/routes/localai.go
@@ -138,9 +138,9 @@ func RegisterLocalAIRoutes(router *echo.Echo,
 		requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TokenizeRequest) }))
 
 	// MCP endpoint - supports both streaming and non-streaming modes
-	// Note: These are the canonical MCP routes (not duplicated in openai.go)
+	// Note: streaming mode is NOT compatible with the OpenAI apis. We have a set which streams more states.
 	if evaluator != nil {
-		mcpStreamHandler := localai.MCPStreamEndpoint(cl, ml, evaluator, appConfig)
+		mcpStreamHandler := localai.MCPEndpoint(cl, ml, evaluator, appConfig)
 		mcpStreamMiddleware := []echo.MiddlewareFunc{
 			requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)),
 			requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }),
diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go
index e61e48a050e3..2d62859f317f 100644
--- a/core/http/routes/openai.go
+++ b/core/http/routes/openai.go
@@ -79,9 +79,6 @@ func RegisterOpenAIRoutes(app *echo.Echo,
 	app.POST("/completions", completionHandler, completionMiddleware...)
 	app.POST("/v1/engines/:model/completions", completionHandler, completionMiddleware...)
 
-	// Note: MCP endpoints are registered in localai.go to avoid route conflicts
-	// The localai.MCPStreamEndpoint handler supports both streaming and non-streaming modes
-
 	// embeddings
 	embeddingHandler := openai.EmbeddingsEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig())
 	embeddingMiddleware := []echo.MiddlewareFunc{