From d89c7b731a03fb7ecbfba61be47ea10ea8547c88 Mon Sep 17 00:00:00 2001 From: majiayu000 <1835304752@qq.com> Date: Tue, 30 Dec 2025 16:56:18 +0800 Subject: [PATCH 1/2] fix: resolve duplicate MCP route registration causing 50% failure rate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #7772 The issue was caused by duplicate registration of the MCP endpoint /mcp/v1/chat/completions in both openai.go and localai.go, leading to a race condition where requests would randomly hit different handlers with incompatible behaviors. Changes: - Removed duplicate MCP route registration from openai.go - Kept the localai.MCPStreamEndpoint as the canonical handler - Added all three MCP route patterns for backward compatibility: * /v1/mcp/chat/completions * /mcp/v1/chat/completions * /mcp/chat/completions - Added comments to clarify route ownership and prevent future conflicts - Fixed formatting in ui_api.go The localai.MCPStreamEndpoint handler is more feature-complete as it supports both streaming and non-streaming modes, while the removed openai.MCPCompletionEndpoint only supported synchronous requests. This eliminates the ~50% failure rate where the cogito library would receive "Invalid http method" errors when internal HTTP requests were routed to the wrong handler. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 Signed-off-by: majiayu000 <1835304752@qq.com> --- core/http/routes/localai.go | 4 +++- core/http/routes/openai.go | 19 ++----------------- core/http/routes/ui_api.go | 2 +- 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index 32e030bf34f0..f7db61b0eceb 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -137,7 +137,8 @@ func RegisterLocalAIRoutes(router *echo.Echo, requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_TOKENIZE)), requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TokenizeRequest) })) - // MCP Stream endpoint + // MCP endpoint - supports both streaming and non-streaming modes + // Note: These are the canonical MCP routes (not duplicated in openai.go) if evaluator != nil { mcpStreamHandler := localai.MCPStreamEndpoint(cl, ml, evaluator, appConfig) mcpStreamMiddleware := []echo.MiddlewareFunc{ @@ -154,6 +155,7 @@ func RegisterLocalAIRoutes(router *echo.Echo, } router.POST("/v1/mcp/chat/completions", mcpStreamHandler, mcpStreamMiddleware...) router.POST("/mcp/v1/chat/completions", mcpStreamHandler, mcpStreamMiddleware...) + router.POST("/mcp/chat/completions", mcpStreamHandler, mcpStreamMiddleware...) } // Agent job routes diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go index 93fed71dbb59..e61e48a050e3 100644 --- a/core/http/routes/openai.go +++ b/core/http/routes/openai.go @@ -79,23 +79,8 @@ func RegisterOpenAIRoutes(app *echo.Echo, app.POST("/completions", completionHandler, completionMiddleware...) app.POST("/v1/engines/:model/completions", completionHandler, completionMiddleware...) - // MCPcompletion - mcpCompletionHandler := openai.MCPCompletionEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.TemplatesEvaluator(), application.ApplicationConfig()) - mcpCompletionMiddleware := []echo.MiddlewareFunc{ - traceMiddleware, - re.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)), - re.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }), - func(next echo.HandlerFunc) echo.HandlerFunc { - return func(c echo.Context) error { - if err := re.SetOpenAIRequest(c); err != nil { - return err - } - return next(c) - } - }, - } - app.POST("/mcp/v1/chat/completions", mcpCompletionHandler, mcpCompletionMiddleware...) - app.POST("/mcp/chat/completions", mcpCompletionHandler, mcpCompletionMiddleware...) + // Note: MCP endpoints are registered in localai.go to avoid route conflicts + // The localai.MCPStreamEndpoint handler supports both streaming and non-streaming modes // embeddings embeddingHandler := openai.EmbeddingsEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig()) diff --git a/core/http/routes/ui_api.go b/core/http/routes/ui_api.go index 84af2e32fe57..78b19468f612 100644 --- a/core/http/routes/ui_api.go +++ b/core/http/routes/ui_api.go @@ -954,7 +954,7 @@ func RegisterUIAPIRoutes(app *echo.Echo, cl *config.ModelConfigLoader, ml *model if !appConfig.EnableTracing { return c.JSON(503, map[string]any{ "error": "Tracing disabled", - }) + }) } traces := middleware.GetTraces() return c.JSON(200, map[string]interface{}{ From 2adddef5fe266037b6c53c644cab06e7346edba8 Mon Sep 17 00:00:00 2001 From: Ettore Di Giacinto Date: Fri, 2 Jan 2026 21:34:23 +0100 Subject: [PATCH 2/2] Address feedback from review Signed-off-by: Ettore Di Giacinto --- core/http/endpoints/localai/mcp.go | 4 +- core/http/endpoints/openai/mcp.go | 148 ----------------------------- core/http/routes/localai.go | 4 +- core/http/routes/openai.go | 3 - 4 files changed, 4 insertions(+), 155 deletions(-) delete mode 100644 core/http/endpoints/openai/mcp.go diff --git a/core/http/endpoints/localai/mcp.go b/core/http/endpoints/localai/mcp.go index a2367fbc3ccb..721f97a69e81 100644 --- a/core/http/endpoints/localai/mcp.go +++ b/core/http/endpoints/localai/mcp.go @@ -53,12 +53,12 @@ type MCPErrorEvent struct { Message string `json:"message"` } -// MCPStreamEndpoint is the SSE streaming endpoint for MCP chat completions +// MCPEndpoint is the endpoint for MCP chat completions. Supports SSE mode, but it is not compatible with the OpenAI apis. // @Summary Stream MCP chat completions with reasoning, tool calls, and results // @Param request body schema.OpenAIRequest true "query params" // @Success 200 {object} schema.OpenAIResponse "Response" // @Router /v1/mcp/chat/completions [post] -func MCPStreamEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc { +func MCPEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc { return func(c echo.Context) error { ctx := c.Request().Context() created := int(time.Now().Unix()) diff --git a/core/http/endpoints/openai/mcp.go b/core/http/endpoints/openai/mcp.go deleted file mode 100644 index e9987cd54039..000000000000 --- a/core/http/endpoints/openai/mcp.go +++ /dev/null @@ -1,148 +0,0 @@ -package openai - -import ( - "context" - "encoding/json" - "errors" - "fmt" - "net" - "time" - - "github.com/labstack/echo/v4" - "github.com/mudler/LocalAI/core/config" - mcpTools "github.com/mudler/LocalAI/core/http/endpoints/mcp" - "github.com/mudler/LocalAI/core/http/middleware" - - "github.com/google/uuid" - "github.com/mudler/LocalAI/core/schema" - "github.com/mudler/LocalAI/core/templates" - "github.com/mudler/LocalAI/pkg/model" - "github.com/mudler/cogito" - "github.com/mudler/xlog" -) - -// MCPCompletionEndpoint is the OpenAI Completion API endpoint https://platform.openai.com/docs/api-reference/completions -// @Summary Generate completions for a given prompt and model. -// @Param request body schema.OpenAIRequest true "query params" -// @Success 200 {object} schema.OpenAIResponse "Response" -// @Router /mcp/v1/completions [post] -func MCPCompletionEndpoint(cl *config.ModelConfigLoader, ml *model.ModelLoader, evaluator *templates.Evaluator, appConfig *config.ApplicationConfig) echo.HandlerFunc { - // We do not support streaming mode (Yet?) - return func(c echo.Context) error { - created := int(time.Now().Unix()) - - ctx := c.Request().Context() - - // Handle Correlation - id := c.Request().Header.Get("X-Correlation-ID") - if id == "" { - id = uuid.New().String() - } - - input, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_LOCALAI_REQUEST).(*schema.OpenAIRequest) - if !ok || input.Model == "" { - return echo.ErrBadRequest - } - - config, ok := c.Get(middleware.CONTEXT_LOCALS_KEY_MODEL_CONFIG).(*config.ModelConfig) - if !ok || config == nil { - return echo.ErrBadRequest - } - - if config.MCP.Servers == "" && config.MCP.Stdio == "" { - return fmt.Errorf("no MCP servers configured") - } - - // Get MCP config from model config - remote, stdio, err := config.MCP.MCPConfigFromYAML() - if err != nil { - return fmt.Errorf("failed to get MCP config: %w", err) - } - - // Check if we have tools in cache, or we have to have an initial connection - sessions, err := mcpTools.SessionsFromMCPConfig(config.Name, remote, stdio) - if err != nil { - return fmt.Errorf("failed to get MCP sessions: %w", err) - } - - if len(sessions) == 0 { - return fmt.Errorf("no working MCP servers found") - } - - fragment := cogito.NewEmptyFragment() - - for _, message := range input.Messages { - fragment = fragment.AddMessage(message.Role, message.StringContent) - } - - _, port, err := net.SplitHostPort(appConfig.APIAddress) - if err != nil { - return err - } - - apiKey := "" - if appConfig.ApiKeys != nil { - apiKey = appConfig.ApiKeys[0] - } - - ctxWithCancellation, cancel := context.WithCancel(ctx) - defer cancel() - - // TODO: instead of connecting to the API, we should just wire this internally - // and act like completion.go. - // We can do this as cogito expects an interface and we can create one that - // we satisfy to just call internally ComputeChoices - defaultLLM := cogito.NewOpenAILLM(config.Name, apiKey, "http://127.0.0.1:"+port) - - // Build cogito options using the consolidated method - cogitoOpts := config.BuildCogitoOptions() - - cogitoOpts = append( - cogitoOpts, - cogito.WithContext(ctxWithCancellation), - cogito.WithMCPs(sessions...), - cogito.WithStatusCallback(func(s string) { - xlog.Debug("[model agent] Status", "model", config.Name, "status", s) - }), - cogito.WithReasoningCallback(func(s string) { - xlog.Debug("[model agent] Reasoning", "model", config.Name, "reasoning", s) - }), - cogito.WithToolCallBack(func(t *cogito.ToolChoice, state *cogito.SessionState) cogito.ToolCallDecision { - xlog.Debug("[model agent] Tool call", "model", config.Name, "tool", t.Name, "reasoning", t.Reasoning, "arguments", t.Arguments) - return cogito.ToolCallDecision{ - Approved: true, - } - }), - cogito.WithToolCallResultCallback(func(t cogito.ToolStatus) { - xlog.Debug("[model agent] Tool call result", "model", config.Name, "tool", t.Name, "result", t.Result, "tool_arguments", t.ToolArguments) - }), - ) - - f, err := cogito.ExecuteTools( - defaultLLM, fragment, - cogitoOpts..., - ) - if err != nil && !errors.Is(err, cogito.ErrNoToolSelected) { - return err - } - - f, err = defaultLLM.Ask(ctx, f) - if err != nil { - return err - } - - resp := &schema.OpenAIResponse{ - ID: id, - Created: created, - Model: input.Model, // we have to return what the user sent here, due to OpenAI spec. - Choices: []schema.Choice{{Message: &schema.Message{Role: "assistant", Content: &f.LastMessage().Content}}}, - Object: "text_completion", - } - - jsonResult, _ := json.Marshal(resp) - xlog.Debug("Response", "response", string(jsonResult)) - - // Return the prediction in the response body - return c.JSON(200, resp) - } -} diff --git a/core/http/routes/localai.go b/core/http/routes/localai.go index f7db61b0eceb..f70a44b2109c 100644 --- a/core/http/routes/localai.go +++ b/core/http/routes/localai.go @@ -138,9 +138,9 @@ func RegisterLocalAIRoutes(router *echo.Echo, requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.TokenizeRequest) })) // MCP endpoint - supports both streaming and non-streaming modes - // Note: These are the canonical MCP routes (not duplicated in openai.go) + // Note: streaming mode is NOT compatible with the OpenAI apis. We have a set which streams more states. if evaluator != nil { - mcpStreamHandler := localai.MCPStreamEndpoint(cl, ml, evaluator, appConfig) + mcpStreamHandler := localai.MCPEndpoint(cl, ml, evaluator, appConfig) mcpStreamMiddleware := []echo.MiddlewareFunc{ requestExtractor.BuildFilteredFirstAvailableDefaultModel(config.BuildUsecaseFilterFn(config.FLAG_CHAT)), requestExtractor.SetModelAndConfig(func() schema.LocalAIRequest { return new(schema.OpenAIRequest) }), diff --git a/core/http/routes/openai.go b/core/http/routes/openai.go index e61e48a050e3..2d62859f317f 100644 --- a/core/http/routes/openai.go +++ b/core/http/routes/openai.go @@ -79,9 +79,6 @@ func RegisterOpenAIRoutes(app *echo.Echo, app.POST("/completions", completionHandler, completionMiddleware...) app.POST("/v1/engines/:model/completions", completionHandler, completionMiddleware...) - // Note: MCP endpoints are registered in localai.go to avoid route conflicts - // The localai.MCPStreamEndpoint handler supports both streaming and non-streaming modes - // embeddings embeddingHandler := openai.EmbeddingsEndpoint(application.ModelConfigLoader(), application.ModelLoader(), application.ApplicationConfig()) embeddingMiddleware := []echo.MiddlewareFunc{