diff --git a/cortex/cli.py b/cortex/cli.py index ea8976d1..a155ed47 100644 --- a/cortex/cli.py +++ b/cortex/cli.py @@ -1,8 +1,10 @@ import argparse +import json import logging import os import sys import time +import urllib.request from datetime import datetime from pathlib import Path from typing import TYPE_CHECKING, Any @@ -36,11 +38,248 @@ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) +def _is_interactive() -> bool: + """Check if stdin is connected to a terminal (interactive mode). + + Returns: + True if running in interactive terminal, False if piped or redirected. + """ + return sys.stdin.isatty() + + class CortexCLI: def __init__(self, verbose: bool = False): self.spinner_chars = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"] self.spinner_idx = 0 self.verbose = verbose + self.stdin_data = None + if not sys.stdin.isatty(): + try: + self.stdin_data = sys.stdin.read() + except Exception: + # Silently ignore any stdin reading errors (OSError, UnicodeDecodeError, etc.) + # stdin_data remains None and will be handled gracefully downstream + pass + + def _build_prompt_with_stdin(self, user_prompt: str) -> str: + """ + Combine optional stdin context with user prompt. + """ + stdin_data = getattr(self, "stdin_data", None) + if stdin_data: + return ( + "Context (from stdin):\n" f"{stdin_data}\n\n" "User instruction:\n" f"{user_prompt}" + ) + return user_prompt + + def _get_confidence_level(self, intent: dict | None) -> str: + """Determine confidence level: 'high', 'medium', or 'low'.""" + if not intent: + return "low" + + domain = intent.get("domain", "unknown") + try: + confidence = float(intent.get("confidence", 0.0)) + except (TypeError, ValueError): + confidence = 0.0 + + # If domain is unknown, it's always low confidence + if domain == "unknown": + return "low" + + # High: domain is known AND confidence >= 0.7 + if confidence >= 0.7: + return "high" + + # Medium: domain is known AND confidence >= 0.5 (even if specifics are vague) + if confidence >= 0.5: + return "medium" + + # Low: domain is known but confidence is very low (< 0.5) + # Ask for clarifying questions rather than complete re-spec + return "medium" + + def _call_llm_for_text( + self, + interpreter: CommandInterpreter, + prompt: str, + system_message: str = "You are a helpful assistant.", + temperature: float = 0.7, + max_tokens: int = 100, + fallback: str = "Unable to generate response", + ) -> str: + """ + Helper method to call LLM with unified provider handling. + Args: + interpreter: CommandInterpreter instance with provider and client + prompt: User prompt to send to LLM + system_message: System message for the LLM + temperature: Sampling temperature + max_tokens: Maximum tokens in response + fallback: Default response if LLM unavailable + Returns: + LLM-generated text or fallback message + """ + try: + if interpreter.provider.name == "openai": + response = interpreter.client.chat.completions.create( + model=interpreter.model, + messages=[ + {"role": "system", "content": system_message}, + {"role": "user", "content": prompt}, + ], + temperature=temperature, + max_tokens=max_tokens, + ) + return response.choices[0].message.content.strip() + elif interpreter.provider.name == "claude": + response = interpreter.client.messages.create( + model=interpreter.model, + max_tokens=max_tokens, + temperature=temperature, + system=system_message, + messages=[{"role": "user", "content": prompt}], + ) + return response.content[0].text.strip() + elif interpreter.provider.name == "ollama": + # Defensive check: ollama_url only exists if provider is OLLAMA + if not hasattr(interpreter, "ollama_url"): + return fallback + + full_prompt = f"System: {system_message}\n\nUser: {prompt}" + data = json.dumps( + { + "model": interpreter.model, + "prompt": full_prompt, + "stream": False, + "options": {"temperature": temperature}, + } + ).encode("utf-8") + req = urllib.request.Request( + f"{interpreter.ollama_url}/api/generate", + data=data, + headers={"Content-Type": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=30) as response: + result = json.loads(response.read().decode("utf-8")) + return result.get("response", "").strip() + elif interpreter.provider.name == "fake": + return fallback + except Exception: + pass + + return fallback + + def _generate_understanding_message( + self, interpreter: CommandInterpreter, intent: dict, user_input: str + ) -> str: + """Generate a natural language message showing what we understood from the user's request.""" + action = intent.get("action", "install") + domain = intent.get("domain", "general") + + prompt = f"User said: '{user_input}'\nInternal understanding: action={action}, domain={domain}\n\nGenerate a natural, friendly response showing what you understood. Be concise (1-2 sentences). Respond with just the message:" + + # Generate fallback based on domain + if domain == "unknown" or domain == "general": + fallback = "Got it! I'm ready to help you install what you need. Let me set that up." + else: + fallback = ( + f"I understand you're looking for {domain} tools. Let me prepare the installation." + ) + + return self._call_llm_for_text( + interpreter, + prompt, + system_message="You are a helpful assistant. Respond naturally and concisely.", + temperature=0.5, + max_tokens=100, + fallback=fallback, + ) + + def _generate_clarifying_questions( + self, interpreter: CommandInterpreter, intent: dict, user_input: str + ) -> str: + """Generate natural clarifying questions for medium-confidence intents.""" + domain = intent.get("domain", "unknown") + + prompt = f"User said: '{user_input}'\nDomain: {domain}\n\nGenerate 1-2 natural, conversational clarifying questions to help narrow down what they specifically need. Ask about specific tools, frameworks, or use cases within this domain.\n\nRespond with just the questions:" + + fallback = ( + f"Would you like to specify which tools or frameworks in {domain}?" + if domain != "unknown" + else "Could you provide more details about what you'd like to install?" + ) + + return self._call_llm_for_text( + interpreter, + prompt, + system_message="You are a helpful assistant. Generate clarifying questions naturally and conversationally.", + temperature=0.7, + max_tokens=150, + fallback=fallback, + ) + + def _generate_clarification_request( + self, interpreter: CommandInterpreter, user_input: str, intent: dict | None = None + ) -> str: + """Generate a natural request for clarification when intent is completely unclear.""" + prompt = f"User said: '{user_input}'\n\nGenerate a friendly, natural message asking them to clarify what they want to install. Be conversational and helpful.\n\nRespond with just the message:" + + return self._call_llm_for_text( + interpreter, + prompt, + system_message="You are a helpful assistant. Be natural and friendly when asking for clarification.", + temperature=0.7, + max_tokens=100, + fallback="Could you be more specific about what you'd like to install?", + ) + + def _generate_suggestions( + self, interpreter: CommandInterpreter, user_input: str, intent: dict | None = None + ) -> list[str]: + """Generate suggestion alternatives for ambiguous requests.""" + domain_hint = "" + if intent and intent.get("domain") != "unknown": + domain_hint = f" in the {intent['domain']} domain" + + prompt = f"Suggest 3 clearer, more specific installation requests similar to: '{user_input}'{domain_hint}.\n\nFormat your response as:\n1. suggestion one\n2. suggestion two\n3. suggestion three" + + content = self._call_llm_for_text( + interpreter, + prompt, + system_message="You are a helpful assistant that suggests installation requests. Be specific and relevant.", + temperature=0.3, + max_tokens=200, + fallback="", + ) + + # Parse numbered list from content + if not content: + # Return default suggestions if LLM failed + return [ + "machine learning tools for Python", + "web server for static sites", + "database for small projects", + ] + + suggestions = [] + lines = content.split("\n") + for line in lines: + line = line.strip() + if line and line[0].isdigit() and line[1:3] in [". ", ") "]: + suggestion = line.split(". ", 1)[-1].split(") ", 1)[-1].strip() + if suggestion: + suggestions.append(suggestion) + + if len(suggestions) >= 3: + return suggestions[:3] + + # Fallback suggestions + return [ + "machine learning tools for Python", + "web server for static sites", + "database for small projects", + ] # Define a method to handle Docker-specific permission repairs def docker_permissions(self, args: argparse.Namespace) -> int: @@ -638,6 +877,11 @@ def install( execute: bool = False, dry_run: bool = False, parallel: bool = False, + api_key: str | None = None, + provider: str | None = None, + skip_clarification: bool = False, + max_retries: int = 3, + retry_count: int = 0, ): # Validate input first is_valid, error = validate_install_request(software) @@ -658,11 +902,13 @@ def install( "pip3 install jupyter numpy pandas" ) - api_key = self._get_api_key() + api_key = api_key if api_key is not None else self._get_api_key() if not api_key: return 1 - provider = self._get_provider() + provider = provider if provider is not None else self._get_provider() + + # --------------------------------------------------- self._debug(f"Using provider: {provider}") self._debug(f"API key: {api_key[:10]}...{api_key[-4:]}") @@ -675,6 +921,85 @@ def install( self._print_status("🧠", "Understanding request...") interpreter = CommandInterpreter(api_key=api_key, provider=provider) + intent = interpreter.extract_intent(software) + + # Determine confidence level: high, medium, or low + confidence_level = self._get_confidence_level(intent) + + # If user has already clarified, skip asking again + if skip_clarification: + confidence_level = "high" + + if confidence_level == "low": + # Low confidence: ask user to clarify what they want + if retry_count >= max_retries: + # Max retries exceeded, show suggestions + if _is_interactive(): + suggestions = self._generate_suggestions(interpreter, software, intent) + print(f"\nHere are some suggestions:\n{suggestions}\n") + self._print_error( + "Unable to determine installation requirements after multiple clarifications." + ) + return 1 + + if _is_interactive(): + clarification_msg = self._generate_clarification_request( + interpreter, software, intent + ) + print(f"\n{clarification_msg}\n") + clarified = input("What would you like to install? ").strip() + if clarified: + return self.install( + clarified, + execute, + dry_run, + parallel, + api_key, + provider, + skip_clarification=True, + max_retries=max_retries, + retry_count=retry_count + 1, + ) + return 1 + else: + return 1 + + elif confidence_level == "medium": + # Medium confidence: ask clarifying questions + if _is_interactive(): + # Show what we understood first + understanding_msg = self._generate_understanding_message( + interpreter, intent, software + ) + print(f"\n{understanding_msg}") + + # Ask clarifying questions + clarifying_qs = self._generate_clarifying_questions( + interpreter, intent, software + ) + clarified = input(f"\n{clarifying_qs}\n> ").strip() + if clarified: + return self.install( + clarified, + execute, + dry_run, + parallel, + api_key, + provider, + skip_clarification=True, + ) + return 1 + # In non-interactive mode, proceed with current intent + + # High confidence: proceed directly + # Generate natural understanding message + if _is_interactive(): + understanding_msg = self._generate_understanding_message( + interpreter, intent, software + ) + print(f"\n{understanding_msg}\n") + + install_mode = intent.get("install_mode", "system") self._print_status("📦", "Planning installation...") @@ -682,8 +1007,21 @@ def install( self._animate_spinner("Analyzing system requirements...") self._clear_line() - commands = interpreter.parse(f"install {software}") + # ---------- Build command-generation prompt ---------- + if install_mode == "python": + base_prompt = ( + f"install {software}. " + "Use pip and Python virtual environments. " + "Do NOT use sudo or system package managers." + ) + else: + base_prompt = f"install {software}" + + prompt = self._build_prompt_with_stdin(base_prompt) + # --------------------------------------------------- + # Parse commands from prompt + commands = interpreter.parse(prompt) if not commands: self._print_error( "No commands generated. Please try again with a different request." @@ -700,10 +1038,64 @@ def install( ) self._print_status("⚙️", f"Installing {software}...") + # Create summary of what we're doing + description = intent.get("description", software) + domain = intent.get("domain", "general") + print(f"\nPlan: {description} in {domain} domain") print("\nGenerated commands:") for i, cmd in enumerate(commands, 1): print(f" {i}. {cmd}") + # ---------- User confirmation ---------- + if execute: + if not _is_interactive(): + # Non-interactive mode (pytest / CI) → auto-approve + cx_print("⚠️ Auto-approving in non-interactive mode", "warning") + choice = "y" + else: + print("\nDo you want to proceed with these commands?") + print(" [y] Yes, execute") + print(" [e] Edit commands") + print(" [n] No, cancel") + choice = input("Enter choice [y/e/n]: ").strip().lower() + + if choice == "n": + print("❌ Installation cancelled by user.") + return 0 + + elif choice == "e": + if not _is_interactive(): + self._print_error("Cannot edit commands in non-interactive mode") + return 1 + + print("Enter commands (one per line, empty line to finish):") + edited_commands = [] + while True: + line = input("> ").strip() + if not line: + break + edited_commands.append(line) + + if not edited_commands: + print("❌ No commands provided. Cancelling.") + return 1 + + commands = edited_commands + + print("\n✅ Updated commands:") + for i, cmd in enumerate(commands, 1): + print(f" {i}. {cmd}") + + confirm = input("\nExecute edited commands? [y/n]: ").strip().lower() + if confirm != "y": + print("❌ Installation cancelled.") + return 0 + + elif choice != "y": + print("❌ Invalid choice. Cancelling.") + return 1 + # ------------------------------------- + if dry_run: print("\n(Dry run mode - commands not executed)") if install_id: diff --git a/cortex/llm/interpreter.py b/cortex/llm/interpreter.py index 069771b8..06a8e799 100644 --- a/cortex/llm/interpreter.py +++ b/cortex/llm/interpreter.py @@ -41,6 +41,9 @@ def __init__( """ self.api_key = api_key self.provider = APIProvider(provider.lower()) + # ✅ Defensive Ollama base URL initialization + if self.provider == APIProvider.OLLAMA: + self.ollama_url = os.environ.get("OLLAMA_BASE_URL", "http://localhost:11434") if cache is None: try: @@ -106,75 +109,280 @@ def _initialize_client(self): # Fake provider uses predefined commands from environment self.client = None # No client needed for fake provider - def _get_system_prompt(self, simplified: bool = False) -> str: + def _get_system_prompt( + self, simplified: bool = False, domain: str | None = None, install_mode: str | None = None + ) -> str: """Get system prompt for command interpretation. Args: simplified: If True, return a shorter prompt optimized for local models + domain: Optional domain context to guide command generation + install_mode: Type of installation ('system', 'python', 'mixed') """ + domain_context = "" + if domain and domain != "unknown": + domain_context = ( + f"\n\nDomain: {domain}\n" + f"Generate commands specific to this domain only. " + f"Avoid installing unrelated packages." + ) + + # Add install mode constraint + install_mode_constraint = "" + if install_mode == "python": + install_mode_constraint = "\n⚠️ CONSTRAINT: This is a Python package installation. ALWAYS use 'pip install' or 'pip3 install'. NEVER use 'apt install'." + elif install_mode == "mixed": + install_mode_constraint = "\n⚠️ CONSTRAINT: Use 'pip install' for Python packages and 'apt install' for system-level tools only." + + common_rules = """ + Execution environment: + - Target OS: Linux (Ubuntu/Debian family) + - Commands will be executed by a non-interactive POSIX-compatible shell + + Rules: + - Respond with ONLY valid JSON + - The response MUST be a JSON array wrapped in an object under the "commands" key + - No explanations, no markdown, no code blocks + - Commands must be POSIX-compliant (portable across Linux shells) + - Do NOT assume interactive shells + - Avoid shell-specific features that are not POSIX-compliant + - Commands should be safe, atomic, and sequential + - Avoid destructive operations unless explicitly requested + - Use apt for system packages on Debian/Ubuntu + - Add sudo only for system-level commands + ⚠️ CRITICAL RULE - PYTHON PACKAGES MUST USE PIP: + - When the request involves Python libraries, packages, or tools (e.g., numpy, pandas, + Flask, Django, TensorFlow, or anything installed via PyPI): use "pip install" or "pip3 install" + - NEVER use "apt install python3-*" for any Python package or library + - Use apt ONLY for system-level tools (like git, curl, build-essential, etc.) + - If unsure whether something is a Python package: use pip + """ + if simplified: - return """You must respond with ONLY a JSON object. No explanations, no markdown, no code blocks. + return f"""{common_rules} -Format: {"commands": ["command1", "command2"]} + Format: + {{"commands": ["command1", "command2"]}} -Example input: install nginx -Example output: {"commands": ["sudo apt update", "sudo apt install -y nginx"]} + Example input: install nginx + Example output: + {{"commands": ["sudo apt update", "sudo apt install -y nginx"]}} + {domain_context}{install_mode_constraint} + """ -Rules: -- Use apt for Ubuntu packages -- Add sudo for system commands -- Return ONLY the JSON object""" + return f"""You are a Linux system command expert. + Convert natural language requests into safe, executable bash commands. - return """You are a Linux system command expert. Convert natural language requests into safe, validated bash commands. + {common_rules} -Rules: -1. Return ONLY a JSON array of commands -2. Each command must be a safe, executable bash command -3. Commands should be atomic and sequential -4. Avoid destructive operations without explicit user confirmation -5. Use package managers appropriate for Debian/Ubuntu systems (apt) -6. Include necessary privilege escalation (sudo) when required -7. Validate command syntax before returning + Additional guidance: + - If virtual environments are needed, prefer invoking tools directly + (e.g., venv/bin/pip install ...) instead of relying on shell activation. + - Validate command syntax before returning. + + Format: + {{"commands": ["command1", "command2", ...]}} + + Example request: + "install docker with nvidia support" -Format: -{"commands": ["command1", "command2", ...]} + Example response: + {{"commands": [ + "sudo apt update", + "sudo apt install -y docker.io", + "sudo apt install -y nvidia-docker2", + "sudo systemctl restart docker" + ]}} + {domain_context}{install_mode_constraint} + """ + + def _extract_intent_ollama(self, user_input: str) -> dict: + import urllib.error + import urllib.request -Example request: "install docker with nvidia support" -Example response: {"commands": ["sudo apt update", "sudo apt install -y docker.io", "sudo apt install -y nvidia-docker2", "sudo systemctl restart docker"]}""" + prompt = f""" + {self._get_intent_prompt()} + + User request: + {user_input} + """ + + data = json.dumps( + { + "model": self.model, + "prompt": prompt, + "stream": False, + "options": { + "temperature": 0.2, + "num_predict": 300, + }, # num_predict is max_tokens equivalent for Ollama + } + ).encode("utf-8") + + req = urllib.request.Request( + f"{self.ollama_url}/api/generate", + data=data, + headers={"Content-Type": "application/json"}, + ) - def _call_openai(self, user_input: str) -> list[str]: + try: + with urllib.request.urlopen(req, timeout=60) as response: + raw = json.loads(response.read().decode("utf-8")) + text = raw.get("response", "") + return self._parse_intent_from_text(text) + + except Exception: + # True failure → unknown intent + return { + "action": "unknown", + "domain": "unknown", + "description": "Failed to extract intent", + "ambiguous": True, + "confidence": 0.0, + "install_mode": "system", + } + + def _get_intent_prompt(self) -> str: + return """You are an intent extraction engine for a Linux package manager. + +Extract the user's intent as JSON. Score confidence based on whether the domain (not specific package names) can be identified. + +Key principle: If the domain is identifiable, confidence should be >= 0.5 even if specific details are vague. + +Confidence scoring: +- HIGH (0.8-1.0): Domain is clearly mentioned or obvious from context +- MEDIUM (0.5-0.8): Domain is recognizable but specifics are vague or need clarification +- LOW (0.0-0.5): Domain cannot be identified or request is completely unclear + +Rules: +- Do NOT suggest commands or list packages +- Focus on domain identification, not package specificity +- If a domain keyword appears, confidence should be at least 0.6+ +- Set ambiguous=true if specifics need clarification but domain is recognized +- Respond ONLY in valid JSON + +Install mode: system (apt/system packages), python (pip/virtualenv), mixed (both) + +Response format (JSON only): +{ + "action": "install", + "domain": "...", + "install_mode": "...", + "description": "brief explanation", + "ambiguous": true/false, + "confidence": 0.0 +} +""" + + def _call_openai( + self, user_input: str, domain: str | None = None, install_mode: str | None = None + ) -> list[str]: try: response = self.client.chat.completions.create( model=self.model, messages=[ - {"role": "system", "content": self._get_system_prompt()}, + { + "role": "system", + "content": self._get_system_prompt( + domain=domain, install_mode=install_mode + ), + }, {"role": "user", "content": user_input}, ], temperature=0.3, max_tokens=1000, ) - content = response.choices[0].message.content.strip() + if not response.choices: + raise RuntimeError("OpenAI returned empty response") + content = (response.choices[0].message.content or "").strip() return self._parse_commands(content) except Exception as e: raise RuntimeError(f"OpenAI API call failed: {str(e)}") - def _call_claude(self, user_input: str) -> list[str]: + def _extract_intent_openai(self, user_input: str) -> dict: + try: + response = self.client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": self._get_intent_prompt()}, + {"role": "user", "content": user_input}, + ], + temperature=0.2, + max_tokens=300, + ) + + if not response.choices: + raise RuntimeError("OpenAI returned empty response") + content = (response.choices[0].message.content or "").strip() + return self._parse_intent_from_text(content) + except Exception as e: + return { + "action": "unknown", + "domain": "unknown", + "description": f"Failed to extract intent: {str(e)}", + "ambiguous": True, + "confidence": 0.0, + "install_mode": "system", + } + + def _parse_intent_from_text(self, text: str) -> dict: + """ + Extract intent JSON from loose LLM output. + No semantic assumptions. + """ + # Try to locate JSON block + try: + start = text.find("{") + end = text.rfind("}") + if start != -1 and end != -1: + parsed = json.loads(text[start : end + 1]) + + # Minimal validation (structure only) + for key in ["action", "domain", "install_mode", "ambiguous", "confidence"]: + if key not in parsed: + raise ValueError("Missing intent field") + + # Ensure description has a default if missing + if "description" not in parsed: + parsed["description"] = "" + + return parsed + except Exception: + pass + + # If parsing fails, do NOT guess meaning + return { + "action": "unknown", + "domain": "unknown", + "description": "Unstructured intent output", + "ambiguous": True, + "confidence": 0.0, + "install_mode": "system", + } + + def _call_claude( + self, user_input: str, domain: str | None = None, install_mode: str | None = None + ) -> list[str]: try: response = self.client.messages.create( model=self.model, max_tokens=1000, temperature=0.3, - system=self._get_system_prompt(), + system=self._get_system_prompt(domain=domain, install_mode=install_mode), messages=[{"role": "user", "content": user_input}], ) - content = response.content[0].text.strip() + if not response.content: + raise RuntimeError("Claude returned empty response") + content = (response.content[0].text or "").strip() return self._parse_commands(content) except Exception as e: raise RuntimeError(f"Claude API call failed: {str(e)}") - def _call_ollama(self, user_input: str) -> list[str]: + def _call_ollama( + self, user_input: str, domain: str | None = None, install_mode: str | None = None + ) -> list[str]: """Call local Ollama instance using OpenAI-compatible API.""" try: # For local models, be extremely explicit in the user message @@ -186,7 +394,12 @@ def _call_ollama(self, user_input: str) -> list[str]: response = self.client.chat.completions.create( model=self.model, messages=[ - {"role": "system", "content": self._get_system_prompt(simplified=True)}, + { + "role": "system", + "content": self._get_system_prompt( + simplified=True, domain=domain, install_mode=install_mode + ), + }, {"role": "user", "content": enhanced_input}, ], temperature=0.1, # Lower temperature for more focused responses @@ -232,6 +445,10 @@ def _repair_json(self, content: str) -> str: return content.strip() def _parse_commands(self, content: str) -> list[str]: + """ + Robust command parser. + Handles strict JSON (OpenAI/Claude) and loose output (Ollama). + """ try: # Strip markdown code blocks if "```json" in content: @@ -254,7 +471,17 @@ def _parse_commands(self, content: str) -> list[str]: # Try to repair common JSON issues content = self._repair_json(content) - data = json.loads(content) + # Attempt to isolate JSON + start = content.find("{") + end = content.rfind("}") + if start != -1 and end != -1: + json_blob = content[start : end + 1] + else: + # No braces found - content is not valid JSON + raise ValueError("No JSON object found in response") + + # First attempt: strict JSON + data = json.loads(json_blob) commands = data.get("commands", []) if not isinstance(commands, list): @@ -267,13 +494,13 @@ def _parse_commands(self, content: str) -> list[str]: for cmd in commands: if isinstance(cmd, str): # Direct string - if cmd: - result.append(cmd) + if cmd.strip(): + result.append(cmd.strip()) elif isinstance(cmd, dict): # Object with "command" key cmd_str = cmd.get("command", "") - if cmd_str: - result.append(cmd_str) + if cmd_str and isinstance(cmd_str, str) and cmd_str.strip(): + result.append(cmd_str.strip()) return result except (json.JSONDecodeError, ValueError) as e: @@ -302,12 +529,13 @@ def _validate_commands(self, commands: list[str]) -> list[str]: return validated - def parse(self, user_input: str, validate: bool = True) -> list[str]: + def parse(self, user_input: str, validate: bool = True, domain: str | None = None) -> list[str]: """Parse natural language input into shell commands. Args: user_input: Natural language description of desired action validate: If True, validate commands for dangerous patterns + domain: Optional domain context (e.g., 'database', 'web_server') to guide command generation Returns: List of shell commands to execute @@ -319,8 +547,20 @@ def parse(self, user_input: str, validate: bool = True) -> list[str]: if not user_input or not user_input.strip(): raise ValueError("User input cannot be empty") + # Extract intent first to determine install_mode constraint + try: + intent = self.extract_intent(user_input) + install_mode = intent.get("install_mode", "system") + if not domain: + domain = intent.get("domain", "unknown") + if domain == "unknown": + domain = None + except Exception: + install_mode = "system" + cache_system_prompt = ( - self._get_system_prompt() + f"\n\n[cortex-cache-validate={bool(validate)}]" + self._get_system_prompt(domain=domain, install_mode=install_mode) + + f"\n\n[cortex-cache-validate={bool(validate)}]" ) if self.cache is not None: @@ -334,11 +574,11 @@ def parse(self, user_input: str, validate: bool = True) -> list[str]: return cached if self.provider == APIProvider.OPENAI: - commands = self._call_openai(user_input) + commands = self._call_openai(user_input, domain=domain, install_mode=install_mode) elif self.provider == APIProvider.CLAUDE: - commands = self._call_claude(user_input) + commands = self._call_claude(user_input, domain=domain, install_mode=install_mode) elif self.provider == APIProvider.OLLAMA: - commands = self._call_ollama(user_input) + commands = self._call_ollama(user_input, domain=domain, install_mode=install_mode) elif self.provider == APIProvider.FAKE: commands = self._call_fake(user_input) else: @@ -371,3 +611,84 @@ def parse_with_context( enriched_input = user_input + context return self.parse(enriched_input, validate=validate) + + def _extract_intent_claude(self, user_input: str) -> dict: + """Extract intent from user input using Claude API. + + Args: + user_input: Natural language description of desired action + + Returns: + Dict with keys: action, domain, install_mode, description, ambiguous, confidence + """ + try: + response = self.client.messages.create( + model=self.model, + max_tokens=300, + temperature=0.2, + system=self._get_intent_prompt(), + messages=[{"role": "user", "content": user_input}], + ) + + if not response.content: + raise RuntimeError("Claude returned empty response") + content = (response.content[0].text or "").strip() + return self._parse_intent_from_text(content) + except Exception as e: + return { + "action": "unknown", + "domain": "unknown", + "description": f"Failed to extract intent: {str(e)}", + "ambiguous": True, + "confidence": 0.0, + "install_mode": "system", + } + + def extract_intent(self, user_input: str) -> dict: + """Extract intent from natural language input. + + Analyzes the user's request to determine: + - action: Type of operation (install, remove, etc.) + - domain: Category of request (machine learning, web development, etc.) + - install_mode: Installation type (system, python, or mixed) + - confidence: Confidence level (0.0-1.0) + - ambiguous: Whether clarification is needed + + Args: + user_input: Natural language description of desired action + + Returns: + Dict with keys: action, domain, install_mode, description, ambiguous, confidence + + Raises: + ValueError: If input is empty + """ + if not user_input or not user_input.strip(): + raise ValueError("User input cannot be empty") + + if self.provider == APIProvider.OPENAI: + return self._extract_intent_openai(user_input) + elif self.provider == APIProvider.CLAUDE: + return self._extract_intent_claude(user_input) + elif self.provider == APIProvider.OLLAMA: + return self._extract_intent_ollama(user_input) + elif self.provider == APIProvider.FAKE: + # Check for configurable fake intent from environment + fake_intent_env = os.environ.get("CORTEX_FAKE_INTENT") + if fake_intent_env: + try: + return json.loads(fake_intent_env) + except json.JSONDecodeError: + pass # Fall back to default + + # Return realistic intent for testing (not ambiguous) + return { + "action": "install", + "domain": "general", + "install_mode": "system", + "description": user_input, + "ambiguous": False, + "confidence": 0.8, + } + else: + raise ValueError(f"Unsupported provider: {self.provider}") diff --git a/docs/docs/nl-installer.md b/docs/docs/nl-installer.md new file mode 100644 index 00000000..9867b733 --- /dev/null +++ b/docs/docs/nl-installer.md @@ -0,0 +1,18 @@ +# Natural Language Installer (NL Installer) + +Cortex supports installing software using natural language instead of +explicit package names. + +Example: +```bash +cortex install "something for machine learning" +``` +The request is converted into shell commands using the CommandInterpreter +By default, commands are generated and printed (dry-run). +Execution only happens when `--execute` is explicitly provided. + +```bash +cortex install "something for machine learning" --execute +``` + +The NL installer is validated using unit tests in `tests/test_nl_installer.py`. \ No newline at end of file diff --git a/tests/test_nl_parser_cases.py b/tests/test_nl_parser_cases.py new file mode 100644 index 00000000..803972ba --- /dev/null +++ b/tests/test_nl_parser_cases.py @@ -0,0 +1,159 @@ +import json + +import pytest + +from cortex.llm.interpreter import CommandInterpreter + + +@pytest.fixture +def fake_interpreter(monkeypatch: pytest.MonkeyPatch) -> CommandInterpreter: + """Fixture providing a CommandInterpreter configured with fake provider for testing. + + Sets CORTEX_FAKE_COMMANDS environment variable with predefined test commands + and returns an interpreter instance that bypasses external API calls. + """ + monkeypatch.setenv( + "CORTEX_FAKE_COMMANDS", + '{"commands": ["echo install step 1", "echo install step 2"]}', + ) + return CommandInterpreter(api_key="fake", provider="fake") + + +def test_install_machine_learning(fake_interpreter: CommandInterpreter) -> None: + """Test parsing of machine learning installation requests.""" + commands = fake_interpreter.parse("install something for machine learning") + assert len(commands) > 0 + + +def test_install_web_server(fake_interpreter: CommandInterpreter) -> None: + """Test parsing of web server installation requests.""" + commands = fake_interpreter.parse("I need a web server") + assert isinstance(commands, list) + + +def test_python_dev_environment(fake_interpreter: CommandInterpreter) -> None: + """Test parsing of Python development environment setup requests.""" + commands = fake_interpreter.parse("set up python development environment") + assert commands + + +def test_install_docker_kubernetes(fake_interpreter: CommandInterpreter) -> None: + """Test parsing of containerizationinstallation requests.""" + commands = fake_interpreter.parse("install containerization tools") + assert len(commands) >= 1 + + +def test_ambiguous_request(fake_interpreter: CommandInterpreter) -> None: + """Test handling of ambiguous installation requests.""" + commands = fake_interpreter.parse("install something") + assert commands # ambiguity handled, not crash + + +def test_typo_tolerance(fake_interpreter: CommandInterpreter) -> None: + """Test tolerance for typos in installation requests.""" + commands = fake_interpreter.parse("instal psycpg2") + assert commands + + +def test_unknown_request(fake_interpreter: CommandInterpreter) -> None: + """Test handling of unknown/unexpected installation requests.""" + commands = fake_interpreter.parse("do something cool") + assert isinstance(commands, list) + + +def test_multiple_tools_request(fake_interpreter: CommandInterpreter) -> None: + """Test parsing of requests for multiple tools.""" + commands = fake_interpreter.parse("install tools for video editing") + assert commands + + +def test_short_query(fake_interpreter: CommandInterpreter) -> None: + """Test parsing of very short installation queries.""" + commands = fake_interpreter.parse("nginx") + assert commands + + +def test_sentence_style_query(fake_interpreter: CommandInterpreter) -> None: + """Test parsing of polite, sentence-style installation requests.""" + commands = fake_interpreter.parse("can you please install a database for me") + assert commands + + +def test_fake_intent_extraction_default_is_not_ambiguous( + fake_interpreter: CommandInterpreter, +) -> None: + """Test that fake intent extraction defaults to non-ambiguous.""" + intent = fake_interpreter.extract_intent("install something") + assert intent["ambiguous"] is False + assert intent["domain"] == "general" + + +def test_install_database(fake_interpreter: CommandInterpreter) -> None: + """Test parsing of database installation requests.""" + commands = fake_interpreter.parse("I need a database") + assert isinstance(commands, list) + + +def test_install_containerization(fake_interpreter: CommandInterpreter) -> None: + """Test parsing of containerization tools installation requests.""" + commands = fake_interpreter.parse("set up containerization tools") + assert commands + + +def test_install_ml_tools(fake_interpreter: CommandInterpreter) -> None: + """Test parsing of machine learning tools installation requests.""" + commands = fake_interpreter.parse("machine learning libraries") + assert commands + + +def test_install_web_dev(fake_interpreter: CommandInterpreter) -> None: + """Test parsing of web development stack installation requests.""" + commands = fake_interpreter.parse("web development stack") + assert commands + + +def test_install_with_typos(fake_interpreter: CommandInterpreter) -> None: + """Test parsing of installation requests with typos.""" + commands = fake_interpreter.parse("instll pytorch") + assert commands + + +def test_install_unknown(fake_interpreter: CommandInterpreter) -> None: + """Test graceful handling of unknown software installation requests.""" + commands = fake_interpreter.parse("install unicorn software") + assert isinstance(commands, list) # should handle gracefully + + +def test_intent_low_confidence( + fake_interpreter: CommandInterpreter, monkeypatch: pytest.MonkeyPatch +) -> None: + """Test intent extraction with low confidence scores.""" + fake_intent = { + "action": "install", + "domain": "unknown", + "install_mode": "system", + "description": "something vague", + "ambiguous": True, + "confidence": 0.3, + } + monkeypatch.setenv("CORTEX_FAKE_INTENT", json.dumps(fake_intent)) + intent = fake_interpreter.extract_intent("vague request") + assert intent["confidence"] < 0.5 + + +def test_intent_high_confidence( + fake_interpreter: CommandInterpreter, monkeypatch: pytest.MonkeyPatch +) -> None: + """Test intent extraction with high confidence scores.""" + fake_intent = { + "action": "install", + "domain": "machine_learning", + "install_mode": "python", + "description": "pytorch", + "ambiguous": False, + "confidence": 0.9, + } + monkeypatch.setenv("CORTEX_FAKE_INTENT", json.dumps(fake_intent)) + intent = fake_interpreter.extract_intent("install pytorch") + assert intent["confidence"] >= 0.5 + assert intent["domain"] == "machine_learning"