Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 51 additions & 2 deletions agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import asyncio
import io
import os
import re
import sys
from datetime import datetime, timedelta
Expand All @@ -16,6 +17,8 @@

from claude_agent_sdk import ClaudeSDKClient

from structured_logging import get_logger

# Fix Windows console encoding for Unicode characters (emoji, etc.)
# Without this, print() crashes when Claude outputs emoji like ✅
if sys.platform == "win32":
Expand Down Expand Up @@ -53,6 +56,7 @@ async def run_agent_session(
client: ClaudeSDKClient,
message: str,
project_dir: Path,
logger=None,
) -> tuple[str, str]:
"""
Run a single agent session using Claude Agent SDK.
Expand All @@ -61,13 +65,16 @@ async def run_agent_session(
client: Claude SDK client
message: The prompt to send
project_dir: Project directory path
logger: Optional structured logger for this session

Returns:
(status, response_text) where status is:
- "continue" if agent should continue working
- "error" if an error occurred
"""
print("Sending prompt to Claude Agent SDK...\n")
if logger:
logger.info("Starting agent session", prompt_length=len(message))

try:
# Send the query
Expand All @@ -94,6 +101,8 @@ async def run_agent_session(
print(f" Input: {input_str[:200]}...", flush=True)
else:
print(f" Input: {input_str}", flush=True)
if logger:
logger.debug("Tool used", tool_name=block.name, input_size=len(str(getattr(block, "input", ""))))

# Handle UserMessage (tool results)
elif msg_type == "UserMessage" and hasattr(msg, "content"):
Expand All @@ -107,20 +116,28 @@ async def run_agent_session(
# Check if command was blocked by security hook
if "blocked" in str(result_content).lower():
print(f" [BLOCKED] {result_content}", flush=True)
if logger:
logger.error("Security: command blocked", content=str(result_content)[:200])
elif is_error:
# Show errors (truncated)
error_str = str(result_content)[:500]
print(f" [Error] {error_str}", flush=True)
if logger:
logger.error("Tool execution error", error=error_str[:200])
else:
# Tool succeeded - just show brief confirmation
print(" [Done]", flush=True)

print("\n" + "-" * 70 + "\n")
if logger:
logger.info("Agent session completed", response_length=len(response_text))
return "continue", response_text

except Exception as e:
error_str = str(e)
print(f"Error during agent session: {error_str}")
if logger:
logger.error("Agent session error", error_type=type(e).__name__, message=error_str[:200])

# Detect rate limit errors from exception message
if is_rate_limit_error(error_str):
Expand Down Expand Up @@ -159,6 +176,27 @@ async def run_autonomous_agent(
testing_feature_id: For testing agents, the pre-claimed feature ID to test (legacy single mode)
testing_feature_ids: For testing agents, list of feature IDs to batch test
"""
# Initialize structured logger for this agent session
# Agent ID format: "initializer", "coding-<feature_id>", "testing-<pid>"
if agent_type == "testing":
log_agent_id = f"testing-{os.getpid()}"
elif feature_id:
log_agent_id = f"coding-{feature_id}"
elif agent_type == "initializer":
log_agent_id = "initializer"
else:
log_agent_id = "coding-main"

logger = get_logger(project_dir, agent_id=log_agent_id, console_output=False)
logger.info(
"Autonomous agent started",
agent_type=agent_type or "auto-detect",
model=model,
yolo_mode=yolo_mode,
max_iterations=max_iterations,
feature_id=feature_id,
)

print("\n" + "=" * 70)
print(" AUTONOMOUS CODING AGENT")
print("=" * 70)
Expand Down Expand Up @@ -224,6 +262,7 @@ async def run_autonomous_agent(
if not is_initializer and iteration == 1:
passing, in_progress, total = count_passing_tests(project_dir)
if total > 0 and passing == total:
logger.info("Project complete on startup", passing=passing, total=total)
print("\n" + "=" * 70)
print(" ALL FEATURES ALREADY COMPLETE!")
print("=" * 70)
Expand All @@ -241,7 +280,6 @@ async def run_autonomous_agent(

# Create client (fresh context)
# Pass agent_id for browser isolation in multi-agent scenarios
import os
if agent_type == "testing":
agent_id = f"testing-{os.getpid()}" # Unique ID for testing agents
elif feature_ids and len(feature_ids) > 1:
Expand Down Expand Up @@ -272,9 +310,10 @@ async def run_autonomous_agent(
# Wrap in try/except to handle MCP server startup failures gracefully
try:
async with client:
status, response = await run_agent_session(client, prompt, project_dir)
status, response = await run_agent_session(client, prompt, project_dir, logger=logger)
except Exception as e:
print(f"Client/MCP server error: {e}")
logger.error("Client/MCP server error", error_type=type(e).__name__, message=str(e)[:200])
# Don't crash - return error status so the loop can retry
status, response = "error", str(e)

Expand All @@ -299,6 +338,7 @@ async def run_autonomous_agent(
if is_rate_limit_error(response):
print("Claude Agent SDK indicated rate limit reached.")
reset_rate_limit_retries = False
logger.warn("Rate limit signal in response")

# Try to extract retry-after from response text first
retry_seconds = parse_retry_after(response)
Expand Down Expand Up @@ -416,6 +456,7 @@ async def run_autonomous_agent(
delay_seconds = calculate_error_backoff(error_retries)
print("\nSession encountered an error")
print(f"Will retry in {delay_seconds}s (attempt #{error_retries})...")
logger.error("Session error, retrying", delay_seconds=delay_seconds, attempt=error_retries)
await asyncio.sleep(delay_seconds)

# Small delay between sessions
Expand All @@ -424,6 +465,14 @@ async def run_autonomous_agent(
await asyncio.sleep(1)

# Final summary
passing, in_progress, total = count_passing_tests(project_dir)
logger.info(
"Agent session complete",
iterations=iteration,
passing=passing,
in_progress=in_progress,
total=total,
)
print("\n" + "=" * 70)
print(" SESSION COMPLETE")
print("=" * 70)
Expand Down
14 changes: 14 additions & 0 deletions autonomous_agent_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@

from agent import run_autonomous_agent
from registry import DEFAULT_MODEL, get_effective_sdk_env, get_project_path
from structured_logging import get_logger


def parse_args() -> argparse.Namespace:
Expand Down Expand Up @@ -237,6 +238,17 @@ def main() -> None:
if migrated:
print(f"Migrated project files to .autoforge/: {', '.join(migrated)}", flush=True)

# Initialize logger now that project_dir is resolved
logger = get_logger(project_dir, agent_id="entry-point", console_output=False)
logger.info(
"Script started",
input_path=project_dir_input,
resolved_path=str(project_dir),
agent_type=args.agent_type,
concurrency=args.concurrency,
yolo_mode=args.yolo,
)

# Parse batch testing feature IDs (comma-separated string -> list[int])
testing_feature_ids: list[int] | None = None
if args.testing_feature_ids:
Expand Down Expand Up @@ -305,8 +317,10 @@ def main() -> None:
except KeyboardInterrupt:
print("\n\nInterrupted by user")
print("To resume, run the same command again")
logger.info("Interrupted by user")
except Exception as e:
print(f"\nFatal error: {e}")
logger.error("Fatal error", error_type=type(e).__name__, message=str(e)[:200])
raise


Expand Down
17 changes: 17 additions & 0 deletions client.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from dotenv import load_dotenv

from security import SENSITIVE_DIRECTORIES, bash_security_hook
from structured_logging import get_logger

# Load environment variables from .env file if present
load_dotenv()
Expand Down Expand Up @@ -309,6 +310,9 @@ def create_client(
Note: Authentication is handled by start.bat/start.sh before this runs.
The Claude SDK auto-detects credentials from the Claude CLI configuration
"""
# Initialize logger for client configuration events
logger = get_logger(project_dir, agent_id="client", console_output=False)

# Select the feature MCP tools appropriate for this agent type
feature_tools_map = {
"coding": CODING_AGENT_TOOLS,
Expand Down Expand Up @@ -388,6 +392,7 @@ def create_client(
with open(settings_file, "w") as f:
json.dump(security_settings, f, indent=2)

logger.info("Settings file written", file_path=str(settings_file))
print(f"Created security settings at {settings_file}")
print(" - Sandbox enabled (OS-level bash isolation)")
print(f" - Filesystem restricted to: {project_dir.resolve()}")
Expand Down Expand Up @@ -463,6 +468,7 @@ def create_client(
model = convert_model_for_vertex(model)
if sdk_env:
print(f" - API overrides: {', '.join(sdk_env.keys())}")
logger.info("API overrides configured", is_ollama=is_ollama, overrides=list(sdk_env.keys()))
if is_vertex:
project_id = sdk_env.get("ANTHROPIC_VERTEX_PROJECT_ID", "unknown")
region = sdk_env.get("CLOUD_ML_REGION", "unknown")
Expand Down Expand Up @@ -552,6 +558,17 @@ async def pre_compact_hook(
}
)

# Log client creation
logger.info(
"Client created",
model=model,
yolo_mode=yolo_mode,
agent_id=agent_id,
agent_type=agent_type,
is_alternative_api=is_alternative_api,
max_turns=max_turns,
)

# PROMPT CACHING: The Claude Code CLI applies cache_control breakpoints internally.
# Our system_prompt benefits from automatic caching without explicit configuration.
# If explicit cache_control is needed, the SDK would need to accept content blocks
Expand Down
19 changes: 19 additions & 0 deletions parallel_orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from api.dependency_resolver import are_dependencies_satisfied, compute_scheduling_scores
from progress import has_features
from server.utils.process_utils import kill_process_tree
from structured_logging import get_logger

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -224,6 +225,16 @@ def __init__(
# Database session for this orchestrator
self._engine, self._session_maker = create_database(project_dir)

# Structured logger for persistent logs (saved to {project_dir}/.autocoder/logs.db)
# Uses console_output=False since orchestrator already has its own print statements
self._logger = get_logger(project_dir, agent_id="orchestrator", console_output=False)
self._logger.info(
"Orchestrator initialized",
max_concurrency=self.max_concurrency,
yolo_mode=yolo_mode,
testing_agent_ratio=testing_agent_ratio,
)

def get_session(self):
"""Get a new database session."""
return self._session_maker()
Expand Down Expand Up @@ -854,6 +865,7 @@ def _spawn_coding_agent(self, feature_id: int) -> tuple[bool, str]:
proc = subprocess.Popen(cmd, **popen_kwargs)
except Exception as e:
# Reset in_progress on failure
self._logger.error("Spawn coding agent failed", feature_id=feature_id, error=str(e)[:200])
session = self.get_session()
try:
feature = session.query(Feature).filter(Feature.id == feature_id).first()
Expand All @@ -879,6 +891,7 @@ def _spawn_coding_agent(self, feature_id: int) -> tuple[bool, str]:
self.on_status(feature_id, "running")

print(f"Started coding agent for feature #{feature_id}", flush=True)
self._logger.info("Spawned coding agent", feature_id=feature_id, pid=proc.pid)
return True, f"Started feature {feature_id}"

def _spawn_coding_agent_batch(self, feature_ids: list[int]) -> tuple[bool, str]:
Expand Down Expand Up @@ -1242,6 +1255,10 @@ def _on_agent_complete(
# feature_id is required for coding agents (always passed from start_feature)
assert feature_id is not None, "feature_id must not be None for coding agents"

# Coding agent completion - log via structured logger
agent_status = "success" if return_code == 0 else "failed"
self._logger.info("Coding agent completed", feature_id=feature_id, status=agent_status, return_code=return_code)

# Coding agent completion - handle both single and batch features
batch_ids = None
with self._lock:
Expand Down Expand Up @@ -1288,6 +1305,7 @@ def _on_agent_complete(
print(f"Feature #{fid} has failed {failure_count} times, will not retry", flush=True)
debug_log.log("COMPLETE", f"Feature #{fid} exceeded max retries",
failure_count=failure_count)
self._logger.warn("Feature exceeded max retries", feature_id=fid, failure_count=failure_count)

status = "completed" if return_code == 0 else "failed"
if self.on_status is not None:
Expand Down Expand Up @@ -1584,6 +1602,7 @@ async def run_loop(self):

except Exception as e:
print(f"Orchestrator error: {e}", flush=True)
self._logger.error("Orchestrator loop error", error_type=type(e).__name__, message=str(e)[:200])
await self._wait_for_agent_completion()

# Wait for remaining agents to complete
Expand Down
Loading
Loading