Building AI Agents: A Complete Code Review Assistant from Scratch

Hands-on tutorial building a production-ready AI agent. Create a code review assistant with tool use, error handling, caching, and GitHub integration.

What We’re Building

A code review agent that:
  • Analyzes code changes in a PR
  • Identifies bugs, security issues, and style problems
  • Suggests specific improvements with code examples
  • Explains reasoning for each suggestion
  • Uses tools to gather context (file contents, git history, documentation)

Architecture Overview

Code Review Agent Architecture

Agent Execution Flow

Agent Execution Loop

Step 1: Define the Tools

Tools are Python functions that the agent can call to interact with the world:
import subprocess
import json
from typing import Dict, Any

def read_file(filepath: str, start_line: int = 1, end_line: int = None) -> Dict[str, Any]:
    """Read file contents with line numbers."""
    try:
        with open(filepath, 'r') as f:
            lines = f.readlines()
        
        if end_line is None:
            end_line = len(lines)
        
        content = ''.join(lines[start_line-1:end_line])
        
        return {{
            'success': True,
            'filepath': filepath,
            'lines': f"{{start_line}}-{{end_line}}",
            'content': content,
            'total_lines': len(lines)
        }}
    except Exception as e:
        return {{'success': False, 'error': str(e)}}

def git_blame(filepath: str, line_number: int) -> Dict[str, Any]:
    """Get git blame info for specific line."""
    try:
        result = subprocess.run(
            ['git', 'blame', '-L', f'{{line_number}},{{line_number}}', filepath],
            capture_output=True,
            text=True
        )
        
        if result.returncode == 0:
            blame_info = result.stdout.strip()
            parts = blame_info.split()
            
            return {{
                'success': True,
                'commit': parts[0],
                'author': ' '.join(parts[1:-1]),
                'line': line_number,
                'filepath': filepath
            }}
        else:
            return {{'success': False, 'error': result.stderr}}
    except Exception as e:
        return {{'success': False, 'error': str(e)}}

def search_codebase(pattern: str, file_extension: str = None) -> Dict[str, Any]:
    """Search for pattern in codebase."""
    try:
        cmd = ['rg', pattern, '--json']
        if file_extension:
            cmd.extend(['--type', file_extension])
        
        result = subprocess.run(cmd, capture_output=True, text=True)
        
        matches = []
        for line in result.stdout.split('\n'):
            if line:
                try:
                    data = json.loads(line)
                    if data.get('type') == 'match':
                        matches.append({{
                            'path': data['data']['path']['text'],
                            'line_number': data['data']['line_number'],
                            'content': data['data']['lines']['text']
                        }})
                except:
                    pass
        
        return {{
            'success': True,
            'pattern': pattern,
            'matches': matches[:10]  # Limit results
        }}
    except Exception as e:
        return {{'success': False, 'error': str(e)}}

def get_dependencies(filepath: str) -> Dict[str, Any]:
    """Extract dependencies from Python file."""
    try:
        with open(filepath, 'r') as f:
            content = f.read()
        
        imports = []
        for line in content.split('\n'):
            if line.strip().startswith(('import ', 'from ')):
                imports.append(line.strip())
        
        return {{
            'success': True,
            'filepath': filepath,
            'imports': imports,
            'count': len(imports)
        }}
    except Exception as e:
        return {{'success': False, 'error': str(e)}}

Step 2: Tool Definitions for OpenAI

tools = [
    {{
        "type": "function",
        "function": {{
            "name": "read_file",
            "description": "Read the contents of a file with optional line range",
            "parameters": {{
                "type": "object",
                "properties": {{
                    "filepath": {{
                        "type": "string",
                        "description": "Path to the file to read"
                    }},
                    "start_line": {{
                        "type": "integer",
                        "description": "Starting line number (1-indexed)"
                    }},
                    "end_line": {{
                        "type": "integer",
                        "description": "Ending line number (inclusive)"
                    }}
                }},
                "required": ["filepath"]
            }}
        }}
    }},
    {{
        "type": "function",
        "function": {{
            "name": "git_blame",
            "description": "Get git blame information for a specific line",
            "parameters": {{
                "type": "object",
                "properties": {{
                    "filepath": {{
                        "type": "string",
                        "description": "Path to the file"
                    }},
                    "line_number": {{
                        "type": "integer",
                        "description": "Line number to blame"
                    }}
                }},
                "required": ["filepath", "line_number"]
            }}
        }}
    }},
    {{
        "type": "function",
        "function": {{
            "name": "search_codebase",
            "description": "Search for a pattern across the codebase",
            "parameters": {{
                "type": "object",
                "properties": {{
                    "pattern": {{
                        "type": "string",
                        "description": "Pattern to search for (regex supported)"
                    }},
                    "file_extension": {{
                        "type": "string",
                        "description": "Filter by file extension (e.g., 'py', 'js')"
                    }}
                }},
                "required": ["pattern"]
            }}
        }}
    }},
    {{
        "type": "function",
        "function": {{
            "name": "get_dependencies",
            "description": "Extract import statements from a Python file",
            "parameters": {{
                "type": "object",
                "properties": {{
                    "filepath": {{
                        "type": "string",
                        "description": "Path to the Python file"
                    }}
                }},
                "required": ["filepath"]
            }}
        }}
    }}
]

# Tool dispatcher
AVAILABLE_FUNCTIONS = {{
    "read_file": read_file,
    "git_blame": git_blame,
    "search_codebase": search_codebase,
    "get_dependencies": get_dependencies
}}

Step 3: The Agent Core

from openai import OpenAI
import json

client = OpenAI()

def run_agent(user_message: str, max_iterations: int = 10):
    """Run the agent loop."""
    messages = [
        {{
            "role": "system",
            "content": """You are an expert code reviewer. Analyze code changes
            and provide detailed, actionable feedback. Use available tools to
            gather context. Focus on:
            - Security vulnerabilities
            - Logic errors and bugs
            - Performance issues
            - Code style and best practices
            - Suggest specific improvements with code examples."""
        }},
        {{"role": "user", "content": user_message}}
    ]
    
    for iteration in range(max_iterations):
        print(f"Iteration {{iteration + 1}}/{{max_iterations}}")
        
        # Call LLM
        response = client.chat.completions.create(
            model="gpt-4-turbo-preview",
            messages=messages,
            tools=tools,
            tool_choice="auto"
        )
        
        response_message = response.choices[0].message
        messages.append(response_message)
        
        # Check if done
        if not response_message.tool_calls:
            return response_message.content
        
        # Execute tools
        for tool_call in response_message.tool_calls:
            function_name = tool_call.function.name
            function_args = json.loads(tool_call.function.arguments)
            
            print(f"Calling: {{function_name}}({{function_args}})")
            
            # Execute the function
            function_to_call = AVAILABLE_FUNCTIONS[function_name]
            function_response = function_to_call(**function_args)
            
            # Add tool response to messages
            messages.append({{
                "tool_call_id": tool_call.id,
                "role": "tool",
                "name": function_name,
                "content": json.dumps(function_response)
            }})
    
    return "Max iterations reached"

Step 4: Adding Structured Output

from pydantic import BaseModel
from typing import List, Literal

class CodeIssue(BaseModel):
    severity: Literal["critical", "high", "medium", "low"]
    category: Literal["security", "bug", "performance", "style"]
    filepath: str
    line_number: int
    description: str
    suggestion: str
    code_example: str = None

class CodeReview(BaseModel):
    summary: str
    issues: List[CodeIssue]
    overall_score: int  # 0-100
    
def run_structured_agent(user_message: str):
    """Run agent with structured output."""
    # ... (same as before, but final call uses response_format)
    
    final_response = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=messages,
        response_format={{"type": "json_object"}}
    )
    
    review_json = json.loads(final_response.choices[0].message.content)
    return CodeReview(**review_json)

Step 5: Integration with GitHub

from github import Github
import os

def review_pr(repo_name: str, pr_number: int):
    """Review a GitHub PR."""
    g = Github(os.getenv('GITHUB_TOKEN'))
    repo = g.get_repo(repo_name)
    pr = repo.get_pull(pr_number)
    
    # Get changed files
    files = pr.get_files()
    
    file_contents = []
    for file in files:
        if file.status != 'removed':
            content = repo.get_contents(file.filename, ref=pr.head.sha)
            file_contents.append({{
                'filename': file.filename,
                'patch': file.patch,
                'content': content.decoded_content.decode('utf-8')
            }})
    
    # Build review prompt
    prompt = f"""Review this PR: {{pr.title}}
    
Files changed: {{len(files)}}
{{json.dumps(file_contents, indent=2)}}

Analyze these changes and provide a detailed code review."""
    
    # Run agent
    review = run_structured_agent(prompt)
    
    # Post review comments
    for issue in review.issues:
        pr.create_review_comment(
            body=f"""**{{issue.severity.upper()}}: {{issue.category}}**

{{issue.description}}

**Suggestion:**
{{issue.suggestion}}

{{f'```\n{{issue.code_example}}\n```' if issue.code_example else ''}}""",
            commit=pr.get_commits()[pr.commits - 1],
            path=issue.filepath,
            line=issue.line_number
        )
    
    return review

Production Considerations

Rate Limiting and Caching

import time
from functools import lru_cache
import hashlib

# Simple rate limiter
class RateLimiter:
    def __init__(self, calls_per_minute: int = 60):
        self.calls_per_minute = calls_per_minute
        self.calls = []
    
    def wait_if_needed(self):
        now = time.time()
        # Remove calls older than 1 minute
        self.calls = [t for t in self.calls if now - t < 60]
        
        if len(self.calls) >= self.calls_per_minute:
            sleep_time = 60 - (now - self.calls[0])
            if sleep_time > 0:
                time.sleep(sleep_time)
        
        self.calls.append(time.time())

limiter = RateLimiter(calls_per_minute=50)

# Cache tool results
@lru_cache(maxsize=1000)
def cached_read_file(filepath: str):
    # Hash file contents to invalidate cache on changes
    with open(filepath, 'rb') as f:
        file_hash = hashlib.md5(f.read()).hexdigest()
    
    return read_file(filepath), file_hash

Error Handling and Recovery

import tenacity

@tenacity.retry(
    stop=tenacity.stop_after_attempt(3),
    wait=tenacity.wait_exponential(multiplier=1, min=2, max=10),
    retry=tenacity.retry_if_exception_type(Exception)
)
def safe_llm_call(messages, tools):
    """LLM call with retry logic."""
    try:
        limiter.wait_if_needed()
        return client.chat.completions.create(
            model="gpt-4-turbo-preview",
            messages=messages,
            tools=tools,
            timeout=30
        )
    except Exception as e:
        print(f"LLM call failed: {{e}}")
        raise

def safe_tool_call(function_name: str, function_args: dict):
    """Execute tool with error handling."""
    try:
        function = AVAILABLE_FUNCTIONS.get(function_name)
        if not function:
            return {{"success": False, "error": f"Unknown function: {{function_name}}"}}'
        
        result = function(**function_args)
        return result
    except Exception as e:
        return {{
            "success": False,
            "error": f"Tool execution failed: {{str(e)}}"
        }}

Testing the Agent

# Example usage
if __name__ == "__main__":
    # Test 1: Review a specific file
    review = run_agent(
        "Review the file 'app.py' for security issues and bugs"
    )
    print(review)
    
    # Test 2: Review a PR
    pr_review = review_pr("myorg/myrepo", 123)
    print(f"Overall score: {{pr_review.overall_score}}")
    print(f"Found {{len(pr_review.issues)}} issues")
    
    for issue in pr_review.issues:
        print(f"- [{{issue.severity}}] {{issue.description}}")

Key Takeaways

  • Tools make agents powerful: The ability to read files, run git commands, and search code transforms an LLM into an active agent
  • Structured output is essential: Pydantic models ensure consistent, parseable results
  • Error handling matters: Production agents need retries, rate limiting, and graceful failures
  • Context is king: The more context tools provide, the better the review quality
  • Iteration is necessary: Agents often need multiple tool calls to gather sufficient information

What’s Next

  • Add more tools: Static analysis (pylint, eslint), test runners, performance profilers
  • Memory system: Remember past reviews and learn from feedback
  • Multi-agent collaboration: Separate agents for security, performance, style
  • Human-in-the-loop: Flag uncertain findings for human review

References & Further Reading


Discover more from C4: Container, Code, Cloud & Context

Subscribe to get the latest posts sent to your email.

Leave a comment

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.