Building AI Agents: A Complete Code Review Assistant from Scratch

Hands-on tutorial building a production-ready AI agent. Create a code review assistant with tool use, error handling, caching, and GitHub integration.

What We’re Building

A code review agent that:

Analyzes code changes in a PR
Identifies bugs, security issues, and style problems
Suggests specific improvements with code examples
Explains reasoning for each suggestion
Uses tools to gather context (file contents, git history, documentation)

Architecture Overview

Agent Execution Flow

Step 1: Define the Tools

Tools are Python functions that the agent can call to interact with the world:

import subprocess
import json
from typing import Dict, Any

def read_file(filepath: str, start_line: int = 1, end_line: int = None) -> Dict[str, Any]:
    """Read file contents with line numbers."""
    try:
        with open(filepath, 'r') as f:
            lines = f.readlines()
        
        if end_line is None:
            end_line = len(lines)
        
        content = ''.join(lines[start_line-1:end_line])
        
        return {{
            'success': True,
            'filepath': filepath,
            'lines': f"{{start_line}}-{{end_line}}",
            'content': content,
            'total_lines': len(lines)
        }}
    except Exception as e:
        return {{'success': False, 'error': str(e)}}

def git_blame(filepath: str, line_number: int) -> Dict[str, Any]:
    """Get git blame info for specific line."""
    try:
        result = subprocess.run(
            ['git', 'blame', '-L', f'{{line_number}},{{line_number}}', filepath],
            capture_output=True,
            text=True
        )
        
        if result.returncode == 0:
            blame_info = result.stdout.strip()
            parts = blame_info.split()
            
            return {{
                'success': True,
                'commit': parts[0],
                'author': ' '.join(parts[1:-1]),
                'line': line_number,
                'filepath': filepath
            }}
        else:
            return {{'success': False, 'error': result.stderr}}
    except Exception as e:
        return {{'success': False, 'error': str(e)}}

def search_codebase(pattern: str, file_extension: str = None) -> Dict[str, Any]:
    """Search for pattern in codebase."""
    try:
        cmd = ['rg', pattern, '--json']
        if file_extension:
            cmd.extend(['--type', file_extension])
        
        result = subprocess.run(cmd, capture_output=True, text=True)
        
        matches = []
        for line in result.stdout.split('\n'):
            if line:
                try:
                    data = json.loads(line)
                    if data.get('type') == 'match':
                        matches.append({{
                            'path': data['data']['path']['text'],
                            'line_number': data['data']['line_number'],
                            'content': data['data']['lines']['text']
                        }})
                except:
                    pass
        
        return {{
            'success': True,
            'pattern': pattern,
            'matches': matches[:10]  # Limit results
        }}
    except Exception as e:
        return {{'success': False, 'error': str(e)}}

def get_dependencies(filepath: str) -> Dict[str, Any]:
    """Extract dependencies from Python file."""
    try:
        with open(filepath, 'r') as f:
            content = f.read()
        
        imports = []
        for line in content.split('\n'):
            if line.strip().startswith(('import ', 'from ')):
                imports.append(line.strip())
        
        return {{
            'success': True,
            'filepath': filepath,
            'imports': imports,
            'count': len(imports)
        }}
    except Exception as e:
        return {{'success': False, 'error': str(e)}}

Step 2: Tool Definitions for OpenAI

tools = [
    {{
        "type": "function",
        "function": {{
            "name": "read_file",
            "description": "Read the contents of a file with optional line range",
            "parameters": {{
                "type": "object",
                "properties": {{
                    "filepath": {{
                        "type": "string",
                        "description": "Path to the file to read"
                    }},
                    "start_line": {{
                        "type": "integer",
                        "description": "Starting line number (1-indexed)"
                    }},
                    "end_line": {{
                        "type": "integer",
                        "description": "Ending line number (inclusive)"
                    }}
                }},
                "required": ["filepath"]
            }}
        }}
    }},
    {{
        "type": "function",
        "function": {{
            "name": "git_blame",
            "description": "Get git blame information for a specific line",
            "parameters": {{
                "type": "object",
                "properties": {{
                    "filepath": {{
                        "type": "string",
                        "description": "Path to the file"
                    }},
                    "line_number": {{
                        "type": "integer",
                        "description": "Line number to blame"
                    }}
                }},
                "required": ["filepath", "line_number"]
            }}
        }}
    }},
    {{
        "type": "function",
        "function": {{
            "name": "search_codebase",
            "description": "Search for a pattern across the codebase",
            "parameters": {{
                "type": "object",
                "properties": {{
                    "pattern": {{
                        "type": "string",
                        "description": "Pattern to search for (regex supported)"
                    }},
                    "file_extension": {{
                        "type": "string",
                        "description": "Filter by file extension (e.g., 'py', 'js')"
                    }}
                }},
                "required": ["pattern"]
            }}
        }}
    }},
    {{
        "type": "function",
        "function": {{
            "name": "get_dependencies",
            "description": "Extract import statements from a Python file",
            "parameters": {{
                "type": "object",
                "properties": {{
                    "filepath": {{
                        "type": "string",
                        "description": "Path to the Python file"
                    }}
                }},
                "required": ["filepath"]
            }}
        }}
    }}
]

# Tool dispatcher
AVAILABLE_FUNCTIONS = {{
    "read_file": read_file,
    "git_blame": git_blame,
    "search_codebase": search_codebase,
    "get_dependencies": get_dependencies
}}

Step 3: The Agent Core

from openai import OpenAI
import json

client = OpenAI()

def run_agent(user_message: str, max_iterations: int = 10):
    """Run the agent loop."""
    messages = [
        {{
            "role": "system",
            "content": """You are an expert code reviewer. Analyze code changes
            and provide detailed, actionable feedback. Use available tools to
            gather context. Focus on:
            - Security vulnerabilities
            - Logic errors and bugs
            - Performance issues
            - Code style and best practices
            - Suggest specific improvements with code examples."""
        }},
        {{"role": "user", "content": user_message}}
    ]
    
    for iteration in range(max_iterations):
        print(f"Iteration {{iteration + 1}}/{{max_iterations}}")
        
        # Call LLM
        response = client.chat.completions.create(
            model="gpt-4-turbo-preview",
            messages=messages,
            tools=tools,
            tool_choice="auto"
        )
        
        response_message = response.choices[0].message
        messages.append(response_message)
        
        # Check if done
        if not response_message.tool_calls:
            return response_message.content
        
        # Execute tools
        for tool_call in response_message.tool_calls:
            function_name = tool_call.function.name
            function_args = json.loads(tool_call.function.arguments)
            
            print(f"Calling: {{function_name}}({{function_args}})")
            
            # Execute the function
            function_to_call = AVAILABLE_FUNCTIONS[function_name]
            function_response = function_to_call(**function_args)
            
            # Add tool response to messages
            messages.append({{
                "tool_call_id": tool_call.id,
                "role": "tool",
                "name": function_name,
                "content": json.dumps(function_response)
            }})
    
    return "Max iterations reached"

Step 4: Adding Structured Output

from pydantic import BaseModel
from typing import List, Literal

class CodeIssue(BaseModel):
    severity: Literal["critical", "high", "medium", "low"]
    category: Literal["security", "bug", "performance", "style"]
    filepath: str
    line_number: int
    description: str
    suggestion: str
    code_example: str = None

class CodeReview(BaseModel):
    summary: str
    issues: List[CodeIssue]
    overall_score: int  # 0-100
    
def run_structured_agent(user_message: str):
    """Run agent with structured output."""
    # ... (same as before, but final call uses response_format)
    
    final_response = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=messages,
        response_format={{"type": "json_object"}}
    )
    
    review_json = json.loads(final_response.choices[0].message.content)
    return CodeReview(**review_json)

Step 5: Integration with GitHub

from github import Github
import os

def review_pr(repo_name: str, pr_number: int):
    """Review a GitHub PR."""
    g = Github(os.getenv('GITHUB_TOKEN'))
    repo = g.get_repo(repo_name)
    pr = repo.get_pull(pr_number)
    
    # Get changed files
    files = pr.get_files()
    
    file_contents = []
    for file in files:
        if file.status != 'removed':
            content = repo.get_contents(file.filename, ref=pr.head.sha)
            file_contents.append({{
                'filename': file.filename,
                'patch': file.patch,
                'content': content.decoded_content.decode('utf-8')
            }})
    
    # Build review prompt
    prompt = f"""Review this PR: {{pr.title}}
    
Files changed: {{len(files)}}
{{json.dumps(file_contents, indent=2)}}

Analyze these changes and provide a detailed code review."""
    
    # Run agent
    review = run_structured_agent(prompt)
    
    # Post review comments
    for issue in review.issues:
        pr.create_review_comment(
            body=f"""**{{issue.severity.upper()}}: {{issue.category}}**

{{issue.description}}

**Suggestion:**
{{issue.suggestion}}

{{f'```\n{{issue.code_example}}\n```' if issue.code_example else ''}}""",
            commit=pr.get_commits()[pr.commits - 1],
            path=issue.filepath,
            line=issue.line_number
        )
    
    return review

Production Considerations

Rate Limiting and Caching

import time
from functools import lru_cache
import hashlib

# Simple rate limiter
class RateLimiter:
    def __init__(self, calls_per_minute: int = 60):
        self.calls_per_minute = calls_per_minute
        self.calls = []
    
    def wait_if_needed(self):
        now = time.time()
        # Remove calls older than 1 minute
        self.calls = [t for t in self.calls if now - t < 60]
        
        if len(self.calls) >= self.calls_per_minute:
            sleep_time = 60 - (now - self.calls[0])
            if sleep_time > 0:
                time.sleep(sleep_time)
        
        self.calls.append(time.time())

limiter = RateLimiter(calls_per_minute=50)

# Cache tool results
@lru_cache(maxsize=1000)
def cached_read_file(filepath: str):
    # Hash file contents to invalidate cache on changes
    with open(filepath, 'rb') as f:
        file_hash = hashlib.md5(f.read()).hexdigest()
    
    return read_file(filepath), file_hash

Error Handling and Recovery

import tenacity

@tenacity.retry(
    stop=tenacity.stop_after_attempt(3),
    wait=tenacity.wait_exponential(multiplier=1, min=2, max=10),
    retry=tenacity.retry_if_exception_type(Exception)
)
def safe_llm_call(messages, tools):
    """LLM call with retry logic."""
    try:
        limiter.wait_if_needed()
        return client.chat.completions.create(
            model="gpt-4-turbo-preview",
            messages=messages,
            tools=tools,
            timeout=30
        )
    except Exception as e:
        print(f"LLM call failed: {{e}}")
        raise

def safe_tool_call(function_name: str, function_args: dict):
    """Execute tool with error handling."""
    try:
        function = AVAILABLE_FUNCTIONS.get(function_name)
        if not function:
            return {{"success": False, "error": f"Unknown function: {{function_name}}"}}'
        
        result = function(**function_args)
        return result
    except Exception as e:
        return {{
            "success": False,
            "error": f"Tool execution failed: {{str(e)}}"
        }}

Testing the Agent

# Example usage
if __name__ == "__main__":
    # Test 1: Review a specific file
    review = run_agent(
        "Review the file 'app.py' for security issues and bugs"
    )
    print(review)
    
    # Test 2: Review a PR
    pr_review = review_pr("myorg/myrepo", 123)
    print(f"Overall score: {{pr_review.overall_score}}")
    print(f"Found {{len(pr_review.issues)}} issues")
    
    for issue in pr_review.issues:
        print(f"- [{{issue.severity}}] {{issue.description}}")

Key Takeaways

Tools make agents powerful: The ability to read files, run git commands, and search code transforms an LLM into an active agent
Structured output is essential: Pydantic models ensure consistent, parseable results
Error handling matters: Production agents need retries, rate limiting, and graceful failures
Context is king: The more context tools provide, the better the review quality
Iteration is necessary: Agents often need multiple tool calls to gather sufficient information

What’s Next

Add more tools: Static analysis (pylint, eslint), test runners, performance profilers
Memory system: Remember past reviews and learn from feedback
Multi-agent collaboration: Separate agents for security, performance, style
Human-in-the-loop: Flag uncertain findings for human review

References & Further Reading

📚 OpenAI Function Calling Guide
📚 Pydantic Documentation
📚 LangChain for Agent Frameworks
📚 “Building LLM Powered Applications” by Valentina Alto

Discover more from C4: Container, Code, Cloud & Context

Subscribe to get the latest posts sent to your email.

Searching in