What We’re Building
A code review agent that:- Analyzes code changes in a PR
- Identifies bugs, security issues, and style problems
- Suggests specific improvements with code examples
- Explains reasoning for each suggestion
- Uses tools to gather context (file contents, git history, documentation)
Architecture Overview
Agent Execution Flow
Step 1: Define the Tools
Tools are Python functions that the agent can call to interact with the world:import subprocess
import json
from typing import Dict, Any
def read_file(filepath: str, start_line: int = 1, end_line: int = None) -> Dict[str, Any]:
"""Read file contents with line numbers."""
try:
with open(filepath, 'r') as f:
lines = f.readlines()
if end_line is None:
end_line = len(lines)
content = ''.join(lines[start_line-1:end_line])
return {{
'success': True,
'filepath': filepath,
'lines': f"{{start_line}}-{{end_line}}",
'content': content,
'total_lines': len(lines)
}}
except Exception as e:
return {{'success': False, 'error': str(e)}}
def git_blame(filepath: str, line_number: int) -> Dict[str, Any]:
"""Get git blame info for specific line."""
try:
result = subprocess.run(
['git', 'blame', '-L', f'{{line_number}},{{line_number}}', filepath],
capture_output=True,
text=True
)
if result.returncode == 0:
blame_info = result.stdout.strip()
parts = blame_info.split()
return {{
'success': True,
'commit': parts[0],
'author': ' '.join(parts[1:-1]),
'line': line_number,
'filepath': filepath
}}
else:
return {{'success': False, 'error': result.stderr}}
except Exception as e:
return {{'success': False, 'error': str(e)}}
def search_codebase(pattern: str, file_extension: str = None) -> Dict[str, Any]:
"""Search for pattern in codebase."""
try:
cmd = ['rg', pattern, '--json']
if file_extension:
cmd.extend(['--type', file_extension])
result = subprocess.run(cmd, capture_output=True, text=True)
matches = []
for line in result.stdout.split('\n'):
if line:
try:
data = json.loads(line)
if data.get('type') == 'match':
matches.append({{
'path': data['data']['path']['text'],
'line_number': data['data']['line_number'],
'content': data['data']['lines']['text']
}})
except:
pass
return {{
'success': True,
'pattern': pattern,
'matches': matches[:10] # Limit results
}}
except Exception as e:
return {{'success': False, 'error': str(e)}}
def get_dependencies(filepath: str) -> Dict[str, Any]:
"""Extract dependencies from Python file."""
try:
with open(filepath, 'r') as f:
content = f.read()
imports = []
for line in content.split('\n'):
if line.strip().startswith(('import ', 'from ')):
imports.append(line.strip())
return {{
'success': True,
'filepath': filepath,
'imports': imports,
'count': len(imports)
}}
except Exception as e:
return {{'success': False, 'error': str(e)}}
Step 2: Tool Definitions for OpenAI
tools = [
{{
"type": "function",
"function": {{
"name": "read_file",
"description": "Read the contents of a file with optional line range",
"parameters": {{
"type": "object",
"properties": {{
"filepath": {{
"type": "string",
"description": "Path to the file to read"
}},
"start_line": {{
"type": "integer",
"description": "Starting line number (1-indexed)"
}},
"end_line": {{
"type": "integer",
"description": "Ending line number (inclusive)"
}}
}},
"required": ["filepath"]
}}
}}
}},
{{
"type": "function",
"function": {{
"name": "git_blame",
"description": "Get git blame information for a specific line",
"parameters": {{
"type": "object",
"properties": {{
"filepath": {{
"type": "string",
"description": "Path to the file"
}},
"line_number": {{
"type": "integer",
"description": "Line number to blame"
}}
}},
"required": ["filepath", "line_number"]
}}
}}
}},
{{
"type": "function",
"function": {{
"name": "search_codebase",
"description": "Search for a pattern across the codebase",
"parameters": {{
"type": "object",
"properties": {{
"pattern": {{
"type": "string",
"description": "Pattern to search for (regex supported)"
}},
"file_extension": {{
"type": "string",
"description": "Filter by file extension (e.g., 'py', 'js')"
}}
}},
"required": ["pattern"]
}}
}}
}},
{{
"type": "function",
"function": {{
"name": "get_dependencies",
"description": "Extract import statements from a Python file",
"parameters": {{
"type": "object",
"properties": {{
"filepath": {{
"type": "string",
"description": "Path to the Python file"
}}
}},
"required": ["filepath"]
}}
}}
}}
]
# Tool dispatcher
AVAILABLE_FUNCTIONS = {{
"read_file": read_file,
"git_blame": git_blame,
"search_codebase": search_codebase,
"get_dependencies": get_dependencies
}}
Step 3: The Agent Core
from openai import OpenAI
import json
client = OpenAI()
def run_agent(user_message: str, max_iterations: int = 10):
"""Run the agent loop."""
messages = [
{{
"role": "system",
"content": """You are an expert code reviewer. Analyze code changes
and provide detailed, actionable feedback. Use available tools to
gather context. Focus on:
- Security vulnerabilities
- Logic errors and bugs
- Performance issues
- Code style and best practices
- Suggest specific improvements with code examples."""
}},
{{"role": "user", "content": user_message}}
]
for iteration in range(max_iterations):
print(f"Iteration {{iteration + 1}}/{{max_iterations}}")
# Call LLM
response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=messages,
tools=tools,
tool_choice="auto"
)
response_message = response.choices[0].message
messages.append(response_message)
# Check if done
if not response_message.tool_calls:
return response_message.content
# Execute tools
for tool_call in response_message.tool_calls:
function_name = tool_call.function.name
function_args = json.loads(tool_call.function.arguments)
print(f"Calling: {{function_name}}({{function_args}})")
# Execute the function
function_to_call = AVAILABLE_FUNCTIONS[function_name]
function_response = function_to_call(**function_args)
# Add tool response to messages
messages.append({{
"tool_call_id": tool_call.id,
"role": "tool",
"name": function_name,
"content": json.dumps(function_response)
}})
return "Max iterations reached"
Step 4: Adding Structured Output
from pydantic import BaseModel
from typing import List, Literal
class CodeIssue(BaseModel):
severity: Literal["critical", "high", "medium", "low"]
category: Literal["security", "bug", "performance", "style"]
filepath: str
line_number: int
description: str
suggestion: str
code_example: str = None
class CodeReview(BaseModel):
summary: str
issues: List[CodeIssue]
overall_score: int # 0-100
def run_structured_agent(user_message: str):
"""Run agent with structured output."""
# ... (same as before, but final call uses response_format)
final_response = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=messages,
response_format={{"type": "json_object"}}
)
review_json = json.loads(final_response.choices[0].message.content)
return CodeReview(**review_json)
Step 5: Integration with GitHub
from github import Github
import os
def review_pr(repo_name: str, pr_number: int):
"""Review a GitHub PR."""
g = Github(os.getenv('GITHUB_TOKEN'))
repo = g.get_repo(repo_name)
pr = repo.get_pull(pr_number)
# Get changed files
files = pr.get_files()
file_contents = []
for file in files:
if file.status != 'removed':
content = repo.get_contents(file.filename, ref=pr.head.sha)
file_contents.append({{
'filename': file.filename,
'patch': file.patch,
'content': content.decoded_content.decode('utf-8')
}})
# Build review prompt
prompt = f"""Review this PR: {{pr.title}}
Files changed: {{len(files)}}
{{json.dumps(file_contents, indent=2)}}
Analyze these changes and provide a detailed code review."""
# Run agent
review = run_structured_agent(prompt)
# Post review comments
for issue in review.issues:
pr.create_review_comment(
body=f"""**{{issue.severity.upper()}}: {{issue.category}}**
{{issue.description}}
**Suggestion:**
{{issue.suggestion}}
{{f'```\n{{issue.code_example}}\n```' if issue.code_example else ''}}""",
commit=pr.get_commits()[pr.commits - 1],
path=issue.filepath,
line=issue.line_number
)
return review
Production Considerations
Rate Limiting and Caching
import time
from functools import lru_cache
import hashlib
# Simple rate limiter
class RateLimiter:
def __init__(self, calls_per_minute: int = 60):
self.calls_per_minute = calls_per_minute
self.calls = []
def wait_if_needed(self):
now = time.time()
# Remove calls older than 1 minute
self.calls = [t for t in self.calls if now - t < 60]
if len(self.calls) >= self.calls_per_minute:
sleep_time = 60 - (now - self.calls[0])
if sleep_time > 0:
time.sleep(sleep_time)
self.calls.append(time.time())
limiter = RateLimiter(calls_per_minute=50)
# Cache tool results
@lru_cache(maxsize=1000)
def cached_read_file(filepath: str):
# Hash file contents to invalidate cache on changes
with open(filepath, 'rb') as f:
file_hash = hashlib.md5(f.read()).hexdigest()
return read_file(filepath), file_hash
Error Handling and Recovery
import tenacity
@tenacity.retry(
stop=tenacity.stop_after_attempt(3),
wait=tenacity.wait_exponential(multiplier=1, min=2, max=10),
retry=tenacity.retry_if_exception_type(Exception)
)
def safe_llm_call(messages, tools):
"""LLM call with retry logic."""
try:
limiter.wait_if_needed()
return client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=messages,
tools=tools,
timeout=30
)
except Exception as e:
print(f"LLM call failed: {{e}}")
raise
def safe_tool_call(function_name: str, function_args: dict):
"""Execute tool with error handling."""
try:
function = AVAILABLE_FUNCTIONS.get(function_name)
if not function:
return {{"success": False, "error": f"Unknown function: {{function_name}}"}}'
result = function(**function_args)
return result
except Exception as e:
return {{
"success": False,
"error": f"Tool execution failed: {{str(e)}}"
}}
Testing the Agent
# Example usage
if __name__ == "__main__":
# Test 1: Review a specific file
review = run_agent(
"Review the file 'app.py' for security issues and bugs"
)
print(review)
# Test 2: Review a PR
pr_review = review_pr("myorg/myrepo", 123)
print(f"Overall score: {{pr_review.overall_score}}")
print(f"Found {{len(pr_review.issues)}} issues")
for issue in pr_review.issues:
print(f"- [{{issue.severity}}] {{issue.description}}")
Key Takeaways
- Tools make agents powerful: The ability to read files, run git commands, and search code transforms an LLM into an active agent
- Structured output is essential: Pydantic models ensure consistent, parseable results
- Error handling matters: Production agents need retries, rate limiting, and graceful failures
- Context is king: The more context tools provide, the better the review quality
- Iteration is necessary: Agents often need multiple tool calls to gather sufficient information
What’s Next
- Add more tools: Static analysis (pylint, eslint), test runners, performance profilers
- Memory system: Remember past reviews and learn from feedback
- Multi-agent collaboration: Separate agents for security, performance, style
- Human-in-the-loop: Flag uncertain findings for human review
References & Further Reading
- 📚 OpenAI Function Calling Guide
- 📚 Pydantic Documentation
- 📚 LangChain for Agent Frameworks
- 📚 “Building LLM Powered Applications” by Valentina Alto
Discover more from C4: Container, Code, Cloud & Context
Subscribe to get the latest posts sent to your email.