AWS Bedrock Enterprise Architecture
The Foundation Model Landscape Has Matured
The most significant evolution in Bedrock has been the expansion of available foundation models. When we started, Claude 2 was the primary option. Today, the platform offers:- Claude 3 (Haiku, Sonnet, Opus): Different price/performance tiers
- Amazon Titan: Multi-modal models for text and embeddings
- Meta Llama 2/3: Open-source high-performance models
- Mistral AI: European LLM with strong multilingual support
- Cohere Command: Enterprise-focused generation and classification
- Stability AI: Image generation (Stable Diffusion)
Model Selection Strategy
import boto3
import json
bedrock = boto3.client('bedrock-runtime')
# Model routing based on complexity
def route_to_model(query_complexity: str, max_tokens: int):
models = {{
'simple': {{
'model_id': 'anthropic.claude-3-haiku-20240307-v1:0',
'cost_per_1k': 0.00025, # Input tokens
'latency': '~500ms'
}},
'moderate': {{
'model_id': 'anthropic.claude-3-sonnet-20240229-v1:0',
'cost_per_1k': 0.003,
'latency': '~1s'
}},
'complex': {{
'model_id': 'anthropic.claude-3-opus-20240229-v1:0',
'cost_per_1k': 0.015,
'latency': '~2s'
}}
}}
return models.get(query_complexity, models['moderate'])
# Example: Customer service query
def invoke_bedrock(prompt: str, complexity: str = 'moderate'):
model_config = route_to_model(complexity, max_tokens=1000)
response = bedrock.invoke_model(
modelId=model_config['model_id'],
body=json.dumps({{
"anthropic_version": "bedrock-2023-05-31",
"max_tokens": 1000,
"temperature": 0.7,
"messages": [
{{"role": "user", "content": prompt}}
]
}}),
contentType='application/json',
accept='application/json'
)
return json.loads(response['body'].read())
Knowledge Bases Changed Everything
Setting Up a Knowledge Base
import boto3
bedrock_agent = boto3.client('bedrock-agent')
# 1. Create Knowledge Base
kb_response = bedrock_agent.create_knowledge_base(
name='enterprise-docs-kb',
description='Company policies and procedures',
roleArn='arn:aws:iam::123456789:role/BedrockKBRole',
knowledgeBaseConfiguration={{
'type': 'VECTOR',
'vectorKnowledgeBaseConfiguration': {{
'embeddingModelArn': 'arn:aws:bedrock:us-east-1::foundation-model/amazon.titan-embed-text-v2:0'
}}
}},
storageConfiguration={{
'type': 'OPENSEARCH_SERVERLESS',
'opensearchServerlessConfiguration': {{
'collectionArn': 'arn:aws:aoss:us-east-1:123456789:collection/kb-collection',
'vectorIndexName': 'bedrock-knowledge-base-index',
'fieldMapping': {{
'vectorField': 'bedrock-knowledge-base-default-vector',
'textField': 'AMAZON_BEDROCK_TEXT_CHUNK',
'metadataField': 'AMAZON_BEDROCK_METADATA'
}}
}}
}}
)
kb_id = kb_response['knowledgeBase']['knowledgeBaseId']
# 2. Add Data Source (S3)
ds_response = bedrock_agent.create_data_source(
knowledgeBaseId=kb_id,
name='s3-documents',
dataSourceConfiguration={{
'type': 'S3',
's3Configuration': {{
'bucketArn': 'arn:aws:s3:::my-enterprise-docs',
'inclusionPrefixes': ['policies/', 'procedures/']
}}
}},
vectorIngestionConfiguration={{
'chunkingConfiguration': {{
'chunkingStrategy': 'FIXED_SIZE',
'fixedSizeChunkingConfiguration': {{
'maxTokens': 300,
'overlapPercentage': 20
}}
}}
}}
)
# 3. Sync data
bedrock_agent.start_ingestion_job(
knowledgeBaseId=kb_id,
dataSourceId=ds_response['dataSource']['dataSourceId']
)
Querying the Knowledge Base
bedrock_agent_runtime = boto3.client('bedrock-agent-runtime')
def query_kb(question: str, kb_id: str):
response = bedrock_agent_runtime.retrieve_and_generate(
input={{
'text': question
}},
retrieveAndGenerateConfiguration={{
'type': 'KNOWLEDGE_BASE',
'knowledgeBaseConfiguration': {{
'knowledgeBaseId': kb_id,
'modelArn': 'arn:aws:bedrock:us-east-1::foundation-model/anthropic.claude-3-sonnet-20240229-v1:0',
'retrievalConfiguration': {{
'vectorSearchConfiguration': {{
'numberOfResults': 5,
'overrideSearchType': 'HYBRID' # Semantic + keyword
}}
}}
}}
}}
)
# Response includes citations!
return {{
'answer': response['output']['text'],
'citations': [
{{
'content': c['retrievedReferences'][0]['content']['text'],
'source': c['retrievedReferences'][0]['location']['s3Location']
}}
for c in response.get('citations', [])
]
}}
# Example usage
result = query_kb("What is our refund policy?", "KB123ABC")
print(f"Answer: {{result['answer']}}")
print(f"\nSources:")
for cite in result['citations']:
print(f" - {{cite['source']}}")
Bedrock Agents: From Chatbots to Autonomous Systems
# Define agent action group (tools)
action_group = {{
'actionGroupName': 'order-management',
'description': 'Manage customer orders',
'actionGroupExecutor': {{
'lambda': 'arn:aws:lambda:us-east-1:123456789:function:order-tools'
}},
'apiSchema': {{
's3': {{
'bucketName': 'my-agent-schemas',
'objectKey': 'order-api-schema.json'
}}
}}
}}
# Create agent
agent_response = bedrock_agent.create_agent(
agentName='customer-service-agent',
description='Handles customer inquiries and actions',
foundationModel='anthropic.claude-3-sonnet-20240229-v1:0',
instruction='''You are a helpful customer service agent.
Use available tools to:
- Check order status
- Process returns
- Update shipping addresses
Always be polite and verify customer identity before making changes.''',
idleSessionTTLInSeconds=600
)
# Add action group to agent
bedrock_agent.create_agent_action_group(
agentId=agent_response['agent']['agentId'],
agentVersion='DRAFT',
**action_group
)
# Add knowledge base to agent
bedrock_agent.associate_agent_knowledge_base(
agentId=agent_response['agent']['agentId'],
agentVersion='DRAFT',
knowledgeBaseId=kb_id,
description='Company knowledge base'
)
# Prepare and create alias
bedrock_agent.prepare_agent(
agentId=agent_response['agent']['agentId']
)
alias_response = bedrock_agent.create_agent_alias(
agentId=agent_response['agent']['agentId'],
agentAliasName='production'
)
Invoking the Agent
def chat_with_agent(agent_id: str, alias_id: str, session_id: str, prompt: str):
response = bedrock_agent_runtime.invoke_agent(
agentId=agent_id,
agentAliasId=alias_id,
sessionId=session_id,
inputText=prompt
)
# Stream response
completion = ""
for event in response.get('completion'):
chunk = event.get('chunk')
if chunk:
completion += chunk.get('bytes').decode()
return completion
# Multi-turn conversation
session_id = "user-12345"
response1 = chat_with_agent(agent_id, alias_id, session_id,
"Where is my order #ABC123?")
print(response1)
response2 = chat_with_agent(agent_id, alias_id, session_id,
"Can you update the shipping address to 123 Main St?")
print(response2)
Guardrails: The Missing Piece for Enterprise Adoption
# Create Guardrail
guardrail_response = bedrock.create_guardrail(
name='enterprise-ai-guardrail',
description='Prevent sensitive data leakage and ensure appropriate content',
# Block sensitive topics
topicPolicyConfig={{
'topicsConfig': [
{{
'name': 'Financial Advice',
'definition': 'Investment recommendations or financial planning',
'examples': ['Which stocks should I buy?', 'How should I invest?'],
'type': 'DENY'
}},
{{
'name': 'Medical Diagnosis',
'definition': 'Medical diagnoses or treatment recommendations',
'examples': ['Do I have cancer?', 'What medication should I take?'],
'type': 'DENY'
}}
]
}},
# PII detection and redaction
sensitiveInformationPolicyConfig={{
'piiEntitiesConfig': [
{{'type': 'EMAIL', 'action': 'BLOCK'}},
{{'type': 'PHONE', 'action': 'ANONYMIZE'}},
{{'type': 'SSN', 'action': 'BLOCK'}},
{{'type': 'CREDIT_CARD', 'action': 'BLOCK'}}
]
}},
# Content filters
contentPolicyConfig={{
'filtersConfig': [
{{'type': 'SEXUAL', 'inputStrength': 'HIGH', 'outputStrength': 'HIGH'}},
{{'type': 'VIOLENCE', 'inputStrength': 'MEDIUM', 'outputStrength': 'MEDIUM'}},
{{'type': 'HATE', 'inputStrength': 'HIGH', 'outputStrength': 'HIGH'}},
{{'type': 'INSULTS', 'inputStrength': 'MEDIUM', 'outputStrength': 'MEDIUM'}},
{{'type': 'MISCONDUCT', 'inputStrength': 'HIGH', 'outputStrength': 'HIGH'}},
{{'type': 'PROMPT_ATTACK', 'inputStrength': 'HIGH', 'outputStrength': 'NONE'}}
]
}},
# Word filters
wordPolicyConfig={{
'wordsConfig': [
{{'text': 'CompetitorName'}},
{{'text': 'InternalCodename'}}
],
'managedWordListsConfig': [
{{'type': 'PROFANITY'}}
]
}},
blockedInputMessaging='I cannot process requests about that topic.',
blockedOutputsMessaging='I cannot provide that information.'
)
# Use guardrail with model invocation
def safe_invoke(prompt: str, guardrail_id: str):
response = bedrock.invoke_model(
modelId='anthropic.claude-3-sonnet-20240229-v1:0',
body=json.dumps({{
"anthropic_version": "bedrock-2023-05-31",
"messages": [{{"role": "user", "content": prompt}}],
"max_tokens": 1000
}}),
guardrailIdentifier=guardrail_id,
guardrailVersion='1'
)
# Check if blocked
if response.get('guardrailAction') == 'BLOCKED':
return {{
'blocked': True,
'reason': response.get('guardrailAssessment')
}}
return {{'blocked': False, 'content': json.loads(response['body'].read())}}
result = safe_invoke("What's your competitor's pricing?", guardrail_id)
Architecture Patterns That Work
Pattern 1: Async Processing with Step Functions
{{
"Comment": "Document processing pipeline",
"StartAt": "ExtractText",
"States": {{
"ExtractText": {{
"Type": "Task",
"Resource": "arn:aws:states:::lambda:invoke",
"Parameters": {{
"FunctionName": "extract-text-from-pdf"
}},
"Next": "ClassifyDocument"
}},
"ClassifyDocument": {{
"Type": "Task",
"Resource": "arn:aws:states:::bedrock:invokeModel",
"Parameters": {{
"ModelId": "anthropic.claude-3-haiku-20240307-v1:0",
"Body": {{
"anthropic_version": "bedrock-2023-05-31",
"messages": [{{
"role": "user",
"content": "Classify this document into: invoice, contract, receipt"
}}],
"max_tokens": 100
}}
}},
"Next": "RouteByType"
}},
"RouteByType": {{
"Type": "Choice",
"Choices": [
{{
"Variable": "$.classification",
"StringEquals": "invoice",
"Next": "ProcessInvoice"
}},
{{
"Variable": "$.classification",
"StringEquals": "contract",
"Next": "ProcessContract"
}}
],
"Default": "ProcessGeneric"
}},
"ProcessInvoice": {{
"Type": "Task",
"Resource": "arn:aws:states:::bedrock:invokeModel",
"Parameters": {{
"ModelId": "anthropic.claude-3-sonnet-20240229-v1:0",
"Body": {{
"messages": [{{
"role": "user",
"content": "Extract: vendor, amount, date, line items"
}}]
}}
}},
"End": true
}}
}}
}}
Cost Management in Production
Generative AI costs can spiral quickly. We’ve reduced costs by 40-60% without sacrificing quality using these strategies:| Strategy | Impact | Implementation |
|---|---|---|
| Prompt caching | -40% input costs | Cache system prompts (Claude supports) |
| Model routing | -30% overall | Haiku for simple, Opus for complex |
| Response streaming | -15% output | Early termination when sufficient |
| Batch processing | -20% throughput | Process overnight, non-urgent tasks |
| Token limits | -25% overruns | Set max_tokens based on use case |
Cost Tracking Implementation
import boto3
from datetime import datetime, timedelta
cloudwatch = boto3.client('cloudwatch')
def track_bedrock_costs(model_id: str, input_tokens: int, output_tokens: int):
# Pricing (as of 2024)
pricing = {{
'anthropic.claude-3-haiku-20240307-v1:0': {{
'input': 0.00025 / 1000,
'output': 0.00125 / 1000
}},
'anthropic.claude-3-sonnet-20240229-v1:0': {{
'input': 0.003 / 1000,
'output': 0.015 / 1000
}},
'anthropic.claude-3-opus-20240229-v1:0': {{
'input': 0.015 / 1000,
'output': 0.075 / 1000
}}
}}
model_pricing = pricing.get(model_id, {{'input': 0, 'output': 0}})
cost = (input_tokens * model_pricing['input'] +
output_tokens * model_pricing['output'])
# Log to CloudWatch
cloudwatch.put_metric_data(
Namespace='BedrockCosts',
MetricData=[
{{
'MetricName': 'TotalCost',
'Value': cost,
'Unit': 'None',
'Timestamp': datetime.utcnow(),
'Dimensions': [
{{'Name': 'ModelId', 'Value': model_id}},
{{'Name': 'Environment', 'Value': 'production'}}
]
}},
{{
'MetricName': 'InputTokens',
'Value': input_tokens,
'Unit': 'Count'
}},
{{
'MetricName': 'OutputTokens',
'Value': output_tokens,
'Unit': 'Count'
}}
]
)
return cost
What I Wish I Knew Earlier
- Start with managed features first: Don’t build custom RAG—use Knowledge Bases
- Model fine-tuning is rarely necessary: Prompt engineering + RAG typically beats fine-tuning
- Plan for model evolution: Abstract model selection behind interfaces
- Guardrails from day one: Easier than retrofitting compliance later
- Cost tracking is essential: Set budgets and alerts before going live
- Test multi-turn conversations: Session management is harder than it looks
Looking Forward
AWS continues to invest heavily in Bedrock:- Model evaluation tools: A/B test different models
- Improved fine-tuning: Lower costs, easier workflows
- Expanded model selection: GPT-5, Gemini Pro coming
- Better observability: Built-in tracing and debugging
- Multi-modal agents: Vision + text in single agent
Conclusion
For organizations beginning their generative AI journey, Bedrock offers a compelling combination of flexibility, security, and operational simplicity. For those already invested in AWS, the integration benefits alone justify serious consideration. The key is approaching adoption strategically—starting with well-defined use cases, implementing proper governance from the beginning, and building architectures that can evolve as the technology matures.References
- 📚 AWS Bedrock Official Docs
- 📚 Bedrock Knowledge Bases Guide
- 📚 Bedrock Agents Documentation
- 📚 AWS Bedrock Code Samples
Discover more from C4: Container, Code, Cloud & Context
Subscribe to get the latest posts sent to your email.