Introduction: Query understanding is the critical first step in building intelligent AI systems that respond appropriately to user requests. Before your system can retrieve relevant documents, call the right tools, or generate helpful responses, it needs to understand what the user actually wants. This involves intent classification (is this a question, command, or conversation?), entity extraction (what specific things are mentioned?), and query expansion (what related terms should we search for?). Poor query understanding leads to irrelevant results and frustrated users; excellent query understanding makes AI systems feel intuitive and helpful. This guide covers practical patterns for building query understanding pipelines: from rule-based classifiers to LLM-powered intent detection, entity extraction techniques, and query expansion strategies that improve retrieval accuracy.

Intent Classification
from dataclasses import dataclass, field
from typing import Any, Optional, List, Dict, Callable
from enum import Enum
import re
class IntentType(Enum):
"""Types of user intents."""
QUESTION = "question"
COMMAND = "command"
SEARCH = "search"
CONVERSATION = "conversation"
FEEDBACK = "feedback"
CLARIFICATION = "clarification"
@dataclass
class IntentResult:
"""Result of intent classification."""
intent: IntentType
confidence: float
sub_intent: str = None
metadata: dict = field(default_factory=dict)
class RuleBasedClassifier:
"""Rule-based intent classifier."""
def __init__(self):
self.rules: list[tuple[Callable[[str], bool], IntentType, float]] = []
self._register_default_rules()
def _register_default_rules(self):
"""Register default classification rules."""
# Question patterns
question_words = ["what", "who", "where", "when", "why", "how", "which", "can", "could", "would", "is", "are", "do", "does"]
def is_question(text: str) -> bool:
text_lower = text.lower().strip()
return (
text.endswith("?") or
any(text_lower.startswith(w + " ") for w in question_words)
)
self.rules.append((is_question, IntentType.QUESTION, 0.9))
# Command patterns
command_words = ["create", "delete", "update", "add", "remove", "set", "change", "make", "build", "generate", "show", "list", "find"]
def is_command(text: str) -> bool:
text_lower = text.lower().strip()
return any(text_lower.startswith(w + " ") for w in command_words)
self.rules.append((is_command, IntentType.COMMAND, 0.85))
# Search patterns
def is_search(text: str) -> bool:
text_lower = text.lower()
return (
"search for" in text_lower or
"find me" in text_lower or
"look up" in text_lower or
len(text.split()) <= 5 # Short queries are often searches
)
self.rules.append((is_search, IntentType.SEARCH, 0.7))
# Feedback patterns
feedback_words = ["thanks", "thank you", "great", "good", "bad", "wrong", "correct", "yes", "no"]
def is_feedback(text: str) -> bool:
text_lower = text.lower().strip()
return any(text_lower.startswith(w) for w in feedback_words)
self.rules.append((is_feedback, IntentType.FEEDBACK, 0.8))
def classify(self, query: str) -> IntentResult:
"""Classify query intent."""
for rule, intent, confidence in self.rules:
if rule(query):
return IntentResult(
intent=intent,
confidence=confidence
)
# Default to conversation
return IntentResult(
intent=IntentType.CONVERSATION,
confidence=0.5
)
class EmbeddingClassifier:
"""Embedding-based intent classifier."""
def __init__(self, embedding_model: Any):
self.embedding_model = embedding_model
self.intent_embeddings: dict[IntentType, list[float]] = {}
self.examples: dict[IntentType, list[str]] = {}
def add_examples(self, intent: IntentType, examples: list[str]):
"""Add training examples for intent."""
self.examples[intent] = examples
async def train(self):
"""Train classifier on examples."""
import numpy as np
for intent, examples in self.examples.items():
embeddings = await self.embedding_model.embed_batch(examples)
# Average embeddings for intent
self.intent_embeddings[intent] = np.mean(embeddings, axis=0).tolist()
async def classify(self, query: str) -> IntentResult:
"""Classify query using embeddings."""
import numpy as np
query_embedding = await self.embedding_model.embed(query)
best_intent = None
best_similarity = -1
for intent, intent_embedding in self.intent_embeddings.items():
similarity = self._cosine_similarity(query_embedding, intent_embedding)
if similarity > best_similarity:
best_similarity = similarity
best_intent = intent
return IntentResult(
intent=best_intent,
confidence=best_similarity
)
def _cosine_similarity(self, a: list, b: list) -> float:
import numpy as np
a = np.array(a)
b = np.array(b)
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
class LLMClassifier:
"""LLM-based intent classifier."""
def __init__(self, llm_client: Any):
self.llm = llm_client
self.intents: list[dict] = []
def register_intent(
self,
intent: str,
description: str,
examples: list[str]
):
"""Register an intent."""
self.intents.append({
"intent": intent,
"description": description,
"examples": examples
})
async def classify(self, query: str) -> IntentResult:
"""Classify query using LLM."""
intent_descriptions = "\n".join([
f"- {i['intent']}: {i['description']} (e.g., {', '.join(i['examples'][:2])})"
for i in self.intents
])
prompt = f"""Classify the following user query into one of these intents:
{intent_descriptions}
Query: "{query}"
Respond with JSON:
{{"intent": "", "confidence": <0.0-1.0>, "reasoning": ""}}
JSON:"""
response = await self.llm.generate(prompt)
try:
import json
data = json.loads(response)
return IntentResult(
intent=IntentType(data["intent"]) if data["intent"] in [i.value for i in IntentType] else IntentType.CONVERSATION,
confidence=data.get("confidence", 0.8),
metadata={"reasoning": data.get("reasoning", "")}
)
except:
return IntentResult(
intent=IntentType.CONVERSATION,
confidence=0.5
)
Entity Extraction
from dataclasses import dataclass
from typing import Any, Optional, List
import re
@dataclass
class Entity:
"""An extracted entity."""
text: str
entity_type: str
start: int
end: int
confidence: float
normalized: str = None
class PatternEntityExtractor:
"""Pattern-based entity extraction."""
def __init__(self):
self.patterns: dict[str, list[re.Pattern]] = {}
self._register_default_patterns()
def _register_default_patterns(self):
"""Register default entity patterns."""
# Date patterns
self.patterns["DATE"] = [
re.compile(r'\b\d{1,2}/\d{1,2}/\d{2,4}\b'),
re.compile(r'\b\d{4}-\d{2}-\d{2}\b'),
re.compile(r'\b(today|tomorrow|yesterday)\b', re.I),
re.compile(r'\b(monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b', re.I),
re.compile(r'\b(january|february|march|april|may|june|july|august|september|october|november|december)\s+\d{1,2}(,?\s+\d{4})?\b', re.I)
]
# Time patterns
self.patterns["TIME"] = [
re.compile(r'\b\d{1,2}:\d{2}(\s*(am|pm))?\b', re.I),
re.compile(r'\b\d{1,2}\s*(am|pm)\b', re.I)
]
# Number patterns
self.patterns["NUMBER"] = [
re.compile(r'\b\d+(\.\d+)?\b'),
re.compile(r'\$\d+(\.\d{2})?'),
re.compile(r'\b\d+%\b')
]
# Email patterns
self.patterns["EMAIL"] = [
re.compile(r'\b[\w.-]+@[\w.-]+\.\w+\b')
]
# URL patterns
self.patterns["URL"] = [
re.compile(r'https?://\S+')
]
def extract(self, text: str) -> list[Entity]:
"""Extract entities from text."""
entities = []
for entity_type, patterns in self.patterns.items():
for pattern in patterns:
for match in pattern.finditer(text):
entities.append(Entity(
text=match.group(),
entity_type=entity_type,
start=match.start(),
end=match.end(),
confidence=0.9
))
# Remove overlapping entities (keep longer ones)
entities = self._remove_overlaps(entities)
return entities
def _remove_overlaps(self, entities: list[Entity]) -> list[Entity]:
"""Remove overlapping entities."""
if not entities:
return []
# Sort by start position, then by length (descending)
entities.sort(key=lambda e: (e.start, -(e.end - e.start)))
result = [entities[0]]
for entity in entities[1:]:
last = result[-1]
# Check for overlap
if entity.start >= last.end:
result.append(entity)
return result
class LLMEntityExtractor:
"""LLM-based entity extraction."""
def __init__(self, llm_client: Any):
self.llm = llm_client
self.entity_types: list[dict] = []
def register_entity_type(
self,
name: str,
description: str,
examples: list[str]
):
"""Register an entity type."""
self.entity_types.append({
"name": name,
"description": description,
"examples": examples
})
async def extract(self, text: str) -> list[Entity]:
"""Extract entities using LLM."""
type_descriptions = "\n".join([
f"- {t['name']}: {t['description']} (e.g., {', '.join(t['examples'][:3])})"
for t in self.entity_types
])
prompt = f"""Extract entities from the following text.
Entity types to extract:
{type_descriptions}
Text: "{text}"
Respond with JSON array:
[{{"text": "", "type": " ", "normalized": ""}}]
JSON:"""
response = await self.llm.generate(prompt)
try:
import json
data = json.loads(response)
entities = []
for item in data:
# Find position in text
start = text.find(item["text"])
if start >= 0:
entities.append(Entity(
text=item["text"],
entity_type=item["type"],
start=start,
end=start + len(item["text"]),
confidence=0.8,
normalized=item.get("normalized")
))
return entities
except:
return []
class HybridEntityExtractor:
"""Combine pattern and LLM extraction."""
def __init__(
self,
pattern_extractor: PatternEntityExtractor,
llm_extractor: LLMEntityExtractor
):
self.pattern = pattern_extractor
self.llm = llm_extractor
async def extract(self, text: str) -> list[Entity]:
"""Extract entities using both methods."""
# Get pattern-based entities (fast, high precision)
pattern_entities = self.pattern.extract(text)
# Get LLM entities (slower, better recall)
llm_entities = await self.llm.extract(text)
# Merge results
all_entities = pattern_entities + llm_entities
# Deduplicate
seen = set()
unique = []
for entity in all_entities:
key = (entity.text, entity.entity_type)
if key not in seen:
seen.add(key)
unique.append(entity)
return unique
Query Expansion
from dataclasses import dataclass
from typing import Any, Optional, List
@dataclass
class ExpandedQuery:
"""An expanded query."""
original: str
expanded: str
synonyms: list[str]
related_terms: list[str]
reformulations: list[str]
class SynonymExpander:
"""Expand queries with synonyms."""
def __init__(self):
self.synonyms: dict[str, list[str]] = {}
self._load_default_synonyms()
def _load_default_synonyms(self):
"""Load default synonym mappings."""
self.synonyms = {
"create": ["make", "build", "generate", "produce"],
"delete": ["remove", "erase", "destroy", "eliminate"],
"update": ["modify", "change", "edit", "revise"],
"find": ["search", "locate", "discover", "lookup"],
"show": ["display", "present", "reveal", "list"],
"fast": ["quick", "rapid", "speedy", "swift"],
"slow": ["sluggish", "delayed", "gradual"],
"big": ["large", "huge", "massive", "enormous"],
"small": ["tiny", "little", "compact", "miniature"],
"good": ["excellent", "great", "quality", "superior"],
"bad": ["poor", "inferior", "subpar", "deficient"]
}
def expand(self, query: str) -> list[str]:
"""Expand query with synonyms."""
words = query.lower().split()
expansions = [query]
for i, word in enumerate(words):
if word in self.synonyms:
for synonym in self.synonyms[word]:
new_words = words.copy()
new_words[i] = synonym
expansions.append(" ".join(new_words))
return expansions
class LLMQueryExpander:
"""LLM-based query expansion."""
def __init__(self, llm_client: Any):
self.llm = llm_client
async def expand(self, query: str) -> ExpandedQuery:
"""Expand query using LLM."""
prompt = f"""Expand this search query to improve retrieval.
Original query: "{query}"
Generate:
1. 3-5 synonyms for key terms
2. 3-5 related terms that might appear in relevant documents
3. 2-3 alternative ways to phrase this query
Respond with JSON:
{{
"synonyms": ["term1", "term2", ...],
"related_terms": ["term1", "term2", ...],
"reformulations": ["query1", "query2", ...]
}}
JSON:"""
response = await self.llm.generate(prompt)
try:
import json
data = json.loads(response)
return ExpandedQuery(
original=query,
expanded=self._build_expanded_query(query, data),
synonyms=data.get("synonyms", []),
related_terms=data.get("related_terms", []),
reformulations=data.get("reformulations", [])
)
except:
return ExpandedQuery(
original=query,
expanded=query,
synonyms=[],
related_terms=[],
reformulations=[]
)
def _build_expanded_query(self, original: str, data: dict) -> str:
"""Build expanded query string."""
terms = [original]
terms.extend(data.get("synonyms", [])[:3])
terms.extend(data.get("related_terms", [])[:2])
return " ".join(terms)
class HypotheticalDocumentExpander:
"""HyDE - Generate hypothetical document for expansion."""
def __init__(self, llm_client: Any):
self.llm = llm_client
async def expand(self, query: str) -> str:
"""Generate hypothetical document that would answer the query."""
prompt = f"""Write a short passage (2-3 sentences) that would be a perfect answer to this query.
Write as if you're writing a document that contains the answer, not answering directly.
Query: "{query}"
Passage:"""
return await self.llm.generate(prompt)
class QueryRewriter:
"""Rewrite queries for better retrieval."""
def __init__(self, llm_client: Any):
self.llm = llm_client
async def rewrite(
self,
query: str,
context: str = None
) -> str:
"""Rewrite query for retrieval."""
context_section = f"\nConversation context: {context}" if context else ""
prompt = f"""Rewrite this query to be more specific and searchable.
Remove pronouns and ambiguous references.
Make the query self-contained.{context_section}
Original query: "{query}"
Rewritten query:"""
return await self.llm.generate(prompt)
async def decompose(self, query: str) -> list[str]:
"""Decompose complex query into sub-queries."""
prompt = f"""Break down this complex query into simpler sub-queries.
Each sub-query should be answerable independently.
Query: "{query}"
Respond with JSON array of sub-queries:
["sub-query 1", "sub-query 2", ...]
JSON:"""
response = await self.llm.generate(prompt)
try:
import json
return json.loads(response)
except:
return [query]
Query Understanding Pipeline
from dataclasses import dataclass
from typing import Any, Optional, List
@dataclass
class QueryUnderstanding:
"""Complete query understanding result."""
original_query: str
intent: IntentResult
entities: list[Entity]
expanded_query: ExpandedQuery
rewritten_query: str
sub_queries: list[str]
metadata: dict
class QueryUnderstandingPipeline:
"""Complete query understanding pipeline."""
def __init__(
self,
intent_classifier: Any,
entity_extractor: Any,
query_expander: Any,
query_rewriter: Any
):
self.classifier = intent_classifier
self.extractor = entity_extractor
self.expander = query_expander
self.rewriter = query_rewriter
async def understand(
self,
query: str,
context: str = None
) -> QueryUnderstanding:
"""Run full query understanding pipeline."""
import asyncio
# Run classification and extraction in parallel
intent_task = asyncio.create_task(
self._classify_intent(query)
)
entity_task = asyncio.create_task(
self._extract_entities(query)
)
intent = await intent_task
entities = await entity_task
# Rewrite query if needed
rewritten = query
if context or intent.intent == IntentType.CLARIFICATION:
rewritten = await self.rewriter.rewrite(query, context)
# Expand query for search intents
expanded = None
if intent.intent in [IntentType.SEARCH, IntentType.QUESTION]:
expanded = await self.expander.expand(rewritten)
else:
expanded = ExpandedQuery(
original=query,
expanded=query,
synonyms=[],
related_terms=[],
reformulations=[]
)
# Decompose complex queries
sub_queries = [query]
if len(query.split()) > 10:
sub_queries = await self.rewriter.decompose(query)
return QueryUnderstanding(
original_query=query,
intent=intent,
entities=entities,
expanded_query=expanded,
rewritten_query=rewritten,
sub_queries=sub_queries,
metadata={
"has_context": context is not None,
"entity_count": len(entities)
}
)
async def _classify_intent(self, query: str) -> IntentResult:
"""Classify intent with fallback."""
try:
if hasattr(self.classifier, 'classify'):
result = self.classifier.classify(query)
if asyncio.iscoroutine(result):
return await result
return result
except:
pass
return IntentResult(
intent=IntentType.CONVERSATION,
confidence=0.5
)
async def _extract_entities(self, query: str) -> list[Entity]:
"""Extract entities with fallback."""
try:
result = self.extractor.extract(query)
if asyncio.iscoroutine(result):
return await result
return result
except:
return []
class QueryRouter:
"""Route queries based on understanding."""
def __init__(self):
self.routes: dict[IntentType, Callable] = {}
def register_route(
self,
intent: IntentType,
handler: Callable
):
"""Register route handler."""
self.routes[intent] = handler
async def route(
self,
understanding: QueryUnderstanding
) -> Any:
"""Route query to appropriate handler."""
handler = self.routes.get(understanding.intent.intent)
if handler:
result = handler(understanding)
if asyncio.iscoroutine(result):
return await result
return result
# Default handler
return {
"action": "default",
"query": understanding.original_query
}
Production Query Service
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, List, Dict
app = FastAPI()
class QueryRequest(BaseModel):
query: str
context: Optional[str] = None
expand: bool = True
extract_entities: bool = True
class IntentRequest(BaseModel):
query: str
class EntityRequest(BaseModel):
text: str
entity_types: Optional[List[str]] = None
# Initialize components
rule_classifier = RuleBasedClassifier()
pattern_extractor = PatternEntityExtractor()
synonym_expander = SynonymExpander()
@app.post("/v1/understand")
async def understand_query(request: QueryRequest) -> dict:
"""Full query understanding."""
result = {
"original_query": request.query,
"intent": None,
"entities": [],
"expanded": None
}
# Classify intent
intent = rule_classifier.classify(request.query)
result["intent"] = {
"type": intent.intent.value,
"confidence": intent.confidence
}
# Extract entities
if request.extract_entities:
entities = pattern_extractor.extract(request.query)
result["entities"] = [
{
"text": e.text,
"type": e.entity_type,
"start": e.start,
"end": e.end
}
for e in entities
]
# Expand query
if request.expand:
expansions = synonym_expander.expand(request.query)
result["expanded"] = {
"queries": expansions,
"count": len(expansions)
}
return result
@app.post("/v1/classify")
async def classify_intent(request: IntentRequest) -> dict:
"""Classify query intent."""
intent = rule_classifier.classify(request.query)
return {
"query": request.query,
"intent": intent.intent.value,
"confidence": intent.confidence,
"metadata": intent.metadata
}
@app.post("/v1/entities")
async def extract_entities(request: EntityRequest) -> list[dict]:
"""Extract entities from text."""
entities = pattern_extractor.extract(request.text)
# Filter by type if specified
if request.entity_types:
entities = [e for e in entities if e.entity_type in request.entity_types]
return [
{
"text": e.text,
"type": e.entity_type,
"start": e.start,
"end": e.end,
"confidence": e.confidence,
"normalized": e.normalized
}
for e in entities
]
@app.post("/v1/expand")
async def expand_query(query: str) -> dict:
"""Expand query with synonyms."""
expansions = synonym_expander.expand(query)
return {
"original": query,
"expansions": expansions,
"count": len(expansions)
}
@app.get("/health")
async def health():
return {"status": "healthy"}
References
- spaCy NER: https://spacy.io/usage/linguistic-features#named-entities
- HuggingFace NER: https://huggingface.co/tasks/token-classification
- Query Expansion: https://en.wikipedia.org/wiki/Query_expansion
- HyDE Paper: https://arxiv.org/abs/2212.10496
- LangChain Query Analysis: https://python.langchain.com/docs/use_cases/query_analysis/
Conclusion
Query understanding is the foundation of intelligent AI systems. Start with rule-based classifiers for common patterns—they’re fast, predictable, and don’t require API calls. Layer in embedding-based classification for nuanced intent detection and LLM-based classification for complex cases. Entity extraction should combine pattern matching for structured data (dates, emails, numbers) with LLM extraction for domain-specific entities. Query expansion significantly improves retrieval: synonyms catch vocabulary mismatches, related terms improve recall, and HyDE generates hypothetical documents that bridge the gap between queries and documents. For conversational systems, query rewriting resolves pronouns and ambiguous references using conversation context. Build your pipeline to run classification and extraction in parallel for better latency. Monitor classification accuracy and entity extraction precision in production—these metrics directly impact downstream retrieval and response quality. The goal is to transform ambiguous user input into structured, searchable queries that your retrieval system can handle effectively.
Discover more from C4: Container, Code, Cloud & Context
Subscribe to get the latest posts sent to your email.