Loading...
Loading...
Build Retrieval-Augmented Generation (RAG) applications that combine LLM capabilities with external knowledge sources. Covers vector databases, embeddings, retrieval strategies, and response generation. Use when building document Q&A systems, knowledge base applications, enterprise search, or combining LLMs with custom data.
npx skill4agent add qodex-ai/ai-agent-skills rag-agent-builderexamples/basic_rag.pyretrieval_strategies.pyagentic_rag.pyscripts/embedding_management.pyvector_db_manager.pyrag_evaluation.pyDocuments → Chunks → Embeddings → Vector DB
↓
User Question → Embedding → Retrieval → LLM → Answer
↑ ↓
Vector DB Context# Simple fixed-size chunks
chunks = split_text(doc, chunk_size=1000, overlap=100)
# Semantic chunks (group by meaning)
chunks = semantic_chunking(doc, max_tokens=512)
# Hierarchical chunks (different levels)
chapters = split_by_heading(doc)
chunks = split_each_chapter(chapters, size=1000)text-embedding-3-smalltext-embedding-3-largeall-MiniLM-L6-v2all-mpnet-base-v2# Similarity search (most common)
results = vector_db.query(question_embedding, k=5)
# Hybrid search (keyword + semantic)
keyword_results = bm25.search(question, k=3)
semantic_results = vector_db.query(embedding, k=3)
results = combine_and_rank(keyword_results, semantic_results)
# Reranking (improve relevance)
retrieved = initial_retrieval(query)
reranked = rerank_by_relevance(retrieved, query)# Fit retrieved documents into context window
def prepare_context(retrieved_docs, max_tokens=3000):
context = ""
for doc in retrieved_docs:
if len(tokenize(context + doc)) <= max_tokens:
context += doc
else:
break
return contextYou are a helpful assistant. Answer the question based on the provided context.
Context:
{retrieved_documents}
Question: {user_question}
Answer:from langchain.document_loaders import PDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.chains import RetrievalQA
# Load documents
loader = PDFLoader("document.pdf")
docs = loader.load()
# Create RAG chain
embeddings = OpenAIEmbeddings()
vectorstore = Pinecone.from_documents(docs, embeddings)
qa = RetrievalQA.from_chain_type(
llm=ChatOpenAI(),
chain_type="stuff",
retriever=vectorstore.as_retriever()
)
answer = qa.run("What is the document about?")from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader
# Load documents
documents = SimpleDirectoryReader("./data").load_data()
# Create index
index = GPTVectorStoreIndex.from_documents(documents)
# Query
response = index.as_query_engine().query("What is the main topic?")from crewai import Agent, Task, Crew
from tools import retrieval_tool
researcher = Agent(
role="Research Assistant",
goal="Research topics using knowledge base",
tools=[retrieval_tool]
)
research_task = Task(
description="Research the topic: {topic}",
agent=researcher
)# Expand query with related terms
expanded_query = query + " " + synonym_expansion(query)
results = retrieve(expanded_query)# Compress retrieved docs before passing to LLM
compressed = compress_documents(retrieved_docs, query)
context = format_context(compressed)# Iteratively refine retrieval based on LLM output
query = user_question
while iterations < max:
results = retrieve(query)
answer = generate_with_context(results)
if answer_complete(answer):
break
query = refine_query(answer)# Retrieve both text and images
text_results = text_retriever.query(question)
image_results = image_retriever.query(question)
context = combine_multimodal(text_results, image_results)