azure-ai-search-python
Compare original and translation side by side
🇺🇸
Original
English🇨🇳
Translation
ChineseAzure AI Search Python SDK
Azure AI Search Python SDK
Write clean, idiomatic Python code for Azure AI Search using .
azure-search-documents使用为Azure AI Search编写整洁、地道的Python代码。
azure-search-documentsAuthentication Patterns
身份验证模式
Microsoft Entra ID (preferred):
python
from azure.identity import DefaultAzureCredential
from azure.search.documents import SearchClient
credential = DefaultAzureCredential()
client = SearchClient(endpoint, index_name, credential)API Key:
python
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
client = SearchClient(endpoint, index_name, AzureKeyCredential(api_key))Microsoft Entra ID(推荐):
python
from azure.identity import DefaultAzureCredential
from azure.search.documents import SearchClient
credential = DefaultAzureCredential()
client = SearchClient(endpoint, index_name, credential)API密钥:
python
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
client = SearchClient(endpoint, index_name, AzureKeyCredential(api_key))Client Selection
客户端选择
| Client | Purpose |
|---|---|
| Query indexes, upload/update/delete documents |
| Create/manage indexes, knowledge sources, knowledge bases |
| Manage indexers, skillsets, data sources |
| Agentic retrieval with LLM-powered Q&A |
| 客户端 | 用途 |
|---|---|
| 查询索引、上传/更新/删除文档 |
| 创建/管理索引、知识源、知识库 |
| 管理索引器、技能集、数据源 |
| 基于大语言模型的问答智能检索 |
Index Creation Pattern
索引创建模式
python
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
SearchIndex, SearchField, VectorSearch, VectorSearchProfile,
HnswAlgorithmConfiguration, AzureOpenAIVectorizer,
AzureOpenAIVectorizerParameters, SemanticSearch,
SemanticConfiguration, SemanticPrioritizedFields, SemanticField
)
index = SearchIndex(
name=index_name,
fields=[
SearchField(name="id", type="Edm.String", key=True),
SearchField(name="content", type="Edm.String", searchable=True),
SearchField(name="embedding", type="Collection(Edm.Single)",
vector_search_dimensions=3072,
vector_search_profile_name="vector-profile"),
],
vector_search=VectorSearch(
profiles=[VectorSearchProfile(
name="vector-profile",
algorithm_configuration_name="hnsw-algo",
vectorizer_name="openai-vectorizer"
)],
algorithms=[HnswAlgorithmConfiguration(name="hnsw-algo")],
vectorizers=[AzureOpenAIVectorizer(
vectorizer_name="openai-vectorizer",
parameters=AzureOpenAIVectorizerParameters(
resource_url=aoai_endpoint,
deployment_name=embedding_deployment,
model_name=embedding_model
)
)]
),
semantic_search=SemanticSearch(
default_configuration_name="semantic-config",
configurations=[SemanticConfiguration(
name="semantic-config",
prioritized_fields=SemanticPrioritizedFields(
content_fields=[SemanticField(field_name="content")]
)
)]
)
)
index_client = SearchIndexClient(endpoint, credential)
index_client.create_or_update_index(index)python
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
SearchIndex, SearchField, VectorSearch, VectorSearchProfile,
HnswAlgorithmConfiguration, AzureOpenAIVectorizer,
AzureOpenAIVectorizerParameters, SemanticSearch,
SemanticConfiguration, SemanticPrioritizedFields, SemanticField
)
index = SearchIndex(
name=index_name,
fields=[
SearchField(name="id", type="Edm.String", key=True),
SearchField(name="content", type="Edm.String", searchable=True),
SearchField(name="embedding", type="Collection(Edm.Single)",
vector_search_dimensions=3072,
vector_search_profile_name="vector-profile"),
],
vector_search=VectorSearch(
profiles=[VectorSearchProfile(
name="vector-profile",
algorithm_configuration_name="hnsw-algo",
vectorizer_name="openai-vectorizer"
)],
algorithms=[HnswAlgorithmConfiguration(name="hnsw-algo")],
vectorizers=[AzureOpenAIVectorizer(
vectorizer_name="openai-vectorizer",
parameters=AzureOpenAIVectorizerParameters(
resource_url=aoai_endpoint,
deployment_name=embedding_deployment,
model_name=embedding_model
)
)]
),
semantic_search=SemanticSearch(
default_configuration_name="semantic-config",
configurations=[SemanticConfiguration(
name="semantic-config",
prioritized_fields=SemanticPrioritizedFields(
content_fields=[SemanticField(field_name="content")]
)
)]
)
)
index_client = SearchIndexClient(endpoint, credential)
index_client.create_or_update_index(index)Document Operations
文档操作
python
from azure.search.documents import SearchIndexingBufferedSenderpython
from azure.search.documents import SearchIndexingBufferedSenderBatch upload with automatic batching
自动批处理的批量上传
with SearchIndexingBufferedSender(endpoint, index_name, credential) as sender:
sender.upload_documents(documents)
with SearchIndexingBufferedSender(endpoint, index_name, credential) as sender:
sender.upload_documents(documents)
Direct operations via SearchClient
通过SearchClient直接操作
search_client = SearchClient(endpoint, index_name, credential)
search_client.upload_documents(documents) # Add new
search_client.merge_documents(documents) # Update existing
search_client.merge_or_upload_documents(documents) # Upsert
search_client.delete_documents(documents) # Remove
undefinedsearch_client = SearchClient(endpoint, index_name, credential)
search_client.upload_documents(documents) # 添加新文档
search_client.merge_documents(documents) # 更新现有文档
search_client.merge_or_upload_documents(documents) # 插入或更新
search_client.delete_documents(documents) # 删除文档
undefinedSearch Patterns
搜索模式
python
undefinedpython
undefinedBasic search
基础搜索
results = search_client.search(search_text="query")
results = search_client.search(search_text="query")
Vector search
向量搜索
from azure.search.documents.models import VectorizedQuery
results = search_client.search(
search_text=None,
vector_queries=[VectorizedQuery(
vector=embedding,
k_nearest_neighbors=5,
fields="embedding"
)]
)
from azure.search.documents.models import VectorizedQuery
results = search_client.search(
search_text=None,
vector_queries=[VectorizedQuery(
vector=embedding,
k_nearest_neighbors=5,
fields="embedding"
)]
)
Hybrid search (vector + keyword)
混合搜索(向量+关键词)
results = search_client.search(
search_text="query",
vector_queries=[VectorizedQuery(vector=embedding, k_nearest_neighbors=5, fields="embedding")],
query_type="semantic",
semantic_configuration_name="semantic-config"
)
results = search_client.search(
search_text="query",
vector_queries=[VectorizedQuery(vector=embedding, k_nearest_neighbors=5, fields="embedding")],
query_type="semantic",
semantic_configuration_name="semantic-config"
)
With filters
带过滤器的搜索
results = search_client.search(
search_text="query",
filter="category eq 'technology'",
select=["id", "title", "content"],
top=10
)
undefinedresults = search_client.search(
search_text="query",
filter="category eq 'technology'",
select=["id", "title", "content"],
top=10
)
undefinedAgentic Retrieval (Knowledge Bases)
智能检索(知识库)
For LLM-powered Q&A with answer synthesis, see references/agentic-retrieval.md.
Key concepts:
- Knowledge Source: Points to a search index
- Knowledge Base: Wraps knowledge sources + LLM for query planning and synthesis
- Output modes: (raw chunks) or
EXTRACTIVE_DATA(LLM-generated answers)ANSWER_SYNTHESIS
如需使用大语言模型生成式问答,可参考references/agentic-retrieval.md。
核心概念:
- 知识源:指向搜索索引
- 知识库:封装知识源+大语言模型,用于查询规划和结果生成
- 输出模式:(原始片段)或
EXTRACTIVE_DATA(大语言模型生成的答案)ANSWER_SYNTHESIS
Async Pattern
异步模式
python
from azure.search.documents.aio import SearchClient
async with SearchClient(endpoint, index_name, credential) as client:
results = await client.search(search_text="query")
async for result in results:
print(result["title"])python
from azure.search.documents.aio import SearchClient
async with SearchClient(endpoint, index_name, credential) as client:
results = await client.search(search_text="query")
async for result in results:
print(result["title"])Best Practices
最佳实践
- Use environment variables for endpoints, keys, and deployment names
- Prefer over API keys for production
DefaultAzureCredential - Use for batch uploads (handles batching/retries)
SearchIndexingBufferedSender - Always define semantic configuration for agentic retrieval indexes
- Use for idempotent index creation
create_or_update_index - Close clients with context managers or explicit
close()
- 使用环境变量存储端点、密钥和部署名称
- **生产环境优先使用**而非API密钥
DefaultAzureCredential - 批量上传使用(自动处理批处理/重试)
SearchIndexingBufferedSender - 为智能检索索引始终定义语义配置
- **使用**实现幂等性索引创建
create_or_update_index - 通过上下文管理器或显式关闭客户端
close()
Field Types Reference
字段类型参考
| EDM Type | Python | Notes |
|---|---|---|
| str | Searchable text |
| int | Integer |
| int | Long integer |
| float | Floating point |
| bool | True/False |
| datetime | ISO 8601 |
| List[float] | Vector embeddings |
| List[str] | String arrays |
| EDM类型 | Python类型 | 说明 |
|---|---|---|
| str | 可搜索文本 |
| int | 整数 |
| int | 长整数 |
| float | 浮点数 |
| bool | 布尔值(True/False) |
| datetime | ISO 8601格式 |
| List[float] | 向量嵌入 |
| List[str] | 字符串数组 |
Error Handling
错误处理
python
from azure.core.exceptions import (
HttpResponseError,
ResourceNotFoundError,
ResourceExistsError
)
try:
result = search_client.get_document(key="123")
except ResourceNotFoundError:
print("Document not found")
except HttpResponseError as e:
print(f"Search error: {e.message}")python
from azure.core.exceptions import (
HttpResponseError,
ResourceNotFoundError,
ResourceExistsError
)
try:
result = search_client.get_document(key="123")
except ResourceNotFoundError:
print("文档不存在")
except HttpResponseError as e:
print(f"搜索错误:{e.message}")