Loading...
Loading...
Compare original and translation side by side
undefinedundefinedundefinedundefinedgit clone https://github.com/google/langextract.git
cd langextract
pip install -e .git clone https://github.com/google/langextract.git
cd langextract
pip install -e .undefinedundefinedundefinedundefinedundefinedundefinedexport LANGEXTRACT_API_KEY="your-gemini-api-key"export OPENAI_API_KEY="your-openai-api-key"undefinedexport LANGEXTRACT_API_KEY="your-gemini-api-key"export OPENAI_API_KEY="your-openai-api-key"undefined
**.env File (Development):**
```bash
**.env文件(开发环境):**
```bashundefinedundefinedimport langextract as lx
import textwrapimport langextract as lx
import textwrapundefinedundefinedimport langextract as lx
prompt = """Extract characters, emotions, and relationships in order of appearance.
Use exact text for extractions. Do not paraphrase or overlap entities."""
examples = [
lx.data.ExampleData(
text="ROMEO entered the garden, filled with wonder at JULIET's beauty.",
extractions=[
lx.data.Extraction(
extraction_class="character",
extraction_text="ROMEO",
attributes={"emotional_state": "wonder"}
),
lx.data.Extraction(
extraction_class="character",
extraction_text="JULIET",
attributes={}
),
lx.data.Extraction(
extraction_class="relationship",
extraction_text="ROMEO ... JULIET's beauty",
attributes={
"subject": "ROMEO",
"relation": "admires",
"object": "JULIET"
}
)
]
)
]
text = """Act 2, Scene 2: The Capulet's orchard.
ROMEO appears beneath JULIET's balcony, gazing upward with longing.
JULIET steps onto the balcony, unaware of ROMEO's presence below."""
result = lx.extract(
text_or_documents=text,
prompt_description=prompt,
examples=examples,
model_id="gemini-2.0-flash-exp"
)import langextract as lx
prompt = """Extract characters, emotions, and relationships in order of appearance.
Use exact text for extractions. Do not paraphrase or overlap entities."""
examples = [
lx.data.ExampleData(
text="ROMEO entered the garden, filled with wonder at JULIET's beauty.",
extractions=[
lx.data.Extraction(
extraction_class="character",
extraction_text="ROMEO",
attributes={"emotional_state": "wonder"}
),
lx.data.Extraction(
extraction_class="character",
extraction_text="JULIET",
attributes={}
),
lx.data.Extraction(
extraction_class="relationship",
extraction_text="ROMEO ... JULIET's beauty",
attributes={
"subject": "ROMEO",
"relation": "admires",
"object": "JULIET"
}
)
]
)
]
text = """Act 2, Scene 2: The Capulet's orchard.
ROMEO appears beneath JULIET's balcony, gazing upward with longing.
JULIET steps onto the balcony, unaware of ROMEO's presence below."""
result = lx.extract(
text_or_documents=text,
prompt_description=prompt,
examples=examples,
model_id="gemini-2.0-flash-exp"
)undefinedundefinedundefinedundefinedextraction = result.extractions[0]
print(f"Text: {extraction.extraction_text}")
print(f"Start: {extraction.start_char}")
print(f"End: {extraction.end_char}")extraction = result.extractions[0]
print(f"Text: {extraction.extraction_text}")
print(f"Start: {extraction.start_char}")
print(f"End: {extraction.end_char}")undefinedundefinedlx.data.Extraction(
extraction_class="medication",
extraction_text="Lisinopril 10mg daily",
attributes={
"name": "Lisinopril",
"dosage": "10mg",
"frequency": "daily",
"route": "oral",
"indication": "hypertension"
}
)lx.data.Extraction(
extraction_class="medication",
extraction_text="Lisinopril 10mg daily",
attributes={
"name": "Lisinopril",
"dosage": "10mg",
"frequency": "daily",
"route": "oral",
"indication": "hypertension"
}
)undefinedundefinedundefinedundefinedresult = lx.extract(
text_or_documents=long_document, # Any length
prompt_description=prompt,
examples=examples,
model_id="gemini-2.0-flash-exp",
extraction_passes=3, # Multiple passes for better recall
max_workers=20, # Parallel processing
max_char_buffer=1000 # Chunk overlap for continuity
)result = lx.extract(
text_or_documents=long_document, # 任意长度
prompt_description=prompt,
examples=examples,
model_id="gemini-2.0-flash-exp",
extraction_passes=3, # 多轮提取提升召回率
max_workers=20, # 并行处理
max_char_buffer=1000 # 分块重叠保证连续性
)undefinedundefinedundefinedundefinedresult = lx.extract(
text_or_documents=documents,
prompt_description=prompt,
examples=examples,
# Multi-pass extraction for better recall
extraction_passes=3,
# Parallel processing
max_workers=20,
# Chunk size tuning
max_char_buffer=1000,
# Model configuration
model_id="gemini-2.0-flash-exp"
)result = lx.extract(
text_or_documents=documents,
prompt_description=prompt,
examples=examples,
# 多轮提取提升召回率
extraction_passes=3,
# 并行处理
max_workers=20,
# 分块大小调整
max_char_buffer=1000,
# 模型配置
model_id="gemini-2.0-flash-exp"
)result = lx.extract(
text_or_documents=text,
prompt_description=prompt,
examples=examples,
model_id="gemini-2.0-flash-exp",
language_model_params={
"vertexai": True,
"project": "your-gcp-project-id",
"location": "us-central1"
}
)language_model_params={
"batch": {
"enabled": True
}
}result = lx.extract(
text_or_documents=text,
prompt_description=prompt,
examples=examples,
model_id="gpt-4o",
fence_output=True, # Required for OpenAI
use_schema_constraints=False # Disable Gemini-specific features
)result = lx.extract(
text_or_documents=text,
prompt_description=prompt,
examples=examples,
model_id="gemma2:2b",
model_url="http://localhost:11434",
use_schema_constraints=False
)result = lx.extract(
text_or_documents=text,
prompt_description=prompt,
examples=examples,
model_id="gemini-2.0-flash-exp",
language_model_params={
"vertexai": True,
"project": "your-gcp-project-id",
"location": "us-central1"
}
)language_model_params={
"batch": {
"enabled": True
}
}result = lx.extract(
text_or_documents=text,
prompt_description=prompt,
examples=examples,
model_id="gpt-4o",
fence_output=True, # OpenAI必需
use_schema_constraints=False # 禁用Gemini专属特性
)result = lx.extract(
text_or_documents=text,
prompt_description=prompt,
examples=examples,
model_id="gemma2:2b",
model_url="http://localhost:11434",
use_schema_constraints=False
)undefinedundefinedundefinedundefinedimport langextract as lx
prompt = """Extract diagnoses, symptoms, and medications from clinical notes.
Include ICD-10 codes when available. Use exact medical terminology."""
examples = [
lx.data.ExampleData(
text="Patient presents with Type 2 Diabetes Mellitus (E11.9). Started on Metformin 500mg BID. Reports fatigue and increased thirst.",
extractions=[
lx.data.Extraction(
extraction_class="diagnosis",
extraction_text="Type 2 Diabetes Mellitus (E11.9)",
attributes={"condition": "Type 2 Diabetes Mellitus", "icd10": "E11.9"}
),
lx.data.Extraction(
extraction_class="medication",
extraction_text="Metformin 500mg BID",
attributes={"name": "Metformin", "dosage": "500mg", "frequency": "BID"}
),
lx.data.Extraction(
extraction_class="symptom",
extraction_text="fatigue",
attributes={"symptom": "fatigue"}
),
lx.data.Extraction(
extraction_class="symptom",
extraction_text="increased thirst",
attributes={"symptom": "polydipsia"}
)
]
)
]import langextract as lx
prompt = """Extract diagnoses, symptoms, and medications from clinical notes.
Include ICD-10 codes when available. Use exact medical terminology."""
examples = [
lx.data.ExampleData(
text="Patient presents with Type 2 Diabetes Mellitus (E11.9). Started on Metformin 500mg BID. Reports fatigue and increased thirst.",
extractions=[
lx.data.Extraction(
extraction_class="diagnosis",
extraction_text="Type 2 Diabetes Mellitus (E11.9)",
attributes={"condition": "Type 2 Diabetes Mellitus", "icd10": "E11.9"}
),
lx.data.Extraction(
extraction_class="medication",
extraction_text="Metformin 500mg BID",
attributes={"name": "Metformin", "dosage": "500mg", "frequency": "BID"}
),
lx.data.Extraction(
extraction_class="symptom",
extraction_text="fatigue",
attributes={"symptom": "fatigue"}
),
lx.data.Extraction(
extraction_class="symptom",
extraction_text="increased thirst",
attributes={"symptom": "polydipsia"}
)
]
)
]undefinedundefinedprompt = """Extract findings, impressions, and recommendations from radiology reports.
Include anatomical location, abnormality type, and severity."""
examples = [
lx.data.ExampleData(
text="FINDINGS: 3.2cm mass in right upper lobe. IMPRESSION: Suspicious for malignancy. RECOMMENDATION: Biopsy recommended.",
extractions=[
lx.data.Extraction(
extraction_class="finding",
extraction_text="3.2cm mass in right upper lobe",
attributes={
"location": "right upper lobe",
"type": "mass",
"size": "3.2cm"
}
),
lx.data.Extraction(
extraction_class="impression",
extraction_text="Suspicious for malignancy",
attributes={"diagnosis": "possible malignancy", "certainty": "suspicious"}
),
lx.data.Extraction(
extraction_class="recommendation",
extraction_text="Biopsy recommended",
attributes={"action": "biopsy"}
)
]
)
]prompt = """Extract findings, impressions, and recommendations from radiology reports.
Include anatomical location, abnormality type, and severity."""
examples = [
lx.data.ExampleData(
text="FINDINGS: 3.2cm mass in right upper lobe. IMPRESSION: Suspicious for malignancy. RECOMMENDATION: Biopsy recommended.",
extractions=[
lx.data.Extraction(
extraction_class="finding",
extraction_text="3.2cm mass in right upper lobe",
attributes={
"location": "right upper lobe",
"type": "mass",
"size": "3.2cm"
}
),
lx.data.Extraction(
extraction_class="impression",
extraction_text="Suspicious for malignancy",
attributes={"diagnosis": "possible malignancy", "certainty": "suspicious"}
),
lx.data.Extraction(
extraction_class="recommendation",
extraction_text="Biopsy recommended",
attributes={"action": "biopsy"}
)
]
)
]import langextract as lx
from pathlib import Pathimport langextract as lx
from pathlib import Pathundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefineddef supports_schema_constraints(self):
return Falsedef supports_schema_constraints(self):
return Falseundefinedundefinedlx.extract()lx.extract()result = lx.extract(
text_or_documents, # str or list of str
prompt_description, # str: extraction instructions
examples, # list of ExampleData
model_id="gemini-2.0-flash-exp", # str: model identifier
extraction_passes=1, # int: number of passes
max_workers=None, # int: parallel workers
max_char_buffer=1000, # int: chunk overlap
language_model_params=None, # dict: model config
fence_output=False, # bool: required for OpenAI
use_schema_constraints=True, # bool: use schema enforcement
model_url=None, # str: custom model endpoint
api_key=None # str: API key (prefer env var)
)AnnotatedDocumentlist[AnnotatedDocument]result = lx.extract(
text_or_documents, # str或str列表
prompt_description, # str: 提取指令
examples, # ExampleData列表
model_id="gemini-2.0-flash-exp", # str: 模型标识符
extraction_passes=1, # int: 提取轮次
max_workers=None, # int: 并行工作进程数
max_char_buffer=1000, # int: 分块重叠字符数
language_model_params=None, # dict: 模型配置
fence_output=False, # bool: OpenAI必需
use_schema_constraints=True, # bool: 使用schema约束
model_url=None, # str: 自定义模型端点
api_key=None # str: API密钥(优先使用环境变量)
)AnnotatedDocumentlist[AnnotatedDocument]lx.visualize()lx.visualize()html_content = lx.visualize(
jsonl_file_path, # str: path to JSONL file
title="Extraction Results", # str: HTML page title
show_attributes=True # bool: display attributes
)strhtml_content = lx.visualize(
jsonl_file_path, # str: JSONL文件路径
title="Extraction Results", # str: HTML页面标题
show_attributes=True # bool: 显示属性
)strlx.io.save_annotated_documents()lx.io.save_annotated_documents()lx.io.save_annotated_documents(
annotated_documents, # list of AnnotatedDocument
output_name, # str: filename (e.g., "results.jsonl")
output_dir="." # str: output directory
)lx.io.save_annotated_documents(
annotated_documents, # AnnotatedDocument列表
output_name, # str: 文件名(如"results.jsonl")
output_dir="." # str: 输出目录
)ExampleDataExampleDataexample = lx.data.ExampleData(
text="Example text here",
extractions=[
lx.data.Extraction(...)
]
)example = lx.data.ExampleData(
text="Example text here",
extractions=[
lx.data.Extraction(...)
]
)ExtractionExtractionextraction = lx.data.Extraction(
extraction_class="medication", # str: entity type
extraction_text="Aspirin 81mg", # str: exact text
attributes={ # dict: metadata
"name": "Aspirin",
"dosage": "81mg"
},
start_char=0, # int: start position (auto-set)
end_char=13 # int: end position (auto-set)
)extraction = lx.data.Extraction(
extraction_class="medication", # str: 实体类型
extraction_text="Aspirin 81mg", # str: 原文文本
attributes={ # dict: 元数据
"name": "Aspirin",
"dosage": "81mg"
},
start_char=0, # int: 起始位置(自动设置)
end_char=13 # int: 结束位置(自动设置)
)AnnotatedDocumentAnnotatedDocumentresult.text # str: original text
result.extractions # list of Extraction
result.metadata # dict: additional inforesult.text # str: 原文
result.extractions # Extraction列表
result.metadata # dict: 附加信息# Good
prompt = "Extract medications with dosage, frequency, and route of administration. Use exact medical terminology."
# Avoid
prompt = "Extract medications."# Include edge cases in examples
examples = [
normal_case_example,
edge_case_example,
complex_case_example
]# Good
extraction_text="Lisinopril 10mg daily"
# Avoid paraphrasing
extraction_text="10mg lisinopril taken once per day"attributes={
"name": "Lisinopril", # Drug name
"dosage": "10mg", # Amount
"frequency": "daily", # How often
"route": "oral" # How taken
}# 良好示例
prompt = "Extract medications with dosage, frequency, and route of administration. Use exact medical terminology."
# 避免
prompt = "Extract medications."# 在示例中包含边缘情况
examples = [
normal_case_example,
edge_case_example,
complex_case_example
]# 良好示例
extraction_text="Lisinopril 10mg daily"
# 避免意译
extraction_text="10mg lisinopril taken once per day"attributes={
"name": "Lisinopril", # 药物名称
"dosage": "10mg", # 剂量
"frequency": "daily", # 频次
"route": "oral" # 给药途径
}extraction_passes=3 # 2-3 passes recommended for thorough extractionmax_workers=20 # Adjust based on API rate limitsmax_char_buffer=1000 # Larger for context, smaller for speed# Simple extraction
model_id="gemini-2.0-flash-exp"
# Complex reasoning
model_id="gemini-2.0-flash-thinking-exp"extraction_passes=3 # 推荐2-3轮提取以保证全面性max_workers=20 # 根据API速率限制调整max_char_buffer=1000 # 更大的值保留更多上下文,更小的值提升速度# 简单提取任务
model_id="gemini-2.0-flash-exp"
# 复杂推理任务
model_id="gemini-2.0-flash-thinking-exp"# Good: Use environment variables
import os
api_key = os.getenv("LANGEXTRACT_API_KEY")
# Avoid: Hardcoding
api_key = "AIza..." # Never do thistry:
result = lx.extract(...)
except Exception as e:
logger.error(f"Extraction failed: {e}")
# Implement retry logic or fallback# Use cheaper models for bulk processing
model_id="gemini-2.0-flash-exp" # vs "gemini-1.5-pro"
# Batch processing for cost efficiency
language_model_params={"batch": {"enabled": True}}for extraction in result.extractions:
# Validate extraction is within document bounds
assert 0 <= extraction.start_char < len(result.text)
assert extraction.end_char <= len(result.text)
# Verify text matches
extracted = result.text[extraction.start_char:extraction.end_char]
assert extracted == extraction.extraction_text# 良好实践:使用环境变量
import os
api_key = os.getenv("LANGEXTRACT_API_KEY")
# 避免:硬编码
api_key = "AIza..." # 绝对不要这样做try:
result = lx.extract(...)
except Exception as e:
logger.error(f"Extraction failed: {e}")
# 实现重试逻辑或备选方案# 批量处理使用更经济的模型
model_id="gemini-2.0-flash-exp" # 对比"gemini-1.5-pro"
# 批量处理提升成本效率
language_model_params={"batch": {"enabled": True}}for extraction in result.extractions:
# 验证提取位置在文档范围内
assert 0 <= extraction.start_char < len(result.text)
assert extraction.end_char <= len(result.text)
# 验证文本匹配
extracted = result.text[extraction.start_char:extraction.end_char]
assert extracted == extraction.extraction_textuse_schema_constraints=Falseuse_schema_constraints=FalseAuthenticationError: Invalid API keyPermission deniedundefinedAuthenticationError: Invalid API keyPermission deniedundefinedundefinedundefinedSchema constraints not supportedundefinedSchema constraints not supportedundefinedundefinedundefinedToken limit exceededundefinedToken limit exceededundefinedundefinedundefinedundefinedundefinedundefinedundefinedConnection refusedundefinedundefinedundefinedundefinedimport logging
logging.basicConfig(level=logging.DEBUG)# Save each pass separately
for i, result in enumerate(results):
lx.io.save_annotated_documents(
[result],
output_name=f"pass_{i}.jsonl",
output_dir="./debug"
)# Check examples match expected format
for example in examples:
for extraction in example.extractions:
# Verify text is in example text
assert extraction.extraction_text in example.text
print(f"✓ {extraction.extraction_class}: {extraction.extraction_text}")# Start with minimal test
test_result = lx.extract(
text_or_documents="Patient on Aspirin 81mg daily.",
prompt_description="Extract medications.",
examples=[simple_example],
model_id="gemini-2.0-flash-exp"
)
print(f"Extractions: {len(test_result.extractions)}")import logging
logging.basicConfig(level=logging.DEBUG)# 单独保存每一轮的结果
for i, result in enumerate(results):
lx.io.save_annotated_documents(
[result],
output_name=f"pass_{i}.jsonl",
output_dir="./debug"
)# 检查示例是否符合预期格式
for example in examples:
for extraction in example.extractions:
# 验证文本存在于示例文本中
assert extraction.extraction_text in example.text
print(f"✓ {extraction.extraction_class}: {extraction.extraction_text}")# 从最小测试开始
test_result = lx.extract(
text_or_documents="Patient on Aspirin 81mg daily.",
prompt_description="Extract medications.",
examples=[simple_example],
model_id="gemini-2.0-flash-exp"
)
print(f"Extractions: {len(test_result.extractions)}")examples = [
lx.data.ExampleData(
text="Patient presents with chest pain. ECG shows ST elevation. Diagnosed with STEMI.",
extractions=[
lx.data.Extraction(
extraction_class="clinical_event",
extraction_text="Patient presents with chest pain. ECG shows ST elevation. Diagnosed with STEMI.",
attributes={
"symptom": "chest pain",
"diagnostic_test": "ECG",
"finding": "ST elevation",
"diagnosis": "STEMI",
"severity": "severe",
"timeline": [
{"event": "symptom_onset", "description": "chest pain"},
{"event": "diagnostic", "description": "ECG shows ST elevation"},
{"event": "diagnosis", "description": "STEMI"}
]
}
)
]
)
]examples = [
lx.data.ExampleData(
text="Patient presents with chest pain. ECG shows ST elevation. Diagnosed with STEMI.",
extractions=[
lx.data.Extraction(
extraction_class="clinical_event",
extraction_text="Patient presents with chest pain. ECG shows ST elevation. Diagnosed with STEMI.",
attributes={
"symptom": "chest pain",
"diagnostic_test": "ECG",
"finding": "ST elevation",
"diagnosis": "STEMI",
"severity": "severe",
"timeline": [
{"event": "symptom_onset", "description": "chest pain"},
{"event": "diagnostic", "description": "ECG shows ST elevation"},
{"event": "diagnosis", "description": "STEMI"}
]
}
)
]
)
]from tqdm import tqdm
import langextract as lx
documents = load_documents() # List of documents
results = []
for i, doc in enumerate(tqdm(documents)):
try:
result = lx.extract(
text_or_documents=doc,
prompt_description=prompt,
examples=examples,
model_id="gemini-2.0-flash-exp"
)
results.append(result)
# Save incrementally
if (i + 1) % 100 == 0:
lx.io.save_annotated_documents(
results,
output_name=f"batch_{i+1}.jsonl",
output_dir="./batches"
)
results = [] # Clear for next batch
except Exception as e:
print(f"Failed on document {i}: {e}")
continuefrom tqdm import tqdm
import langextract as lx
documents = load_documents() # 文档列表
results = []
for i, doc in enumerate(tqdm(documents)):
try:
result = lx.extract(
text_or_documents=doc,
prompt_description=prompt,
examples=examples,
model_id="gemini-2.0-flash-exp"
)
results.append(result)
# 增量保存
if (i + 1) % 100 == 0:
lx.io.save_annotated_documents(
results,
output_name=f"batch_{i+1}.jsonl",
output_dir="./batches"
)
results = [] # 清空以准备下一批
except Exception as e:
print(f"处理第{i}篇文档失败: {e}")
continueimport langextract as lx
import pandas as pdimport langextract as lx
import pandas as pdfor extraction in result.extractions:
extractions_data.append({
'patient_id': row['patient_id'],
'note_date': row['note_date'],
'extraction_class': extraction.extraction_class,
'extraction_text': extraction.extraction_text,
**extraction.attributes
})for extraction in result.extractions:
extractions_data.append({
'patient_id': row['patient_id'],
'note_date': row['note_date'],
'extraction_class': extraction.extraction_class,
'extraction_text': extraction.extraction_text,
**extraction.attributes
})undefinedundefinedimport time
import langextract as lx
def benchmark_extraction(documents, model_id, passes=1):
start = time.time()
results = lx.extract(
text_or_documents=documents,
prompt_description=prompt,
examples=examples,
model_id=model_id,
extraction_passes=passes,
max_workers=20
)
elapsed = time.time() - start
total_extractions = sum(len(r.extractions) for r in results)
print(f"Model: {model_id}")
print(f"Passes: {passes}")
print(f"Documents: {len(documents)}")
print(f"Total extractions: {total_extractions}")
print(f"Time: {elapsed:.2f}s")
print(f"Throughput: {len(documents)/elapsed:.2f} docs/sec")
print()import time
import langextract as lx
def benchmark_extraction(documents, model_id, passes=1):
start = time.time()
results = lx.extract(
text_or_documents=documents,
prompt_description=prompt,
examples=examples,
model_id=model_id,
extraction_passes=passes,
max_workers=20
)
elapsed = time.time() - start
total_extractions = sum(len(r.extractions) for r in results)
print(f"模型: {model_id}")
print(f"提取轮次: {passes}")
print(f"文档数量: {len(documents)}")
print(f"总提取数: {total_extractions}")
print(f"耗时: {elapsed:.2f}秒")
print(f"吞吐量: {len(documents)/elapsed:.2f} 文档/秒")
print()undefinedundefinedexamples/custom_provider_plugin/examples/notebooks/examples/ollama/examples/custom_provider_plugin/examples/notebooks/examples/ollama/examples/clinical_extraction.pyexamples/clinical_extraction.pyexamples/literary_extraction.pyexamples/literary_extraction.pyundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedimport langextract as lx
def test_basic_extraction():
prompt = "Extract names."
examples = [
lx.data.ExampleData(
text="John Smith visited the clinic.",
extractions=[
lx.data.Extraction(
extraction_class="name",
extraction_text="John Smith"
)
]
)
]
result = lx.extract(
text_or_documents="Mary Johnson was the doctor.",
prompt_description=prompt,
examples=examples,
model_id="gemini-2.0-flash-exp"
)
assert len(result.extractions) >= 1
assert result.extractions[0].extraction_class == "name"import langextract as lx
def test_basic_extraction():
prompt = "Extract names."
examples = [
lx.data.ExampleData(
text="John Smith visited the clinic.",
extractions=[
lx.data.Extraction(
extraction_class="name",
extraction_text="John Smith"
)
]
)
]
result = lx.extract(
text_or_documents="Mary Johnson was the doctor.",
prompt_description=prompt,
examples=examples,
model_id="gemini-2.0-flash-exp"
)
assert len(result.extractions) >= 1
assert result.extractions[0].extraction_class == "name"git clone https://github.com/google/langextract.git
cd langextract
pip install -e ".[dev]"
pre-commit installgit clone https://github.com/google/langextract.git
cd langextract
pip install -e ".[dev]"
pre-commit installundefinedundefinedundefinedundefined