Loading...
Loading...
Compare original and translation side by side
gemini-3-image-generationgemini-3-image-generationgemini-3-pro-apigemini-3-pro-apiimport google.generativeai as genai
from pathlib import Path
genai.configure(api_key="YOUR_API_KEY")
model = genai.GenerativeModel("gemini-3-pro-preview")import google.generativeai as genai
from pathlib import Path
genai.configure(api_key="YOUR_API_KEY")
model = genai.GenerativeModel("gemini-3-pro-preview")undefinedundefinedimport { GoogleGenerativeAI } from "@google/generative-ai";
import { GoogleAIFileManager } from "@google/generative-ai/server";
import fs from "fs";
const genAI = new GoogleGenerativeAI("YOUR_API_KEY");
const fileManager = new GoogleAIFileManager("YOUR_API_KEY");
// Upload and analyze image
const uploadResult = await fileManager.uploadFile("photo.jpg", {
mimeType: "image/jpeg"
});
const model = genAI.getGenerativeModel({ model: "gemini-3-pro-preview" });
const result = await model.generateContent([
"What's in this image?",
{ fileData: { fileUri: uploadResult.file.uri, mimeType: uploadResult.file.mimeType } }
]);
console.log(result.response.text());import { GoogleGenerativeAI } from "@google/generative-ai";
import { GoogleAIFileManager } from "@google/generative-ai/server";
import fs from "fs";
const genAI = new GoogleGenerativeAI("YOUR_API_KEY");
const fileManager = new GoogleAIFileManager("YOUR_API_KEY");
// 上传并分析图像
const uploadResult = await fileManager.uploadFile("photo.jpg", {
mimeType: "image/jpeg"
});
const model = genAI.getGenerativeModel({ model: "gemini-3-pro-preview" });
const result = await model.generateContent([
"What's in this image?",
{ fileData: { fileUri: uploadResult.file.uri, mimeType: uploadResult.file.mimeType } }
]);
console.log(result.response.text());import google.generativeai as genai
from pathlib import Path
genai.configure(api_key="YOUR_API_KEY")import google.generativeai as genai
from pathlib import Path
genai.configure(api_key="YOUR_API_KEY")
**Node.js Example:**
```typescript
import { GoogleGenerativeAI } from "@google/generative-ai";
import { GoogleAIFileManager } from "@google/generative-ai/server";
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
const fileManager = new GoogleAIFileManager(process.env.GEMINI_API_KEY!);
// Upload image
const uploadResult = await fileManager.uploadFile("screenshot.png", {
mimeType: "image/png"
});
// Configure model with high resolution
const model = genAI.getGenerativeModel({
model: "gemini-3-pro-preview",
generationConfig: {
thinking_level: "high",
media_resolution: "high" // Best quality for OCR
}
});
const result = await model.generateContent([
`Analyze this image and provide:
1. Main objects and their locations
2. Any visible text (OCR)
3. Overall context and purpose`,
{ fileData: { fileUri: uploadResult.file.uri, mimeType: uploadResult.file.mimeType } }
]);
console.log(result.response.text());| Resolution | Tokens per Image | Best For |
|---|---|---|
| 280 tokens | Quick analysis, low detail |
| 560 tokens | Balanced quality/cost |
| 1,120 tokens | OCR, fine details, small text |
references/image-understanding.md
**Node.js示例**:
```typescript
import { GoogleGenerativeAI } from "@google/generative-ai";
import { GoogleAIFileManager } from "@google/generative-ai/server";
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
const fileManager = new GoogleAIFileManager(process.env.GEMINI_API_KEY!);import google.generativeai as genai
from pathlib import Path
genai.configure(api_key="YOUR_API_KEY")
**分辨率选项**:
| 分辨率 | 单图像Token数 | 最佳适用场景 |
|-----------|------------------|----------|
| `low` | 280个 | 快速分析、低细节需求 |
| `medium` | 560个 | 平衡质量与成本 |
| `high` | 1120个 | OCR、精细细节、小文本 |
**支持格式**:JPEG、PNG、WEBP、HEIC、HEIF
**参考**:`references/image-understanding.md` 获取进阶模式
---import google.generativeai as genai
from pathlib import Path
genai.configure(api_key="YOUR_API_KEY")
**Node.js Example:**
```typescript
import { GoogleGenerativeAI } from "@google/generative-ai";
import { GoogleAIFileManager, FileState } from "@google/generative-ai/server";
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
const fileManager = new GoogleAIFileManager(process.env.GEMINI_API_KEY!);
// Upload video
const uploadResult = await fileManager.uploadFile("tutorial.mp4", {
mimeType: "video/mp4"
});
// Wait for processing
let file = await fileManager.getFile(uploadResult.file.name);
while (file.state === FileState.PROCESSING) {
await new Promise(resolve => setTimeout(resolve, 5000));
file = await fileManager.getFile(uploadResult.file.name);
}
if (file.state === FileState.FAILED) {
throw new Error("Video processing failed");
}
// Analyze video
const model = genAI.getGenerativeModel({
model: "gemini-3-pro-preview",
generationConfig: {
media_resolution: "medium"
}
});
const result = await model.generateContent([
`Analyze this video and provide:
1. Overall summary
2. Key scenes and timestamps
3. Main topics covered`,
{ fileData: { fileUri: file.uri, mimeType: file.mimeType } }
]);
console.log(result.response.text());references/video-processing.mdimport google.generativeai as genai
from pathlib import Path
genai.configure(api_key="YOUR_API_KEY")
model = genai.GenerativeModel("gemini-3-pro-preview")
**Node.js示例**:
```typescript
import { GoogleGenerativeAI } from "@google/generative-ai";
import { GoogleAIFileManager, FileState } from "@google/generative-ai/server";
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
const fileManager = new GoogleAIFileManager(process.env.GEMINI_API_KEY!);
**Node.js Example:**
```typescript
import { GoogleGenerativeAI } from "@google/generative-ai";
import { GoogleAIFileManager, FileState } from "@google/generative-ai/server";
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
const fileManager = new GoogleAIFileManager(process.env.GEMINI_API_KEY!);
// Upload audio
const uploadResult = await fileManager.uploadFile("podcast.mp3", {
mimeType: "audio/mp3"
});
// Wait for processing
let file = await fileManager.getFile(uploadResult.file.name);
while (file.state === FileState.PROCESSING) {
await new Promise(resolve => setTimeout(resolve, 5000));
file = await fileManager.getFile(uploadResult.file.name);
}
const model = genAI.getGenerativeModel({ model: "gemini-3-pro-preview" });
const result = await model.generateContent([
`Process this audio and provide:
1. Full transcription
2. Summary of main points
3. Key timestamps`,
{ fileData: { fileUri: file.uri, mimeType: file.mimeType } }
]);
console.log(result.response.text());references/audio-processing.mdimport google.generativeai as genai
from pathlib import Path
genai.configure(api_key="YOUR_API_KEY")
**视频规格**:
- **最长时长**:1小时
- **支持格式**:MP4、MOV、AVI等
- **分辨率选项**:低(每帧70个token)、中(每帧70个token)、高(每帧280个token)
- **OCR**:高分辨率下支持
**参考**:`references/video-processing.md` 获取进阶模式
---import google.generativeai as genai
from pathlib import Path
genai.configure(api_key="YOUR_API_KEY")
model = genai.GenerativeModel("gemini-3-pro-preview")
**Node.js Example:**
```typescript
import { GoogleGenerativeAI } from "@google/generative-ai";
import { GoogleAIFileManager, FileState } from "@google/generative-ai/server";
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
const fileManager = new GoogleAIFileManager(process.env.GEMINI_API_KEY!);
// Upload PDF
const uploadResult = await fileManager.uploadFile("research_paper.pdf", {
mimeType: "application/pdf"
});
// Wait for processing
let file = await fileManager.getFile(uploadResult.file.name);
while (file.state === FileState.PROCESSING) {
await new Promise(resolve => setTimeout(resolve, 5000));
file = await fileManager.getFile(uploadResult.file.name);
}
// Analyze with medium resolution (recommended)
const model = genAI.getGenerativeModel({
model: "gemini-3-pro-preview",
generationConfig: {
media_resolution: "medium"
}
});
const result = await model.generateContent([
`Analyze this PDF and extract:
1. Main sections
2. Key findings
3. Important data`,
{ fileData: { fileUri: file.uri, mimeType: file.mimeType } }
]);
console.log(result.response.text());mediumreferences/document-processing.md
**Node.js示例**:
```typescript
import { GoogleGenerativeAI } from "@google/generative-ai";
import { GoogleAIFileManager, FileState } from "@google/generative-ai/server";
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
const fileManager = new GoogleAIFileManager(process.env.GEMINI_API_KEY!);| Media Type | Resolution | Tokens | Use Case |
|---|---|---|---|
| Images | | 280 | Quick scan, thumbnails |
| Images | | 560 | General analysis |
| Images | | 1,120 | OCR, fine details, code |
| PDFs | | 560/page | Recommended (saturation point) |
| PDFs | | 1,120/page | Diminishing returns |
| Video | | 70/frame | Most use cases |
| Video | | 280/frame | OCR from video |
import google.generativeai as genai
genai.configure(api_key="YOUR_API_KEY")model = genai.GenerativeModel(
"gemini-3-pro-preview",
generation_config={
"media_resolution": resolution
}
)
image_file = genai.upload_file(image_path)
response = model.generate_content([
"Describe this image" if not need_ocr else "Extract all text from this image",
image_file
])
# Log token usage for cost tracking
tokens = response.usage_metadata.total_token_count
cost = (tokens / 1_000_000) * 2.00 # Input pricing
print(f"Resolution: {resolution}, Tokens: {tokens}, Cost: ${cost:.6f}")
return response.text
**音频规格**:
- **最长时长**:9.5小时
- **支持格式**:WAV、MP3、FLAC、AAC等
- **支持语言**:多语言支持
**参考**:`references/audio-processing.md` 获取进阶模式
---
**Per-Item Resolution Control:**
```pythonimport google.generativeai as genai
from pathlib import Path
genai.configure(api_key="YOUR_API_KEY")
**Cost Monitoring:**
```python
def log_media_costs(response):
"""Log media processing costs"""
usage = response.usage_metadata
# Pricing for ≤200k context
input_cost = (usage.prompt_token_count / 1_000_000) * 2.00
output_cost = (usage.candidates_token_count / 1_000_000) * 12.00
print(f"Input tokens: {usage.prompt_token_count} (${input_cost:.6f})")
print(f"Output tokens: {usage.candidates_token_count} (${output_cost:.6f})")
print(f"Total cost: ${input_cost + output_cost:.6f}")references/token-optimization.md| Setting | Images | PDFs | Video (per frame) | Recommendation |
|---|---|---|---|---|
| 280 tokens | 280 tokens | 70 tokens | Quick analysis, low detail |
| 560 tokens | 560 tokens | 70 tokens | Balanced quality/cost |
| 1,120 tokens | 1,120 tokens | 280 tokens | OCR, fine text, details |
model = genai.GenerativeModel(
"gemini-3-pro-preview",
generation_config={
"media_resolution": "high" # Applies to all media
}
)response = model.generate_content([
"Analyze these files",
{"file": high_detail_image, "media_resolution": "high"},
{"file": low_detail_image, "media_resolution": "low"}
])highmediummediumlowmediumlowreferences/media-resolution.md
**Node.js示例**:
```typescript
import { GoogleGenerativeAI } from "@google/generative-ai";
import { GoogleAIFileManager, FileState } from "@google/generative-ai/server";
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
const fileManager = new GoogleAIFileManager(process.env.GEMINI_API_KEY!);import google.generativeai as genaiundefined
**PDF处理技巧**:
- **推荐分辨率**:`medium`(每页560个token)——质量饱和点
- **多页支持**:自动处理所有页面
- **原生支持**:无需转换为图像
- **文本提取**:内置高质量文本提取功能
**参考**:`references/document-processing.md` 获取进阶模式
---undefined| 媒体类型 | 分辨率 | Token数 | 适用场景 |
|---|---|---|---|
| 图像 | | 280 | 快速扫描、缩略图 |
| 图像 | | 560 | 常规分析 |
| 图像 | | 1120 | OCR、精细细节、代码 |
| 560/页 | 推荐(饱和点) | |
| 1120/页 | 收益递减 | |
| 视频 | | 70/帧 | 大多数场景 |
| 视频 | | 280/帧 | 视频OCR |
import google.generativeai as genai
genai.configure(api_key="YOUR_API_KEY")undefinedmodel = genai.GenerativeModel(
"gemini-3-pro-preview",
generation_config={
"media_resolution": resolution
}
)
image_file = genai.upload_file(image_path)
response = model.generate_content([
"Describe this image" if not need_ocr else "Extract all text from this image",
image_file
])
# 记录token使用量以追踪成本
tokens = response.usage_metadata.total_token_count
cost = (tokens / 1_000_000) * 2.00 # 输入定价
print(f"Resolution: {resolution}, Tokens: {tokens}, Cost: ${cost:.6f}")
return response.textundefined
**单文件分辨率控制**:
```python
**成本监控**:
```python
def log_media_costs(response):
"""记录媒体处理成本"""
usage = response.usage_metadata
# ≤200k上下文的定价
input_cost = (usage.prompt_token_count / 1_000_000) * 2.00
output_cost = (usage.candidates_token_count / 1_000_000) * 12.00
print(f"Input tokens: {usage.prompt_token_count} (${input_cost:.6f})")
print(f"Output tokens: {usage.candidates_token_count} (${output_cost:.6f})")
print(f"Total cost: ${input_cost + output_cost:.6f}")references/token-optimization.mdundefined| 设置 | 图像 | 视频(每帧) | 推荐场景 | |
|---|---|---|---|---|
| 280个token | 280个token | 70个token | 快速分析、低细节需求 |
| 560个token | 560个token | 70个token | 平衡质量与成本 |
| 1120个token | 1120个token | 280个token | OCR、小文本、精细细节 |
model = genai.GenerativeModel(
"gemini-3-pro-preview",
generation_config={
"media_resolution": "high" # 应用于所有媒体
}
)response = model.generate_content([
"Analyze these files",
{"file": high_detail_image, "media_resolution": "high"},
{"file": low_detail_image, "media_resolution": "low"}
])undefinedhighmediummediumlowmediumlowreferences/media-resolution.mdimport google.generativeai as genaiundefinedundefinedundefined
---undefinedundefinedundefinedmodel = genai.GenerativeModel("gemini-3-pro-preview")
chat = model.start_chat()undefinedundefinedimport google.generativeai as genai
from pathlib import Path
genai.configure(api_key="YOUR_API_KEY")
model = genai.GenerativeModel(
"gemini-3-pro-preview",
generation_config={"media_resolution": "medium"}
)undefined# Wait for processing
while pdf_file.state.name == "PROCESSING":
time.sleep(5)
pdf_file = genai.get_file(pdf_file.name)
# Extract key information
response = model.generate_content([
"Extract: 1) Document type, 2) Key dates, 3) Important numbers, 4) Summary",
pdf_file
])
results[pdf_path.name] = response.text
# Clean up
genai.delete_file(pdf_file.name)undefinedundefinedvideo = genai.upload_file("user_upload.mp4")undefinedProvide structured response.
""",
video
---
---media_resolution: "high"lowmediummodel = genai.GenerativeModel("gemini-3-pro-preview")
chat = model.start_chat()media_resolution: "high"undefinedimport google.generativeai as genai
from pathlib import Path
genai.configure(api_key="YOUR_API_KEY")
model = genai.GenerativeModel(
"gemini-3-pro-preview",
generation_config={"media_resolution": "medium"}
)# 等待处理完成
while pdf_file.state.name == "PROCESSING":
time.sleep(5)
pdf_file = genai.get_file(pdf_file.name)
# 提取关键信息
response = model.generate_content([
"Extract: 1) Document type, 2) Key dates, 3) Important numbers, 4) Summary",
pdf_file
])
results[pdf_path.name] = response.text
# 清理文件
genai.delete_file(pdf_file.name)undefinedvideo = genai.upload_file("user_upload.mp4")请提供结构化回复。
""",
video
---media_resolution: "high"lowmediummedia_resolution: "high"