Loading...
Loading...
Guide for implementing Google Gemini API image generation - create high-quality images from text prompts using gemini-2.5-flash-image model. Use when generating images, creating visual content, or implementing text-to-image features. Supports text-to-image, image editing, multi-image composition, and iterative refinement.
npx skill4agent add aia-11-hn-mib/mib-mockinterviewaibot gemini-image-genGEMINI_API_KEYexport GEMINI_API_KEY="your-key".env.claude/.env.claude/skills/.env.claude/skills/gemini-image-gen/.env.envGEMINI_API_KEY=your_api_key_here# Enable Vertex AI
export GEMINI_USE_VERTEX=true
export VERTEX_PROJECT_ID=your-gcp-project-id
export VERTEX_LOCATION=us-central1 # Optional, defaults to us-central1.envGEMINI_USE_VERTEX=true
VERTEX_PROJECT_ID=your-gcp-project-id
VERTEX_LOCATION=us-central1pip install google-genaifrom google import genai
from google.genai import types
import os
# API key detection handled automatically by helper script
client = genai.Client(api_key=os.getenv('GEMINI_API_KEY'))
response = client.models.generate_content(
model='gemini-2.5-flash-image',
contents='A serene mountain landscape at sunset with snow-capped peaks',
config=types.GenerateContentConfig(
response_modalities=['image'],
aspect_ratio='16:9'
)
)
# Save to ./docs/assets/
for i, part in enumerate(response.candidates[0].content.parts):
if part.inline_data:
with open(f'./docs/assets/generated-{i}.png', 'wb') as f:
f.write(part.inline_data.data)# Generate single image
python .claude/skills/gemini-image-gen/scripts/generate.py \
"A futuristic city with flying cars" \
--aspect-ratio 16:9 \
--output ./docs/assets/city.png
# Generate with specific modalities
python .claude/skills/gemini-image-gen/scripts/generate.py \
"Modern architecture design" \
--response-modalities image text \
--aspect-ratio 1:1| Ratio | Resolution | Use Case | Token Cost |
|---|---|---|---|
| 1:1 | 1024×1024 | Social media, avatars | 1290 |
| 16:9 | 1344×768 | Landscapes, banners | 1290 |
| 9:16 | 768×1344 | Mobile, portraits | 1290 |
| 4:3 | 1152×896 | Traditional media | 1290 |
| 3:4 | 896×1152 | Vertical posters | 1290 |
['image']['text']['image', 'text']import PIL.Image
img = PIL.Image.open('original.png')
response = client.models.generate_content(
model='gemini-2.5-flash-image',
contents=[
'Add a red balloon floating in the sky',
img
]
)img1 = PIL.Image.open('background.png')
img2 = PIL.Image.open('foreground.png')
response = client.models.generate_content(
model='gemini-2.5-flash-image',
contents=[
'Combine these images into a cohesive scene',
img1,
img2
]
)references/prompting-guide.mdconfig = types.GenerateContentConfig(
response_modalities=['image'],
safety_settings=[
types.SafetySetting(
category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
threshold=types.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE
)
]
)references/safety-settings.md./docs/assets/# Create directory if needed
mkdir -p ./docs/assetsgemini-2.5-flash-image# Check environment variables
echo $GEMINI_API_KEY
# Verify .env file exists
cat .claude/skills/gemini-image-gen/.env
# or
cat .envresponse.prompt_feedback.block_reasonreferences/api-reference.mdreferences/prompting-guide.mdreferences/safety-settings.mdreferences/code-examples.md