Loading...
Loading...
Compare original and translation side by side
| Environment | Purpose | Workspace | Data | Permissions |
|---|---|---|---|---|
| Development | Local dev, experimentation | Shared dev workspace | Sample/synthetic | Broad access |
| Staging | Integration testing, UAT | Dedicated staging | Prod clone (masked) | Team access |
| Production | Live workloads | Dedicated prod | Real data | Restricted |
| 环境 | 用途 | 工作区 | 数据 | 权限 |
|---|---|---|---|---|
| 开发环境 | 本地开发、实验 | 共享开发工作区 | 样本/合成数据 | 广泛访问 |
| 预发布环境 | 集成测试、用户验收测试(UAT) | 专属预发布工作区 | 生产环境克隆(已脱敏) | 团队访问 |
| 生产环境 | 实时工作负载 | 专属生产工作区 | 真实数据 | 受限访问 |
undefinedundefinedundefinedundefinedundefinedundefined # Environment-specific schedule
schedule:
quartz_cron_expression: >-
${bundle.target == "prod" ? "0 0 6 * * ?" :
bundle.target == "staging" ? "0 0 8 * * ?" : null}
timezone_id: "America/New_York"
pause_status: ${bundle.target == "dev" ? "PAUSED" : "UNPAUSED"}
# Environment-specific notifications
email_notifications:
on_failure: ${bundle.target == "prod" ?
["oncall@company.com", "pagerduty@company.pagerduty.com"] :
["team@company.com"]}
# Environment-specific cluster sizing
job_clusters:
- job_cluster_key: etl_cluster
new_cluster:
spark_version: "14.3.x-scala2.12"
node_type_id: >-
${bundle.target == "prod" ? "Standard_DS4_v2" : "Standard_DS3_v2"}
num_workers: ${var.cluster_size}
autoscale:
min_workers: ${bundle.target == "prod" ? 2 : 1}
max_workers: ${bundle.target == "prod" ? 10 : 4}
# Spot instances for non-prod
azure_attributes:
availability: >-
${bundle.target == "prod" ? "ON_DEMAND_AZURE" : "SPOT_AZURE"}
first_on_demand: 1undefined # Environment-specific schedule
schedule:
quartz_cron_expression: >-
${bundle.target == "prod" ? "0 0 6 * * ?" :
bundle.target == "staging" ? "0 0 8 * * ?" : null}
timezone_id: "America/New_York"
pause_status: ${bundle.target == "dev" ? "PAUSED" : "UNPAUSED"}
# Environment-specific notifications
email_notifications:
on_failure: ${bundle.target == "prod" ?
["oncall@company.com", "pagerduty@company.pagerduty.com"] :
["team@company.com"]}
# Environment-specific cluster sizing
job_clusters:
- job_cluster_key: etl_cluster
new_cluster:
spark_version: "14.3.x-scala2.12"
node_type_id: >-
${bundle.target == "prod" ? "Standard_DS4_v2" : "Standard_DS3_v2"}
num_workers: ${var.cluster_size}
autoscale:
min_workers: ${bundle.target == "prod" ? 2 : 1}
max_workers: ${bundle.target == "prod" ? 10 : 4}
# Spot instances for non-prod
azure_attributes:
availability: >-
${bundle.target == "prod" ? "ON_DEMAND_AZURE" : "SPOT_AZURE"}
first_on_demand: 1undefined-- Create environment-specific catalogs
CREATE CATALOG IF NOT EXISTS dev_catalog;
CREATE CATALOG IF NOT EXISTS staging_catalog;
CREATE CATALOG IF NOT EXISTS prod_catalog;
-- Grant cross-environment read access for data lineage
GRANT USAGE ON CATALOG prod_catalog TO `staging-service-principal`;
GRANT SELECT ON CATALOG prod_catalog TO `staging-service-principal`;
-- Set up data sharing between environments
CREATE SHARE IF NOT EXISTS prod_to_staging;
ALTER SHARE prod_to_staging ADD SCHEMA prod_catalog.reference;
-- Create recipient for staging workspace
CREATE RECIPIENT IF NOT EXISTS staging_workspace
USING IDENTITY ('staging-workspace-identity');
GRANT SELECT ON SHARE prod_to_staging TO RECIPIENT staging_workspace;-- Create environment-specific catalogs
CREATE CATALOG IF NOT EXISTS dev_catalog;
CREATE CATALOG IF NOT EXISTS staging_catalog;
CREATE CATALOG IF NOT EXISTS prod_catalog;
-- Grant cross-environment read access for data lineage
GRANT USAGE ON CATALOG prod_catalog TO `staging-service-principal`;
GRANT SELECT ON CATALOG prod_catalog TO `staging-service-principal`;
-- Set up data sharing between environments
CREATE SHARE IF NOT EXISTS prod_to_staging;
ALTER SHARE prod_to_staging ADD SCHEMA prod_catalog.reference;
-- Create recipient for staging workspace
CREATE RECIPIENT IF NOT EXISTS staging_workspace
USING IDENTITY ('staging-workspace-identity');
GRANT SELECT ON SHARE prod_to_staging TO RECIPIENT staging_workspace;undefinedundefineddef __init__(self, environment: str = None):
self.environment = environment or os.getenv("ENVIRONMENT", "dev")
self.w = WorkspaceClient()
self._secret_scope = f"{self.environment}-secrets"
def get_secret(self, key: str) -> str:
"""Get secret for current environment."""
# In notebooks, use dbutils
# return dbutils.secrets.get(scope=self._secret_scope, key=key)
# Via API (for testing)
return self.w.secrets.get_secret(
scope=self._secret_scope,
key=key
).value
def get_database_url(self) -> str:
"""Get environment-specific database URL."""
return self.get_secret("database_url")
def get_api_key(self, service: str) -> str:
"""Get API key for service."""
return self.get_secret(f"{service}_api_key")def __init__(self, environment: str = None):
self.environment = environment or os.getenv("ENVIRONMENT", "dev")
self.w = WorkspaceClient()
self._secret_scope = f"{self.environment}-secrets"
def get_secret(self, key: str) -> str:
"""Get secret for current environment."""
# In notebooks, use dbutils
# return dbutils.secrets.get(scope=self._secret_scope, key=key)
# Via API (for testing)
return self.w.secrets.get_secret(
scope=self._secret_scope,
key=key
).value
def get_database_url(self) -> str:
"""Get environment-specific database URL."""
return self.get_secret("database_url")
def get_api_key(self, service: str) -> str:
"""Get API key for service."""
return self.get_secret(f"{service}_api_key")undefinedundefinedundefinedundefined# Or from Databricks tags
# spark.conf.get("spark.databricks.tags.Environment")
configs = {
"dev": EnvironmentConfig(
name="dev",
catalog="dev_catalog",
schema_prefix="dev",
is_production=False,
debug_enabled=True,
max_cluster_size=4,
),
"staging": EnvironmentConfig(
name="staging",
catalog="staging_catalog",
schema_prefix="staging",
is_production=False,
debug_enabled=True,
max_cluster_size=8,
),
"prod": EnvironmentConfig(
name="prod",
catalog="prod_catalog",
schema_prefix="prod",
is_production=True,
debug_enabled=False,
max_cluster_size=20,
),
}
return configs.get(env, configs["dev"])# Or from Databricks tags
# spark.conf.get("spark.databricks.tags.Environment")
configs = {
"dev": EnvironmentConfig(
name="dev",
catalog="dev_catalog",
schema_prefix="dev",
is_production=False,
debug_enabled=True,
max_cluster_size=4,
),
"staging": EnvironmentConfig(
name="staging",
catalog="staging_catalog",
schema_prefix="staging",
is_production=False,
debug_enabled=True,
max_cluster_size=8,
),
"prod": EnvironmentConfig(
name="prod",
catalog="prod_catalog",
schema_prefix="prod",
is_production=True,
debug_enabled=False,
max_cluster_size=20,
),
}
return configs.get(env, configs["dev"])undefinedundefinedundefinedundefinedSteps:
1. Verify staging tests passed
2. Tag release in git
3. Deploy to production
4. Run smoke tests
5. Enable schedules
"""
results = {"steps": []}
# 1. Verify staging tests
print("Verifying staging tests...")
staging_result = subprocess.run(
["databricks", "bundle", "run", "-t", "staging", "integration-tests"],
capture_output=True
)
if staging_result.returncode != 0:
raise Exception("Staging tests failed")
results["steps"].append({"stage": "verify_staging", "status": "passed"})
# 2. Tag release
print(f"Tagging release {version_tag}...")
if not dry_run:
subprocess.run(["git", "tag", version_tag])
subprocess.run(["git", "push", "origin", version_tag])
results["steps"].append({"stage": "tag_release", "status": "done" if not dry_run else "skipped"})
# 3. Deploy to production
print("Deploying to production...")
if not dry_run:
subprocess.run(["databricks", "bundle", "deploy", "-t", "prod"])
results["steps"].append({"stage": "deploy_prod", "status": "done" if not dry_run else "skipped"})
# 4. Run smoke tests
print("Running smoke tests...")
if not dry_run:
subprocess.run(["databricks", "bundle", "run", "-t", "prod", "smoke-tests"])
results["steps"].append({"stage": "smoke_tests", "status": "done" if not dry_run else "skipped"})
return resultsresult = promote_to_production(args.version, args.dry_run)
print(result)undefinedSteps:
1. Verify staging tests passed
2. Tag release in git
3. Deploy to production
4. Run smoke tests
5. Enable schedules
"""
results = {"steps": []}
# 1. Verify staging tests
print("Verifying staging tests...")
staging_result = subprocess.run(
["databricks", "bundle", "run", "-t", "staging", "integration-tests"],
capture_output=True
)
if staging_result.returncode != 0:
raise Exception("Staging tests failed")
results["steps"].append({"stage": "verify_staging", "status": "passed"})
# 2. Tag release
print(f"Tagging release {version_tag}...")
if not dry_run:
subprocess.run(["git", "tag", version_tag])
subprocess.run(["git", "push", "origin", version_tag])
results["steps"].append({"stage": "tag_release", "status": "done" if not dry_run else "skipped"})
# 3. Deploy to production
print("Deploying to production...")
if not dry_run:
subprocess.run(["databricks", "bundle", "deploy", "-t", "prod"])
results["steps"].append({"stage": "deploy_prod", "status": "done" if not dry_run else "skipped"})
# 4. Run smoke tests
print("Running smoke tests...")
if not dry_run:
subprocess.run(["databricks", "bundle", "run", "-t", "prod", "smoke-tests"])
results["steps"].append({"stage": "smoke_tests", "status": "done" if not dry_run else "skipped"})
return resultsresult = promote_to_production(args.version, args.dry_run)
print(result)undefined| Issue | Cause | Solution |
|---|---|---|
| Wrong environment | Missing env var | Check ENVIRONMENT variable |
| Secret not found | Wrong scope | Verify scope name matches environment |
| Permission denied | Missing grants | Add Unity Catalog grants |
| Config mismatch | Target override issue | Check bundle target syntax |
| 问题 | 原因 | 解决方案 |
|---|---|---|
| 环境识别错误 | 缺少环境变量 | 检查ENVIRONMENT变量配置 |
| 密钥未找到 | 密钥范围错误 | 验证范围名称与环境是否匹配 |
| 权限拒绝 | 缺少授权 | 添加Unity Catalog权限 |
| 配置不匹配 | 目标覆盖问题 | 检查bundle目标语法 |
env = detect_environment()
print(f"Running in {env.name} environment")
print(f"Using catalog: {env.catalog}")
print(f"Production mode: {env.is_production}")env = detect_environment()
print(f"Running in {env.name} environment")
print(f"Using catalog: {env.catalog}")
print(f"Production mode: {env.is_production}")undefinedundefinedundefinedundefineddatabricks-observabilitydatabricks-observability