cwicr-material-substitution

Compare original and translation side by side

🇺🇸

Original

English
🇨🇳

Translation

Chinese

CWICR Material Substitution

CWICR材料替代方案

Business Case

业务场景

Problem Statement

问题描述

Material substitution challenges:
  • Supply chain issues
  • Cost optimization
  • Specification compliance
  • Equivalent performance
材料替代面临的挑战:
  • 供应链问题
  • 成本优化
  • 规格合规
  • 等效性能

Solution

解决方案

Systematic material substitution using CWICR data to find functionally equivalent alternatives with cost and performance analysis.
利用CWICR数据进行系统化材料替代,寻找具备成本和性能分析的功能等效替代方案。

Business Value

业务价值

  • Supply flexibility - Alternative sources
  • Cost savings - Lower-cost equivalents
  • Compliance - Specification matching
  • Quick decisions - Rapid alternative search
  • 供应灵活性 - 替代来源
  • 成本节约 - 低成本等效材料
  • 合规性 - 规格匹配
  • 快速决策 - 快速搜索替代方案

Technical Implementation

技术实现

python
import pandas as pd
import numpy as np
from typing import Dict, Any, List, Optional, Tuple
from dataclasses import dataclass
from enum import Enum
from difflib import SequenceMatcher


class SubstitutionType(Enum):
    """Types of substitution."""
    DIRECT = "direct"        # Drop-in replacement
    EQUIVALENT = "equivalent"  # Same function, different material
    UPGRADE = "upgrade"      # Better performance
    DOWNGRADE = "downgrade"  # Lower performance (cost saving)


class CompatibilityLevel(Enum):
    """Compatibility levels."""
    EXACT = "exact"          # Identical specs
    HIGH = "high"            # Minor differences
    MEDIUM = "medium"        # Requires review
    LOW = "low"              # Significant differences


@dataclass
class MaterialSubstitute:
    """Material substitution option."""
    original_code: str
    original_description: str
    substitute_code: str
    substitute_description: str
    substitution_type: SubstitutionType
    compatibility: CompatibilityLevel
    original_cost: float
    substitute_cost: float
    cost_difference: float
    cost_difference_pct: float
    notes: str
python
import pandas as pd
import numpy as np
from typing import Dict, Any, List, Optional, Tuple
from dataclasses import dataclass
from enum import Enum
from difflib import SequenceMatcher


class SubstitutionType(Enum):
    """Types of substitution."""
    DIRECT = "direct"        # Drop-in replacement
    EQUIVALENT = "equivalent"  # Same function, different material
    UPGRADE = "upgrade"      # Better performance
    DOWNGRADE = "downgrade"  # Lower performance (cost saving)


class CompatibilityLevel(Enum):
    """Compatibility levels."""
    EXACT = "exact"          # Identical specs
    HIGH = "high"            # Minor differences
    MEDIUM = "medium"        # Requires review
    LOW = "low"              # Significant differences


@dataclass
class MaterialSubstitute:
    """Material substitution option."""
    original_code: str
    original_description: str
    substitute_code: str
    substitute_description: str
    substitution_type: SubstitutionType
    compatibility: CompatibilityLevel
    original_cost: float
    substitute_cost: float
    cost_difference: float
    cost_difference_pct: float
    notes: str

Material compatibility groups

Material compatibility groups

MATERIAL_GROUPS = { 'concrete': ['cement', 'beton', 'concrete', 'C20', 'C25', 'C30', 'C35', 'C40'], 'steel': ['steel', 'rebar', 'reinforcement', 'S235', 'S275', 'S355'], 'lumber': ['wood', 'timber', 'lumber', 'plywood', 'OSB'], 'masonry': ['brick', 'block', 'CMU', 'masonry'], 'insulation': ['insulation', 'rockwool', 'glasswool', 'EPS', 'XPS', 'PIR'], 'pipe': ['pipe', 'PVC', 'HDPE', 'copper', 'steel pipe'], 'electrical': ['wire', 'cable', 'conduit'], 'finishing': ['paint', 'plaster', 'drywall', 'gypsum'], 'flooring': ['tile', 'vinyl', 'laminate', 'carpet', 'hardwood'], 'roofing': ['shingle', 'membrane', 'metal roof', 'tile roof'] }
class CWICRMaterialSubstitution: """Find material substitutions using CWICR data."""
def __init__(self, cwicr_data: pd.DataFrame):
    self.materials = cwicr_data
    self._index_data()

def _index_data(self):
    """Index material data."""
    if 'work_item_code' in self.materials.columns:
        self._code_index = self.materials.set_index('work_item_code')
    elif 'material_code' in self.materials.columns:
        self._code_index = self.materials.set_index('material_code')
    else:
        self._code_index = None

def _similarity(self, a: str, b: str) -> float:
    """Calculate string similarity."""
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

def _get_material_group(self, description: str) -> Optional[str]:
    """Identify material group from description."""
    desc_lower = description.lower()

    for group, keywords in MATERIAL_GROUPS.items():
        if any(kw.lower() in desc_lower for kw in keywords):
            return group

    return None

def _get_cost(self, code: str) -> Tuple[float, str]:
    """Get material cost."""
    if self._code_index is None or code not in self._code_index.index:
        return (0, 'unit')

    item = self._code_index.loc[code]
    cost = float(item.get('material_cost', item.get('total_cost', 0)) or 0)
    unit = str(item.get('unit', 'unit'))

    return (cost, unit)

def find_substitutes(self,
                      material_code: str,
                      max_results: int = 10,
                      max_cost_increase: float = 0.20,
                      include_upgrades: bool = True) -> List[MaterialSubstitute]:
    """Find substitute materials."""

    if self._code_index is None or material_code not in self._code_index.index:
        return []

    original = self._code_index.loc[material_code]
    original_desc = str(original.get('description', material_code))
    original_cost, original_unit = self._get_cost(material_code)

    group = self._get_material_group(original_desc)

    substitutes = []

    for code, row in self._code_index.iterrows():
        if code == material_code:
            continue

        sub_desc = str(row.get('description', code))
        sub_group = self._get_material_group(sub_desc)

        # Check if same group or similar description
        if group and sub_group == group:
            similarity = 0.7
        else:
            similarity = self._similarity(original_desc, sub_desc)

        if similarity < 0.3:
            continue

        sub_cost, sub_unit = self._get_cost(code)

        if sub_unit != original_unit:
            continue

        cost_diff = sub_cost - original_cost
        cost_diff_pct = (cost_diff / original_cost * 100) if original_cost > 0 else 0

        # Filter by cost increase limit
        if not include_upgrades and cost_diff_pct > max_cost_increase * 100:
            continue

        # Determine substitution type
        if cost_diff_pct < -10:
            sub_type = SubstitutionType.DOWNGRADE
        elif cost_diff_pct > 10:
            sub_type = SubstitutionType.UPGRADE
        elif similarity > 0.8:
            sub_type = SubstitutionType.DIRECT
        else:
            sub_type = SubstitutionType.EQUIVALENT

        # Determine compatibility
        if similarity > 0.9:
            compat = CompatibilityLevel.EXACT
        elif similarity > 0.7:
            compat = CompatibilityLevel.HIGH
        elif similarity > 0.5:
            compat = CompatibilityLevel.MEDIUM
        else:
            compat = CompatibilityLevel.LOW

        substitutes.append(MaterialSubstitute(
            original_code=material_code,
            original_description=original_desc,
            substitute_code=code,
            substitute_description=sub_desc,
            substitution_type=sub_type,
            compatibility=compat,
            original_cost=round(original_cost, 2),
            substitute_cost=round(sub_cost, 2),
            cost_difference=round(cost_diff, 2),
            cost_difference_pct=round(cost_diff_pct, 1),
            notes=f"Similarity: {similarity:.0%}"
        ))

    # Sort by compatibility then cost
    substitutes.sort(key=lambda x: (
        list(CompatibilityLevel).index(x.compatibility),
        x.cost_difference
    ))

    return substitutes[:max_results]

def find_cost_saving_alternatives(self,
                                   material_code: str,
                                   min_savings_pct: float = 5.0) -> List[MaterialSubstitute]:
    """Find lower-cost alternatives."""

    subs = self.find_substitutes(material_code, max_results=20)

    cost_saving = [
        s for s in subs
        if s.cost_difference_pct <= -min_savings_pct
    ]

    return sorted(cost_saving, key=lambda x: x.cost_difference)

def find_by_group(self,
                   group_name: str,
                   max_results: int = 20) -> List[Dict[str, Any]]:
    """Find all materials in a group."""

    if self._code_index is None:
        return []

    results = []

    for code, row in self._code_index.iterrows():
        desc = str(row.get('description', code))
        item_group = self._get_material_group(desc)

        if item_group == group_name.lower():
            cost, unit = self._get_cost(code)
            results.append({
                'code': code,
                'description': desc,
                'cost': cost,
                'unit': unit,
                'group': item_group
            })

    return sorted(results, key=lambda x: x['cost'])[:max_results]

def substitution_impact(self,
                        original_code: str,
                        substitute_code: str,
                        quantity: float) -> Dict[str, Any]:
    """Calculate impact of substitution."""

    original_cost, _ = self._get_cost(original_code)
    substitute_cost, _ = self._get_cost(substitute_code)

    original_total = original_cost * quantity
    substitute_total = substitute_cost * quantity
    impact = substitute_total - original_total

    return {
        'original_code': original_code,
        'substitute_code': substitute_code,
        'quantity': quantity,
        'original_unit_cost': original_cost,
        'substitute_unit_cost': substitute_cost,
        'original_total': round(original_total, 2),
        'substitute_total': round(substitute_total, 2),
        'cost_impact': round(impact, 2),
        'impact_percent': round(impact / original_total * 100, 1) if original_total > 0 else 0
    }

def batch_substitution(self,
                        materials: List[Dict[str, Any]]) -> Dict[str, Any]:
    """Find substitutions for multiple materials."""

    results = []
    total_original = 0
    total_potential_savings = 0

    for mat in materials:
        code = mat.get('material_code', mat.get('code'))
        qty = mat.get('quantity', 1)

        subs = self.find_cost_saving_alternatives(code)

        original_cost, _ = self._get_cost(code)
        original_total = original_cost * qty
        total_original += original_total

        best_sub = subs[0] if subs else None
        potential_savings = 0

        if best_sub:
            impact = self.substitution_impact(code, best_sub.substitute_code, qty)
            potential_savings = abs(impact['cost_impact']) if impact['cost_impact'] < 0 else 0
            total_potential_savings += potential_savings

        results.append({
            'code': code,
            'quantity': qty,
            'original_total': round(original_total, 2),
            'best_substitute': best_sub.substitute_code if best_sub else None,
            'potential_savings': round(potential_savings, 2),
            'alternatives_count': len(subs)
        })

    return {
        'materials': results,
        'total_original_cost': round(total_original, 2),
        'total_potential_savings': round(total_potential_savings, 2),
        'savings_percent': round(total_potential_savings / total_original * 100, 1) if total_original > 0 else 0
    }

def export_substitution_report(self,
                                substitutes: List[MaterialSubstitute],
                                output_path: str) -> str:
    """Export substitution report to Excel."""

    with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
        df = pd.DataFrame([
            {
                'Original Code': s.original_code,
                'Original Description': s.original_description,
                'Substitute Code': s.substitute_code,
                'Substitute Description': s.substitute_description,
                'Type': s.substitution_type.value,
                'Compatibility': s.compatibility.value,
                'Original Cost': s.original_cost,
                'Substitute Cost': s.substitute_cost,
                'Cost Difference': s.cost_difference,
                'Difference %': s.cost_difference_pct,
                'Notes': s.notes
            }
            for s in substitutes
        ])
        df.to_excel(writer, sheet_name='Substitutes', index=False)

    return output_path
undefined
MATERIAL_GROUPS = { 'concrete': ['cement', 'beton', 'concrete', 'C20', 'C25', 'C30', 'C35', 'C40'], 'steel': ['steel', 'rebar', 'reinforcement', 'S235', 'S275', 'S355'], 'lumber': ['wood', 'timber', 'lumber', 'plywood', 'OSB'], 'masonry': ['brick', 'block', 'CMU', 'masonry'], 'insulation': ['insulation', 'rockwool', 'glasswool', 'EPS', 'XPS', 'PIR'], 'pipe': ['pipe', 'PVC', 'HDPE', 'copper', 'steel pipe'], 'electrical': ['wire', 'cable', 'conduit'], 'finishing': ['paint', 'plaster', 'drywall', 'gypsum'], 'flooring': ['tile', 'vinyl', 'laminate', 'carpet', 'hardwood'], 'roofing': ['shingle', 'membrane', 'metal roof', 'tile roof'] }
class CWICRMaterialSubstitution: """Find material substitutions using CWICR data."""
def __init__(self, cwicr_data: pd.DataFrame):
    self.materials = cwicr_data
    self._index_data()

def _index_data(self):
    """Index material data."""
    if 'work_item_code' in self.materials.columns:
        self._code_index = self.materials.set_index('work_item_code')
    elif 'material_code' in self.materials.columns:
        self._code_index = self.materials.set_index('material_code')
    else:
        self._code_index = None

def _similarity(self, a: str, b: str) -> float:
    """Calculate string similarity."""
    return SequenceMatcher(None, a.lower(), b.lower()).ratio()

def _get_material_group(self, description: str) -> Optional[str]:
    """Identify material group from description."""
    desc_lower = description.lower()

    for group, keywords in MATERIAL_GROUPS.items():
        if any(kw.lower() in desc_lower for kw in keywords):
            return group

    return None

def _get_cost(self, code: str) -> Tuple[float, str]:
    """Get material cost."""
    if self._code_index is None or code not in self._code_index.index:
        return (0, 'unit')

    item = self._code_index.loc[code]
    cost = float(item.get('material_cost', item.get('total_cost', 0)) or 0)
    unit = str(item.get('unit', 'unit'))

    return (cost, unit)

def find_substitutes(self,
                      material_code: str,
                      max_results: int = 10,
                      max_cost_increase: float = 0.20,
                      include_upgrades: bool = True) -> List[MaterialSubstitute]:
    """Find substitute materials."""

    if self._code_index is None or material_code not in self._code_index.index:
        return []

    original = self._code_index.loc[material_code]
    original_desc = str(original.get('description', material_code))
    original_cost, original_unit = self._get_cost(material_code)

    group = self._get_material_group(original_desc)

    substitutes = []

    for code, row in self._code_index.iterrows():
        if code == material_code:
            continue

        sub_desc = str(row.get('description', code))
        sub_group = self._get_material_group(sub_desc)

        # Check if same group or similar description
        if group and sub_group == group:
            similarity = 0.7
        else:
            similarity = self._similarity(original_desc, sub_desc)

        if similarity < 0.3:
            continue

        sub_cost, sub_unit = self._get_cost(code)

        if sub_unit != original_unit:
            continue

        cost_diff = sub_cost - original_cost
        cost_diff_pct = (cost_diff / original_cost * 100) if original_cost > 0 else 0

        # Filter by cost increase limit
        if not include_upgrades and cost_diff_pct > max_cost_increase * 100:
            continue

        # Determine substitution type
        if cost_diff_pct < -10:
            sub_type = SubstitutionType.DOWNGRADE
        elif cost_diff_pct > 10:
            sub_type = SubstitutionType.UPGRADE
        elif similarity > 0.8:
            sub_type = SubstitutionType.DIRECT
        else:
            sub_type = SubstitutionType.EQUIVALENT

        # Determine compatibility
        if similarity > 0.9:
            compat = CompatibilityLevel.EXACT
        elif similarity > 0.7:
            compat = CompatibilityLevel.HIGH
        elif similarity > 0.5:
            compat = CompatibilityLevel.MEDIUM
        else:
            compat = CompatibilityLevel.LOW

        substitutes.append(MaterialSubstitute(
            original_code=material_code,
            original_description=original_desc,
            substitute_code=code,
            substitute_description=sub_desc,
            substitution_type=sub_type,
            compatibility=compat,
            original_cost=round(original_cost, 2),
            substitute_cost=round(sub_cost, 2),
            cost_difference=round(cost_diff, 2),
            cost_difference_pct=round(cost_diff_pct, 1),
            notes=f"Similarity: {similarity:.0%}"
        ))

    # Sort by compatibility then cost
    substitutes.sort(key=lambda x: (
        list(CompatibilityLevel).index(x.compatibility),
        x.cost_difference
    ))

    return substitutes[:max_results]

def find_cost_saving_alternatives(self,
                                   material_code: str,
                                   min_savings_pct: float = 5.0) -> List[MaterialSubstitute]:
    """Find lower-cost alternatives."""

    subs = self.find_substitutes(material_code, max_results=20)

    cost_saving = [
        s for s in subs
        if s.cost_difference_pct <= -min_savings_pct
    ]

    return sorted(cost_saving, key=lambda x: x.cost_difference)

def find_by_group(self,
                   group_name: str,
                   max_results: int = 20) -> List[Dict[str, Any]]:
    """Find all materials in a group."""

    if self._code_index is None:
        return []

    results = []

    for code, row in self._code_index.iterrows():
        desc = str(row.get('description', code))
        item_group = self._get_material_group(desc)

        if item_group == group_name.lower():
            cost, unit = self._get_cost(code)
            results.append({
                'code': code,
                'description': desc,
                'cost': cost,
                'unit': unit,
                'group': item_group
            })

    return sorted(results, key=lambda x: x['cost'])[:max_results]

def substitution_impact(self,
                        original_code: str,
                        substitute_code: str,
                        quantity: float) -> Dict[str, Any]:
    """Calculate impact of substitution."""

    original_cost, _ = self._get_cost(original_code)
    substitute_cost, _ = self._get_cost(substitute_code)

    original_total = original_cost * quantity
    substitute_total = substitute_cost * quantity
    impact = substitute_total - original_total

    return {
        'original_code': original_code,
        'substitute_code': substitute_code,
        'quantity': quantity,
        'original_unit_cost': original_cost,
        'substitute_unit_cost': substitute_cost,
        'original_total': round(original_total, 2),
        'substitute_total': round(substitute_total, 2),
        'cost_impact': round(impact, 2),
        'impact_percent': round(impact / original_total * 100, 1) if original_total > 0 else 0
    }

def batch_substitution(self,
                        materials: List[Dict[str, Any]]) -> Dict[str, Any]:
    """Find substitutions for multiple materials."""

    results = []
    total_original = 0
    total_potential_savings = 0

    for mat in materials:
        code = mat.get('material_code', mat.get('code'))
        qty = mat.get('quantity', 1)

        subs = self.find_cost_saving_alternatives(code)

        original_cost, _ = self._get_cost(code)
        original_total = original_cost * qty
        total_original += original_total

        best_sub = subs[0] if subs else None
        potential_savings = 0

        if best_sub:
            impact = self.substitution_impact(code, best_sub.substitute_code, qty)
            potential_savings = abs(impact['cost_impact']) if impact['cost_impact'] < 0 else 0
            total_potential_savings += potential_savings

        results.append({
            'code': code,
            'quantity': qty,
            'original_total': round(original_total, 2),
            'best_substitute': best_sub.substitute_code if best_sub else None,
            'potential_savings': round(potential_savings, 2),
            'alternatives_count': len(subs)
        })

    return {
        'materials': results,
        'total_original_cost': round(total_original, 2),
        'total_potential_savings': round(total_potential_savings, 2),
        'savings_percent': round(total_potential_savings / total_original * 100, 1) if total_original > 0 else 0
    }

def export_substitution_report(self,
                                substitutes: List[MaterialSubstitute],
                                output_path: str) -> str:
    """Export substitution report to Excel."""

    with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
        df = pd.DataFrame([
            {
                'Original Code': s.original_code,
                'Original Description': s.original_description,
                'Substitute Code': s.substitute_code,
                'Substitute Description': s.substitute_description,
                'Type': s.substitution_type.value,
                'Compatibility': s.compatibility.value,
                'Original Cost': s.original_cost,
                'Substitute Cost': s.substitute_cost,
                'Cost Difference': s.cost_difference,
                'Difference %': s.cost_difference_pct,
                'Notes': s.notes
            }
            for s in substitutes
        ])
        df.to_excel(writer, sheet_name='Substitutes', index=False)

    return output_path
undefined

Quick Start

快速开始

python
undefined
python
undefined

Load CWICR data

Load CWICR data

cwicr = pd.read_parquet("ddc_cwicr_en.parquet")
cwicr = pd.read_parquet("ddc_cwicr_en.parquet")

Initialize substitution finder

Initialize substitution finder

sub_finder = CWICRMaterialSubstitution(cwicr)
sub_finder = CWICRMaterialSubstitution(cwicr)

Find substitutes

Find substitutes

substitutes = sub_finder.find_substitutes("CONC-C30-001")
for sub in substitutes[:5]: print(f"{sub.substitute_code}: ${sub.cost_difference:+.2f} ({sub.cost_difference_pct:+.1f}%)")
undefined
substitutes = sub_finder.find_substitutes("CONC-C30-001")
for sub in substitutes[:5]: print(f"{sub.substitute_code}: ${sub.cost_difference:+.2f} ({sub.cost_difference_pct:+.1f}%)")
undefined

Common Use Cases

常见用例

1. Cost Saving Alternatives

1. 成本节约替代方案

python
savings = sub_finder.find_cost_saving_alternatives("STEEL-S355", min_savings_pct=10)
for s in savings:
    print(f"{s.substitute_code}: Save ${abs(s.cost_difference):.2f}/unit")
python
savings = sub_finder.find_cost_saving_alternatives("STEEL-S355", min_savings_pct=10)
for s in savings:
    print(f"{s.substitute_code}: Save ${abs(s.cost_difference):.2f}/unit")

2. Batch Analysis

2. 批量分析

python
materials = [
    {'code': 'CONC-001', 'quantity': 200},
    {'code': 'STEEL-002', 'quantity': 5000},
    {'code': 'BRICK-003', 'quantity': 10000}
]

batch = sub_finder.batch_substitution(materials)
print(f"Potential Savings: ${batch['total_potential_savings']:,.2f}")
python
materials = [
    {'code': 'CONC-001', 'quantity': 200},
    {'code': 'STEEL-002', 'quantity': 5000},
    {'code': 'BRICK-003', 'quantity': 10000}
]

batch = sub_finder.batch_substitution(materials)
print(f"Potential Savings: ${batch['total_potential_savings']:,.2f}")

3. Material Group Search

3. 材料组搜索

python
concrete_options = sub_finder.find_by_group('concrete')
for opt in concrete_options[:5]:
    print(f"{opt['code']}: ${opt['cost']:.2f}/{opt['unit']}")
python
concrete_options = sub_finder.find_by_group('concrete')
for opt in concrete_options[:5]:
    print(f"{opt['code']}: ${opt['cost']:.2f}/{opt['unit']}")

Resources

资源