Loading...
Loading...
Compare original and translation side by side
Optimization Workflow:
1. BASELINE -> Establish current behavior with tests
2. TEST -> Add regression tests for the code you'll change
3. OPTIMIZE -> Make the change
4. VERIFY -> Run tests to prove correctness preserved
5. BENCHMARK -> Only now measure the improvementundefinedOptimization Workflow:
1. BASELINE -> Establish current behavior with tests
2. TEST -> Add regression tests for the code you'll change
3. OPTIMIZE -> Make the change
4. VERIFY -> Run tests to prove correctness preserved
5. BENCHMARK -> Only now measure the improvementundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefinedundefined
**IMPORTANT**: Always document which profile was used in benchmark reports.
**重要提示**:在基准测试报告中务必注明所使用的构建配置文件。use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId};
fn benchmark_variants(c: &mut Criterion) {
let mut group = c.benchmark_group("processing");
for size in [100, 1000, 10000].iter() {
let data = generate_data(*size);
group.bench_with_input(
BenchmarkId::new("original", size),
&data,
|b, data| b.iter(|| original_impl(black_box(data))),
);
group.bench_with_input(
BenchmarkId::new("optimized", size),
&data,
|b, data| b.iter(|| optimized_impl(black_box(data))),
);
}
group.finish();
}
criterion_group!(benches, benchmark_variants);
criterion_main!(benches);use criterion::{black_box, criterion_group, criterion_main, Criterion, BenchmarkId};
fn benchmark_variants(c: &mut Criterion) {
let mut group = c.benchmark_group("processing");
for size in [100, 1000, 10000].iter() {
let data = generate_data(*size);
group.bench_with_input(
BenchmarkId::new("original", size),
&data,
|b, data| b.iter(|| original_impl(black_box(data))),
);
group.bench_with_input(
BenchmarkId::new("optimized", size),
&data,
|b, data| b.iter(|| optimized_impl(black_box(data))),
);
}
group.finish();
}
criterion_group!(benches, benchmark_variants);
criterion_main!(benches);undefinedundefinedundefinedundefinedundefinedundefined| Metric | Before | After | Change |
|---|---|---|---|
| Time (mean) | 45.2s | 12.3s | -73% |
| Memory (peak) | 2.1 GB | 850 MB | -60% |
| Throughput | 22 MB/s | 81 MB/s | +3.7x |
undefined| Metric | Before | After | Change |
|---|---|---|---|
| Time (mean) | 45.2s | 12.3s | -73% |
| Memory (peak) | 2.1 GB | 850 MB | -60% |
| Throughput | 22 MB/s | 81 MB/s | +3.7x |
undefined// Before: Allocates on every call
fn process(items: &[Item]) -> Vec<String> {
items.iter().map(|i| i.name.clone()).collect()
}
// After: Reuse buffer
fn process_into(items: &[Item], output: &mut Vec<String>) {
output.clear();
output.extend(items.iter().map(|i| i.name.clone()));
}
// Use SmallVec for small collections
use smallvec::SmallVec;
type Tags = SmallVec<[String; 4]>; // Stack-allocated for <= 4 items// Before: Allocates on every call
fn process(items: &[Item]) -> Vec<String> {
items.iter().map(|i| i.name.clone()).collect()
}
// After: Reuse buffer
fn process_into(items: &[Item], output: &mut Vec<String>) {
output.clear();
output.extend(items.iter().map(|i| i.name.clone()));
}
// Use SmallVec for small collections
use smallvec::SmallVec;
type Tags = SmallVec<[String; 4]>; // Stack-allocated for <= 4 items// Before: Array of Structs (AoS)
struct Entity {
position: Vec3,
velocity: Vec3,
health: f32,
}
let entities: Vec<Entity>;
// After: Struct of Arrays (SoA) - better cache locality
struct Entities {
positions: Vec<Vec3>,
velocities: Vec<Vec3>,
health: Vec<f32>,
}
// Process all positions together (cache-friendly)
fn update_positions(entities: &mut Entities, dt: f32) {
for (pos, vel) in entities.positions.iter_mut().zip(&entities.velocities) {
*pos += *vel * dt;
}
}// Before: Array of Structs (AoS)
struct Entity {
position: Vec3,
velocity: Vec3,
health: f32,
}
let entities: Vec<Entity>;
// After: Struct of Arrays (SoA) - better cache locality
struct Entities {
positions: Vec<Vec3>,
velocities: Vec<Vec3>,
health: Vec<f32>,
}
// Process all positions together (cache-friendly)
fn update_positions(entities: &mut Entities, dt: f32) {
for (pos, vel) in entities.positions.iter_mut().zip(&entities.velocities) {
*pos += *vel * dt;
}
}use std::borrow::Cow;
// Parse without copying when possible
struct ParsedData<'a> {
name: Cow<'a, str>,
values: &'a [u8],
}
fn parse(input: &[u8]) -> Result<ParsedData<'_>> {
// Borrow from input when no transformation needed
// Only allocate when escaping/decoding required
}use std::borrow::Cow;
// Parse without copying when possible
struct ParsedData<'a> {
name: Cow<'a, str>,
values: &'a [u8],
}
fn parse(input: &[u8]) -> Result<ParsedData<'_>> {
// Borrow from input when no transformation needed
// Only allocate when escaping/decoding required
}// Use portable-simd or explicit intrinsics
use std::simd::{f32x8, SimdFloat};
fn sum_simd(data: &[f32]) -> f32 {
let chunks = data.chunks_exact(8);
let remainder = chunks.remainder();
let sum = chunks
.map(|chunk| f32x8::from_slice(chunk))
.fold(f32x8::splat(0.0), |acc, x| acc + x)
.reduce_sum();
sum + remainder.iter().sum::<f32>()
}// Use portable-simd or explicit intrinsics
use std::simd::{f32x8, SimdFloat};
fn sum_simd(data: &[f32]) -> f32 {
let chunks = data.chunks_exact(8);
let remainder = chunks.remainder();
let sum = chunks
.map(|chunk| f32x8::from_slice(chunk))
.fold(f32x8::splat(0.0), |acc, x| acc + x)
.reduce_sum();
sum + remainder.iter().sum::<f32>()
}// Use string interning for repeated strings
use string_interner::{StringInterner, DefaultSymbol};
struct Interned {
interner: StringInterner,
}
impl Interned {
fn intern(&mut self, s: &str) -> DefaultSymbol {
self.interner.get_or_intern(s)
}
}
// Use CompactString for small strings
use compact_str::CompactString;
let small: CompactString = "hello".into(); // No heap allocation// Use string interning for repeated strings
use string_interner::{StringInterner, DefaultSymbol};
struct Interned {
interner: StringInterner,
}
impl Interned {
fn intern(&mut self, s: &str) -> DefaultSymbol {
self.interner.get_or_intern(s)
}
}
// Use CompactString for small strings
use compact_str::CompactString;
let small: CompactString = "hello".into(); // No heap allocation// Likely/unlikely branch hints
#[cold]
fn handle_error() { ... }
// Force inlining
#[inline(always)]
fn hot_function() { ... }
// Prevent inlining
#[inline(never)]
fn cold_function() { ... }
// Enable specific optimizations
#[target_feature(enable = "avx2")]
unsafe fn simd_process() { ... }// Likely/unlikely branch hints
#[cold]
fn handle_error() { ... }
// Force inlining
#[inline(always)]
fn hot_function() { ... }
// Prevent inlining
#[inline(never)]
fn cold_function() { ... }
// Enable specific optimizations
#[target_feature(enable = "avx2")]
unsafe fn simd_process() { ... }// Check struct size and alignment
println!("Size: {}", std::mem::size_of::<MyStruct>());
println!("Align: {}", std::mem::align_of::<MyStruct>());
// Optimize field ordering to reduce padding
#[repr(C)]
struct Optimized {
large: u64, // 8 bytes
medium: u32, // 4 bytes
small: u16, // 2 bytes
tiny: u8, // 1 byte
_pad: u8, // explicit padding
}// Check struct size and alignment
println!("Size: {}", std::mem::size_of::<MyStruct>());
println!("Align: {}", std::mem::align_of::<MyStruct>());
// Optimize field ordering to reduce padding
#[repr(C)]
struct Optimized {
large: u64, // 8 bytes
medium: u32, // 4 bytes
small: u16, // 2 bytes
tiny: u8, // 1 byte
_pad: u8, // explicit padding
}[ ] Regression tests added/extended for changed code paths
[ ] Tests pass BEFORE benchmarking
[ ] Benchmark script included (Criterion or hyperfine)
[ ] Before/after numbers on same machine
[ ] Build profile explicitly noted (release, release-lto, etc.)
[ ] If >50% improvement: flamegraph/perf evidence included
[ ] If unsafe code: invariants documented + tests proving them[ ] 为变更的代码路径添加/扩展了回归测试
[ ] 基准测试前所有测试已通过
[ ] 包含基准测试脚本(Criterion或hyperfine)
[ ] 同一机器上的优化前后性能数据已记录
[ ] 明确注明了构建配置文件(release、release-lto等)
[ ] 若性能提升超过50%:已附上火焰图/perf证据
[ ] 若使用了unsafe代码:已记录不变量并通过测试验证