Loading...
Loading...
Compare original and translation side by side
EP > 1EP > 1cfg.comm_overlap.overlap_moe_expert_parallel_comm = Truecfg.comm_overlap.overlap_moe_expert_parallel_comm = Trueundefinedundefinedexpert_model_parallel_size > 1num_moe_experts > 1moe_token_dispatcher_type"alltoall""flex"virtual_pipeline_model_parallel_sizeNoneexpert_model_parallel_size > 1num_moe_experts > 1moe_token_dispatcher_type"alltoall""flex"virtual_pipeline_model_parallel_sizeNonemoe_flex_dispatcher_backendmoe_token_dispatcher_type = "flex"moe_flex_dispatcher_backendmoe_token_dispatcher_type = "flex"delay_wgrad_computedelay_wgrad_computeEP=16alltoallmoe_permute_fusion=falsedelay_wgrad_computealltoallEP=16alltoallmoe_permute_fusion=falsedelay_wgrad_computealltoallsrc/megatron/bridge/training/comm_overlap.pysrc/megatron/bridge/training/flex_dispatcher_backend.pysrc/megatron/bridge/training/config.pytests/unit_tests/training/test_comm_overlap.pytests/unit_tests/training/test_deepep.pysrc/megatron/bridge/training/comm_overlap.pysrc/megatron/bridge/training/flex_dispatcher_backend.pysrc/megatron/bridge/training/config.pytests/unit_tests/training/test_comm_overlap.pytests/unit_tests/training/test_deepep.pymoe_shared_expert_overlapoverlap_moe_expert_parallel_commmoe_flex_dispatcher_backend="deepep"moe_token_dispatcher_type"alltoall"moe_shared_expert_overlapoverlap_moe_expert_parallel_commmoe_token_dispatcher_type"alltoall"moe_flex_dispatcher_backend="deepep"comm_overlap.pyuv run python scripts/performance/run_script.py \
-m qwen \
-mr qwen3_30b_a3b \
--task pretrain \
-g h100 \
-c bf16 \
-ng 16 \
-gn 8 \
--max_steps 8 \
--config_variant v1 \
--cuda_graph_impl none \
--moe_flex_dispatcher_backend None \
--moe_a2a_overlap false \
--tokenizer_type NullTokenizer \
comm_overlap.overlap_moe_expert_parallel_comm=true \
comm_overlap.delay_wgrad_compute=false \
model.moe_shared_expert_overlap=falsemodel.moe_permute_fusion=falsecomm_overlap.pyuv run python scripts/performance/run_script.py \\
-m qwen \\
-mr qwen3_30b_a3b \\
--task pretrain \\
-g h100 \\
-c bf16 \\
-ng 16 \\
-gn 8 \\
--max_steps 8 \\
--config_variant v1 \\
--cuda_graph_impl none \\
--moe_flex_dispatcher_backend None \\
--moe_a2a_overlap false \\
--tokenizer_type NullTokenizer \\
comm_overlap.overlap_moe_expert_parallel_comm=true \\
comm_overlap.delay_wgrad_compute=false \\
model.moe_shared_expert_overlap=falsemodel.moe_permute_fusion=false