From e30763538a2b050f165cc84a4c2a042a319a311e Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 30 Jan 2026 17:17:42 -0800 Subject: [PATCH 1/3] Update the cost estimator for latest task-spec and local-execution. --- .../cost_estimator/local_cost_estimator.h | 34 ++-- .../cost_estimator/tracked_allocator.h | 4 - .../cost_estimator/local_cost_estimator.cc | 163 +++++++++++------- .../cost_estimator/tracked_allocator.cc | 6 +- .../local-execution/local_cost_estimator.cc | 72 +++++--- 5 files changed, 179 insertions(+), 100 deletions(-) diff --git a/lib/local-execution/include/local-execution/cost_estimator/local_cost_estimator.h b/lib/local-execution/include/local-execution/cost_estimator/local_cost_estimator.h index d07a8b731b..93e28d0986 100644 --- a/lib/local-execution/include/local-execution/cost_estimator/local_cost_estimator.h +++ b/lib/local-execution/include/local-execution/cost_estimator/local_cost_estimator.h @@ -1,19 +1,23 @@ -#if 0 // FIXME (Elliott): fix cost estimator - #ifndef _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_COST_ESTIMATOR_LOCAL_COST_ESTIMATOR_H #define _FLEXFLOW_LIB_LOCAL_EXECUTION_INCLUDE_LOCAL_EXECUTION_COST_ESTIMATOR_LOCAL_COST_ESTIMATOR_H #include "compiler/cost_estimator/cost_estimator.h" +#include "kernels/allocation.h" +#include "kernels/device_handle_t.dtg.h" +#include "kernels/profiling_settings.dtg.h" +#include "pcg/device_id_t.dtg.h" #include "pcg/machine_interconnect_specification.dtg.h" -#include "pcg/optimizer_attrs.dtg.h" -#include "task-spec/runtime_task_invocation/runtime_arg_config.dtg.h" +#include "task-spec/ff_iteration_config.dtg.h" namespace FlexFlow { struct LocalCostEstimator : public ICostEstimator { - explicit LocalCostEstimator(RuntimeArgConfig const &, - MachineInterconnectSpecification const &, - DeviceType); + explicit LocalCostEstimator(MachineInterconnectSpecification const &, + Allocator &allocator, + ProfilingSettings const &profiling_settings, + device_handle_t const &device_handle, + FFIterationConfig const &iteration_config, + device_id_t device_idx); LocalCostEstimator(LocalCostEstimator const &) = delete; LocalCostEstimator(LocalCostEstimator &&) = delete; @@ -24,16 +28,22 @@ struct LocalCostEstimator : public ICostEstimator { milliseconds_t estimate_cost(TensorSetMovement const &) const override; private: - RuntimeArgConfig runtime_arg_config; MachineInterconnectSpecification interconnect_specification; - DeviceType device_type; + Allocator allocator; + ProfilingSettings profiling_settings; + device_handle_t device_handle; + FFIterationConfig iteration_config; + device_id_t device_idx; }; CHECK_RC_COPY_VIRTUAL_COMPLIANT(LocalCostEstimator); -CostEstimator get_local_cost_estimator(RuntimeArgConfig const &); +CostEstimator get_local_cost_estimator(MachineInterconnectSpecification const &, + Allocator &, + ProfilingSettings const &, + device_handle_t const &, + FFIterationConfig const &, + device_id_t); } // namespace FlexFlow #endif - -#endif diff --git a/lib/local-execution/include/local-execution/cost_estimator/tracked_allocator.h b/lib/local-execution/include/local-execution/cost_estimator/tracked_allocator.h index 79a62b628a..0b531f9b3d 100644 --- a/lib/local-execution/include/local-execution/cost_estimator/tracked_allocator.h +++ b/lib/local-execution/include/local-execution/cost_estimator/tracked_allocator.h @@ -1,5 +1,3 @@ -#if 0 // FIXME (Elliott): fix cost estimator - #ifndef _FLEXFLOW_LOCAL_EXECUTION_TRACKED_ALLOCATOR_H #define _FLEXFLOW_LOCAL_EXECUTION_TRACKED_ALLOCATOR_H @@ -35,5 +33,3 @@ size_t get_tracked_memory_usage(Allocator &wrapped_allocator); } // namespace FlexFlow #endif - -#endif diff --git a/lib/local-execution/src/local-execution/cost_estimator/local_cost_estimator.cc b/lib/local-execution/src/local-execution/cost_estimator/local_cost_estimator.cc index 79e2dcafb2..b9cd0c238d 100644 --- a/lib/local-execution/src/local-execution/cost_estimator/local_cost_estimator.cc +++ b/lib/local-execution/src/local-execution/cost_estimator/local_cost_estimator.cc @@ -1,60 +1,75 @@ -#if 0 // FIXME (Elliott): fix cost estimator - #include "local-execution/cost_estimator/local_cost_estimator.h" #include "compiler/machine_mapping/machine_view.dtg.h" #include "kernels/create_local_allocator_for_device_type.h" #include "kernels/device.h" #include "kernels/local_cpu_allocator.h" #include "kernels/local_cuda_allocator.h" +#include "local-execution/computation_graph_instance/computation_graph_instance.h" #include "local-execution/cost_estimator/tracked_allocator.h" #include "op-attrs/computation_graph_op_attrs.h" #include "op-attrs/pcg_operator_attrs.h" +#include "op-attrs/tensor_slot_name.dtg.h" #include "pcg/computation_graph.h" #include "pcg/computation_graph/layer_added_result.dtg.h" +#include "pcg/device_id.h" #include "pcg/parallel_tensor_attrs.h" #include "utils/containers/concat_vectors.h" -#include "utils/containers/get_only.h" +#include "utils/containers/map_values.h" #include "utils/containers/maximum.h" +#include "utils/containers/require_only_key.h" #include "utils/containers/sum.h" #include "utils/containers/transform.h" #include "utils/containers/unordered_set_of.h" #include "utils/containers/values.h" +#include "utils/exception.h" +#include "utils/optional.h" +#include namespace FlexFlow { -LocalCostEstimator::LocalCostEstimator(RuntimeArgConfig const &config) - : runtime_arg_config(config) {} +LocalCostEstimator::LocalCostEstimator( + MachineInterconnectSpecification const &interconnect_specification, + Allocator &allocator, + ProfilingSettings const &profiling_settings, + device_handle_t const &device_handle, + FFIterationConfig const &iteration_config, + device_id_t device_idx) + : interconnect_specification(interconnect_specification), + allocator(allocator), profiling_settings(profiling_settings), + device_handle(device_handle), iteration_config(iteration_config), + device_idx(device_idx) {} static ComputationGraph computation_graph_for_local_cost_estimation( ComputationGraphOpAttrs const &op, - std::vector const &inputs, - std::vector const &weights, - std::vector const &outputs) { + std::unordered_map const &inputs, + std::unordered_map const &weights, + std::unordered_map const &outputs) { ComputationGraph computation_graph = make_empty_computation_graph(); - std::vector input_tensors; - for (ParallelTensorShape const &input : inputs) { - LayerAddedResult inputs_layer = add_layer( - computation_graph, - LayerAttrs{ComputationGraphOpAttrs{InputAttrs{get_piece_shape(input)}}, - std::nullopt}, - {}, - {}); - input_tensors.push_back(get_only(inputs_layer.outputs)); - } - - std::vector weight_tensors; - for (ParallelTensorShape const &weight : weights) { - LayerAddedResult weights_layer = - add_layer(computation_graph, - LayerAttrs{ComputationGraphOpAttrs{WeightAttrs{ - get_piece_shape(weight), - InitializerAttrs{ZeroInitializerAttrs{}}}}, - std::nullopt}, - {}, - {}); - weight_tensors.push_back(get_only(weights_layer.outputs)); - } + std::unordered_map input_tensors = + map_values(inputs, [&](ParallelTensorShape const &shape) { + LayerAddedResult inputs_layer = + add_layer(computation_graph, + LayerAttrs{ComputationGraphOpAttrs{ + InputAttrs{get_piece_shape(shape)}}, + std::nullopt}, + {}, + {}); + return require_only_key(inputs_layer.outputs, TensorSlotName::OUTPUT); + }); + + std::unordered_map weight_tensors = + map_values(weights, [&](ParallelTensorShape const &shape) { + LayerAddedResult weights_layer = + add_layer(computation_graph, + LayerAttrs{ComputationGraphOpAttrs{WeightAttrs{ + get_piece_shape(shape), + InitializerAttrs{ZeroInitializerAttrs{}}}}, + std::nullopt}, + {}, + {}); + return require_only_key(weights_layer.outputs, TensorSlotName::OUTPUT); + }); // create operator layer LayerAddedResult operator_layer = add_layer(computation_graph, @@ -72,10 +87,13 @@ OpCostMetrics LocalCostEstimator::estimate_cost( OpCostEstimateKey const &op_cost_estimate_key) const { PCGOperatorAttrs op = op_cost_estimate_key.op_attrs; - std::vector inputs = op_cost_estimate_key.input_shapes; - std::vector weights = op_cost_estimate_key.weight_shapes; - std::vector outputs = op_cost_estimate_key.output_shapes; - MachineView mv = op_cost_estimate_key.machine_view; + std::unordered_map inputs = + op_cost_estimate_key.input_shapes; + std::unordered_map weights = + op_cost_estimate_key.weight_shapes; + std::unordered_map outputs = + op_cost_estimate_key.output_shapes; + OptimizerAttrs optimizer_attrs = op_cost_estimate_key.optimizer_attrs; if (is_parallel_op(op) || op.has() || op.has() || op.has()) { @@ -89,30 +107,50 @@ OpCostMetrics LocalCostEstimator::estimate_cost( // allocate memory std::shared_ptr tracked_allocator_ptr = std::make_shared(create_local_allocator_for_device_type( - runtime_arg_config.kernel_device_type)); + get_device_type(this->device_idx))); layer_guid_t layer_guid = layer_guid_t{Node{0}}; Allocator allocator = Allocator(tracked_allocator_ptr); + ComputationGraph cg = computation_graph_for_local_cost_estimation( + /*op=*/assert_unwrap(compgraph_op_attrs_from_pcg_op_attrs(op)), + /*inputs=*/inputs, + /*weights=*/weights, + /*outputs=*/outputs); + + ComputationGraphInstance instance = create_computation_graph_instance( + /*compgraph=*/cg, + /*optimizer_attrs=*/optimizer_attrs, + /*loss_attrs=*/std::nullopt, + /*label_tensor=*/std::nullopt, + /*logit_tensor=*/std::nullopt, + /*input_tensors=*/{}, + /*allocator=*/allocator, + /*profiling_settings=*/this->profiling_settings, + /*device_handle=*/this->device_handle, + /*iteration_config=*/this->iteration_config, + /*device_idx=*/this->device_idx); + // execute layer - layer_guid_t operator_layer_guid = - get_layer_by_name(training_cg.computation_graph, "operator"); - - milliseconds_t fwd = execute_forward(local_backing.local_task_registry, - local_backing.local_tensor_backing, - local_backing.local_args_backing, - get_training_layer_plus_context( - training_cg, operator_layer_guid), - allocator) - .value(); - milliseconds_t bwd = execute_backward(local_backing.local_task_registry, - local_backing.local_tensor_backing, - local_backing.local_args_backing, - get_training_layer_plus_context( - training_cg, operator_layer_guid), - allocator) - .value(); + dynamic_layer_guid_t operator_layer_guid{get_layer_by_name(cg, "operator")}; + + std::unordered_map> + fwd_timing = perform_forward_pass_for_computation_graph_instance( + instance, + this->profiling_settings, + this->device_handle, + this->iteration_config, + this->device_idx); + milliseconds_t fwd = fwd_timing.at(operator_layer_guid).value(); + std::unordered_map> + bwd_timing = perform_backward_pass_for_computation_graph_instance( + instance, + this->profiling_settings, + this->device_handle, + this->iteration_config, + this->device_idx); + milliseconds_t bwd = bwd_timing.at(operator_layer_guid).value(); return OpCostMetrics{ /*forward_runtime=*/fwd, @@ -123,7 +161,6 @@ OpCostMetrics LocalCostEstimator::estimate_cost( milliseconds_t LocalCostEstimator::estimate_cost( TensorSetMovement const &tensor_set_movement) const { - auto estimate_single_comm_cost = [&](MachineSpaceCoordinate const &src, MachineSpaceCoordinate const &dst, @@ -147,11 +184,19 @@ milliseconds_t LocalCostEstimator::estimate_cost( })); } -CostEstimator - get_local_cost_estimator(RuntimeArgConfig const &runtime_arg_config) { - return CostEstimator::create(runtime_arg_config); +CostEstimator get_local_cost_estimator( + MachineInterconnectSpecification const &interconnect_specification, + Allocator &allocator, + ProfilingSettings const &profiling_settings, + device_handle_t const &device_handle, + FFIterationConfig const &iteration_config, + device_id_t device_idx) { + return CostEstimator::create(interconnect_specification, + allocator, + profiling_settings, + device_handle, + iteration_config, + device_idx); } } // namespace FlexFlow - -#endif diff --git a/lib/local-execution/src/local-execution/cost_estimator/tracked_allocator.cc b/lib/local-execution/src/local-execution/cost_estimator/tracked_allocator.cc index 2930ba0c86..8424f637c6 100644 --- a/lib/local-execution/src/local-execution/cost_estimator/tracked_allocator.cc +++ b/lib/local-execution/src/local-execution/cost_estimator/tracked_allocator.cc @@ -1,6 +1,4 @@ -#if 0 // FIXME (Elliott): fix cost estimator - -#include "local-execution/tracked_allocator.h" +#include "local-execution/cost_estimator/tracked_allocator.h" #include "kernels/device.h" namespace FlexFlow { @@ -35,5 +33,3 @@ Allocator get_tracked_memory_allocator(Allocator const &base_allocator) { } } // namespace FlexFlow - -#endif diff --git a/lib/local-execution/test/src/local-execution/local_cost_estimator.cc b/lib/local-execution/test/src/local-execution/local_cost_estimator.cc index 788817d3ed..f3dcab7f82 100644 --- a/lib/local-execution/test/src/local-execution/local_cost_estimator.cc +++ b/lib/local-execution/test/src/local-execution/local_cost_estimator.cc @@ -1,24 +1,26 @@ -#if 0 // FIXME (Elliott): fix cost estimator #include "local-execution/cost_estimator/local_cost_estimator.h" #include "compiler/machine_mapping/machine_view.h" -#include "internal/test_utils.h" #include "kernels/device_handle_t.h" +#include "kernels/local_cpu_allocator.h" +#include "kernels/local_cuda_allocator.h" +#include "kernels/managed_ff_stream.h" #include "kernels/managed_per_device_ff_handle.h" #include "op-attrs/ops/attention.h" #include "op-attrs/ops/cast.h" #include "op-attrs/parallel_tensor_shape.h" +#include "op-attrs/tensor_slot_name.dtg.h" #include "pcg/computation_graph_builder.h" -#include "task-spec/runtime_task_invocation/runtime_arg_config.h" +#include "pcg/device_id_t.h" #include using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("LocalCostEstimator") { - RuntimeArgConfig runtime_arg_config = - cpu_make_runtime_arg_config(EnableProfiling::YES, - ProfilingSettings{/*warmup_iters=*/0, - /*measure_iters=*/1}); + Allocator allocator = create_local_cpu_memory_allocator(); + device_handle_t ff_handle = cpu_make_device_handle_t(); + device_id_t device_idx = + make_device_id_t_from_idx(nonnegative_int{0}, DeviceType::CPU); OptimizerAttrs optimizer_attrs = OptimizerAttrs{ SGDOptimizerAttrs{ @@ -29,7 +31,20 @@ TEST_SUITE(FF_TEST_SUITE) { }, }; - CostEstimator cost_estimator = get_local_cost_estimator(runtime_arg_config); + MachineInterconnectSpecification interconnect_specification{ + /*inter_node_bandwidth=*/bytes_per_second_t{10000000}, + /*intra_node_bandwidth=*/bytes_per_second_t{10000000000}, + }; + + CostEstimator cost_estimator = get_local_cost_estimator( + /*interconnect_specification=*/interconnect_specification, + /*allocator=*/allocator, + /*profiling_settings=*/ + ProfilingSettings{/*warmup_iters=*/0, + /*measure_iters=*/1}, + /*device_handle=*/ff_handle, + /*iteration_config=*/FFIterationConfig{1_p}, + /*device_idx=*/device_idx); SUBCASE("estimate operator cost") { CastAttrs attrs = CastAttrs{ @@ -46,9 +61,9 @@ TEST_SUITE(FF_TEST_SUITE) { OpCostEstimateKey op_cost_estimate_key = OpCostEstimateKey{ /*op_attrs=*/PCGOperatorAttrs{attrs}, - /*input_shapes=*/{input_shape}, + /*input_shapes=*/{{TensorSlotName::INPUT, input_shape}}, /*weight_shapes=*/{}, - /*output_shapes=*/{output_shape}, + /*output_shapes=*/{{TensorSlotName::OUTPUT, output_shape}}, /*optimizer_attrs=*/optimizer_attrs, /*machine_view=*/ make_1d_machine_view( @@ -68,15 +83,17 @@ TEST_SUITE(FF_TEST_SUITE) { TEST_SUITE(FF_CUDA_TEST_SUITE) { TEST_CASE("LocalCostEstimator (CUDA)") { + ManagedFFStream managed_stream{}; ManagedPerDeviceFFHandle managed_handle = initialize_single_gpu_handle( /*workSpaceSize=*/1024 * 1024, /*allowTensorOpMathConversion=*/true); - RuntimeArgConfig runtime_arg_config = - gpu_make_runtime_arg_config(managed_handle.raw_handle(), - EnableProfiling::YES, - ProfilingSettings{/*warmup_iters=*/0, - /*measure_iters=*/1}); + Allocator allocator = create_local_cuda_memory_allocator(); + + device_id_t device_idx = + make_device_id_t_from_idx(nonnegative_int{0}, DeviceType::GPU); + device_handle_t ff_handle = + gpu_make_device_handle_t(managed_handle.raw_handle()); OptimizerAttrs optimizer_attrs = OptimizerAttrs{ SGDOptimizerAttrs{ @@ -87,7 +104,20 @@ TEST_SUITE(FF_CUDA_TEST_SUITE) { }, }; - CostEstimator cost_estimator = get_local_cost_estimator(runtime_arg_config); + MachineInterconnectSpecification interconnect_specification{ + /*inter_node_bandwidth=*/bytes_per_second_t{10000000}, + /*intra_node_bandwidth=*/bytes_per_second_t{10000000000}, + }; + + CostEstimator cost_estimator = get_local_cost_estimator( + /*interconnect_specification=*/interconnect_specification, + /*allocator=*/allocator, + /*profiling_settings=*/ + ProfilingSettings{/*warmup_iters=*/0, + /*measure_iters=*/1}, + /*device_handle=*/ff_handle, + /*iteration_config=*/FFIterationConfig{1_p}, + /*device_idx=*/device_idx); SUBCASE("estimate operator cost") { positive_int embed_dim = 32_p; @@ -122,9 +152,12 @@ TEST_SUITE(FF_CUDA_TEST_SUITE) { OpCostEstimateKey op_cost_estimate_key = OpCostEstimateKey{ /*op_attrs=*/PCGOperatorAttrs{attrs}, - /*input_shapes=*/{inputs_shape, inputs_shape, inputs_shape}, - /*weight_shapes=*/{weights_shape}, - /*output_shapes=*/{output_shape}, + /*input_shapes=*/ + {{TensorSlotName::QUERY, inputs_shape}, + {TensorSlotName::KEY, inputs_shape}, + {TensorSlotName::VALUE, inputs_shape}}, + /*weight_shapes=*/{{TensorSlotName::WEIGHT, weights_shape}}, + /*output_shapes=*/{{TensorSlotName::OUTPUT, output_shape}}, /*optimizer_attrs=*/optimizer_attrs, /*machine_view=*/ make_1d_machine_view( @@ -141,4 +174,3 @@ TEST_SUITE(FF_CUDA_TEST_SUITE) { } } } -#endif From 5038fb32f16b1d6707479d8fb5924490b8218992 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Fri, 6 Feb 2026 14:58:34 -0800 Subject: [PATCH 2/3] Implement get_tensor_shape interface. --- .../local_task_argument_accessor.h | 2 ++ .../local_task_argument_accessor.cc | 31 +++++++++++++++++++ .../itask_argument_accessor.h | 2 ++ .../task_argument_accessor.h | 2 +- 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/lib/local-execution/include/local-execution/local_task_argument_accessor.h b/lib/local-execution/include/local-execution/local_task_argument_accessor.h index 44844a67f1..638bea247e 100644 --- a/lib/local-execution/include/local-execution/local_task_argument_accessor.h +++ b/lib/local-execution/include/local-execution/local_task_argument_accessor.h @@ -27,6 +27,8 @@ struct LocalTaskArgumentAccessor : public ITaskArgumentAccessor { LocalTaskArgumentAccessor(LocalTaskArgumentAccessor const &) = delete; LocalTaskArgumentAccessor(LocalTaskArgumentAccessor &&) = delete; + TensorShape get_tensor_shape(TensorSlotName slot) const override; + GenericTensorAccessor get_tensor(TaskTensorParameter slot, Permissions priv) const override; diff --git a/lib/local-execution/src/local-execution/local_task_argument_accessor.cc b/lib/local-execution/src/local-execution/local_task_argument_accessor.cc index 8a4df61d17..796d122a23 100644 --- a/lib/local-execution/src/local-execution/local_task_argument_accessor.cc +++ b/lib/local-execution/src/local-execution/local_task_argument_accessor.cc @@ -4,6 +4,7 @@ #include "pcg/device_id_t.h" #include "utils/exception.h" #include "utils/optional.h" +#include "utils/overload.h" namespace FlexFlow { @@ -26,6 +27,36 @@ LocalTaskArgumentAccessor::LocalTaskArgumentAccessor( iteration_config(iteration_config), optimizer_attrs(optimizer_attrs), device_idx(device_idx) {} +TensorShape + LocalTaskArgumentAccessor::get_tensor_shape(TensorSlotName slot) const { + + for (auto const &[backing_slot, accessor] : this->tensor_slots_backing) { + bool match = backing_slot.visit(overload{ + [&](TaskForwardTensorParameter const ¶m) { + return param.name == slot; + }, + [&](TaskGradientTensorParameter const ¶m) { + return param.name == slot; + }, + [&](TaskOptimizerTensorParameter const ¶m) { + return param.name == slot; + }, + [&](TaskLossTensorParameter const ¶m) { return false; }, + }); + + if (match) { + if (accessor.has()) { + return accessor.get().shape; + } else { + return accessor.get().shape; + } + } + } + + PANIC("Unable to find TensorSlotName in tensor_slots_backing", + fmt::to_string(slot)); +} + GenericTensorAccessor LocalTaskArgumentAccessor::get_tensor(TaskTensorParameter slot, Permissions priv) const { diff --git a/lib/task-spec/include/task-spec/task_argument_accessor/itask_argument_accessor.h b/lib/task-spec/include/task-spec/task_argument_accessor/itask_argument_accessor.h index a7d1af4022..3d08101915 100644 --- a/lib/task-spec/include/task-spec/task_argument_accessor/itask_argument_accessor.h +++ b/lib/task-spec/include/task-spec/task_argument_accessor/itask_argument_accessor.h @@ -24,6 +24,8 @@ struct ITaskArgumentAccessor { virtual ~ITaskArgumentAccessor() = default; + virtual TensorShape get_tensor_shape(TensorSlotName) const = 0; + virtual GenericTensorAccessor get_tensor(TaskTensorParameter, Permissions priv) const = 0; diff --git a/lib/task-spec/include/task-spec/task_argument_accessor/task_argument_accessor.h b/lib/task-spec/include/task-spec/task_argument_accessor/task_argument_accessor.h index e350387684..29f3f625f6 100644 --- a/lib/task-spec/include/task-spec/task_argument_accessor/task_argument_accessor.h +++ b/lib/task-spec/include/task-spec/task_argument_accessor/task_argument_accessor.h @@ -27,7 +27,7 @@ struct TaskArgumentAccessor { OptimizerAttrs get_optimizer_attrs() const; TensorShape get_tensor_shape(TensorSlotName slot) const { - NOT_IMPLEMENTED(); + return this->ptr->get_tensor_shape(slot); } template From 59f6f2499592041dfb829f97ea0b057af1bb3d94 Mon Sep 17 00:00:00 2001 From: Elliott Slaughter Date: Thu, 12 Feb 2026 09:13:08 -0800 Subject: [PATCH 3/3] Add back arg names to interface. --- .../cost_estimator/local_cost_estimator.h | 26 ++++++++++--------- 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/lib/local-execution/include/local-execution/cost_estimator/local_cost_estimator.h b/lib/local-execution/include/local-execution/cost_estimator/local_cost_estimator.h index 93e28d0986..653067da8a 100644 --- a/lib/local-execution/include/local-execution/cost_estimator/local_cost_estimator.h +++ b/lib/local-execution/include/local-execution/cost_estimator/local_cost_estimator.h @@ -12,12 +12,13 @@ namespace FlexFlow { struct LocalCostEstimator : public ICostEstimator { - explicit LocalCostEstimator(MachineInterconnectSpecification const &, - Allocator &allocator, - ProfilingSettings const &profiling_settings, - device_handle_t const &device_handle, - FFIterationConfig const &iteration_config, - device_id_t device_idx); + explicit LocalCostEstimator( + MachineInterconnectSpecification const &interconnect_specification, + Allocator &allocator, + ProfilingSettings const &profiling_settings, + device_handle_t const &device_handle, + FFIterationConfig const &iteration_config, + device_id_t device_idx); LocalCostEstimator(LocalCostEstimator const &) = delete; LocalCostEstimator(LocalCostEstimator &&) = delete; @@ -37,12 +38,13 @@ struct LocalCostEstimator : public ICostEstimator { }; CHECK_RC_COPY_VIRTUAL_COMPLIANT(LocalCostEstimator); -CostEstimator get_local_cost_estimator(MachineInterconnectSpecification const &, - Allocator &, - ProfilingSettings const &, - device_handle_t const &, - FFIterationConfig const &, - device_id_t); +CostEstimator get_local_cost_estimator( + MachineInterconnectSpecification const &interconnect_specification, + Allocator &allocator, + ProfilingSettings const &profiling_settings, + device_handle_t const &device_handle, + FFIterationConfig const &iteration_config, + device_id_t device_idx); } // namespace FlexFlow