diff --git a/.flake/pkgs/legion.nix b/.flake/pkgs/legion.nix deleted file mode 100644 index 361a66c4ff..0000000000 --- a/.flake/pkgs/legion.nix +++ /dev/null @@ -1,48 +0,0 @@ -{ lib -, stdenv -, fetchFromGitLab -, cmake -, cudaPackages ? { } -, cudaCapabilities ? [ "60" "70" "80" "86" ] -, maxDim ? 5 -}: - -# from https://codeberg.org/Uli/nix-things/src/commit/776519e382c81b136c1d0b10d8c7b52b4acb9192/overlays/cq/python/libclang-python.nix - -let - cmakeFlag = x: if x then "1" else "0"; - - inherit (cudaPackages) cudatoolkit; -in - -stdenv.mkDerivation rec { - pname = "legion"; - version = "2025-01-06"; - - src = fetchFromGitLab { - owner = "StanfordLegion"; - repo = "legion"; - rev = "7be1abd0207eb1126c7629b16d1123fa6f58ce9d"; - sha256 = "sha256-gTjnGYYTQwTsrV1WcY0qqpTrlwbzAPcndurRy6XnG8A="; - }; - - nativeBuildInputs = [ - cmake - ]; - - cmakeFlags = [ - "-DLegion_USE_CUDA=1" - "-DLegion_CUDA_ARCH=${lib.concatStringsSep "," cudaCapabilities}" - "-DLegion_MAX_DIM=${toString maxDim}" - ]; - - buildInputs = [ - cudatoolkit - ]; - - meta = with lib; { - description = "Legion is a parallel programming model for distributed, heterogeneous machines"; - homepage = "https://legion.stanford.edu/"; - license = licenses.asl20; - }; -} diff --git a/.flake/pkgs/realm.nix b/.flake/pkgs/realm.nix new file mode 100644 index 0000000000..b809573690 --- /dev/null +++ b/.flake/pkgs/realm.nix @@ -0,0 +1,46 @@ +{ lib +, stdenv +, fetchFromGitHub +, cmake +, cudaPackages ? { } +, zlib +, maxDim ? 5 +}: + +let + inherit (cudaPackages) cudatoolkit; +in + +stdenv.mkDerivation rec { + pname = "realm"; + version = "2026-02-06"; + + src = fetchFromGitHub { + owner = "StanfordLegion"; + repo = "realm"; + rev = "0405b67ca14b586f7dec0dcddee194cecee7efa6"; + sha256 = "sha256-iUPVV1rh3QuyDKgXuu8aDlaZGlNwcpPvPsSVLWp8tr4="; + }; + + nativeBuildInputs = [ + cmake + ]; + + cmakeFlags = [ + "-DBUILD_SHARED_LIBS=ON" + "-DREALM_ENABLE_CUDA=ON" + "-DREALM_ENABLE_PREALM=ON" + "-DREALM_MAX_DIM=${toString maxDim}" + ]; + + buildInputs = [ + cudatoolkit + zlib + ]; + + meta = with lib; { + description = "Realm is a distributed, event–based tasking runtime for building high-performance applications that span clusters of CPUs, GPUs, and other accelerators"; + homepage = "https://legion.stanford.edu/realm"; + license = licenses.asl20; + }; +} diff --git a/.proj.toml b/.proj.toml index 38690f710b..5dbbfbcdd7 100644 --- a/.proj.toml +++ b/.proj.toml @@ -85,6 +85,13 @@ has-cpu-only-benchmarks = false has-cuda-tests = true has-cuda-benchmarks = false +[targets.realm-execution] +type = "lib" +has-cpu-only-tests = true +has-cpu-only-benchmarks = false +has-cuda-tests = true +has-cuda-benchmarks = false + # [targets.local-pcg-execution] # type = "lib" # has-cpu-only-tests = true diff --git a/flake.nix b/flake.nix index 6ccd5616cd..dad0e2fc32 100644 --- a/flake.nix +++ b/flake.nix @@ -30,8 +30,8 @@ }; }; - outputs = { self, nixpkgs, flake-utils, proj-repo, nixGL, ... }: flake-utils.lib.eachSystem [ "x86_64-linux" ] (system: - let + outputs = { self, nixpkgs, flake-utils, proj-repo, nixGL, ... }: flake-utils.lib.eachSystem [ "x86_64-linux" ] (system: + let pkgs = import nixpkgs { inherit system; config.allowUnfree = true; @@ -41,21 +41,21 @@ mkShell = attrs: pkgs.mkShell.override { stdenv = pkgs.cudaPackages.backendStdenv; } (attrs // { - hardeningDisable = ["all"]; # disable nixpkgs default compiler arguments, otherwise ubsan doesn't catch - # signed overflows due to the signedoverflow hardening setting. - # for more details, see the following (long-running) nixpkgs github issues: + hardeningDisable = ["all"]; # disable nixpkgs default compiler arguments, otherwise ubsan doesn't catch + # signed overflows due to the signedoverflow hardening setting. + # for more details, see the following (long-running) nixpkgs github issues: # - https://github.com/NixOS/nixpkgs/issues/18995 # - https://github.com/NixOS/nixpkgs/issues/60919 }); proj = proj-repo.packages.${system}.proj; - in + in { packages = rec { libdwarf-lite = pkgs.callPackage ./.flake/pkgs/libdwarf-lite.nix { }; cpptrace = pkgs.callPackage ./.flake/pkgs/cpptrace.nix { inherit libdwarf-lite; }; libassert = pkgs.callPackage ./.flake/pkgs/libassert.nix { inherit cpptrace; }; - legion = pkgs.callPackage ./.flake/pkgs/legion.nix { }; + realm = pkgs.callPackage ./.flake/pkgs/realm.nix { }; bencher-cli = pkgs.callPackage ./.flake/pkgs/bencher-cli.nix { }; ffdb = pkgs.callPackage ./.flake/pkgs/ffdb { inherit proj; }; hpp2plantuml = pkgs.python3Packages.callPackage ./.flake/pkgs/hpp2plantuml.nix { }; @@ -83,8 +83,7 @@ shellHook = '' export PATH="$HOME/ff/.scripts/:$PATH" export RC_PARAMS="max_discard_ratio=100" - export CMAKE_FLAGS="-DFF_USE_EXTERNAL_LEGION=ON \ - -DFF_USE_EXTERNAL_NCCL=ON \ + export CMAKE_FLAGS="-DFF_USE_EXTERNAL_NCCL=ON \ -DFF_USE_EXTERNAL_JSON=ON \ -DFF_USE_EXTERNAL_FMT=ON \ -DFF_USE_EXTERNAL_SPDLOG=ON \ @@ -94,7 +93,7 @@ -DFF_USE_EXTERNAL_GBENCHMARK=ON \ -DFF_USE_EXTERNAL_LIBASSERT=ON" ''; - + buildInputs = builtins.concatLists [ (with pkgs; [ zlib @@ -125,7 +124,7 @@ ]) (with self.packages.${system}; [ libassert - legion + realm rapidcheckFull doctest ]) diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index 2e71e577c0..cb3bd6d6ae 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -5,6 +5,7 @@ add_subdirectory(op-attrs) add_subdirectory(kernels) add_subdirectory(local-execution) add_subdirectory(local-pcg-execution) +add_subdirectory(realm-execution) add_subdirectory(task-spec) add_subdirectory(utils) add_subdirectory(ffi) diff --git a/lib/kernels/include/kernels/device_handle_t.h b/lib/kernels/include/kernels/device_handle_t.h index 9b7769355e..0836503717 100644 --- a/lib/kernels/include/kernels/device_handle_t.h +++ b/lib/kernels/include/kernels/device_handle_t.h @@ -9,6 +9,9 @@ namespace FlexFlow { device_handle_t device_handle_t_from_managed_handle( std::optional const &managed_handle); +device_handle_t device_handle_t_from_managed_handle_ptr( + std::optional const &managed_handle); + device_handle_t gpu_make_device_handle_t(PerDeviceFFHandle const &ff_handle); device_handle_t cpu_make_device_handle_t(); diff --git a/lib/kernels/src/kernels/device_handle_t.cc b/lib/kernels/src/kernels/device_handle_t.cc index 85f9e2a388..0225ee8e94 100644 --- a/lib/kernels/src/kernels/device_handle_t.cc +++ b/lib/kernels/src/kernels/device_handle_t.cc @@ -11,6 +11,15 @@ device_handle_t device_handle_t_from_managed_handle( } } +device_handle_t device_handle_t_from_managed_handle_ptr( + std::optional const &managed_handle) { + if (managed_handle.has_value()) { + return gpu_make_device_handle_t(managed_handle.value()->raw_handle()); + } else { + return cpu_make_device_handle_t(); + } +} + device_handle_t gpu_make_device_handle_t(PerDeviceFFHandle const &ff_handle) { return device_handle_t{ ff_handle, diff --git a/lib/pcg/include/pcg/layer_guid_t.dtg.toml b/lib/pcg/include/pcg/layer_guid_t.dtg.toml index d73cf547da..2f2f7694a0 100644 --- a/lib/pcg/include/pcg/layer_guid_t.dtg.toml +++ b/lib/pcg/include/pcg/layer_guid_t.dtg.toml @@ -6,6 +6,7 @@ features = [ "ord", "hash", "fmt", + "json", ] includes = [ diff --git a/lib/pcg/include/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.h b/lib/pcg/include/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.h index 5b1cad5e99..ebfdefa478 100644 --- a/lib/pcg/include/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.h +++ b/lib/pcg/include/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.h @@ -5,6 +5,7 @@ #include "pcg/machine_space_coordinate.dtg.h" #include "pcg/mapped_parallel_computation_graph/operator_atomic_task_shard_binding.dtg.h" #include "utils/bidict/bidict.h" +#include namespace FlexFlow { @@ -45,4 +46,15 @@ struct hash<::FlexFlow::MappedOperatorTaskGroup> { }; } // namespace std + +namespace nlohmann { + +template <> +struct adl_serializer<::FlexFlow::MappedOperatorTaskGroup> { + static ::FlexFlow::MappedOperatorTaskGroup from_json(json const &j); + static void to_json(json &j, ::FlexFlow::MappedOperatorTaskGroup const &t); +}; + +} // namespace nlohmann + #endif diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h index 25dc0721cd..21f33f6d3d 100644 --- a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h +++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h @@ -32,6 +32,10 @@ ParallelLayerAddedResult add_parallel_layer( ParallelLayerAddedResult pcg_add_input_layer(ParallelComputationGraph &pcg, TensorShape const &tensor_shape); +ParallelLayerAddedResult + pcg_add_input_layer_with_grad(ParallelComputationGraph &pcg, + TensorShape const &tensor_shape); + OperatorTaskSpace get_operator_task_space(ParallelComputationGraph const &pcg, parallel_layer_guid_t const &layer); @@ -54,6 +58,9 @@ std::unordered_map std::unordered_set get_initial_layers(ParallelComputationGraph const &); +std::unordered_map + get_outgoing_tensors(ParallelComputationGraph const &, + parallel_layer_guid_t const &); std::unordered_map get_incoming_tensors(ParallelComputationGraph const &, parallel_layer_guid_t const &); @@ -107,6 +114,9 @@ ParallelTensorShape get_parallel_tensor_shape(ParallelComputationGraph const &, std::vector topological_ordering(ParallelComputationGraph const &); +std::unordered_map + get_parallel_layer_attrs_mapping(ParallelComputationGraph const &pcg); + parallel_layer_guid_t get_parallel_layer_by_name(ParallelComputationGraph const &pcg, std::string const &name); diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.toml b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.toml index 618bcb0dc4..292b361fc8 100644 --- a/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.toml +++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.toml @@ -6,6 +6,7 @@ features = [ "ord", "hash", "fmt", + "json", ] includes = [ diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.toml b/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.toml index 4494a31ac2..2710a15664 100644 --- a/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.toml +++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.toml @@ -6,6 +6,7 @@ features = [ "ord", "hash", "fmt", + "json", ] includes = [ diff --git a/lib/pcg/include/pcg/tensor_guid_t.dtg.toml b/lib/pcg/include/pcg/tensor_guid_t.dtg.toml index 151f7b1f0f..e8caf0021f 100644 --- a/lib/pcg/include/pcg/tensor_guid_t.dtg.toml +++ b/lib/pcg/include/pcg/tensor_guid_t.dtg.toml @@ -6,6 +6,7 @@ features = [ "ord", "hash", "fmt", + "json", ] includes = [ diff --git a/lib/pcg/src/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.cc b/lib/pcg/src/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.cc index b96a447383..4436efd727 100644 --- a/lib/pcg/src/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.cc +++ b/lib/pcg/src/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.cc @@ -90,3 +90,20 @@ size_t hash<::FlexFlow::MappedOperatorTaskGroup>::operator()( } } // namespace std + +namespace nlohmann { + +::FlexFlow::MappedOperatorTaskGroup + adl_serializer<::FlexFlow::MappedOperatorTaskGroup>::from_json( + json const &j) { + return ::FlexFlow::MappedOperatorTaskGroup{j.template get< + ::FlexFlow::bidict<::FlexFlow::MachineSpaceCoordinate, + ::FlexFlow::OperatorAtomicTaskShardBinding>>()}; +} + +void adl_serializer<::FlexFlow::MappedOperatorTaskGroup>::to_json( + json &j, ::FlexFlow::MappedOperatorTaskGroup const &t) { + j = t.get_shard_bindings(); +} + +} // namespace nlohmann diff --git a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc index f83628b8e1..959747dbc7 100644 --- a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc +++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc @@ -142,6 +142,27 @@ ParallelLayerAddedResult pcg_add_input_layer(ParallelComputationGraph &pcg, }); } +ParallelLayerAddedResult + pcg_add_input_layer_with_grad(ParallelComputationGraph &pcg, + TensorShape const &tensor_shape) { + ParallelLayerAttrs layer_attrs = ParallelLayerAttrs{ + /*op_attrs=*/PCGOperatorAttrs{InputAttrs{tensor_shape}}, + /*name=*/std::nullopt, + }; + + return add_parallel_layer(/*pcg=*/pcg, + /*layer_attrs=*/layer_attrs, + /*inputs=*/{}, + /*weights=*/{}, + /*output_flags=*/ + std::unordered_map{ + { + TensorSlotName::OUTPUT, + CreateGrad::YES, + }, + }); +} + OperatorTaskSpace get_operator_task_space(ParallelComputationGraph const &pcg, parallel_layer_guid_t const &layer) { PCGOperatorAttrs op_attrs = pcg_get_op_attrs(pcg, layer); @@ -212,6 +233,16 @@ std::unordered_set [](Node const &n) { return parallel_layer_guid_t{n}; }); } +std::unordered_map + get_outgoing_tensors(ParallelComputationGraph const &pcg, + parallel_layer_guid_t const &l) { + return map_values(get_outgoing_kwarg_dataflow_outputs_for_node( + pcg.raw_graph, l.raw_graph_node), + [](KwargDataflowOutput const &o) { + return parallel_tensor_guid_t{o}; + }); +} + std::unordered_map get_incoming_tensors(ParallelComputationGraph const &pcg, parallel_layer_guid_t const &l) { @@ -378,6 +409,17 @@ std::vector [](Node const &n) { return parallel_layer_guid_t{n}; }); } +std::unordered_map + get_parallel_layer_attrs_mapping(ParallelComputationGraph const &pcg) { + std::unordered_map + layer_attrs_mapping; + for (parallel_layer_guid_t const &layer_guid : get_parallel_layers(pcg)) { + layer_attrs_mapping.insert( + {layer_guid, get_parallel_layer_attrs(pcg, layer_guid)}); + } + return layer_attrs_mapping; +} + parallel_layer_guid_t get_parallel_layer_by_name(ParallelComputationGraph const &pcg, std::string const &name) { diff --git a/lib/pcg/test/src/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.cc b/lib/pcg/test/src/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.cc new file mode 100644 index 0000000000..1c3667afc7 --- /dev/null +++ b/lib/pcg/test/src/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.cc @@ -0,0 +1,42 @@ +#include "pcg/mapped_parallel_computation_graph/mapped_operator_task_group.h" +#include "op-attrs/parallel_tensor_space_coordinate.dtg.h" +#include "op-attrs/tensor_slot_name.dtg.h" +#include "pcg/device_type.dtg.h" +#include "pcg/machine_space_coordinate.dtg.h" +#include "pcg/mapped_parallel_computation_graph/operator_atomic_task_shard_binding.dtg.h" +#include +#include + +using namespace ::FlexFlow; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("adl_serializer") { + bidict + shard_bindings{ + {MachineSpaceCoordinate{0_n, 0_n, DeviceType::CPU}, + OperatorAtomicTaskShardBinding{ + { + {TensorSlotName::INPUT, + ParallelTensorSpaceCoordinate{ + 0_n, 0_n, FFOrdered{1_n, 2_n, 3_n}}}, + }, + }}, + }; + MappedOperatorTaskGroup deserialized{shard_bindings}; + nlohmann::json serialized = shard_bindings; + + SUBCASE("to_json") { + nlohmann::json result = deserialized; + nlohmann::json correct = serialized; + + CHECK(result == correct); + } + + SUBCASE("from_json") { + MappedOperatorTaskGroup result = serialized; + MappedOperatorTaskGroup correct = deserialized; + + CHECK(result == correct); + } + } +} diff --git a/lib/realm-execution/CMakeLists.txt b/lib/realm-execution/CMakeLists.txt new file mode 100644 index 0000000000..08676525e1 --- /dev/null +++ b/lib/realm-execution/CMakeLists.txt @@ -0,0 +1,22 @@ +ff_add_library( + NAME + realm-execution + SRC_PATTERNS + src/*.cc + PUBLIC_INCLUDE + include/ + PRIVATE_INCLUDE + src/ + DEPS + compiler + kernels + local-execution + op-attrs + pcg + spdlog + task-spec + utils + Realm::Realm +) + +add_subdirectory(test) diff --git a/lib/realm-execution/include/realm-execution/atomic_dependency_set.h b/lib/realm-execution/include/realm-execution/atomic_dependency_set.h new file mode 100644 index 0000000000..da6ba86638 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/atomic_dependency_set.h @@ -0,0 +1,27 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_ATOMIC_DEPENDENCY_SET_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_ATOMIC_DEPENDENCY_SET_H + +#include "realm-execution/realm.h" +#include + +namespace FlexFlow { + +struct AtomicDependencySet { +public: + AtomicDependencySet() = delete; + explicit AtomicDependencySet(Realm::Event precondition); + + void add_writer(Realm::Event writer); + void add_reader(Realm::Event reader); + + Realm::Event get_dependency_for_writer() const; + Realm::Event get_dependency_for_reader() const; + +private: + Realm::Event writer; + std::vector readers; +}; + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/dependency_set.h b/lib/realm-execution/include/realm-execution/dependency_set.h new file mode 100644 index 0000000000..629a40e2e7 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/dependency_set.h @@ -0,0 +1,34 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DEPENDENCY_SET_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DEPENDENCY_SET_H + +#include "realm-execution/atomic_dependency_set.h" +#include "realm-execution/realm.h" +#include "task-spec/dynamic_graph/dynamic_value_attrs.dtg.h" +#include + +namespace FlexFlow { + +struct DependencySet { +public: + DependencySet() = delete; + explicit DependencySet(Realm::Event precondition); + + void add_writer(DynamicValueAttrs const &value, Realm::Event writer); + void add_reader(DynamicValueAttrs const &value, Realm::Event reader); + + Realm::Event get_dependency_for_writer(DynamicValueAttrs const &value) const; + Realm::Event get_dependency_for_reader(DynamicValueAttrs const &value) const; + +private: + AtomicDependencySet & + get_atomic_dependency_set(DynamicValueAttrs const &value); + +private: + Realm::Event precondition; + std::unordered_map + atomic_dependencies; +}; + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/device_specific_managed_per_device_ff_handle.h b/lib/realm-execution/include/realm-execution/device_specific_managed_per_device_ff_handle.h new file mode 100644 index 0000000000..d48a80f438 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/device_specific_managed_per_device_ff_handle.h @@ -0,0 +1,38 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DEVICE_SPECIFIC_MANAGED_PER_DEVICE_FF_HANDLE_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DEVICE_SPECIFIC_MANAGED_PER_DEVICE_FF_HANDLE_H + +#include "kernels/device_handle_t.dtg.h" +#include "kernels/managed_per_device_ff_handle.h" +#include "pcg/device_id_t.dtg.h" +#include "realm-execution/tasks/serializer/serializable_device_specific_ptr.dtg.h" +#include +#include + +namespace FlexFlow { + +struct DeviceSpecificManagedPerDeviceFFHandle { +public: + DeviceSpecificManagedPerDeviceFFHandle() = delete; + explicit DeviceSpecificManagedPerDeviceFFHandle( + device_id_t owner, std::optional handle); + + std::optional get(device_id_t device_idx) const; + + SerializableDeviceSpecificPtr serialize() const; + static DeviceSpecificManagedPerDeviceFFHandle + deserialize(SerializableDeviceSpecificPtr const &j); + +private: + device_id_t owner; + std::optional handle; +}; + +DeviceSpecificManagedPerDeviceFFHandle make_device_specific_managed_handle( + device_id_t const &, std::optional const &); + +device_handle_t device_handle_t_from_device_specific_managed_handle( + DeviceSpecificManagedPerDeviceFFHandle const &, device_id_t); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/distributed_device_handle.h b/lib/realm-execution/include/realm-execution/distributed_device_handle.h new file mode 100644 index 0000000000..268be3583d --- /dev/null +++ b/lib/realm-execution/include/realm-execution/distributed_device_handle.h @@ -0,0 +1,36 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DISTRIBUTED_DEVICE_HANDLE_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DISTRIBUTED_DEVICE_HANDLE_H + +#include "realm-execution/device_specific_managed_per_device_ff_handle.h" +#include "realm-execution/hash/processor.h" +#include "realm-execution/realm.h" +#include "realm-execution/realm_context.h" +#include + +namespace FlexFlow { + +struct DistributedDeviceHandle { +public: + DistributedDeviceHandle() = delete; + explicit DistributedDeviceHandle( + std::unordered_map const + &handles); + + DeviceSpecificManagedPerDeviceFFHandle const & + at(Realm::Processor processor) const; + +private: + std::unordered_map + handles; +}; + +DistributedDeviceHandle create_distributed_device_handle( + RealmContext &ctx, + size_t workSpaceSize, + bool allowTensorOpMathConversion, + Realm::Event precondition = Realm::Event::NO_EVENT); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/distributed_device_state_initialization.h b/lib/realm-execution/include/realm-execution/distributed_device_state_initialization.h new file mode 100644 index 0000000000..ca24ecdd4c --- /dev/null +++ b/lib/realm-execution/include/realm-execution/distributed_device_state_initialization.h @@ -0,0 +1,24 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DISTRIBUTED_DEVICE_STATE_INITIALIZATION_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DISTRIBUTED_DEVICE_STATE_INITIALIZATION_H + +#include "kernels/profiling_settings.dtg.h" +#include "pcg/optimizer_attrs.dtg.h" +#include "realm-execution/distributed_device_handle.h" +#include "realm-execution/realm_context.h" +#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.dtg.h" +#include "task-spec/ff_iteration_config.dtg.h" + +namespace FlexFlow { + +DynamicOpenDataflowGraph perform_distributed_device_state_initialization( + DynamicOpenDataflowGraph const &dg, + RealmContext &ctx, + ProfilingSettings const &profiling_settings, + DistributedDeviceHandle const &device_handle, + FFIterationConfig const &iteration_config, + OptimizerAttrs const &optimizer_attrs, + Realm::Event precondition); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/fmt/instance.h b/lib/realm-execution/include/realm-execution/fmt/instance.h new file mode 100644 index 0000000000..c7c2df6735 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/fmt/instance.h @@ -0,0 +1,35 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_FMT_INSTANCE_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_FMT_INSTANCE_H + +#include "realm-execution/realm.h" +#include "utils/check_fmtable.h" +#include +#include + +namespace fmt { + +template +struct formatter<::FlexFlow::Realm::RegionInstance, + Char, + std::enable_if_t::value>> + : formatter<::std::string> { + template + auto format(::FlexFlow::Realm::RegionInstance const &m, FormatContext &ctx) + -> decltype(ctx.out()) { + std::string result = fmt::format("", m.id); + + return formatter::format(result, ctx); + } +}; + +} // namespace fmt + +namespace FlexFlow { + +std::ostream &operator<<(std::ostream &s, + ::FlexFlow::Realm::RegionInstance const &m); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/hash/processor.h b/lib/realm-execution/include/realm-execution/hash/processor.h new file mode 100644 index 0000000000..e5eb8eb503 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/hash/processor.h @@ -0,0 +1,16 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_HASH_PROCESSOR_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_HASH_PROCESSOR_H + +#include "realm-execution/realm.h" +#include + +namespace std { + +template <> +struct hash<::FlexFlow::Realm::Processor> { + size_t operator()(::FlexFlow::Realm::Processor const &p) const; +}; + +} // namespace std + +#endif diff --git a/lib/realm-execution/include/realm-execution/instance_allocation.h b/lib/realm-execution/include/realm-execution/instance_allocation.h new file mode 100644 index 0000000000..09709201ce --- /dev/null +++ b/lib/realm-execution/include/realm-execution/instance_allocation.h @@ -0,0 +1,23 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_INSTANCE_ALLOCATION_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_INSTANCE_ALLOCATION_H + +#include "realm-execution/realm_context.h" +#include "realm-execution/tensor_instance_backing.dtg.h" +#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.dtg.h" + +namespace FlexFlow { + +std::pair + perform_instance_allocation_for_value(DynamicNodeAttrs const &node, + DynamicValueAttrs const &value, + RealmContext &ctx); + +TensorInstanceBacking perform_instance_allocation( + DynamicOpenDataflowGraph const &g, + std::unordered_map const + &preallocated, + RealmContext &ctx); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/pcg_instance/pcg_instance.h b/lib/realm-execution/include/realm-execution/pcg_instance/pcg_instance.h new file mode 100644 index 0000000000..b0037f51b2 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/pcg_instance/pcg_instance.h @@ -0,0 +1,90 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_PCG_INSTANCE_PCG_INSTANCE_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_PCG_INSTANCE_PCG_INSTANCE_H + +#include "kernels/accessor.h" +#include "kernels/allocation.h" +#include "kernels/device_handle_t.dtg.h" +#include "kernels/profiling_settings.dtg.h" +#include "op-attrs/ops/loss_functions/loss_attrs.dtg.h" +#include "pcg/device_id_t.dtg.h" +#include "pcg/mapped_parallel_computation_graph/mapped_parallel_computation_graph.dtg.h" +#include "pcg/optimizer_attrs.dtg.h" +#include "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h" +#include "realm-execution/distributed_device_handle.h" +#include "realm-execution/realm_context.h" +#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.dtg.h" +#include "task-spec/dynamic_graph/dynamic_tensor_accessor.dtg.h" +#include "task-spec/dynamic_graph/dynamic_value_attrs.dtg.h" +#include "task-spec/ff_iteration_config.dtg.h" +#include "utils/units/milliseconds_t.h" +#include + +namespace FlexFlow { + +struct PCGInstance { +public: + PCGInstance() = delete; + PCGInstance(PCGInstance const &) = delete; + PCGInstance(PCGInstance &&) = delete; + explicit PCGInstance( + RealmContext &ctx, + std::vector const &execution_order, + OptimizerAttrs const &optimizer_attrs, + std::optional logit_grad_tensor); + RealmContext &get_realm_context(); + std::vector const &get_execution_order() const; + OptimizerAttrs const &get_optimizer_attrs() const; + void update_optimizer_attrs_for_next_iter(); + std::optional get_loss_tensor_instance() const; + +private: + RealmContext &ctx; + std::vector execution_order; + OptimizerAttrs optimizer_attrs; + std::optional logit_grad_tensor; +}; + +PCGInstance create_pcg_instance( + RealmContext &ctx, + MappedParallelComputationGraph const &mpcg, + OptimizerAttrs const &optimizer_attrs, + std::optional const &loss_attrs, + std::optional label_tensor, + std::optional logit_tensor, + std::unordered_map const + &input_tensors, + ProfilingSettings const &profiling_settings, + DistributedDeviceHandle const &device_handle, + FFIterationConfig const &iteration_config); + +std::unordered_map + perform_all_passes_for_pcg_instance( + PCGInstance &instance, + ProfilingSettings const &profiling_settings, + DistributedDeviceHandle const &device_handle, + FFIterationConfig iteration_config); + +std::unordered_map + perform_forward_pass_for_pcg_instance( + PCGInstance &instance, + ProfilingSettings const &profiling_settings, + DistributedDeviceHandle const &device_handle, + FFIterationConfig iteration_config); + +std::unordered_map + perform_backward_pass_for_pcg_instance( + PCGInstance &instance, + ProfilingSettings const &profiling_settings, + DistributedDeviceHandle const &device_handle, + FFIterationConfig iteration_config); + +std::unordered_map + perform_update_pass_for_pcg_instance( + PCGInstance &instance, + ProfilingSettings const &profiling_settings, + DistributedDeviceHandle const &device_handle, + FFIterationConfig iteration_config); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/realm.h b/lib/realm-execution/include/realm-execution/realm.h new file mode 100644 index 0000000000..b6913e66f5 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/realm.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_REALM_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_REALM_H + +#define FLEXFLOW_USE_PREALM + +#ifdef FLEXFLOW_USE_PREALM +#include +#else +#include +#endif + +namespace FlexFlow { + +#ifdef FLEXFLOW_USE_PREALM +namespace Realm = ::PRealm; +#else +namespace Realm = ::Realm; +#endif + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/realm_allocator.h b/lib/realm-execution/include/realm-execution/realm_allocator.h new file mode 100644 index 0000000000..d72f2d7f91 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/realm_allocator.h @@ -0,0 +1,33 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_REALM_ALLOCATOR_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_REALM_ALLOCATOR_H + +#include "kernels/allocation.h" +#include "realm-execution/realm.h" + +namespace FlexFlow { + +struct RealmAllocator : public IAllocator { + RealmAllocator(Realm::Processor processor, Realm::Memory memory); + + RealmAllocator() = delete; + RealmAllocator(RealmAllocator const &) = delete; + RealmAllocator(RealmAllocator &&) = delete; + ~RealmAllocator() = default; + + void *allocate(size_t) override; + void deallocate(void *) override; + + DeviceType get_allocation_device_type() const override; + +private: + Realm::Processor processor; + Realm::Memory memory; + std::unordered_map ptr_instances; +}; +CHECK_RC_COPY_VIRTUAL_COMPLIANT(RealmAllocator); + +Allocator get_realm_allocator(Realm::Processor processor, Realm::Memory memory); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/realm_context.h b/lib/realm-execution/include/realm-execution/realm_context.h new file mode 100644 index 0000000000..b8baad41b9 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/realm_context.h @@ -0,0 +1,84 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_REALM_CONTEXT_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_REALM_CONTEXT_H + +#include "kernels/allocation.h" +#include "kernels/device_handle_t.dtg.h" +#include "kernels/managed_per_device_ff_handle.h" +#include "pcg/device_id_t.dtg.h" +#include "pcg/machine_space_coordinate.dtg.h" +#include "realm-execution/realm.h" +#include "realm-execution/tasks/task_id_t.dtg.h" +#include +#include + +namespace FlexFlow { + +struct RealmContext { +public: + RealmContext(Realm::Processor processor); + virtual ~RealmContext(); + + RealmContext() = delete; + RealmContext(RealmContext const &) = delete; + RealmContext(RealmContext &&) = delete; + + // Device mapping + Realm::Processor + map_device_coord_to_processor(MachineSpaceCoordinate const &); + static Realm::Memory get_nearest_memory(Realm::Processor); + + // Current device context + Realm::Processor get_current_processor() const; + Allocator &get_current_device_allocator(); + device_id_t get_current_device_idx() const; + + // Task creation + Realm::Event spawn_task(Realm::Processor proc, + task_id_t task_id, + void const *args, + size_t arglen, + Realm::ProfilingRequestSet const &requests, + Realm::Event wait_on = Realm::Event::NO_EVENT, + int priority = 0); + + Realm::Event + collective_spawn_task(Realm::Processor target_proc, + task_id_t task_id, + void const *args, + size_t arglen, + Realm::Event wait_on = Realm::Event::NO_EVENT, + int priority = 0); + + // Instance management + std::pair + create_instance(Realm::Memory memory, + TensorShape const &shape, + Realm::ProfilingRequestSet const &prs, + Realm::Event wait_on = Realm::Event::NO_EVENT); + + // Get the current set of outstanding events + Realm::Event get_outstanding_events(); + +protected: + // Compact AND CLEAR the outstanding event queue + // Important: USER MUST BLOCK on event or else use it, or it WILL BE LOST + [[nodiscard]] Realm::Event merge_outstanding_events(); + + void discover_machine_topology(); + + static std::optional + make_device_handle_for_processor(Realm::Processor processor); + +protected: + Realm::Runtime runtime; + Realm::Processor processor; + Allocator allocator; + std::vector outstanding_events; + std::unordered_map, + std::vector> + processors; +}; + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/realm_manager.h b/lib/realm-execution/include/realm-execution/realm_manager.h new file mode 100644 index 0000000000..8a79476bcf --- /dev/null +++ b/lib/realm-execution/include/realm-execution/realm_manager.h @@ -0,0 +1,28 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_REALM_MANAGER_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_REALM_MANAGER_H + +#include "kernels/allocation.h" +#include "kernels/device_handle_t.dtg.h" +#include "pcg/device_id_t.dtg.h" +#include "realm-execution/realm.h" +#include "realm-execution/realm_context.h" + +namespace FlexFlow { + +struct RealmManager : private RealmContext { +public: + RealmManager(int *argc, char ***argv); + virtual ~RealmManager(); + + RealmManager() = delete; + RealmManager(RealmManager const &) = delete; + RealmManager(RealmManager &&) = delete; + + [[nodiscard]] Realm::Event + start_controller(std::function, + Realm::Event wait_on = Realm::Event::NO_EVENT); +}; + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/controller_task.h b/lib/realm-execution/include/realm-execution/tasks/impl/controller_task.h new file mode 100644 index 0000000000..7134973ead --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/impl/controller_task.h @@ -0,0 +1,20 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_CONTROLLER_TASK_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_CONTROLLER_TASK_H + +#include "realm-execution/realm.h" +#include "realm-execution/realm_context.h" + +namespace FlexFlow { + +void controller_task_body( + void const *, size_t, void const *, size_t, Realm::Processor); + +Realm::Event + collective_spawn_controller_task(RealmContext &ctx, + Realm::Processor &target_proc, + std::function thunk, + Realm::Event precondition); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_return_task.h b/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_return_task.h new file mode 100644 index 0000000000..a87652b5ce --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_return_task.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_HANDLE_INIT_RETURN_TASK_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_HANDLE_INIT_RETURN_TASK_H + +#include "realm-execution/device_specific_managed_per_device_ff_handle.h" +#include "realm-execution/realm.h" +#include "realm-execution/realm_context.h" + +namespace FlexFlow { + +void device_handle_init_return_task_body( + void const *, size_t, void const *, size_t, Realm::Processor); + +Realm::Event spawn_device_handle_init_return_task( + RealmContext &ctx, + Realm::Processor origin_proc, + DeviceSpecificManagedPerDeviceFFHandle const &result, + DeviceSpecificManagedPerDeviceFFHandle *origin_result_ptr, + Realm::Event precondition); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_task.h b/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_task.h new file mode 100644 index 0000000000..312ed26add --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_task.h @@ -0,0 +1,23 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_HANDLE_INIT_TASK_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_HANDLE_INIT_TASK_H + +#include "realm-execution/device_specific_managed_per_device_ff_handle.h" +#include "realm-execution/realm.h" +#include "realm-execution/realm_context.h" + +namespace FlexFlow { + +void device_handle_init_task_body( + void const *, size_t, void const *, size_t, Realm::Processor); + +Realm::Event spawn_device_handle_init_task( + RealmContext &ctx, + Realm::Processor target_proc, + size_t workSpaceSize, + bool allowTensorOpMathConversion, + DeviceSpecificManagedPerDeviceFFHandle *result_ptr, + Realm::Event precondition); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_task_args.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_task_args.dtg.toml new file mode 100644 index 0000000000..c0ba37bb5d --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_task_args.dtg.toml @@ -0,0 +1,26 @@ +namespace = "FlexFlow" +name = "DeviceHandleInitTaskArgs" +type = "struct" +features = [] + +includes = [ + "realm-execution/device_specific_managed_per_device_ff_handle.h", + "realm-execution/realm.h", + "realm-execution/tasks/serializer/serializable_realm_processor.h", +] + +[[fields]] +name = "workSpaceSize" +type = "size_t" + +[[fields]] +name = "allowTensorOpMathConversion" +type = "bool" + +[[fields]] +name = "origin_proc" +type = "::FlexFlow::Realm::Processor" + +[[fields]] +name = "origin_result_ptr" +type = "::FlexFlow::DeviceSpecificManagedPerDeviceFFHandle *" diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_return_task.h b/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_return_task.h new file mode 100644 index 0000000000..8f44680815 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_return_task.h @@ -0,0 +1,22 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_STATE_INIT_RETURN_TASK_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_STATE_INIT_RETURN_TASK_H + +#include "realm-execution/realm.h" +#include "realm-execution/realm_context.h" +#include "task-spec/device_specific_per_device_op_state.dtg.h" + +namespace FlexFlow { + +void device_state_init_return_task_body( + void const *, size_t, void const *, size_t, Realm::Processor); + +Realm::Event spawn_device_state_init_return_task( + RealmContext &ctx, + Realm::Processor origin_proc, + DeviceSpecificPerDeviceOpState const &result, + DeviceSpecificPerDeviceOpState *origin_result_ptr, + Realm::Event precondition); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_task.h b/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_task.h new file mode 100644 index 0000000000..4ed8c1726d --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_task.h @@ -0,0 +1,31 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_STATE_INIT_TASK_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_STATE_INIT_TASK_H + +#include "kernels/profiling_settings.dtg.h" +#include "pcg/optimizer_attrs.dtg.h" +#include "realm-execution/device_specific_managed_per_device_ff_handle.h" +#include "realm-execution/realm.h" +#include "realm-execution/realm_context.h" +#include "task-spec/device_specific_per_device_op_state.dtg.h" +#include "task-spec/dynamic_graph/dynamic_node_invocation.dtg.h" +#include "task-spec/ff_iteration_config.dtg.h" + +namespace FlexFlow { + +void device_state_init_task_body( + void const *, size_t, void const *, size_t, Realm::Processor); + +std::optional spawn_device_state_init_task( + RealmContext &ctx, + Realm::Processor target_proc, + DynamicNodeInvocation const &invocation, + ProfilingSettings const &profiling_settings, + DeviceSpecificManagedPerDeviceFFHandle const &device_handle, + FFIterationConfig const &iteration_config, + OptimizerAttrs const &optimizer_attrs, + DeviceSpecificPerDeviceOpState *result_ptr, + Realm::Event precondition); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_task_args.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_task_args.dtg.toml new file mode 100644 index 0000000000..a9aa77dde9 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_task_args.dtg.toml @@ -0,0 +1,42 @@ +namespace = "FlexFlow" +name = "DeviceStateInitTaskArgs" +type = "struct" +features = [] + +includes = [ + "kernels/profiling_settings.dtg.h", + "pcg/optimizer_attrs.dtg.h", + "realm-execution/device_specific_managed_per_device_ff_handle.h", + "realm-execution/realm.h", + "task-spec/device_specific_per_device_op_state.dtg.h", + "task-spec/dynamic_graph/dynamic_node_invocation.dtg.h", + "task-spec/ff_iteration_config.dtg.h", +] + +[[fields]] +name = "invocation" +type = "::FlexFlow::DynamicNodeInvocation" + +[[fields]] +name = "profiling_settings" +type = "::FlexFlow::ProfilingSettings" + +[[fields]] +name = "device_handle" +type = "::FlexFlow::DeviceSpecificManagedPerDeviceFFHandle" + +[[fields]] +name = "iteration_config" +type = "::FlexFlow::FFIterationConfig" + +[[fields]] +name = "optimizer_attrs" +type = "::FlexFlow::OptimizerAttrs" + +[[fields]] +name = "origin_proc" +type = "::FlexFlow::Realm::Processor" + +[[fields]] +name = "origin_result_ptr" +type = "::FlexFlow::DeviceSpecificPerDeviceOpState *" diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/op_task.h b/lib/realm-execution/include/realm-execution/tasks/impl/op_task.h new file mode 100644 index 0000000000..9d4c2fd451 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/impl/op_task.h @@ -0,0 +1,29 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_OP_TASK_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_OP_TASK_H + +#include "kernels/profiling_settings.dtg.h" +#include "op-attrs/ops/loss_functions/loss_attrs.dtg.h" +#include "pcg/optimizer_attrs.dtg.h" +#include "realm-execution/device_specific_managed_per_device_ff_handle.h" +#include "realm-execution/realm.h" +#include "realm-execution/realm_context.h" +#include "task-spec/dynamic_graph/dynamic_node_invocation.dtg.h" +#include "task-spec/ff_iteration_config.dtg.h" + +namespace FlexFlow { + +void op_task_body(void const *, size_t, void const *, size_t, Realm::Processor); + +Realm::Event + spawn_op_task(RealmContext &ctx, + Realm::Processor target_proc, + DynamicNodeInvocation const &invocation, + ProfilingSettings const &profiling_settings, + DeviceSpecificManagedPerDeviceFFHandle const &device_handle, + FFIterationConfig const &iteration_config, + std::optional const &optimizer_attrs, + Realm::Event precondition); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/op_task_args.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/impl/op_task_args.dtg.toml new file mode 100644 index 0000000000..814f9f802b --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/impl/op_task_args.dtg.toml @@ -0,0 +1,32 @@ +namespace = "FlexFlow" +name = "OpTaskArgs" +type = "struct" +features = [] + +includes = [ + "kernels/profiling_settings.dtg.h", + "pcg/optimizer_attrs.dtg.h", + "realm-execution/device_specific_managed_per_device_ff_handle.h", + "task-spec/dynamic_graph/dynamic_node_invocation.dtg.h", + "task-spec/ff_iteration_config.dtg.h", +] + +[[fields]] +name = "invocation" +type = "::FlexFlow::DynamicNodeInvocation" + +[[fields]] +name = "profiling_settings" +type = "::FlexFlow::ProfilingSettings" + +[[fields]] +name = "device_handle" +type = "::FlexFlow::DeviceSpecificManagedPerDeviceFFHandle" + +[[fields]] +name = "iteration_config" +type = "::FlexFlow::FFIterationConfig" + +[[fields]] +name = "optimizer_attrs" +type = "std::optional<::FlexFlow::OptimizerAttrs>" diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_handle_init_task_args.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_handle_init_task_args.dtg.toml new file mode 100644 index 0000000000..34f52880f8 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_handle_init_task_args.dtg.toml @@ -0,0 +1,29 @@ +namespace = "FlexFlow" +name = "SerializableDeviceHandleInitTaskArgs" +type = "struct" +features = [ + "eq", + "fmt", + "hash", + "json", +] + +includes = [ + "realm-execution/tasks/serializer/serializable_realm_processor.dtg.h", +] + +[[fields]] +name = "workSpaceSize" +type = "size_t" + +[[fields]] +name = "allowTensorOpMathConversion" +type = "bool" + +[[fields]] +name = "origin_proc" +type = "::FlexFlow::SerializableRealmProcessor" + +[[fields]] +name = "origin_result_ptr" +type = "uintptr_t" diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_handle_init_task_args.h b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_handle_init_task_args.h new file mode 100644 index 0000000000..63d70fe10a --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_handle_init_task_args.h @@ -0,0 +1,17 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_SERIALIZABLE_DEVICE_HANDLE_INIT_TASK_ARGS_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_SERIALIZABLE_DEVICE_HANDLE_INIT_TASK_ARGS_H + +#include "realm-execution/tasks/impl/device_handle_init_task_args.dtg.h" +#include "realm-execution/tasks/impl/serializable_device_handle_init_task_args.dtg.h" + +namespace FlexFlow { + +SerializableDeviceHandleInitTaskArgs + device_handle_init_task_args_to_serializable( + DeviceHandleInitTaskArgs const &); +DeviceHandleInitTaskArgs device_handle_init_task_args_from_serializable( + SerializableDeviceHandleInitTaskArgs const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_state_init_task_args.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_state_init_task_args.dtg.toml new file mode 100644 index 0000000000..c99d2758c0 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_state_init_task_args.dtg.toml @@ -0,0 +1,47 @@ +namespace = "FlexFlow" +name = "SerializableDeviceStateInitTaskArgs" +type = "struct" +features = [ + "eq", + "fmt", + "hash", + "json", +] + +includes = [ + "kernels/profiling_settings.dtg.h", + "pcg/optimizer_attrs.dtg.h", + "realm-execution/tasks/serializer/serializable_device_specific_ptr.dtg.h", + "realm-execution/tasks/serializer/serializable_realm_processor.dtg.h", + "task-spec/device_specific_per_device_op_state.dtg.h", + "task-spec/dynamic_graph/serializable_dynamic_node_invocation.dtg.h", + "task-spec/ff_iteration_config.dtg.h", +] + +[[fields]] +name = "invocation" +type = "::FlexFlow::SerializableDynamicNodeInvocation" + +[[fields]] +name = "profiling_settings" +type = "::FlexFlow::ProfilingSettings" + +[[fields]] +name = "device_handle" +type = "::FlexFlow::SerializableDeviceSpecificPtr" + +[[fields]] +name = "iteration_config" +type = "::FlexFlow::FFIterationConfig" + +[[fields]] +name = "optimizer_attrs" +type = "::FlexFlow::OptimizerAttrs" + +[[fields]] +name = "origin_proc" +type = "::FlexFlow::SerializableRealmProcessor" + +[[fields]] +name = "origin_result_ptr" +type = "uintptr_t" diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_state_init_task_args.h b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_state_init_task_args.h new file mode 100644 index 0000000000..f028820974 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_state_init_task_args.h @@ -0,0 +1,16 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_SERIALIZABLE_DEVICE_STATE_INIT_TASK_ARGS_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_SERIALIZABLE_DEVICE_STATE_INIT_TASK_ARGS_H + +#include "realm-execution/tasks/impl/device_state_init_task_args.dtg.h" +#include "realm-execution/tasks/impl/serializable_device_state_init_task_args.dtg.h" + +namespace FlexFlow { + +SerializableDeviceStateInitTaskArgs device_state_init_task_args_to_serializable( + DeviceStateInitTaskArgs const &); +DeviceStateInitTaskArgs device_state_init_task_args_from_serializable( + SerializableDeviceStateInitTaskArgs const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/serializable_op_task_args.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_op_task_args.dtg.toml new file mode 100644 index 0000000000..a0f89e3ae2 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_op_task_args.dtg.toml @@ -0,0 +1,42 @@ +namespace = "FlexFlow" +name = "SerializableOpTaskArgs" +type = "struct" +features = [ + "eq", + "fmt", + "hash", + "json", +] + +includes = [ + "kernels/profiling_settings.dtg.h", + "pcg/optimizer_attrs.dtg.h", + "realm-execution/tasks/serializer/serializable_device_specific_ptr.dtg.h", + "task-spec/dynamic_graph/serializable_dynamic_node_invocation.dtg.h", + "task-spec/ff_iteration_config.dtg.h", +] + +src_includes = [ + "utils/fmt/optional.h", + "utils/json/optional.h", +] + +[[fields]] +name = "invocation" +type = "::FlexFlow::SerializableDynamicNodeInvocation" + +[[fields]] +name = "profiling_settings" +type = "::FlexFlow::ProfilingSettings" + +[[fields]] +name = "device_handle" +type = "::FlexFlow::SerializableDeviceSpecificPtr" + +[[fields]] +name = "iteration_config" +type = "::FlexFlow::FFIterationConfig" + +[[fields]] +name = "optimizer_attrs" +type = "std::optional<::FlexFlow::OptimizerAttrs>" diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/serializable_op_task_args.h b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_op_task_args.h new file mode 100644 index 0000000000..3b2d05d0b6 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_op_task_args.h @@ -0,0 +1,14 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_SERIALIZABLE_OP_TASK_ARGS_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_SERIALIZABLE_OP_TASK_ARGS_H + +#include "realm-execution/tasks/impl/op_task_args.dtg.h" +#include "realm-execution/tasks/impl/serializable_op_task_args.dtg.h" + +namespace FlexFlow { + +SerializableOpTaskArgs op_task_args_to_serializable(OpTaskArgs const &); +OpTaskArgs op_task_args_from_serializable(SerializableOpTaskArgs const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/tasks/realm_task_id_t.h b/lib/realm-execution/include/realm-execution/tasks/realm_task_id_t.h new file mode 100644 index 0000000000..a3c6891fb0 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/realm_task_id_t.h @@ -0,0 +1,13 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_REALM_TASK_ID_T_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_REALM_TASK_ID_T_H + +#include "realm-execution/realm.h" +#include "realm-execution/tasks/task_id_t.dtg.h" + +namespace FlexFlow { + +Realm::Processor::TaskFuncID get_realm_task_id_for_task_id(task_id_t); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/tasks/realm_task_registry.h b/lib/realm-execution/include/realm-execution/tasks/realm_task_registry.h new file mode 100644 index 0000000000..8114f1a82c --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/realm_task_registry.h @@ -0,0 +1,21 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_REALM_TASK_REGISTRY_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_REALM_TASK_REGISTRY_H + +#include "realm-execution/realm.h" +#include "realm-execution/tasks/task_id_t.dtg.h" + +namespace FlexFlow { + +[[nodiscard]] Realm::Event register_task(Realm::Processor::Kind target_kind, + task_id_t func_id, + void (*task_body)(void const *, + size_t, + void const *, + size_t, + Realm::Processor)); + +[[nodiscard]] Realm::Event register_all_tasks(); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_device_specific_ptr.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_device_specific_ptr.dtg.toml new file mode 100644 index 0000000000..07cf61f7e1 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_device_specific_ptr.dtg.toml @@ -0,0 +1,28 @@ +namespace = "FlexFlow" +name = "SerializableDeviceSpecificPtr" +type = "struct" +features = [ + "eq", + "fmt", + "hash", + "json", +] + +includes = [ + "pcg/device_id_t.dtg.h", + "cstdint", + "optional", +] + +src_includes = [ + "utils/fmt/optional.h", + "utils/json/optional.h", +] + +[[fields]] +name = "device_idx" +type = "::FlexFlow::device_id_t" + +[[fields]] +name = "ptr" +type = "std::optional" diff --git a/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_realm_processor.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_realm_processor.dtg.toml new file mode 100644 index 0000000000..3cb64d95c1 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_realm_processor.dtg.toml @@ -0,0 +1,17 @@ +namespace = "FlexFlow" +name = "SerializableRealmProcessor" +type = "struct" +features = [ + "eq", + "fmt", + "hash", + "json", +] + +includes = [ + "realm-execution/realm.h", +] + +[[fields]] +name = "id" +type = "::FlexFlow::Realm::Processor::id_t" diff --git a/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_realm_processor.h b/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_realm_processor.h new file mode 100644 index 0000000000..6b29b6e223 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_realm_processor.h @@ -0,0 +1,16 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_SERIALIZER_SERIALIZABLE_REALM_PROCESSOR_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_SERIALIZER_SERIALIZABLE_REALM_PROCESSOR_H + +#include "realm-execution/realm.h" +#include "realm-execution/tasks/serializer/serializable_realm_processor.dtg.h" + +namespace FlexFlow { + +SerializableRealmProcessor + realm_processor_to_serializable(Realm::Processor const &); +Realm::Processor + realm_processor_from_serializable(SerializableRealmProcessor const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/tasks/serializer/task_arg_serializer.h b/lib/realm-execution/include/realm-execution/tasks/serializer/task_arg_serializer.h new file mode 100644 index 0000000000..3208368d2d --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/serializer/task_arg_serializer.h @@ -0,0 +1,25 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_SERIALIZER_TASK_ARG_SERIALIZER_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_SERIALIZER_TASK_ARG_SERIALIZER_H + +#include +#include +#include + +namespace FlexFlow { + +template +std::string serialize_task_args(T const &args) { + nlohmann::json j = args; + return j.dump(); +} + +template +T deserialize_task_args(void const *args, size_t arglen) { + nlohmann::json j = nlohmann::json::parse( + std::string_view{reinterpret_cast(args), arglen}); + return j.get(); +} + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/task_id_t.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/task_id_t.dtg.toml similarity index 97% rename from lib/task-spec/include/task-spec/task_id_t.dtg.toml rename to lib/realm-execution/include/realm-execution/tasks/task_id_t.dtg.toml index ce2de52d40..97b19b5f51 100644 --- a/lib/task-spec/include/task-spec/task_id_t.dtg.toml +++ b/lib/realm-execution/include/realm-execution/tasks/task_id_t.dtg.toml @@ -9,10 +9,16 @@ features = [ ] [[values]] -name = "TOP_LEVEL_TASK_ID" +name = "CONTROLLER_TASK_ID" [[values]] -name = "FF_INIT_TASK_ID" +name = "DEVICE_HANDLE_INIT_TASK_ID" + +[[values]] +name = "DEVICE_HANDLE_INIT_RETURN_TASK_ID" + +[[values]] +name = "DEVICE_STATE_INIT_RETURN_TASK_ID" [[values]] name = "IMAGE_INIT_TASK_ID" diff --git a/lib/realm-execution/include/realm-execution/tasks/task_id_t.h b/lib/realm-execution/include/realm-execution/tasks/task_id_t.h new file mode 100644 index 0000000000..53945d2e5b --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tasks/task_id_t.h @@ -0,0 +1,28 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_TASK_ID_T_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_TASK_ID_T_H + +#include "op-attrs/pcg_operator_attrs.dtg.h" +#include "pcg/optimizer_attrs.dtg.h" +#include "realm-execution/tasks/task_id_t.dtg.h" +#include "task-spec/dynamic_graph/dynamic_node_attrs.dtg.h" +#include + +namespace FlexFlow { + +std::optional + get_task_id_for_op(DynamicNodeAttrs const &, + std::optional const &); + +std::optional + get_init_task_id_for_op_attrs(PCGOperatorAttrs const &); + +std::optional get_fwd_task_id_for_op_attrs(PCGOperatorAttrs const &); + +std::optional get_bwd_task_id_for_op_attrs(PCGOperatorAttrs const &); + +std::optional + get_update_task_id_for_optimizer_attrs(OptimizerAttrs const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/include/realm-execution/tensor_instance_backing.dtg.toml b/lib/realm-execution/include/realm-execution/tensor_instance_backing.dtg.toml new file mode 100644 index 0000000000..e6a8bd58d9 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tensor_instance_backing.dtg.toml @@ -0,0 +1,24 @@ +namespace = "FlexFlow" +name = "TensorInstanceBacking" +type = "struct" +features = [ + "eq", + #"fmt", + #"hash", +] + +includes = [ + "", + "realm-execution/realm.h", + "task-spec/dynamic_graph/dynamic_value_attrs.dtg.h", +] + +src_includes = [ + "realm-execution/fmt/instance.h", + "utils/hash/unordered_map.h", + "utils/fmt/unordered_map.h", +] + +[[fields]] +name = "backing" +type = "std::unordered_map<::FlexFlow::DynamicValueAttrs, std::pair<::FlexFlow::Realm::RegionInstance, ::FlexFlow::Realm::Event>>" diff --git a/lib/realm-execution/include/realm-execution/tensor_instance_backing.h b/lib/realm-execution/include/realm-execution/tensor_instance_backing.h new file mode 100644 index 0000000000..1d143b7409 --- /dev/null +++ b/lib/realm-execution/include/realm-execution/tensor_instance_backing.h @@ -0,0 +1,12 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TENSOR_INSTANCE_BACKING_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TENSOR_INSTANCE_BACKING_H + +#include "realm-execution/tensor_instance_backing.dtg.h" + +namespace FlexFlow { + +TensorInstanceBacking make_empty_tensor_instance_backing(); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/src/realm-execution/atomic_dependency_set.cc b/lib/realm-execution/src/realm-execution/atomic_dependency_set.cc new file mode 100644 index 0000000000..ba4fcc5a9f --- /dev/null +++ b/lib/realm-execution/src/realm-execution/atomic_dependency_set.cc @@ -0,0 +1,27 @@ +#include "realm-execution/atomic_dependency_set.h" + +namespace FlexFlow { + +AtomicDependencySet::AtomicDependencySet(Realm::Event precondition) + : writer(precondition) {} + +void AtomicDependencySet::add_writer(Realm::Event writer) { + this->writer = + Realm::Event::merge_events(writer, this->get_dependency_for_writer()); + this->readers.clear(); +} + +void AtomicDependencySet::add_reader(Realm::Event reader) { + this->readers.push_back(reader); +} + +Realm::Event AtomicDependencySet::get_dependency_for_writer() const { + Realm::Event readers = Realm::Event::merge_events(this->readers); + return Realm::Event::merge_events(this->writer, readers); +} + +Realm::Event AtomicDependencySet::get_dependency_for_reader() const { + return this->writer; +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/dependency_set.cc b/lib/realm-execution/src/realm-execution/dependency_set.cc new file mode 100644 index 0000000000..84412a125d --- /dev/null +++ b/lib/realm-execution/src/realm-execution/dependency_set.cc @@ -0,0 +1,49 @@ +#include "realm-execution/dependency_set.h" +#include "realm-execution/atomic_dependency_set.h" +#include "utils/containers/contains_key.h" + +namespace FlexFlow { + +DependencySet::DependencySet(Realm::Event precondition) + : precondition(precondition) {} + +void DependencySet::add_writer(DynamicValueAttrs const &value, + Realm::Event writer) { + AtomicDependencySet &atomic_dependence_set = + this->get_atomic_dependency_set(value); + atomic_dependence_set.add_writer(writer); +} + +void DependencySet::add_reader(DynamicValueAttrs const &value, + Realm::Event reader) { + AtomicDependencySet &atomic_dependence_set = + this->get_atomic_dependency_set(value); + atomic_dependence_set.add_reader(reader); +} + +Realm::Event DependencySet::get_dependency_for_writer( + DynamicValueAttrs const &value) const { + if (contains_key(this->atomic_dependencies, value)) { + return this->atomic_dependencies.at(value).get_dependency_for_writer(); + } + return this->precondition; +} + +Realm::Event DependencySet::get_dependency_for_reader( + DynamicValueAttrs const &value) const { + if (contains_key(this->atomic_dependencies, value)) { + return this->atomic_dependencies.at(value).get_dependency_for_reader(); + } + return this->precondition; +} + +AtomicDependencySet & + DependencySet::get_atomic_dependency_set(DynamicValueAttrs const &value) { + if (!contains_key(this->atomic_dependencies, value)) { + this->atomic_dependencies.insert( + {value, AtomicDependencySet{this->precondition}}); + } + return this->atomic_dependencies.at(value); +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/device_specific_managed_per_device_ff_handle.cc b/lib/realm-execution/src/realm-execution/device_specific_managed_per_device_ff_handle.cc new file mode 100644 index 0000000000..6e0cef0bb2 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/device_specific_managed_per_device_ff_handle.cc @@ -0,0 +1,57 @@ +#include "realm-execution/device_specific_managed_per_device_ff_handle.h" +#include "kernels/device_handle_t.h" +#include "utils/containers/transform.h" +#include "utils/json/optional.h" +#include + +namespace FlexFlow { + +DeviceSpecificManagedPerDeviceFFHandle::DeviceSpecificManagedPerDeviceFFHandle( + device_id_t owner, std::optional handle) + : owner(owner), handle(handle) {} + +std::optional + DeviceSpecificManagedPerDeviceFFHandle::get(device_id_t device_idx) const { + ASSERT(this->owner == device_idx); + return this->handle; +} + +SerializableDeviceSpecificPtr + DeviceSpecificManagedPerDeviceFFHandle::serialize() const { + return SerializableDeviceSpecificPtr{ + /*device_idx=*/owner, + /*ptr=*/ + transform(handle, + [](ManagedPerDeviceFFHandle *ptr) { + return reinterpret_cast(ptr); + }), + }; +} + +DeviceSpecificManagedPerDeviceFFHandle + DeviceSpecificManagedPerDeviceFFHandle::deserialize( + SerializableDeviceSpecificPtr const &handle) { + return DeviceSpecificManagedPerDeviceFFHandle{ + /*owner=*/handle.device_idx, + /*handle=*/ + transform(handle.ptr, + [](uintptr_t ptrval) { + return reinterpret_cast(ptrval); + }), + }; +} + +DeviceSpecificManagedPerDeviceFFHandle make_device_specific_managed_handle( + device_id_t const &device_id, + std::optional const &managed_handle) { + return DeviceSpecificManagedPerDeviceFFHandle{device_id, managed_handle}; +} + +device_handle_t device_handle_t_from_device_specific_managed_handle( + DeviceSpecificManagedPerDeviceFFHandle const &device_specific, + device_id_t device_idx) { + return device_handle_t_from_managed_handle_ptr( + *device_specific.get(device_idx)); +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/distributed_device_handle.cc b/lib/realm-execution/src/realm-execution/distributed_device_handle.cc new file mode 100644 index 0000000000..87376be9b1 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/distributed_device_handle.cc @@ -0,0 +1,51 @@ +#include "realm-execution/distributed_device_handle.h" +#include "realm-execution/device_specific_managed_per_device_ff_handle.h" +#include "realm-execution/tasks/impl/device_handle_init_task.h" +#include "task-spec/device_specific.h" + +namespace FlexFlow { + +DistributedDeviceHandle::DistributedDeviceHandle( + std::unordered_map const &handles) + : handles(handles) {} + +DeviceSpecificManagedPerDeviceFFHandle const & + DistributedDeviceHandle::at(Realm::Processor processor) const { + return this->handles.at(processor); +} + +DistributedDeviceHandle + create_distributed_device_handle(RealmContext &ctx, + size_t workSpaceSize, + bool allowTensorOpMathConversion, + Realm::Event precondition) { + std::unordered_map + handles; + + // Allocate space for the result before launching any tasks + Realm::Machine::ProcessorQuery pq(Realm::Machine::get_machine()); + for (Realm::Processor proc : pq) { + if (proc.kind() == Realm::Processor::LOC_PROC || + proc.kind() == Realm::Processor::TOC_PROC) { + handles.insert({proc, + make_device_specific_managed_handle( + ctx.get_current_device_idx(), std::nullopt)}); + } + } + + for (auto &[proc, handle] : handles) { + spawn_device_handle_init_task(ctx, + proc, + workSpaceSize, + allowTensorOpMathConversion, + &handle, + precondition); + } + + ctx.get_outstanding_events().wait(); + + return DistributedDeviceHandle{handles}; +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/distributed_device_state_initialization.cc b/lib/realm-execution/src/realm-execution/distributed_device_state_initialization.cc new file mode 100644 index 0000000000..cab2b49e15 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/distributed_device_state_initialization.cc @@ -0,0 +1,72 @@ +#include "realm-execution/distributed_device_state_initialization.h" +#include "local-execution/device_state_initialization.h" +#include "realm-execution/tasks/impl/device_state_init_task.h" +#include "task-spec/dynamic_graph/dynamic_node_invocation.dtg.h" +#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.h" +#include "utils/optional.h" +#include +#include + +namespace FlexFlow { + +DynamicOpenDataflowGraph perform_distributed_device_state_initialization( + DynamicOpenDataflowGraph const &dg, + RealmContext &ctx, + ProfilingSettings const &profiling_settings, + DistributedDeviceHandle const &device_handle, + FFIterationConfig const &iteration_config, + OptimizerAttrs const &optimizer_attrs, + Realm::Event precondition) { + + // Initialize all operators and save the per-device op state + ASSERT(no_nodes_are_initialized(dg)); + + std::unordered_map + result_map; + for (DynamicNodeInvocation const &invocation : dg.invocations) { + Realm::Processor target_proc = ctx.map_device_coord_to_processor( + assert_unwrap(invocation.node_attrs.device_coord)); + + // FIXME: in the absense of a real serializer we're just tossing around raw + // bytes, which means we need to bypass the constructor for this type (yes, + // ugh) + DeviceSpecificPerDeviceOpState *output = + static_cast( + malloc(sizeof(DeviceSpecificPerDeviceOpState))); + std::optional result = + spawn_device_state_init_task(ctx, + target_proc, + invocation, + profiling_settings, + device_handle.at(target_proc), + iteration_config, + optimizer_attrs, + output, + precondition); + if (result) { + result_map[invocation] = output; + } else { + free(output); + } + } + + ctx.get_outstanding_events().wait(); + + DynamicOpenDataflowGraph result = transform_dynamic_invocation_set( + dg, [&](DynamicNodeInvocation const &invocation) { + DynamicNodeInvocation result = invocation; + auto device_state = result_map.find(invocation); + if (device_state != result_map.end()) { + result.node_attrs.per_device_op_state = *device_state->second; + } + return result; + }); + + for (auto &[invocation, output] : result_map) { + free(output); + } + + return result; +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/fmt/instance.cc b/lib/realm-execution/src/realm-execution/fmt/instance.cc new file mode 100644 index 0000000000..f8eabe9bb0 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/fmt/instance.cc @@ -0,0 +1,10 @@ +#include "realm-execution/fmt/instance.h" + +namespace FlexFlow { + +std::ostream &operator<<(std::ostream &s, + ::FlexFlow::Realm::RegionInstance const &m) { + return s << fmt::to_string(m); +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/hash/processor.cc b/lib/realm-execution/src/realm-execution/hash/processor.cc new file mode 100644 index 0000000000..dcc1bc5d06 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/hash/processor.cc @@ -0,0 +1,11 @@ +#include "realm-execution/hash/processor.h" +#include + +namespace std { + +size_t hash<::FlexFlow::Realm::Processor>::operator()( + ::FlexFlow::Realm::Processor const &p) const { + return hash<::FlexFlow::Realm::Processor::id_t>{}(p.id); +} + +} // namespace std diff --git a/lib/realm-execution/src/realm-execution/instance_allocation.cc b/lib/realm-execution/src/realm-execution/instance_allocation.cc new file mode 100644 index 0000000000..b740859e22 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/instance_allocation.cc @@ -0,0 +1,76 @@ +#include "realm-execution/instance_allocation.h" +#include "local-execution/tensor_allocation.h" +#include "op-attrs/parallel_tensor_shape.h" +#include "op-attrs/tensor_shape.dtg.h" +#include "realm-execution/realm_context.h" +#include "realm-execution/tensor_instance_backing.h" +#include "task-spec/dynamic_graph/dynamic_node_attrs.dtg.h" +#include "task-spec/dynamic_graph/dynamic_node_invocation.dtg.h" +#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.h" +#include "task-spec/dynamic_graph/dynamic_tensor_accessor.dtg.h" +#include "task-spec/dynamic_graph/dynamic_value_attrs.dtg.h" +#include "utils/bidict/generate_bidict.h" +#include "utils/containers/all_are_true.h" +#include "utils/containers/contains_key.h" +#include "utils/containers/make.h" +#include "utils/containers/map_values.h" +#include "utils/containers/unordered_set_of.h" +#include "utils/containers/values.h" +#include "utils/exception.h" +#include "utils/optional.h" + +namespace FlexFlow { + +std::pair + perform_instance_allocation_for_value(DynamicNodeAttrs const &node, + DynamicValueAttrs const &value, + RealmContext &ctx) { + ASSERT(value.accessor == std::nullopt); + + TensorShape shape = get_piece_shape(value.parallel_tensor_shape.value()); + + MachineSpaceCoordinate device_coord = assert_unwrap(node.device_coord); + Realm::Processor proc = ctx.map_device_coord_to_processor(device_coord); + Realm::Memory memory = ctx.get_nearest_memory(proc); + return ctx.create_instance(memory, shape, Realm::ProfilingRequestSet()); +} + +TensorInstanceBacking perform_instance_allocation( + DynamicOpenDataflowGraph const &g, + std::unordered_map const + &preallocated, + RealmContext &ctx) { + ASSERT(no_tensors_are_allocated(g)); + ASSERT(tensors_are_ready_for_allocation(g)); + for (DynamicValueAttrs const &v : keys(preallocated)) { + ASSERT(v.accessor == std::nullopt); + } + + TensorInstanceBacking result = make_empty_tensor_instance_backing(); + auto allocate = [&](DynamicNodeAttrs const &n, DynamicValueAttrs const &v) { + if (contains_key(preallocated, v)) { + // FIXME: Attach external instance to existing allocation and use that + NOT_IMPLEMENTED(); + } else { + if (contains_key(result.backing, v)) { + return result.backing.at(v); + } else { + result.backing.insert( + std::pair{v, perform_instance_allocation_for_value(n, v, ctx)}); + } + } + }; + + for (DynamicNodeInvocation const &invocation : g.invocations) { + for (DynamicValueAttrs const &input : values(invocation.inputs)) { + allocate(invocation.node_attrs, input); + } + for (DynamicValueAttrs const &output : values(invocation.outputs)) { + allocate(invocation.node_attrs, output); + } + } + + return result; +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/pcg_instance/pcg_instance.cc b/lib/realm-execution/src/realm-execution/pcg_instance/pcg_instance.cc new file mode 100644 index 0000000000..8e6ab022aa --- /dev/null +++ b/lib/realm-execution/src/realm-execution/pcg_instance/pcg_instance.cc @@ -0,0 +1,259 @@ +#include "realm-execution/pcg_instance/pcg_instance.h" +#include "pcg/optimizer_attrs.h" +#include "realm-execution/dependency_set.h" +#include "realm-execution/distributed_device_state_initialization.h" +#include "realm-execution/instance_allocation.h" +#include "realm-execution/realm_context.h" +#include "realm-execution/tasks/impl/op_task.h" +#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.h" +#include "task-spec/dynamic_graph/dynamic_tensor_guid_t.dtg.h" +#include "task-spec/dynamic_graph/dynamic_value_attrs.dtg.h" +#include "task-spec/dynamic_graph/loss_insertion.h" +#include "task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.h" +#include "task-spec/dynamic_graph/pass_expansion.h" +#include "task-spec/dynamic_graph/shard_expansion.h" +#include "task-spec/dynamic_graph/update_insertion.h" +#include "utils/containers/transform.h" +#include "utils/containers/values.h" +#include "utils/graph/digraph/algorithms/get_topological_ordering.h" +#include "utils/optional.h" + +namespace FlexFlow { + +PCGInstance::PCGInstance( + RealmContext &ctx, + std::vector const &execution_order, + OptimizerAttrs const &optimizer_attrs, + std::optional logit_grad_tensor) + : ctx(ctx), execution_order(execution_order), + optimizer_attrs(optimizer_attrs), logit_grad_tensor(logit_grad_tensor) {} + +RealmContext &PCGInstance::get_realm_context() { + return this->ctx; +} +std::vector const & + PCGInstance::get_execution_order() const { + return this->execution_order; +} +OptimizerAttrs const &PCGInstance::get_optimizer_attrs() const { + return this->optimizer_attrs; +} +void PCGInstance::update_optimizer_attrs_for_next_iter() { + this->optimizer_attrs = + get_optimizer_attrs_for_next_iter(this->optimizer_attrs); +} +std::optional + PCGInstance::get_loss_tensor_instance() const { + return this->logit_grad_tensor; +} + +PCGInstance create_pcg_instance( + RealmContext &ctx, + MappedParallelComputationGraph const &mpcg, + OptimizerAttrs const &optimizer_attrs, + std::optional const &loss_attrs, + std::optional label_tensor, + std::optional logit_tensor, + std::unordered_map const + &input_tensors, + ProfilingSettings const &profiling_settings, + DistributedDeviceHandle const &device_handle, + FFIterationConfig const &iteration_config) { + + DynamicOpenDataflowGraph dg = + make_dynamic_open_dataflow_graph_from_mpcg(mpcg); + dg = perform_pass_expansion(dg); + + std::unordered_map inputs = + input_tensors; + std::optional logit_grad_value; + if (loss_attrs) { + auto [dg2, label_v, logit_grad_v] = perform_loss_insertion( + dg, + assert_unwrap(loss_attrs), + dynamic_tensor_guid_t{assert_unwrap(logit_tensor)}); + dg = dg2; + logit_grad_value = logit_grad_v; + inputs.insert(std::pair{label_v, assert_unwrap(label_tensor)}); + } + + dg = perform_update_insertion(dg, optimizer_attrs); + dg = perform_shard_expansion(dg); + TensorInstanceBacking backing = perform_instance_allocation(dg, inputs, ctx); + + std::optional logit_grad_tensor = + transform(logit_grad_value, [&](DynamicValueAttrs const &lgv) { + return backing.backing.at(lgv).first; + }); + + // FIXME: for now we're going to be lazy and block on everything rather than + // do fine-grained dependencies on instances + dg = perform_distributed_device_state_initialization( + dg, + ctx, + profiling_settings, + device_handle, + iteration_config, + optimizer_attrs, + ctx.get_outstanding_events()); + + // Compute the topological ordering of the graph + auto [kwarg_graph, node_map] = + labelled_open_kwarg_dataflow_graph_from_dynamic_open_dataflow_graph(dg); + std::vector node_topo_order = get_topological_ordering(kwarg_graph); + std::vector invocation_topo_order = transform( + node_topo_order, [&](Node node) { return node_map.at_l(node); }); + + return PCGInstance{ + ctx, invocation_topo_order, optimizer_attrs, logit_grad_tensor}; + + // TODO list: + // * external instances + // * task argument serializer + // * pass instances to task and convert to tensor accessor + // * copies + // * parallel operator implementation (partition, reduce, gather, etc.) + // * and fused parallel operators (reduce + broadcast = allreduce) + // * memory-optimizing compiler integration (tensor creation/destruction, + // tensor reuse) +} + +static std::unordered_map + execute_distributed_dynamic_node_invocation_set( + RealmContext &ctx, + std::vector const &invocations, + OptimizerAttrs const &optimizer_attrs, + ProfilingSettings const &profiling_settings, + DistributedDeviceHandle const &device_handle, + FFIterationConfig iteration_config) { + // For simplicity we'll track a dependency on all outstanding operations up to + // this point. This will create an effective barrier between phases. + DependencySet dependency_set{ctx.get_outstanding_events()}; + return unordered_map_from_pairs( + transform(invocations, [&](DynamicNodeInvocation const &invocation) { + std::vector input_dependencies = + transform(vector_of(values(invocation.inputs)), + [&](DynamicValueAttrs const &value) { + return dependency_set.get_dependency_for_reader(value); + }); + std::vector output_dependencies = + transform(vector_of(values(invocation.outputs)), + [&](DynamicValueAttrs const &value) { + return dependency_set.get_dependency_for_writer(value); + }); + Realm::Event dependencies = Realm::Event::merge_events( + Realm::Event::merge_events(input_dependencies), + Realm::Event::merge_events(output_dependencies)); + Realm::Processor target_proc = ctx.map_device_coord_to_processor( + assert_unwrap(invocation.node_attrs.device_coord)); + Realm::Event result = spawn_op_task(ctx, + target_proc, + invocation, + profiling_settings, + device_handle.at(target_proc), + iteration_config, + optimizer_attrs, + dependencies); + for (DynamicValueAttrs const &value : values(invocation.inputs)) { + dependency_set.add_reader(value, result); + } + for (DynamicValueAttrs const &value : values(invocation.outputs)) { + dependency_set.add_writer(value, result); + } + return std::pair{invocation.node_attrs.layer_guid, result}; + })); +} + +std::unordered_map + perform_all_passes_for_pcg_instance( + PCGInstance &instance, + ProfilingSettings const &profiling_settings, + DistributedDeviceHandle const &device_handle, + FFIterationConfig iteration_config) { + std::vector execution_order = + instance.get_execution_order(); + std::unordered_map result = + execute_distributed_dynamic_node_invocation_set( + /*ctx=*/instance.get_realm_context(), + /*invocations=*/execution_order, + /*optimizer_attrs=*/instance.get_optimizer_attrs(), + /*profiling_settings=*/profiling_settings, + /*device_handle=*/device_handle, + /*iteration_config=*/iteration_config); + instance.update_optimizer_attrs_for_next_iter(); + return result; +} + +std::unordered_map + perform_forward_pass_for_pcg_instance( + PCGInstance &instance, + ProfilingSettings const &profiling_settings, + DistributedDeviceHandle const &device_handle, + FFIterationConfig iteration_config) { + std::vector execution_order = + filter(instance.get_execution_order(), + [](DynamicNodeInvocation const &invocation) { + DynamicTaskType task_type = + assert_unwrap(invocation.node_attrs.task_type); + return task_type == DynamicTaskType::FWD; + }); + + return execute_distributed_dynamic_node_invocation_set( + /*ctx=*/instance.get_realm_context(), + /*invocations=*/execution_order, + /*optimizer_attrs=*/instance.get_optimizer_attrs(), + /*profiling_settings=*/profiling_settings, + /*device_handle=*/device_handle, + /*iteration_config=*/iteration_config); +} + +std::unordered_map + perform_backward_pass_for_pcg_instance( + PCGInstance &instance, + ProfilingSettings const &profiling_settings, + DistributedDeviceHandle const &device_handle, + FFIterationConfig iteration_config) { + std::vector execution_order = + filter(instance.get_execution_order(), + [](DynamicNodeInvocation const &invocation) { + DynamicTaskType task_type = + assert_unwrap(invocation.node_attrs.task_type); + return task_type == DynamicTaskType::BWD; + }); + + return execute_distributed_dynamic_node_invocation_set( + /*ctx=*/instance.get_realm_context(), + /*invocations=*/execution_order, + /*optimizer_attrs=*/instance.get_optimizer_attrs(), + /*profiling_settings=*/profiling_settings, + /*device_handle=*/device_handle, + /*iteration_config=*/iteration_config); +} + +std::unordered_map + perform_update_pass_for_pcg_instance( + PCGInstance &instance, + ProfilingSettings const &profiling_settings, + DistributedDeviceHandle const &device_handle, + FFIterationConfig iteration_config) { + std::vector execution_order = + filter(instance.get_execution_order(), + [](DynamicNodeInvocation const &invocation) { + DynamicTaskType task_type = + assert_unwrap(invocation.node_attrs.task_type); + return task_type == DynamicTaskType::UPD; + }); + + std::unordered_map result = + execute_distributed_dynamic_node_invocation_set( + /*ctx=*/instance.get_realm_context(), + /*invocations=*/execution_order, + /*optimizer_attrs=*/instance.get_optimizer_attrs(), + /*profiling_settings=*/profiling_settings, + /*device_handle=*/device_handle, + /*iteration_config=*/iteration_config); + instance.update_optimizer_attrs_for_next_iter(); + return result; +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/realm_allocator.cc b/lib/realm-execution/src/realm-execution/realm_allocator.cc new file mode 100644 index 0000000000..f24106b0bc --- /dev/null +++ b/lib/realm-execution/src/realm-execution/realm_allocator.cc @@ -0,0 +1,53 @@ +#include "realm-execution/realm_allocator.h" +#include "kernels/device.h" +#include "pcg/device_type.dtg.h" + +namespace FlexFlow { + +RealmAllocator::RealmAllocator(Realm::Processor processor, Realm::Memory memory) + : processor(processor), memory(memory) {} + +void *RealmAllocator::allocate(size_t requested_memory_size) { + Realm::Rect<1> bounds{Realm::Point<1>::ZEROES(), + Realm::Point<1>{requested_memory_size} - + Realm::Point<1>::ONES()}; + std::vector field_sizes{1}; + Realm::RegionInstance inst; + Realm::Event ready = + Realm::RegionInstance::create_instance(inst, + this->memory, + bounds, + field_sizes, + 0 /*SOA*/, + Realm::ProfilingRequestSet{}); + ready.wait(); + void *ptr = + inst.pointer_untyped(/*offset=*/0, /*datalen=*/requested_memory_size); + ASSERT(ptr); + this->ptr_instances.insert({ptr, inst}); + return ptr; +} + +void RealmAllocator::deallocate(void *ptr) { + this->ptr_instances.at(ptr).destroy(Realm::Event::NO_EVENT); + this->ptr_instances.erase(ptr); +} + +DeviceType RealmAllocator::get_allocation_device_type() const { + switch (this->processor.kind()) { + case Realm::Processor::Kind::LOC_PROC: + return DeviceType::CPU; + case Realm::Processor::Kind::TOC_PROC: + return DeviceType::GPU; + default: + PANIC("Unhandled FwbTensorType", this->processor.kind()); + } +} + +Allocator get_realm_allocator(Realm::Processor processor, + Realm::Memory memory) { + Allocator allocator = Allocator::create(processor, memory); + return allocator; +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/realm_context.cc b/lib/realm-execution/src/realm-execution/realm_context.cc new file mode 100644 index 0000000000..3427e8cbee --- /dev/null +++ b/lib/realm-execution/src/realm-execution/realm_context.cc @@ -0,0 +1,252 @@ +#include "realm-execution/realm_context.h" +#include "kernels/device_handle_t.dtg.h" +#include "kernels/device_handle_t.h" +#include "op-attrs/datatype.h" +#include "op-attrs/tensor_dims.dtg.h" +#include "pcg/device_id_t.h" +#include "pcg/device_type.dtg.h" +#include "realm-execution/realm_allocator.h" +#include "realm-execution/tasks/realm_task_id_t.h" +#include "realm-execution/tasks/task_id_t.dtg.h" +#include "utils/containers/contains_key.h" +#include "utils/containers/transform.h" +#include "utils/nonnegative_int/nonnegative_int.h" +#include "utils/one_to_many/one_to_many.h" +#include "utils/positive_int/positive_int.h" + +namespace FlexFlow { + +RealmContext::RealmContext(Realm::Processor processor) + : processor(processor), + allocator(get_realm_allocator( + processor, RealmContext::get_nearest_memory(processor))) {} + +RealmContext::~RealmContext() { + if (!this->outstanding_events.empty()) { + Realm::Event outstanding = this->merge_outstanding_events(); + outstanding.wait(); + } +} + +static std::tuple + convert_machine_space_coordinate( + MachineSpaceCoordinate const &device_coord) { + Realm::AddressSpace as = int{device_coord.node_idx}; + Realm::Processor::Kind kind; + switch (device_coord.device_type) { + case DeviceType::CPU: + kind = Realm::Processor::Kind::LOC_PROC; + break; + case DeviceType::GPU: + kind = Realm::Processor::Kind::TOC_PROC; + break; + default: + PANIC("Unhandled DeviceType", fmt::to_string(device_coord.device_type)); + break; + } + nonnegative_int proc_in_node = device_coord.device_idx; + return std::tuple{as, kind, proc_in_node}; +} + +Realm::Processor RealmContext::map_device_coord_to_processor( + MachineSpaceCoordinate const &device_coord) { + this->discover_machine_topology(); + auto [as, kind, proc_in_node] = + convert_machine_space_coordinate(device_coord); + return this->processors.at(std::pair{as, kind}).at(int{proc_in_node}); +} + +Realm::Memory RealmContext::get_nearest_memory(Realm::Processor proc) { + if (!proc.exists()) { + return Realm::Memory::NO_MEMORY; + } + + // FIMXE: this isn't going to do what you expect until + // https://github.com/StanfordLegion/realm/pull/392 merges + Realm::Machine::MemoryQuery mq(Realm::Machine::get_machine()); + mq.best_affinity_to(proc); + ASSERT(mq.count() > 0); + return mq.first(); +} + +Realm::Processor RealmContext::get_current_processor() const { + return this->processor; +} + +Allocator &RealmContext::get_current_device_allocator() { + return this->allocator; +} + +device_id_t RealmContext::get_current_device_idx() const { + Realm::Processor proc = this->get_current_processor(); + + // FIXME: find a more efficient way to implement this than scanning the + // machine every time + Realm::Machine::ProcessorQuery pq(Realm::Machine::get_machine()); + pq.same_address_space_as(proc); + nonnegative_int idx{0}; + for (Realm::Processor p : pq) { + if (p == proc) { + break; + } + idx++; + } + + switch (proc.kind()) { + case Realm::Processor::LOC_PROC: + return make_device_id_t_from_idx(idx, DeviceType::CPU); + case Realm::Processor::TOC_PROC: + return make_device_id_t_from_idx(idx, DeviceType::GPU); + default: + PANIC("Unhandled Realm::ProcessorKind", fmt::to_string(int{proc.kind()})); + } +} + +Realm::Event + RealmContext::spawn_task(Realm::Processor proc, + task_id_t task_id, + void const *args, + size_t arglen, + Realm::ProfilingRequestSet const &requests, + Realm::Event wait_on, + int priority) { + Realm::Event result = proc.spawn(get_realm_task_id_for_task_id(task_id), + args, + arglen, + requests, + wait_on, + priority); + this->outstanding_events.push_back(result); + return result; +} + +Realm::Event RealmContext::collective_spawn_task(Realm::Processor target_proc, + task_id_t task_id, + void const *args, + size_t arglen, + Realm::Event wait_on, + int priority) { + Realm::Event result = + this->runtime.collective_spawn(target_proc, + get_realm_task_id_for_task_id(task_id), + args, + arglen, + wait_on, + priority); + this->outstanding_events.push_back(result); + return result; +} + +template +static Realm::Rect rect_from_dims(TensorDims const &dims) { + std::vector values{dims.ff_ordered.begin(), dims.ff_ordered.end()}; + return Realm::Rect{Realm::Point::ZEROES(), + Realm::Point{values.data()} - + Realm::Point::ONES()}; +} + +std::pair + RealmContext::create_instance(Realm::Memory memory, + TensorShape const &shape, + Realm::ProfilingRequestSet const &prs, + Realm::Event wait_on) { + std::vector field_sizes{ + static_cast(int{size_of_datatype(shape.data_type)})}; + Realm::RegionInstance inst; + Realm::Event ready; + switch (shape.dims.ff_ordered.num_dims()) { +#if REALM_MAX_DIM >= 1 + case 1: + ready = + Realm::RegionInstance::create_instance(inst, + memory, + rect_from_dims<1>(shape.dims), + field_sizes, + 0 /*SOA*/, + prs, + wait_on); + break; +#endif +#if REALM_MAX_DIM >= 2 + case 2: + ready = + Realm::RegionInstance::create_instance(inst, + memory, + rect_from_dims<2>(shape.dims), + field_sizes, + 0 /*SOA*/, + prs, + wait_on); + break; +#endif +#if REALM_MAX_DIM >= 3 + case 3: + ready = + Realm::RegionInstance::create_instance(inst, + memory, + rect_from_dims<3>(shape.dims), + field_sizes, + 0 /*SOA*/, + prs, + wait_on); + break; +#endif +#if REALM_MAX_DIM >= 4 + case 4: + ready = + Realm::RegionInstance::create_instance(inst, + memory, + rect_from_dims<4>(shape.dims), + field_sizes, + 0 /*SOA*/, + prs, + wait_on); + break; +#endif +#if REALM_MAX_DIM >= 5 + case 5: + ready = + Realm::RegionInstance::create_instance(inst, + memory, + rect_from_dims<5>(shape.dims), + field_sizes, + 0 /*SOA*/, + prs, + wait_on); + break; +#endif + default: + PANIC("TensorShape dims greater than REALM_MAX_DIM", + fmt::to_string(shape.dims.ff_ordered.num_dims())); + break; + } + this->outstanding_events.push_back(ready); + return std::pair{inst, ready}; +} + +Realm::Event RealmContext::get_outstanding_events() { + Realm::Event result = this->merge_outstanding_events(); + this->outstanding_events.push_back(result); + return result; +} + +Realm::Event RealmContext::merge_outstanding_events() { + Realm::Event result = Realm::Event::merge_events(this->outstanding_events); + this->outstanding_events.clear(); + return result; +} + +void RealmContext::discover_machine_topology() { + if (!this->processors.empty()) { + return; + } + + Realm::Machine::ProcessorQuery pq(Realm::Machine::get_machine()); + for (Realm::Processor proc : pq) { + Realm::AddressSpace as = proc.address_space(); + Realm::Processor::Kind kind = proc.kind(); + this->processors[std::pair{as, kind}].push_back(proc); + } +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/realm_manager.cc b/lib/realm-execution/src/realm-execution/realm_manager.cc new file mode 100644 index 0000000000..fc74fffe5d --- /dev/null +++ b/lib/realm-execution/src/realm-execution/realm_manager.cc @@ -0,0 +1,34 @@ +#include "realm-execution/realm_manager.h" +#include "realm-execution/realm_context.h" +#include "realm-execution/tasks/impl/controller_task.h" +#include "realm-execution/tasks/realm_task_registry.h" + +namespace FlexFlow { + +RealmManager::RealmManager(int *argc, char ***argv) + : RealmContext(Realm::Processor::NO_PROC) { + bool ok = this->runtime.init(argc, argv); + ASSERT(ok); + + // Register all tasks at initialization time so we don't need to later + register_all_tasks().wait(); +} + +RealmManager::~RealmManager() { + Realm::Event outstanding = this->merge_outstanding_events(); + this->runtime.shutdown(outstanding); + this->runtime.wait_for_shutdown(); +} + +Realm::Event + RealmManager::start_controller(std::function thunk, + Realm::Event wait_on) { + Realm::Processor target_proc = + Realm::Machine::ProcessorQuery(Realm::Machine::get_machine()) + .only_kind(Realm::Processor::LOC_PROC) + .first(); + + return collective_spawn_controller_task(*this, target_proc, thunk, wait_on); +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/controller_task.cc b/lib/realm-execution/src/realm-execution/tasks/impl/controller_task.cc new file mode 100644 index 0000000000..285e8acaa7 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tasks/impl/controller_task.cc @@ -0,0 +1,39 @@ +#include "realm-execution/tasks/impl/op_task.h" +#include "realm-execution/tasks/task_id_t.h" + +namespace FlexFlow { + +struct ControllerTaskArgs { +public: + std::function thunk; +}; + +void controller_task_body(void const *args, + size_t arglen, + void const *userdata, + size_t userlen, + Realm::Processor proc) { + ASSERT(arglen == sizeof(ControllerTaskArgs)); + ControllerTaskArgs task_args = + *reinterpret_cast(args); + + RealmContext ctx{proc}; + task_args.thunk(ctx); +} + +Realm::Event + collective_spawn_controller_task(RealmContext &ctx, + Realm::Processor &target_proc, + std::function thunk, + Realm::Event precondition) { + ControllerTaskArgs task_args; + task_args.thunk = thunk; + + return ctx.collective_spawn_task(target_proc, + task_id_t::CONTROLLER_TASK_ID, + &task_args, + sizeof(task_args), + precondition); +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/device_handle_init_return_task.cc b/lib/realm-execution/src/realm-execution/tasks/impl/device_handle_init_return_task.cc new file mode 100644 index 0000000000..bda6f7781c --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tasks/impl/device_handle_init_return_task.cc @@ -0,0 +1,52 @@ +#include "realm-execution/tasks/impl/device_handle_init_task.h" +#include "realm-execution/tasks/task_id_t.dtg.h" + +namespace FlexFlow { + +struct DeviceHandleInitReturnTaskArgs { +public: + DeviceHandleInitReturnTaskArgs() = delete; + DeviceHandleInitReturnTaskArgs( + DeviceSpecificManagedPerDeviceFFHandle result, + Realm::Processor origin_proc, + DeviceSpecificManagedPerDeviceFFHandle *origin_result_ptr) + : result(result), origin_proc(origin_proc), + origin_result_ptr(origin_result_ptr) {} + +public: + DeviceSpecificManagedPerDeviceFFHandle result; + Realm::Processor origin_proc; + DeviceSpecificManagedPerDeviceFFHandle *origin_result_ptr; +}; + +void device_handle_init_return_task_body(void const *args, + size_t arglen, + void const *userdata, + size_t userlen, + Realm::Processor proc) { + ASSERT(arglen == sizeof(DeviceHandleInitReturnTaskArgs)); + DeviceHandleInitReturnTaskArgs task_args = + *reinterpret_cast(args); + + ASSERT(task_args.origin_proc.address_space() == proc.address_space()); + *task_args.origin_result_ptr = task_args.result; +} + +Realm::Event spawn_device_handle_init_return_task( + RealmContext &ctx, + Realm::Processor origin_proc, + DeviceSpecificManagedPerDeviceFFHandle const &result, + DeviceSpecificManagedPerDeviceFFHandle *origin_result_ptr, + Realm::Event precondition) { + DeviceHandleInitReturnTaskArgs task_args{ + result, origin_proc, origin_result_ptr}; + + return ctx.spawn_task(origin_proc, + task_id_t::DEVICE_HANDLE_INIT_RETURN_TASK_ID, + &task_args, + sizeof(task_args), + Realm::ProfilingRequestSet{}, + precondition); +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/device_handle_init_task.cc b/lib/realm-execution/src/realm-execution/tasks/impl/device_handle_init_task.cc new file mode 100644 index 0000000000..b806aa1277 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tasks/impl/device_handle_init_task.cc @@ -0,0 +1,81 @@ +#include "realm-execution/tasks/impl/device_handle_init_task.h" +#include "realm-execution/device_specific_managed_per_device_ff_handle.h" +#include "realm-execution/tasks/impl/device_handle_init_return_task.h" +#include "realm-execution/tasks/impl/device_handle_init_task_args.dtg.h" +#include "realm-execution/tasks/impl/serializable_device_handle_init_task_args.h" +#include "realm-execution/tasks/serializer/task_arg_serializer.h" +#include "realm-execution/tasks/task_id_t.dtg.h" +#include + +namespace FlexFlow { + +static std::optional + make_device_handle_for_processor(Realm::Processor processor, + size_t workSpaceSize, + bool allowTensorOpMathConversion) { + switch (processor.kind()) { + case Realm::Processor::LOC_PROC: + return std::nullopt; + case Realm::Processor::TOC_PROC: + return new ManagedPerDeviceFFHandle{initialize_multi_gpu_handle( + /*num_ranks=*/Realm::Machine::get_machine().get_address_space_count(), + /*my_rank=*/processor.address_space(), + /*workSpaceSize=*/workSpaceSize, + /*allowTensorOpMathConversion=*/allowTensorOpMathConversion)}; + default: + PANIC("Unhandled Realm::ProcessorKind", + fmt::to_string(int{processor.kind()})); + } +} + +void device_handle_init_task_body(void const *args, + size_t arglen, + void const *userdata, + size_t userlen, + Realm::Processor proc) { + DeviceHandleInitTaskArgs task_args = + device_handle_init_task_args_from_serializable( + deserialize_task_args(args, + arglen)); + + RealmContext ctx{proc}; + DeviceSpecificManagedPerDeviceFFHandle managed_handle = + make_device_specific_managed_handle( + ctx.get_current_device_idx(), + make_device_handle_for_processor( + proc, + task_args.workSpaceSize, + task_args.allowTensorOpMathConversion)); + + spawn_device_handle_init_return_task(ctx, + task_args.origin_proc, + managed_handle, + task_args.origin_result_ptr, + Realm::Event::NO_EVENT); +} + +Realm::Event spawn_device_handle_init_task( + RealmContext &ctx, + Realm::Processor target_proc, + size_t workSpaceSize, + bool allowTensorOpMathConversion, + DeviceSpecificManagedPerDeviceFFHandle *result_ptr, + Realm::Event precondition) { + DeviceHandleInitTaskArgs task_args{ + workSpaceSize, + allowTensorOpMathConversion, + ctx.get_current_processor(), + result_ptr, + }; + + std::string args = serialize_task_args( + device_handle_init_task_args_to_serializable(task_args)); + return ctx.spawn_task(target_proc, + task_id_t::DEVICE_HANDLE_INIT_TASK_ID, + args.data(), + args.size(), + Realm::ProfilingRequestSet{}, + precondition); +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/device_state_init_return_task.cc b/lib/realm-execution/src/realm-execution/tasks/impl/device_state_init_return_task.cc new file mode 100644 index 0000000000..306697e950 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tasks/impl/device_state_init_return_task.cc @@ -0,0 +1,52 @@ +#include "realm-execution/tasks/impl/device_state_init_return_task.h" +#include "realm-execution/tasks/task_id_t.dtg.h" + +namespace FlexFlow { + +struct DeviceStateInitReturnTaskArgs { +public: + DeviceStateInitReturnTaskArgs() = delete; + DeviceStateInitReturnTaskArgs( + DeviceSpecificPerDeviceOpState result, + Realm::Processor origin_proc, + DeviceSpecificPerDeviceOpState *origin_result_ptr) + : result(result), origin_proc(origin_proc), + origin_result_ptr(origin_result_ptr) {} + +public: + DeviceSpecificPerDeviceOpState result; + Realm::Processor origin_proc; + DeviceSpecificPerDeviceOpState *origin_result_ptr; +}; + +void device_state_init_return_task_body(void const *args, + size_t arglen, + void const *userdata, + size_t userlen, + Realm::Processor proc) { + ASSERT(arglen == sizeof(DeviceStateInitReturnTaskArgs)); + DeviceStateInitReturnTaskArgs task_args = + *reinterpret_cast(args); + + ASSERT(task_args.origin_proc.address_space() == proc.address_space()); + *task_args.origin_result_ptr = task_args.result; +} + +Realm::Event spawn_device_state_init_return_task( + RealmContext &ctx, + Realm::Processor origin_proc, + DeviceSpecificPerDeviceOpState const &result, + DeviceSpecificPerDeviceOpState *origin_result_ptr, + Realm::Event precondition) { + DeviceStateInitReturnTaskArgs task_args{ + result, origin_proc, origin_result_ptr}; + + return ctx.spawn_task(origin_proc, + task_id_t::DEVICE_STATE_INIT_RETURN_TASK_ID, + &task_args, + sizeof(task_args), + Realm::ProfilingRequestSet{}, + precondition); +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/device_state_init_task.cc b/lib/realm-execution/src/realm-execution/tasks/impl/device_state_init_task.cc new file mode 100644 index 0000000000..99c72cf5e7 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tasks/impl/device_state_init_task.cc @@ -0,0 +1,89 @@ +#include "realm-execution/tasks/impl/device_state_init_task.h" +#include "local-execution/device_state_initialization.h" +#include "realm-execution/tasks/impl/device_state_init_return_task.h" +#include "realm-execution/tasks/impl/device_state_init_task_args.dtg.h" +#include "realm-execution/tasks/impl/serializable_device_state_init_task_args.h" +#include "realm-execution/tasks/serializer/task_arg_serializer.h" +#include "realm-execution/tasks/task_id_t.dtg.h" +#include "realm-execution/tasks/task_id_t.h" +#include "utils/optional.h" +#include +#include + +namespace FlexFlow { + +void device_state_init_task_body(void const *args, + size_t arglen, + void const *userdata, + size_t userlen, + Realm::Processor proc) { + DeviceStateInitTaskArgs task_args = + device_state_init_task_args_from_serializable( + deserialize_task_args(args, + arglen)); + + RealmContext ctx{proc}; + device_handle_t device_handle = + device_handle_t_from_device_specific_managed_handle( + task_args.device_handle, ctx.get_current_device_idx()); + DynamicNodeInvocation result_invocation = + initialize_node(task_args.invocation, + ctx.get_current_device_allocator(), + task_args.profiling_settings, + device_handle, + task_args.iteration_config, + task_args.optimizer_attrs, + ctx.get_current_device_idx()); + DeviceSpecificPerDeviceOpState result_state = + assert_unwrap(result_invocation.node_attrs.per_device_op_state); + // Important: to make sure this doesn't get deallocated, we intentionally leak + // the allocation here + DeviceSpecificPerDeviceOpState *result_state_ptr = + new DeviceSpecificPerDeviceOpState{result_state}; + spawn_device_state_init_return_task(ctx, + task_args.origin_proc, + *result_state_ptr, + task_args.origin_result_ptr, + Realm::Event::NO_EVENT); +} + +std::optional spawn_device_state_init_task( + RealmContext &ctx, + Realm::Processor target_proc, + DynamicNodeInvocation const &invocation, + ProfilingSettings const &profiling_settings, + DeviceSpecificManagedPerDeviceFFHandle const &device_handle, + FFIterationConfig const &iteration_config, + OptimizerAttrs const &optimizer_attrs, + DeviceSpecificPerDeviceOpState *result_ptr, + Realm::Event precondition) { + DeviceStateInitTaskArgs task_args{ + invocation, + profiling_settings, + device_handle, + iteration_config, + optimizer_attrs, + ctx.get_current_processor(), + result_ptr, + }; + + std::optional task_id = + and_then(and_then(invocation.node_attrs.op_attrs, + [](TrainingOperationAttrs const &op_attrs) { + return op_attrs.try_require_pcg_op(); + }), + get_init_task_id_for_op_attrs); + if (task_id.has_value()) { + std::string args = serialize_task_args( + device_state_init_task_args_to_serializable(task_args)); + return ctx.spawn_task(target_proc, + assert_unwrap(task_id), + args.data(), + args.size(), + Realm::ProfilingRequestSet{}, + precondition); + } + return std::nullopt; +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/op_task.cc b/lib/realm-execution/src/realm-execution/tasks/impl/op_task.cc new file mode 100644 index 0000000000..d8b8873442 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tasks/impl/op_task.cc @@ -0,0 +1,67 @@ +#include "realm-execution/tasks/impl/op_task.h" +#include "local-execution/task_execution.h" +#include "realm-execution/device_specific_managed_per_device_ff_handle.h" +#include "realm-execution/tasks/impl/op_task_args.dtg.h" +#include "realm-execution/tasks/impl/serializable_op_task_args.h" +#include "realm-execution/tasks/serializer/task_arg_serializer.h" +#include "realm-execution/tasks/task_id_t.h" +#include "task-spec/per_device_op_state.h" +#include "utils/optional.h" +#include + +namespace FlexFlow { + +void op_task_body(void const *args, + size_t arglen, + void const *userdata, + size_t userlen, + Realm::Processor proc) { + OpTaskArgs task_args = op_task_args_from_serializable( + deserialize_task_args(args, arglen)); + + RealmContext ctx{proc}; + device_handle_t device_handle = + device_handle_t_from_device_specific_managed_handle( + task_args.device_handle, ctx.get_current_device_idx()); + execute_dynamic_node_invocation( + /*invocation=*/task_args.invocation, + /*allocator=*/ctx.get_current_device_allocator(), + /*profiling_settings=*/task_args.profiling_settings, + /*ff_handle=*/device_handle, + /*per_device_op_state=*/ + transform(task_args.invocation.node_attrs.per_device_op_state, + [&](DeviceSpecificPerDeviceOpState const &op_state) { + return get_device_state_from_device_specific( + op_state, ctx.get_current_device_idx()); + }), + /*iteration_config=*/task_args.iteration_config, + /*optimizer_attrs=*/task_args.optimizer_attrs, + /*device_idx=*/ctx.get_current_device_idx()); +} + +Realm::Event + spawn_op_task(RealmContext &ctx, + Realm::Processor target_proc, + DynamicNodeInvocation const &invocation, + ProfilingSettings const &profiling_settings, + DeviceSpecificManagedPerDeviceFFHandle const &device_handle, + FFIterationConfig const &iteration_config, + std::optional const &optimizer_attrs, + Realm::Event precondition) { + OpTaskArgs task_args{invocation, + profiling_settings, + device_handle, + iteration_config, + optimizer_attrs}; + std::string args = + serialize_task_args(op_task_args_to_serializable(task_args)); + return ctx.spawn_task( + target_proc, + assert_unwrap(get_task_id_for_op(invocation.node_attrs, optimizer_attrs)), + args.data(), + args.size(), + Realm::ProfilingRequestSet{}, + precondition); +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/serializable_device_handle_init_task_args.cc b/lib/realm-execution/src/realm-execution/tasks/impl/serializable_device_handle_init_task_args.cc new file mode 100644 index 0000000000..a44a5a5db1 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tasks/impl/serializable_device_handle_init_task_args.cc @@ -0,0 +1,28 @@ +#include "realm-execution/tasks/impl/serializable_device_handle_init_task_args.h" + +namespace FlexFlow { + +SerializableDeviceHandleInitTaskArgs + device_handle_init_task_args_to_serializable( + DeviceHandleInitTaskArgs const &args) { + return SerializableDeviceHandleInitTaskArgs{ + /*workSpaceSize=*/args.workSpaceSize, + /*allowTensorOpMathConversion=*/args.allowTensorOpMathConversion, + /*origin_proc=*/realm_processor_to_serializable(args.origin_proc), + /*origin_result_ptr=*/reinterpret_cast(args.origin_result_ptr), + }; +} + +DeviceHandleInitTaskArgs device_handle_init_task_args_from_serializable( + SerializableDeviceHandleInitTaskArgs const &args) { + return DeviceHandleInitTaskArgs{ + /*workSpaceSize=*/args.workSpaceSize, + /*allowTensorOpMathConversion=*/args.allowTensorOpMathConversion, + /*origin_proc=*/realm_processor_from_serializable(args.origin_proc), + /*origin_result_ptr=*/ + reinterpret_cast( + args.origin_result_ptr), + }; +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/serializable_device_state_init_task_args.cc b/lib/realm-execution/src/realm-execution/tasks/impl/serializable_device_state_init_task_args.cc new file mode 100644 index 0000000000..528ff26867 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tasks/impl/serializable_device_state_init_task_args.cc @@ -0,0 +1,36 @@ +#include "realm-execution/tasks/impl/serializable_device_state_init_task_args.h" +#include "realm-execution/tasks/serializer/serializable_realm_processor.h" +#include "task-spec/dynamic_graph/serializable_dynamic_node_invocation.h" + +namespace FlexFlow { + +SerializableDeviceStateInitTaskArgs device_state_init_task_args_to_serializable( + DeviceStateInitTaskArgs const &args) { + return SerializableDeviceStateInitTaskArgs{ + /*invocation=*/dynamic_node_invocation_to_serializable(args.invocation), + /*profiling_settings=*/args.profiling_settings, + /*device_handle=*/args.device_handle.serialize(), + /*iteration_config=*/args.iteration_config, + /*optimizer_attrs=*/args.optimizer_attrs, + /*origin_proc=*/realm_processor_to_serializable(args.origin_proc), + /*origin_result_ptr=*/reinterpret_cast(args.origin_result_ptr), + }; +} + +DeviceStateInitTaskArgs device_state_init_task_args_from_serializable( + SerializableDeviceStateInitTaskArgs const &args) { + return DeviceStateInitTaskArgs{ + /*invocation=*/dynamic_node_invocation_from_serializable(args.invocation), + /*profiling_settings=*/args.profiling_settings, + /*device_handle=*/ + DeviceSpecificManagedPerDeviceFFHandle::deserialize(args.device_handle), + /*iteration_config=*/args.iteration_config, + /*optimizer_attrs=*/args.optimizer_attrs, + /*origin_proc=*/realm_processor_from_serializable(args.origin_proc), + /*origin_result_ptr=*/ + reinterpret_cast( + args.origin_result_ptr), + }; +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/serializable_op_task_args.cc b/lib/realm-execution/src/realm-execution/tasks/impl/serializable_op_task_args.cc new file mode 100644 index 0000000000..0513bc6df7 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tasks/impl/serializable_op_task_args.cc @@ -0,0 +1,27 @@ +#include "realm-execution/tasks/impl/serializable_op_task_args.h" +#include "task-spec/dynamic_graph/serializable_dynamic_node_invocation.h" + +namespace FlexFlow { + +SerializableOpTaskArgs op_task_args_to_serializable(OpTaskArgs const &args) { + return SerializableOpTaskArgs{ + /*invocation=*/dynamic_node_invocation_to_serializable(args.invocation), + /*profiling_settings=*/args.profiling_settings, + /*device_handle=*/args.device_handle.serialize(), + /*iteration_config=*/args.iteration_config, + /*optimizer_attrs=*/args.optimizer_attrs, + }; +} + +OpTaskArgs op_task_args_from_serializable(SerializableOpTaskArgs const &args) { + return OpTaskArgs{ + /*invocation=*/dynamic_node_invocation_from_serializable(args.invocation), + /*profiling_settings=*/args.profiling_settings, + /*device_handle=*/ + DeviceSpecificManagedPerDeviceFFHandle::deserialize(args.device_handle), + /*iteration_config=*/args.iteration_config, + /*optimizer_attrs=*/args.optimizer_attrs, + }; +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/tasks/realm_task_id_t.cc b/lib/realm-execution/src/realm-execution/tasks/realm_task_id_t.cc new file mode 100644 index 0000000000..ec1aa143a6 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tasks/realm_task_id_t.cc @@ -0,0 +1,10 @@ +#include "realm-execution/tasks/realm_task_id_t.h" + +namespace FlexFlow { + +Realm::Processor::TaskFuncID get_realm_task_id_for_task_id(task_id_t task_id) { + return Realm::Processor::TASK_ID_FIRST_AVAILABLE + + static_cast(task_id); +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/tasks/realm_task_registry.cc b/lib/realm-execution/src/realm-execution/tasks/realm_task_registry.cc new file mode 100644 index 0000000000..cff12c2391 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tasks/realm_task_registry.cc @@ -0,0 +1,149 @@ +#include "realm-execution/tasks/realm_task_registry.h" +#include "realm-execution/tasks/impl/controller_task.h" +#include "realm-execution/tasks/impl/device_handle_init_return_task.h" +#include "realm-execution/tasks/impl/device_handle_init_task.h" +#include "realm-execution/tasks/impl/device_state_init_return_task.h" +#include "realm-execution/tasks/impl/device_state_init_task.h" +#include "realm-execution/tasks/impl/op_task.h" +#include "realm-execution/tasks/realm_task_id_t.h" +#include "utils/exception.h" + +namespace FlexFlow { + +Realm::Event register_task(Realm::Processor::Kind target_kind, + task_id_t func_id, + void (*task_body)(void const *, + size_t, + void const *, + size_t, + Realm::Processor)) { + return Realm::Processor::register_task_by_kind( + target_kind, + /*global=*/false, + get_realm_task_id_for_task_id(func_id), + Realm::CodeDescriptor(task_body), + Realm::ProfilingRequestSet()); +} + +Realm::Event register_all_tasks() { + std::vector pending_registrations; + + std::vector init_task_ids = { + // Init tasks + task_id_t::BATCHNORM_INIT_TASK_ID, + task_id_t::COMBINE_INIT_TASK_ID, + task_id_t::CONV2D_INIT_TASK_ID, + task_id_t::DROPOUT_INIT_TASK_ID, + task_id_t::ELEMENTBINARY_INIT_TASK_ID, + task_id_t::ELEMENTUNARY_INIT_TASK_ID, + task_id_t::GATHER_INIT_TASK_ID, + task_id_t::LAYERNORM_INIT_TASK_ID, + task_id_t::LINEAR_INIT_TASK_ID, + task_id_t::ATTENTION_INIT_TASK_ID, + task_id_t::POOL2D_INIT_TASK_ID, + task_id_t::REDUCE_INIT_TASK_ID, + task_id_t::REDUCTION_INIT_TASK_ID, + task_id_t::REPARTITION_INIT_TASK_ID, + task_id_t::REPLICATE_INIT_TASK_ID, + task_id_t::SOFTMAX_INIT_TASK_ID, + }; + + for (task_id_t task_id : init_task_ids) { + pending_registrations.push_back(register_task( + Realm::Processor::TOC_PROC, task_id, device_state_init_task_body)); + } + + std::vector task_ids = { + // Forward tasks + task_id_t::BATCHMATMUL_FWD_TASK_ID, + task_id_t::BATCHNORM_FWD_TASK_ID, + task_id_t::BROADCAST_FWD_TASK_ID, + task_id_t::CAST_FWD_TASK_ID, + task_id_t::COMBINE_FWD_TASK_ID, + task_id_t::CONCAT_FWD_TASK_ID, + task_id_t::CONV2D_FWD_TASK_ID, + task_id_t::DROPOUT_FWD_TASK_ID, + task_id_t::ELEMENTBINARY_FWD_TASK_ID, + task_id_t::ELEMENTUNARY_FWD_TASK_ID, + task_id_t::EMBED_FWD_TASK_ID, + task_id_t::FLAT_FWD_TASK_ID, + task_id_t::GATHER_FWD_TASK_ID, + task_id_t::LAYERNORM_FWD_TASK_ID, + task_id_t::LINEAR_FWD_TASK_ID, + task_id_t::ATTENTION_FWD_TASK_ID, + task_id_t::POOL2D_FWD_TASK_ID, + task_id_t::REDUCE_FWD_TASK_ID, + task_id_t::REDUCTION_FWD_TASK_ID, + task_id_t::REPARTITION_FWD_TASK_ID, + task_id_t::REPLICATE_FWD_TASK_ID, + task_id_t::RESHAPE_FWD_TASK_ID, + task_id_t::REVERSE_FWD_TASK_ID, + task_id_t::SOFTMAX_FWD_TASK_ID, + task_id_t::SPLIT_FWD_TASK_ID, + task_id_t::TOPK_FWD_TASK_ID, + task_id_t::TRANSPOSE_FWD_TASK_ID, + + // Backward tasks + task_id_t::BATCHMATMUL_BWD_TASK_ID, + task_id_t::BATCHNORM_BWD_TASK_ID, + task_id_t::BROADCAST_BWD_TASK_ID, + task_id_t::CAST_BWD_TASK_ID, + task_id_t::COMBINE_BWD_TASK_ID, + task_id_t::CONCAT_BWD_TASK_ID, + task_id_t::CONV2D_BWD_TASK_ID, + task_id_t::DROPOUT_BWD_TASK_ID, + task_id_t::ELEMENTBINARY_BWD_TASK_ID, + task_id_t::ELEMENTUNARY_BWD_TASK_ID, + task_id_t::EMBED_BWD_TASK_ID, + task_id_t::FLAT_BWD_TASK_ID, + task_id_t::GATHER_BWD_TASK_ID, + task_id_t::LAYERNORM_BWD_TASK_ID, + task_id_t::LINEAR_BWD_TASK_ID, + task_id_t::ATTENTION_BWD_TASK_ID, + task_id_t::POOL2D_BWD_TASK_ID, + task_id_t::REDUCE_BWD_TASK_ID, + task_id_t::REDUCTION_BWD_TASK_ID, + task_id_t::REPARTITION_BWD_TASK_ID, + task_id_t::REPLICATE_BWD_TASK_ID, + task_id_t::RESHAPE_BWD_TASK_ID, + task_id_t::REVERSE_BWD_TASK_ID, + task_id_t::SOFTMAX_BWD_TASK_ID, + task_id_t::SPLIT_BWD_TASK_ID, + task_id_t::TOPK_BWD_TASK_ID, + task_id_t::TRANSPOSE_BWD_TASK_ID, + + // Update tasks + task_id_t::SGD_UPD_NCCL_TASK_ID, + task_id_t::ADAM_UPD_NCCL_TASK_ID, + }; + + for (task_id_t task_id : task_ids) { + pending_registrations.push_back( + register_task(Realm::Processor::LOC_PROC, task_id, op_task_body)); + pending_registrations.push_back( + register_task(Realm::Processor::TOC_PROC, task_id, op_task_body)); + } + + pending_registrations.push_back(register_task(Realm::Processor::LOC_PROC, + task_id_t::CONTROLLER_TASK_ID, + controller_task_body)); + pending_registrations.push_back( + register_task(Realm::Processor::LOC_PROC, + task_id_t::DEVICE_HANDLE_INIT_TASK_ID, + device_handle_init_task_body)); + pending_registrations.push_back( + register_task(Realm::Processor::TOC_PROC, + task_id_t::DEVICE_HANDLE_INIT_TASK_ID, + device_handle_init_task_body)); + pending_registrations.push_back( + register_task(Realm::Processor::LOC_PROC, + task_id_t::DEVICE_HANDLE_INIT_RETURN_TASK_ID, + device_handle_init_return_task_body)); + pending_registrations.push_back( + register_task(Realm::Processor::LOC_PROC, + task_id_t::DEVICE_STATE_INIT_RETURN_TASK_ID, + device_state_init_return_task_body)); + return Realm::Event::merge_events(pending_registrations); +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/tasks/serializer/serializable_realm_processor.cc b/lib/realm-execution/src/realm-execution/tasks/serializer/serializable_realm_processor.cc new file mode 100644 index 0000000000..b16e2891c4 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tasks/serializer/serializable_realm_processor.cc @@ -0,0 +1,15 @@ +#include "realm-execution/tasks/serializer/serializable_realm_processor.h" + +namespace FlexFlow { + +SerializableRealmProcessor + realm_processor_to_serializable(Realm::Processor const &proc) { + return SerializableRealmProcessor{proc.id}; +} + +Realm::Processor + realm_processor_from_serializable(SerializableRealmProcessor const &proc) { + return Realm::Processor{proc.id}; +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/tasks/task_id_t.cc b/lib/realm-execution/src/realm-execution/tasks/task_id_t.cc new file mode 100644 index 0000000000..94e1b887e7 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tasks/task_id_t.cc @@ -0,0 +1,193 @@ +#include "realm-execution/tasks/task_id_t.h" +#include "pcg/optimizer_attrs.dtg.h" +#include "pcg/optimizers/adam_optimizer_attrs.dtg.h" +#include "task-spec/dynamic_graph/dynamic_node_attrs.dtg.h" +#include "utils/optional.h" +#include "utils/overload.h" + +namespace FlexFlow { + +std::optional + get_task_id_for_op(DynamicNodeAttrs const &node_attrs, + std::optional const &optimizer_attrs) { + DynamicTaskType task_type = assert_unwrap(node_attrs.task_type); + switch (task_type) { + case DynamicTaskType::FWD: + return get_fwd_task_id_for_op_attrs( + assert_unwrap(node_attrs.op_attrs).require_pcg_op()); + case DynamicTaskType::BWD: + return get_bwd_task_id_for_op_attrs( + assert_unwrap(node_attrs.op_attrs).require_pcg_op()); + case DynamicTaskType::UPD: + return get_update_task_id_for_optimizer_attrs( + assert_unwrap(optimizer_attrs)); + case DynamicTaskType::LOSS: + return task_id_t::LOSS_BWD_TASK_ID; + default: + PANIC("Unhandled DynamicTaskType", task_type); + } +} + +std::optional + get_init_task_id_for_op_attrs(PCGOperatorAttrs const &op_attrs) { + + return op_attrs.visit>(overload{ + [](BatchMatmulAttrs const &) { return std::nullopt; }, + [](BatchNormAttrs const &) { return task_id_t::BATCHNORM_INIT_TASK_ID; }, + [](BroadcastAttrs const &) { return std::nullopt; }, + [](CastAttrs const &) { return std::nullopt; }, + [](CombineAttrs const &attrs) { return task_id_t::COMBINE_INIT_TASK_ID; }, + [](ConcatAttrs const &) { return std::nullopt; }, + [](Conv2DAttrs const &) { return task_id_t::CONV2D_INIT_TASK_ID; }, + [](DropoutAttrs const &) { return task_id_t::DROPOUT_INIT_TASK_ID; }, + [](ElementBinaryAttrs const &) { + return task_id_t::ELEMENTBINARY_INIT_TASK_ID; + }, + [](ElementUnaryAttrs const &) { + return task_id_t::ELEMENTUNARY_INIT_TASK_ID; + }, + [](EmbeddingAttrs const &) { return std::nullopt; }, + [](FlatAttrs const &) { return std::nullopt; }, + [](GatherAttrs const &) { return task_id_t::GATHER_INIT_TASK_ID; }, + [](InputAttrs const &) { return std::nullopt; }, + [](LayerNormAttrs const &) { return task_id_t::LAYERNORM_INIT_TASK_ID; }, + [](LinearAttrs const &) { return task_id_t::LINEAR_INIT_TASK_ID; }, + [](MultiHeadAttentionAttrs const &) { + return task_id_t::ATTENTION_INIT_TASK_ID; + }, + [](NoopAttrs const &) { return std::nullopt; }, + [](Pool2DAttrs const &) { return task_id_t::POOL2D_INIT_TASK_ID; }, + [](ReduceAttrs const &) { return task_id_t::REDUCE_INIT_TASK_ID; }, + [](ReductionAttrs const &attrs) { + return task_id_t::REDUCTION_INIT_TASK_ID; + }, + [](RepartitionAttrs const &attrs) { + return task_id_t::REPARTITION_INIT_TASK_ID; + }, + [](ReplicateAttrs const &attrs) { + return task_id_t::REPLICATE_INIT_TASK_ID; + }, + [](ReshapeAttrs const &) { return std::nullopt; }, + [](ReverseAttrs const &) { return std::nullopt; }, + [](SoftmaxAttrs const &) { return task_id_t::SOFTMAX_INIT_TASK_ID; }, + [](SplitAttrs const &) { return std::nullopt; }, + [](TopKAttrs const &) { return std::nullopt; }, + [](TransposeAttrs const &) { return std::nullopt; }, + [](WeightAttrs const &) { return std::nullopt; }, + }); +} + +std::optional + get_fwd_task_id_for_op_attrs(PCGOperatorAttrs const &op_attrs) { + + return op_attrs.visit>(overload{ + [](BatchMatmulAttrs const &) { + return task_id_t::BATCHMATMUL_FWD_TASK_ID; + }, + [](BatchNormAttrs const &) { return task_id_t::BATCHNORM_FWD_TASK_ID; }, + [](BroadcastAttrs const &) { return task_id_t::BROADCAST_FWD_TASK_ID; }, + [](CastAttrs const &) { return task_id_t::CAST_FWD_TASK_ID; }, + [](CombineAttrs const &attrs) { return task_id_t::COMBINE_FWD_TASK_ID; }, + [](ConcatAttrs const &) { return task_id_t::CONCAT_FWD_TASK_ID; }, + [](Conv2DAttrs const &) { return task_id_t::CONV2D_FWD_TASK_ID; }, + [](DropoutAttrs const &) { return task_id_t::DROPOUT_FWD_TASK_ID; }, + [](ElementBinaryAttrs const &) { + return task_id_t::ELEMENTBINARY_FWD_TASK_ID; + }, + [](ElementUnaryAttrs const &) { + return task_id_t::ELEMENTUNARY_FWD_TASK_ID; + }, + [](EmbeddingAttrs const &) { return task_id_t::EMBED_FWD_TASK_ID; }, + [](FlatAttrs const &) { return task_id_t::FLAT_FWD_TASK_ID; }, + [](GatherAttrs const &) { return task_id_t::GATHER_FWD_TASK_ID; }, + [](InputAttrs const &) { return std::nullopt; }, + [](LayerNormAttrs const &) { return task_id_t::LAYERNORM_FWD_TASK_ID; }, + [](LinearAttrs const &) { return task_id_t::LINEAR_FWD_TASK_ID; }, + [](MultiHeadAttentionAttrs const &) { + return task_id_t::ATTENTION_FWD_TASK_ID; + }, + [](NoopAttrs const &) { return std::nullopt; }, + [](Pool2DAttrs const &) { return task_id_t::POOL2D_FWD_TASK_ID; }, + [](ReduceAttrs const &) { return task_id_t::REDUCE_FWD_TASK_ID; }, + [](ReductionAttrs const &attrs) { + return task_id_t::REDUCTION_FWD_TASK_ID; + }, + [](RepartitionAttrs const &attrs) { + return task_id_t::REPARTITION_FWD_TASK_ID; + }, + [](ReplicateAttrs const &attrs) { + return task_id_t::REPLICATE_FWD_TASK_ID; + }, + [](ReshapeAttrs const &) { return task_id_t::RESHAPE_FWD_TASK_ID; }, + [](ReverseAttrs const &) { return task_id_t::REVERSE_FWD_TASK_ID; }, + [](SoftmaxAttrs const &) { return task_id_t::SOFTMAX_FWD_TASK_ID; }, + [](SplitAttrs const &) { return task_id_t::SPLIT_FWD_TASK_ID; }, + [](TopKAttrs const &) { return task_id_t::TOPK_FWD_TASK_ID; }, + [](TransposeAttrs const &) { return task_id_t::TRANSPOSE_FWD_TASK_ID; }, + [](WeightAttrs const &) { return std::nullopt; }, + }); +} + +std::optional + get_bwd_task_id_for_op_attrs(PCGOperatorAttrs const &op_attrs) { + + return op_attrs.visit>(overload{ + [](BatchMatmulAttrs const &) { + return task_id_t::BATCHMATMUL_BWD_TASK_ID; + }, + [](BatchNormAttrs const &) { return task_id_t::BATCHNORM_BWD_TASK_ID; }, + [](BroadcastAttrs const &) { return task_id_t::BROADCAST_BWD_TASK_ID; }, + [](CastAttrs const &) { return task_id_t::CAST_BWD_TASK_ID; }, + [](CombineAttrs const &attrs) { return task_id_t::COMBINE_BWD_TASK_ID; }, + [](ConcatAttrs const &) { return task_id_t::CONCAT_BWD_TASK_ID; }, + [](Conv2DAttrs const &) { return task_id_t::CONV2D_BWD_TASK_ID; }, + [](DropoutAttrs const &) { return task_id_t::DROPOUT_BWD_TASK_ID; }, + [](ElementBinaryAttrs const &) { + return task_id_t::ELEMENTBINARY_BWD_TASK_ID; + }, + [](ElementUnaryAttrs const &) { + return task_id_t::ELEMENTUNARY_BWD_TASK_ID; + }, + [](EmbeddingAttrs const &) { return task_id_t::EMBED_BWD_TASK_ID; }, + [](FlatAttrs const &) { return task_id_t::FLAT_BWD_TASK_ID; }, + [](GatherAttrs const &) { return task_id_t::GATHER_BWD_TASK_ID; }, + [](InputAttrs const &) { return std::nullopt; }, + [](LayerNormAttrs const &) { return task_id_t::LAYERNORM_BWD_TASK_ID; }, + [](LinearAttrs const &) { return task_id_t::LINEAR_BWD_TASK_ID; }, + [](MultiHeadAttentionAttrs const &) { + return task_id_t::ATTENTION_BWD_TASK_ID; + }, + [](NoopAttrs const &) { return std::nullopt; }, + [](Pool2DAttrs const &) { return task_id_t::POOL2D_BWD_TASK_ID; }, + [](ReduceAttrs const &) { return task_id_t::REDUCE_BWD_TASK_ID; }, + [](ReductionAttrs const &attrs) { + return task_id_t::REDUCTION_BWD_TASK_ID; + }, + [](RepartitionAttrs const &attrs) { + return task_id_t::REPARTITION_BWD_TASK_ID; + }, + [](ReplicateAttrs const &attrs) { + return task_id_t::REPLICATE_BWD_TASK_ID; + }, + [](ReshapeAttrs const &) { return task_id_t::RESHAPE_BWD_TASK_ID; }, + [](ReverseAttrs const &) { return task_id_t::REVERSE_BWD_TASK_ID; }, + [](SoftmaxAttrs const &) { return task_id_t::SOFTMAX_BWD_TASK_ID; }, + [](SplitAttrs const &) { return task_id_t::SPLIT_BWD_TASK_ID; }, + [](TopKAttrs const &) { return task_id_t::TOPK_BWD_TASK_ID; }, + [](TransposeAttrs const &) { return task_id_t::TRANSPOSE_BWD_TASK_ID; }, + [](WeightAttrs const &) { return std::nullopt; }, + }); +} + +std::optional get_update_task_id_for_optimizer_attrs( + OptimizerAttrs const &optimizer_attrs) { + + return optimizer_attrs.visit>(overload{ + [](SGDOptimizerAttrs const &) { return task_id_t::SGD_UPD_NCCL_TASK_ID; }, + [](AdamOptimizerAttrs const &) { + return task_id_t::ADAM_UPD_NCCL_TASK_ID; + }, + }); +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/tensor_instance_backing.cc b/lib/realm-execution/src/realm-execution/tensor_instance_backing.cc new file mode 100644 index 0000000000..53c2a2b271 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tensor_instance_backing.cc @@ -0,0 +1,11 @@ +#include "realm-execution/tensor_instance_backing.h" + +namespace FlexFlow { + +TensorInstanceBacking make_empty_tensor_instance_backing() { + return TensorInstanceBacking{ + /*backing=*/{}, + }; +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/test/CMakeLists.txt b/lib/realm-execution/test/CMakeLists.txt new file mode 100644 index 0000000000..b3beff42c0 --- /dev/null +++ b/lib/realm-execution/test/CMakeLists.txt @@ -0,0 +1,15 @@ +ff_add_test_executable( + NAME + realm-execution-tests + SRC_PATTERNS + src/*.cc + PRIVATE_INCLUDE + src/ + DEPS + doctest + utils-test-common + realm-execution + kernels + op-attrs + task-spec +) diff --git a/lib/realm-execution/test/src/internal/realm_test_utils.cc b/lib/realm-execution/test/src/internal/realm_test_utils.cc new file mode 100644 index 0000000000..e381feb8de --- /dev/null +++ b/lib/realm-execution/test/src/internal/realm_test_utils.cc @@ -0,0 +1,28 @@ +#include "internal/realm_test_utils.h" +#include +#include + +namespace FlexFlow { + +static char *leak_string_contents(std::string const &str) { + // Realm command-line arguments require char* so intentionally leak the + // allocated string contents here + std::vector *content = new std::vector{str.begin(), str.end()}; + content->push_back(0); // NUL byte + return content->data(); +} + +std::vector make_fake_realm_args(positive_int num_cpus, + nonnegative_int num_gpus) { + std::vector result; + result.push_back(leak_string_contents("fake_executable_name")); + result.push_back(leak_string_contents("-ll:cpu")); + result.push_back(leak_string_contents(fmt::to_string(num_cpus))); + if (num_gpus > 0) { + result.push_back(leak_string_contents("-ll:gpu")); + result.push_back(leak_string_contents(fmt::to_string(num_gpus))); + } + return result; +} + +} // namespace FlexFlow diff --git a/lib/realm-execution/test/src/internal/realm_test_utils.h b/lib/realm-execution/test/src/internal/realm_test_utils.h new file mode 100644 index 0000000000..8e2775ad8b --- /dev/null +++ b/lib/realm-execution/test/src/internal/realm_test_utils.h @@ -0,0 +1,15 @@ +#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_TEST_SRC_INTERNAL_REALM_TEST_UTILS_H +#define _FLEXFLOW_LIB_REALM_EXECUTION_TEST_SRC_INTERNAL_REALM_TEST_UTILS_H + +#include "utils/nonnegative_int/nonnegative_int.h" +#include "utils/positive_int/positive_int.h" +#include + +namespace FlexFlow { + +std::vector make_fake_realm_args(positive_int num_cpus, + nonnegative_int num_gpus); + +} // namespace FlexFlow + +#endif diff --git a/lib/realm-execution/test/src/realm-execution/distributed_device_handle.cc b/lib/realm-execution/test/src/realm-execution/distributed_device_handle.cc new file mode 100644 index 0000000000..fb7dff01e3 --- /dev/null +++ b/lib/realm-execution/test/src/realm-execution/distributed_device_handle.cc @@ -0,0 +1,36 @@ +#include "realm-execution/distributed_device_handle.h" +#include "internal/realm_test_utils.h" +#include "realm-execution/realm_manager.h" +#include + +namespace test { + +using namespace ::FlexFlow; +namespace Realm = ::FlexFlow::Realm; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("DistributedDeviceHandle") { + std::vector fake_args = + make_fake_realm_args(/*num_cpus=*/2_p, /*num_gpus=*/0_n); + int fake_argc = fake_args.size(); + char **fake_argv = fake_args.data(); + + RealmManager manager(&fake_argc, &fake_argv); + + (void)manager.start_controller([](RealmContext &ctx) { + DistributedDeviceHandle handle = create_distributed_device_handle( + /*ctx=*/ctx, + /*workSpaceSize=*/1024 * 1024, + /*allowTensorOpMathConversion=*/true); + + // Make sure we have handles for the processors we're expecting + Realm::Machine::ProcessorQuery pq(Realm::Machine::get_machine()); + pq.only_kind(Realm::Processor::LOC_PROC); + for (Realm::Processor proc : pq) { + handle.at(proc); + } + }); + } +} + +} // namespace test diff --git a/lib/realm-execution/test/src/realm-execution/realm_manager.cc b/lib/realm-execution/test/src/realm-execution/realm_manager.cc new file mode 100644 index 0000000000..450d7fd3ec --- /dev/null +++ b/lib/realm-execution/test/src/realm-execution/realm_manager.cc @@ -0,0 +1,33 @@ +#include "realm-execution/realm_manager.h" +#include "internal/realm_test_utils.h" +#include "realm-execution/distributed_device_handle.h" +#include + +namespace test { + +using namespace ::FlexFlow; +namespace Realm = ::FlexFlow::Realm; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("RealmManager") { + std::vector fake_args = + make_fake_realm_args(/*num_cpus=*/1_p, /*num_gpus=*/0_n); + int fake_argc = fake_args.size(); + char **fake_argv = fake_args.data(); + + // Initialize Realm + RealmManager manager(&fake_argc, &fake_argv); + + // Launch a controller + int some_data = 123; + Realm::Event event = manager.start_controller([&](RealmContext &ctx) { + // Data is captured and retains value + ASSERT(some_data == 123); + }); + // Need to block on the completion of the event to ensure we don't race, + // because the lambda captures the environment + event.wait(); + } +} + +} // namespace test diff --git a/lib/realm-execution/test/src/realm-execution/test_e2e.cc b/lib/realm-execution/test/src/realm-execution/test_e2e.cc new file mode 100644 index 0000000000..8e5edf72ad --- /dev/null +++ b/lib/realm-execution/test/src/realm-execution/test_e2e.cc @@ -0,0 +1,230 @@ +#include "internal/realm_test_utils.h" +#include "kernels/allocation.h" +#include "op-attrs/tensor_shape.dtg.h" +#include "op-attrs/tensor_slot_name.dtg.h" +#include "pcg/device_type.dtg.h" +#include "pcg/machine_space_coordinate.dtg.h" +#include "pcg/mapped_parallel_computation_graph/operator_atomic_task_shard_binding.dtg.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" +#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h" +#include "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h" +#include "realm-execution/distributed_device_handle.h" +#include "realm-execution/pcg_instance/pcg_instance.h" +#include "realm-execution/realm_manager.h" +#include "utils/containers/require_only_key.h" +#include + +namespace test { + +using namespace ::FlexFlow; +namespace Realm = ::FlexFlow::Realm; + +TEST_SUITE(FF_TEST_SUITE) { + TEST_CASE("RealmBackend e2e Training") { + std::vector fake_args = + make_fake_realm_args(/*num_cpus=*/1_p, /*num_gpus=*/0_n); + int fake_argc = fake_args.size(); + char **fake_argv = fake_args.data(); + + RealmManager manager(&fake_argc, &fake_argv); + + (void)manager.start_controller([](RealmContext &ctx) { + Allocator allocator = ctx.get_current_device_allocator(); + + positive_int batch_size = 10_p; + positive_int data_dim = 16_p; + positive_int hidden_dim = 32_p; + positive_int output_dim = 1_p; + + TensorShape output_tensor_shape = TensorShape{ + TensorDims{FFOrdered{batch_size, output_dim}}, DataType::FLOAT}; + + GenericTensorAccessorW label_tensor_backing = + allocator.allocate_tensor(output_tensor_shape); + + // construct computation graph + ParallelComputationGraph pcg = empty_parallel_computation_graph(); + + TensorShape input_tensor_shape = TensorShape{ + TensorDims{FFOrdered{batch_size, data_dim}}, DataType::FLOAT}; + + TensorShape label_tensor_shape = TensorShape{ + TensorDims{FFOrdered{batch_size, output_dim}}, DataType::FLOAT}; + GenericTensorAccessorW label_tensor = + allocator.allocate_tensor(label_tensor_shape); + + TensorShape weight_shape_1 = TensorShape{ + TensorDims{FFOrdered{hidden_dim, data_dim}}, DataType::FLOAT}; + TensorShape weight_shape_2 = TensorShape{ + TensorDims{FFOrdered{output_dim, hidden_dim}}, DataType::FLOAT}; + + ParallelLayerAddedResult inputs_layer = + pcg_add_input_layer_with_grad(pcg, input_tensor_shape); + parallel_tensor_guid_t t_input = + require_only_key(inputs_layer.outputs, TensorSlotName::OUTPUT); + + ParallelLayerAddedResult weights_layer_1 = add_parallel_layer( + pcg, + ParallelLayerAttrs{ + PCGOperatorAttrs{WeightAttrs{ + weight_shape_1, InitializerAttrs{GlorotNormalAttrs{0}}}}, + std::nullopt}, + {}, + {}); + parallel_tensor_guid_t t_weights_1 = + require_only_key(weights_layer_1.outputs, TensorSlotName::OUTPUT); + + ParallelLayerAddedResult weights_layer_2 = add_parallel_layer( + pcg, + ParallelLayerAttrs{ + PCGOperatorAttrs{WeightAttrs{ + weight_shape_2, InitializerAttrs{GlorotNormalAttrs{0}}}}, + std::nullopt}, + {}, + {}); + parallel_tensor_guid_t t_weights_2 = + require_only_key(weights_layer_2.outputs, TensorSlotName::OUTPUT); + + ParallelLayerAddedResult linear_operator_1 = add_parallel_layer( + pcg, + ParallelLayerAttrs{PCGOperatorAttrs{LinearAttrs{hidden_dim, + /*use_bias=*/false, + DataType::FLOAT, + Activation::RELU, + std::nullopt}}, + std::nullopt}, + { + { + TensorSlotName::INPUT, + t_input, + }, + }, + { + { + TensorSlotName::WEIGHT, + t_weights_1, + }, + }); + parallel_tensor_guid_t t_linear_1 = + require_only_key(linear_operator_1.outputs, TensorSlotName::OUTPUT); + + ParallelLayerAddedResult linear_operator_2 = add_parallel_layer( + pcg, + ParallelLayerAttrs{PCGOperatorAttrs{LinearAttrs{output_dim, + /*use_bias=*/false, + DataType::FLOAT, + Activation::RELU, + std::nullopt}}, + std::nullopt}, + { + { + TensorSlotName::INPUT, + t_linear_1, + }, + }, + { + { + TensorSlotName::WEIGHT, + t_weights_2, + }, + }); + parallel_tensor_guid_t t_linear_2 = + require_only_key(linear_operator_2.outputs, TensorSlotName::OUTPUT); + + MachineSpaceCoordinate cpu0{0_n, 0_n, DeviceType::CPU}; + ParallelTensorSpaceCoordinate tensor_coord0{0_n, 0_n, FFOrdered{0_n}}; + MappedParallelComputationGraph mpcg{ + pcg, + { + {inputs_layer.parallel_layer, + MappedOperatorTaskGroup{ + {{cpu0, + OperatorAtomicTaskShardBinding{ + {{TensorSlotName::OUTPUT, tensor_coord0}}}}}}}, + {weights_layer_1.parallel_layer, + MappedOperatorTaskGroup{ + {{cpu0, + OperatorAtomicTaskShardBinding{ + {{TensorSlotName::OUTPUT, tensor_coord0}}}}}}}, + {weights_layer_2.parallel_layer, + MappedOperatorTaskGroup{ + {{cpu0, + OperatorAtomicTaskShardBinding{ + {{TensorSlotName::OUTPUT, tensor_coord0}}}}}}}, + {linear_operator_1.parallel_layer, + MappedOperatorTaskGroup{ + {{cpu0, + OperatorAtomicTaskShardBinding{{ + {TensorSlotName::INPUT, tensor_coord0}, + {TensorSlotName::WEIGHT, tensor_coord0}, + {TensorSlotName::OUTPUT, tensor_coord0}, + }}}}}}, + {linear_operator_2.parallel_layer, + MappedOperatorTaskGroup{ + {{cpu0, + OperatorAtomicTaskShardBinding{{ + {TensorSlotName::INPUT, tensor_coord0}, + {TensorSlotName::WEIGHT, tensor_coord0}, + {TensorSlotName::OUTPUT, tensor_coord0}, + }}}}}}, + }, + }; + + // instantiate computation graph + LossAttrs loss_attrs = LossAttrs{ + NonconfigurableLossAttrs{LossFunction::CATEGORICAL_CROSSENTROPY}}; + OptimizerAttrs optimizer_attrs = + OptimizerAttrs{SGDOptimizerAttrs{/*lr=*/0.001, + /*momentum=*/0.9, + /*nesterov=*/false, + /*weight_decay=*/0.001}}; + + std::unordered_map + input_tensors; + + DistributedDeviceHandle device_handle = create_distributed_device_handle( + ctx, + /*workSpaceSize=*/1024 * 1024, + /*allowTensorOpMathConversion=*/true); + + PCGInstance pcg_instance = create_pcg_instance( + /*ctx=*/ctx, + /*mpcg=*/mpcg, + /*optimizer=*/optimizer_attrs, + /*loss=*/loss_attrs, + /*label_tensor=*/label_tensor, + /*logit_tensor=*/t_linear_2, + /*input_tensors=*/input_tensors, + /*profiling_settings=*/ProfilingSettings{0, 0}, + /*device_handle=*/device_handle, + /*iteration_config=*/FFIterationConfig{1_p}); + + // begin training loop + int num_epochs = 5; + std::vector loss_values; + + for (int i = 0; i < num_epochs; i++) { + perform_all_passes_for_pcg_instance( + /*instance=*/pcg_instance, + /*profiling_settings=*/ProfilingSettings{0, 0}, + /*device_handle=*/device_handle, + /*iteration_config=*/FFIterationConfig{1_p}); + // loss_values.push_back(copy_tensor_accessor_r( + // pcg_instance.get_loss_tensor_accessor().value(), + // allocator)); + } + + // // Assert that each sample in the batch has a lower loss in last epoch + // // than the first epoch + // GenericTensorAccessorR first_epoch_loss = loss_values.at(0); + // GenericTensorAccessorR last_epoch_loss = loss_values.back(); + // CHECK_MESSAGE(did_loss_decrease(first_epoch_loss, last_epoch_loss), + // check_kv("first_epoch_loss", + // format_accessor_r_contents(first_epoch_loss)), + // check_kv("last_epoch_loss", + // format_accessor_r_contents(last_epoch_loss))); + }); + } +} + +} // namespace test diff --git a/lib/task-spec/include/task-spec/dynamic_graph/dynamic_layer_guid_t.dtg.toml b/lib/task-spec/include/task-spec/dynamic_graph/dynamic_layer_guid_t.dtg.toml index c6e6673f33..bd64f52567 100644 --- a/lib/task-spec/include/task-spec/dynamic_graph/dynamic_layer_guid_t.dtg.toml +++ b/lib/task-spec/include/task-spec/dynamic_graph/dynamic_layer_guid_t.dtg.toml @@ -5,6 +5,7 @@ features = [ "eq", "hash", "fmt", + "json", ] includes = [ diff --git a/lib/task-spec/include/task-spec/dynamic_graph/dynamic_tensor_guid_t.dtg.toml b/lib/task-spec/include/task-spec/dynamic_graph/dynamic_tensor_guid_t.dtg.toml index 75e9099104..c9171b928b 100644 --- a/lib/task-spec/include/task-spec/dynamic_graph/dynamic_tensor_guid_t.dtg.toml +++ b/lib/task-spec/include/task-spec/dynamic_graph/dynamic_tensor_guid_t.dtg.toml @@ -5,6 +5,7 @@ features = [ "eq", "hash", "fmt", + "json", ] includes = [ diff --git a/lib/task-spec/include/task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.h b/lib/task-spec/include/task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.h new file mode 100644 index 0000000000..758a0c2813 --- /dev/null +++ b/lib/task-spec/include/task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.h @@ -0,0 +1,14 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_DYNAMIC_OPEN_DATAFLOW_GRAPH_FROM_MPCG_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_DYNAMIC_OPEN_DATAFLOW_GRAPH_FROM_MPCG_H + +#include "pcg/mapped_parallel_computation_graph/mapped_parallel_computation_graph.dtg.h" +#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.dtg.h" + +namespace FlexFlow { + +DynamicOpenDataflowGraph make_dynamic_open_dataflow_graph_from_mpcg( + MappedParallelComputationGraph const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_attrs.dtg.toml b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_attrs.dtg.toml new file mode 100644 index 0000000000..3c43e1d637 --- /dev/null +++ b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_attrs.dtg.toml @@ -0,0 +1,43 @@ +namespace = "FlexFlow" +name = "SerializableDynamicNodeAttrs" +type = "struct" +features = [ + "eq", + "hash", + "fmt", + "json", +] + +includes = [ + "", + "task-spec/dynamic_graph/dynamic_task_type.dtg.h", + "pcg/machine_space_coordinate.dtg.h", + "pcg/mapped_parallel_computation_graph/mapped_operator_task_group.h", + "task-spec/dynamic_graph/dynamic_layer_guid_t.dtg.h", + "task-spec/dynamic_graph/training_operation_attrs.dtg.h", +] + +src_includes = [ + "utils/fmt/optional.h", + "utils/json/optional.h", +] + +[[fields]] +name = "task_type" +type = "std::optional<::FlexFlow::DynamicTaskType>" + +[[fields]] +name = "device_coord" +type = "std::optional<::FlexFlow::MachineSpaceCoordinate>" + +[[fields]] +name = "mapping" +type = "std::optional<::FlexFlow::MappedOperatorTaskGroup>" + +[[fields]] +name = "op_attrs" +type = "std::optional<::FlexFlow::TrainingOperationAttrs>" + +[[fields]] +name = "layer_guid" +type = "::FlexFlow::dynamic_layer_guid_t" diff --git a/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_attrs.h b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_attrs.h new file mode 100644 index 0000000000..7a274a1e7b --- /dev/null +++ b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_attrs.h @@ -0,0 +1,16 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_SERIALIZABLE_DYNAMIC_NODE_ATTRS_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_SERIALIZABLE_DYNAMIC_NODE_ATTRS_H + +#include "task-spec/dynamic_graph/dynamic_node_attrs.dtg.h" +#include "task-spec/dynamic_graph/serializable_dynamic_node_attrs.dtg.h" + +namespace FlexFlow { + +SerializableDynamicNodeAttrs + dynamic_node_attrs_to_serializable(DynamicNodeAttrs const &); +DynamicNodeAttrs + dynamic_node_attrs_from_serializable(SerializableDynamicNodeAttrs const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_invocation.dtg.toml b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_invocation.dtg.toml new file mode 100644 index 0000000000..01f4cc8876 --- /dev/null +++ b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_invocation.dtg.toml @@ -0,0 +1,33 @@ +namespace = "FlexFlow" +name = "SerializableDynamicNodeInvocation" +type = "struct" +features = [ + "eq", + "fmt", + "hash", + "json", +] + +includes = [ + "", + "task-spec/dynamic_graph/serializable_dynamic_node_attrs.dtg.h", + "task-spec/dynamic_graph/dynamic_tensor_slot.dtg.h", + "task-spec/dynamic_graph/serializable_dynamic_value_attrs.dtg.h", +] + +src_includes = [ + "utils/hash/unordered_map.h", + "utils/fmt/unordered_map.h", +] + +[[fields]] +name = "inputs" +type = "std::unordered_map<::FlexFlow::DynamicTensorSlot, ::FlexFlow::SerializableDynamicValueAttrs>" + +[[fields]] +name = "node_attrs" +type = "::FlexFlow::SerializableDynamicNodeAttrs" + +[[fields]] +name = "outputs" +type = "std::unordered_map<::FlexFlow::DynamicTensorSlot, ::FlexFlow::SerializableDynamicValueAttrs>" diff --git a/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_invocation.h b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_invocation.h new file mode 100644 index 0000000000..2bcdb9a898 --- /dev/null +++ b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_invocation.h @@ -0,0 +1,16 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_SERIALIZABLE_DYNAMIC_NODE_INVOCATION_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_SERIALIZABLE_DYNAMIC_NODE_INVOCATION_H + +#include "task-spec/dynamic_graph/dynamic_node_invocation.dtg.h" +#include "task-spec/dynamic_graph/serializable_dynamic_node_invocation.dtg.h" + +namespace FlexFlow { + +SerializableDynamicNodeInvocation + dynamic_node_invocation_to_serializable(DynamicNodeInvocation const &); +DynamicNodeInvocation dynamic_node_invocation_from_serializable( + SerializableDynamicNodeInvocation const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_value_attrs.dtg.toml b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_value_attrs.dtg.toml new file mode 100644 index 0000000000..6209bfa247 --- /dev/null +++ b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_value_attrs.dtg.toml @@ -0,0 +1,38 @@ +namespace = "FlexFlow" +name = "SerializableDynamicValueAttrs" +type = "struct" +features = [ + "eq", + "hash", + "fmt", + "json", +] + +includes = [ + "", + "task-spec/dynamic_graph/dynamic_tensor_guid_t.dtg.h", + "op-attrs/parallel_tensor_shape.dtg.h", + "op-attrs/parallel_tensor_space_coordinate.dtg.h", + "task-spec/dynamic_graph/dynamic_tensor_role.dtg.h", +] + +src_includes = [ + "utils/fmt/optional.h", + "utils/json/optional.h", +] + +[[fields]] +name = "tensor_guid" +type = "::FlexFlow::dynamic_tensor_guid_t" + +[[fields]] +name = "parallel_tensor_shape" +type = "std::optional<::FlexFlow::ParallelTensorShape>" + +[[fields]] +name = "shard_coord" +type = "std::optional<::FlexFlow::ParallelTensorSpaceCoordinate>" + +[[fields]] +name = "role" +type = "std::optional<::FlexFlow::DynamicTensorRole>" diff --git a/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_value_attrs.h b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_value_attrs.h new file mode 100644 index 0000000000..6272265b7e --- /dev/null +++ b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_value_attrs.h @@ -0,0 +1,16 @@ +#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_SERIALIZABLE_DYNAMIC_VALUE_ATTRS_H +#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_SERIALIZABLE_DYNAMIC_VALUE_ATTRS_H + +#include "task-spec/dynamic_graph/dynamic_value_attrs.dtg.h" +#include "task-spec/dynamic_graph/serializable_dynamic_value_attrs.dtg.h" + +namespace FlexFlow { + +SerializableDynamicValueAttrs + dynamic_value_attrs_to_serializable(DynamicValueAttrs const &); +DynamicValueAttrs dynamic_value_attrs_from_serializable( + SerializableDynamicValueAttrs const &); + +} // namespace FlexFlow + +#endif diff --git a/lib/task-spec/include/task-spec/dynamic_graph/training_operation_attrs.dtg.toml b/lib/task-spec/include/task-spec/dynamic_graph/training_operation_attrs.dtg.toml index 66c475b3a9..1051d8ac13 100644 --- a/lib/task-spec/include/task-spec/dynamic_graph/training_operation_attrs.dtg.toml +++ b/lib/task-spec/include/task-spec/dynamic_graph/training_operation_attrs.dtg.toml @@ -5,6 +5,7 @@ features = [ "eq", "hash", "fmt", + "json", ] includes = [ diff --git a/lib/task-spec/include/task-spec/ops/impl/dropout.h b/lib/task-spec/include/task-spec/ops/impl/dropout.h index a7b382ce62..192f2f8244 100644 --- a/lib/task-spec/include/task-spec/ops/impl/dropout.h +++ b/lib/task-spec/include/task-spec/ops/impl/dropout.h @@ -2,7 +2,6 @@ #define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_OPS_IMPL_DROPOUT_H #include "op-attrs/ops/dropout_attrs.dtg.h" -#include "task-spec/task_id_t.dtg.h" #include "task-spec/task_impl_function.dtg.h" namespace FlexFlow { diff --git a/lib/task-spec/include/task-spec/ops/op_task_id_t.dtg.toml b/lib/task-spec/include/task-spec/ops/op_task_id_t.dtg.toml deleted file mode 100644 index 557da6cf4c..0000000000 --- a/lib/task-spec/include/task-spec/ops/op_task_id_t.dtg.toml +++ /dev/null @@ -1,18 +0,0 @@ -namespace = "FlexFlow" -name = "op_task_id_t" -type = "enum" -features = [ - "hash", - "json", - "rapidcheck", - "fmt", -] - -[[values]] -name = "INIT" - -[[values]] -name = "FWD" - -[[values]] -name = "BWD" diff --git a/lib/task-spec/include/task-spec/task_id_with_noop_default_t.dtg.toml b/lib/task-spec/include/task-spec/task_id_with_noop_default_t.dtg.toml deleted file mode 100644 index 50349d5773..0000000000 --- a/lib/task-spec/include/task-spec/task_id_with_noop_default_t.dtg.toml +++ /dev/null @@ -1,28 +0,0 @@ -namespace = "FlexFlow" -name = "task_id_with_noop_default_t" -type = "variant" -features = [ - "eq", - "ord", - "hash", - "fmt", - "rapidcheck", -] - -includes = [ - "task-spec/task_id_t.dtg.h", - "", -] - -src_includes = [ - "utils/rapidcheck/monostate.h", - "utils/fmt/monostate.h", -] - -[[values]] -type = "::FlexFlow::task_id_t" -key = "real_task" - -[[values]] -type = "std::monostate" -key = "noop_task" diff --git a/lib/task-spec/include/task-spec/task_id_with_noop_default_t.h b/lib/task-spec/include/task-spec/task_id_with_noop_default_t.h deleted file mode 100644 index 054b73844e..0000000000 --- a/lib/task-spec/include/task-spec/task_id_with_noop_default_t.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TASK_ID_WITH_NOOP_DEFAULT_T_H -#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TASK_ID_WITH_NOOP_DEFAULT_T_H - -#include "op-attrs/computation_graph_op_attrs.dtg.h" -#include "op-attrs/operator_type.dtg.h" -#include "task-spec/ops/op_task_id_t.dtg.h" -#include "task-spec/task_id_with_noop_default_t.dtg.h" - -namespace FlexFlow { - -task_id_with_noop_default_t lift_task_id_t(task_id_t); -task_id_with_noop_default_t default_noop_task(); - -task_id_with_noop_default_t lower_op_task_id_to_task_id_with_noop_default_t( - op_task_id_t, ComputationGraphOpAttrs const &); - -task_id_with_noop_default_t - get_init_task_id_for_op_attrs(ComputationGraphOpAttrs const &); - -task_id_with_noop_default_t - get_fwd_task_id_for_op_attrs(ComputationGraphOpAttrs const &); - -task_id_with_noop_default_t - get_bwd_task_id_for_op_attrs(ComputationGraphOpAttrs const &); - -} // namespace FlexFlow - -#endif diff --git a/lib/task-spec/src/task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.cc b/lib/task-spec/src/task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.cc new file mode 100644 index 0000000000..ced98dfd44 --- /dev/null +++ b/lib/task-spec/src/task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.cc @@ -0,0 +1,78 @@ +#include "task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.h" +#include "op-attrs/parallel_tensor_shape.h" +#include "op-attrs/pcg_operator_attrs.h" +#include "pcg/parallel_computation_graph/parallel_computation_graph.h" +#include "pcg/parallel_computation_graph/parallel_tensor_attrs.dtg.h" +#include "task-spec/dynamic_graph/dynamic_layer_guid_t.dtg.h" +#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.h" +#include "task-spec/dynamic_graph/dynamic_tensor_role.h" +#include "utils/containers/generate_map.h" +#include +#include +#include + +namespace FlexFlow { + +DynamicOpenDataflowGraph make_dynamic_open_dataflow_graph_from_mpcg( + MappedParallelComputationGraph const &mpcg) { + DynamicOpenDataflowGraph result = make_empty_dynamic_open_dataflow_graph(); + + for (auto const &[layer, attrs] : + get_parallel_layer_attrs_mapping(mpcg.pcg)) { + DynamicNodeAttrs result_attrs{ + /*task_type=*/std::nullopt, + /*device_coord=*/std::nullopt, + /*mapping=*/mpcg.mapped_tasks.at(layer), + /*op_attrs=*/TrainingOperationAttrs{attrs.op_attrs}, + /*pcg_layer_guid=*/dynamic_layer_guid_t{layer}, + /*per_device_op_state=*/std::nullopt, + }; + + std::unordered_map result_inputs = + transform(get_incoming_tensors(mpcg.pcg, layer), + [&](TensorSlotName const &slot_name, + parallel_tensor_guid_t const &tensor) { + ParallelTensorAttrs attrs = + get_parallel_tensor_attrs(mpcg.pcg, tensor); + return std::pair{ + DynamicTensorSlot{ + /*slot_name=*/slot_name, + /*slot_tensor_role=*/std::nullopt, + }, + DynamicValueAttrs{ + /*tensor_guid=*/dynamic_tensor_guid_t{tensor}, + /*parallel_tensor_shape=*/attrs.shape, + /*shard_coord=*/std::nullopt, + /*accessor=*/std::nullopt, + /*role=*/std::nullopt, + }, + }; + }); + std::unordered_map result_outputs = + transform(get_outgoing_tensors(mpcg.pcg, layer), + [&](TensorSlotName const &slot_name, + parallel_tensor_guid_t const &tensor) { + ParallelTensorAttrs attrs = + get_parallel_tensor_attrs(mpcg.pcg, tensor); + return std::pair{ + DynamicTensorSlot{ + /*slot_name=*/slot_name, + /*slot_tensor_role=*/std::nullopt, + }, + DynamicValueAttrs{ + /*tensor_guid=*/dynamic_tensor_guid_t{tensor}, + /*parallel_tensor_shape=*/attrs.shape, + /*shard_coord=*/std::nullopt, + /*accessor=*/std::nullopt, + /*role=*/std::nullopt, + }, + }; + }); + + result.invocations.emplace(result_inputs, result_attrs, result_outputs); + } + + return result; +} + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_node_attrs.cc b/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_node_attrs.cc new file mode 100644 index 0000000000..d613194d14 --- /dev/null +++ b/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_node_attrs.cc @@ -0,0 +1,29 @@ +#include "task-spec/dynamic_graph/serializable_dynamic_node_attrs.h" +#include + +namespace FlexFlow { + +SerializableDynamicNodeAttrs + dynamic_node_attrs_to_serializable(DynamicNodeAttrs const &attrs) { + return SerializableDynamicNodeAttrs{ + /*task_type=*/attrs.task_type, + /*device_coord=*/attrs.device_coord, + /*mapping=*/attrs.mapping, + /*op_attrs=*/attrs.op_attrs, + /*layer_guid=*/attrs.layer_guid, + }; +} + +DynamicNodeAttrs dynamic_node_attrs_from_serializable( + SerializableDynamicNodeAttrs const &attrs) { + return DynamicNodeAttrs{ + /*task_type=*/attrs.task_type, + /*device_coord=*/attrs.device_coord, + /*mapping=*/attrs.mapping, + /*op_attrs=*/attrs.op_attrs, + /*layer_guid=*/attrs.layer_guid, + /*per_device_op_state=*/std::nullopt, + }; +} + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_node_invocation.cc b/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_node_invocation.cc new file mode 100644 index 0000000000..334623ee67 --- /dev/null +++ b/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_node_invocation.cc @@ -0,0 +1,31 @@ +#include "task-spec/dynamic_graph/serializable_dynamic_node_invocation.h" +#include "task-spec/dynamic_graph/serializable_dynamic_node_attrs.h" +#include "task-spec/dynamic_graph/serializable_dynamic_value_attrs.h" +#include "utils/containers/map_values.h" + +namespace FlexFlow { + +SerializableDynamicNodeInvocation dynamic_node_invocation_to_serializable( + DynamicNodeInvocation const &invocation) { + return SerializableDynamicNodeInvocation{ + /*inputs=*/map_values(invocation.inputs, + dynamic_value_attrs_to_serializable), + /*node_attrs=*/dynamic_node_attrs_to_serializable(invocation.node_attrs), + /*outputs=*/ + map_values(invocation.outputs, dynamic_value_attrs_to_serializable), + }; +} + +DynamicNodeInvocation dynamic_node_invocation_from_serializable( + SerializableDynamicNodeInvocation const &invocation) { + return DynamicNodeInvocation{ + /*inputs=*/map_values(invocation.inputs, + dynamic_value_attrs_from_serializable), + /*node_attrs=*/ + dynamic_node_attrs_from_serializable(invocation.node_attrs), + /*outputs=*/ + map_values(invocation.outputs, dynamic_value_attrs_from_serializable), + }; +} + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_value_attrs.cc b/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_value_attrs.cc new file mode 100644 index 0000000000..2dc0b509ab --- /dev/null +++ b/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_value_attrs.cc @@ -0,0 +1,27 @@ +#include "task-spec/dynamic_graph/serializable_dynamic_value_attrs.h" +#include + +namespace FlexFlow { + +SerializableDynamicValueAttrs + dynamic_value_attrs_to_serializable(DynamicValueAttrs const &attrs) { + return SerializableDynamicValueAttrs{ + /*tensor_guid=*/attrs.tensor_guid, + /*parallel_tensor_shape=*/attrs.parallel_tensor_shape, + /*shard_coord=*/attrs.shard_coord, + /*role=*/attrs.role, + }; +} + +DynamicValueAttrs dynamic_value_attrs_from_serializable( + SerializableDynamicValueAttrs const &attrs) { + return DynamicValueAttrs{ + /*tensor_guid=*/attrs.tensor_guid, + /*parallel_tensor_shape=*/attrs.parallel_tensor_shape, + /*shard_coord=*/attrs.shard_coord, + /*accessor=*/std::nullopt, + /*role=*/attrs.role, + }; +} + +} // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/dynamic_graph/shard_expansion.cc b/lib/task-spec/src/task-spec/dynamic_graph/shard_expansion.cc index ea253b63f8..402e0ef055 100644 --- a/lib/task-spec/src/task-spec/dynamic_graph/shard_expansion.cc +++ b/lib/task-spec/src/task-spec/dynamic_graph/shard_expansion.cc @@ -15,7 +15,7 @@ bool value_is_shard_expanded(DynamicValueAttrs const &n) { bool no_part_of_graph_is_shard_expanded(DynamicOpenDataflowGraph const &g) { auto slot_is_shard_expanded = [](DynamicTensorSlot const &) -> bool { - return true; + return false; }; return no_part_of_dynamic_graph_satisfies(g, @@ -81,4 +81,19 @@ std::unordered_set }); } +DynamicOpenDataflowGraph + perform_shard_expansion(DynamicOpenDataflowGraph const &g) { + + ASSERT(no_part_of_graph_is_shard_expanded(g)); + + DynamicOpenDataflowGraph result = + flatmap_dynamic_invocation_set(g, [&](DynamicNodeInvocation const &i) { + return perform_shard_expansion_for_invocation(i); + }); + + ASSERT(graph_is_fully_shard_expanded(result)); + + return result; +} + } // namespace FlexFlow diff --git a/lib/task-spec/src/task-spec/task_id_with_noop_default_t.cc b/lib/task-spec/src/task-spec/task_id_with_noop_default_t.cc deleted file mode 100644 index 20e0d00c57..0000000000 --- a/lib/task-spec/src/task-spec/task_id_with_noop_default_t.cc +++ /dev/null @@ -1,243 +0,0 @@ -#include "task-spec/task_id_with_noop_default_t.h" -#include "utils/overload.h" - -namespace FlexFlow { - -task_id_with_noop_default_t lift_task_id_t(task_id_t task_id) { - return task_id_with_noop_default_t{task_id}; -} - -task_id_with_noop_default_t default_noop_task() { - return task_id_with_noop_default_t{std::monostate{}}; -} - -task_id_with_noop_default_t lower_op_task_id_to_task_id_with_noop_default_t( - op_task_id_t op_task_id, ComputationGraphOpAttrs const &op_attrs) { - switch (op_task_id) { - case op_task_id_t::INIT: - return get_init_task_id_for_op_attrs(op_attrs); - case op_task_id_t::FWD: - return get_fwd_task_id_for_op_attrs(op_attrs); - case op_task_id_t::BWD: - return get_bwd_task_id_for_op_attrs(op_attrs); - default: - PANIC("Unhandled op_task_id_t", op_task_id); - } -} - -task_id_with_noop_default_t - get_init_task_id_for_op_attrs(ComputationGraphOpAttrs const &op_attrs) { - - return op_attrs.visit(overload{ - [](BatchMatmulAttrs const &) { return default_noop_task(); }, - [](BatchNormAttrs const &) { - return lift_task_id_t(task_id_t::BATCHNORM_INIT_TASK_ID); - }, - [](BroadcastAttrs const &) { return default_noop_task(); }, - [](CastAttrs const &) { return default_noop_task(); }, - [](ConcatAttrs const &) { return default_noop_task(); }, - [](Conv2DAttrs const &) { - return lift_task_id_t(task_id_t::CONV2D_INIT_TASK_ID); - }, - [](DropoutAttrs const &) { - return lift_task_id_t(task_id_t::DROPOUT_INIT_TASK_ID); - }, - [](ElementBinaryAttrs const &) { - return lift_task_id_t(task_id_t::ELEMENTBINARY_INIT_TASK_ID); - }, - [](ElementUnaryAttrs const &) { - return lift_task_id_t(task_id_t::ELEMENTUNARY_INIT_TASK_ID); - }, - [](EmbeddingAttrs const &) { return default_noop_task(); }, - [](FlatAttrs const &) { return default_noop_task(); }, - [](GatherAttrs const &) { - return lift_task_id_t(task_id_t::GATHER_INIT_TASK_ID); - }, - [](InputAttrs const &) { return default_noop_task(); }, - [](LayerNormAttrs const &) { - return lift_task_id_t(task_id_t::LAYERNORM_INIT_TASK_ID); - }, - [](LinearAttrs const &) { - return lift_task_id_t(task_id_t::LINEAR_INIT_TASK_ID); - }, - [](MultiHeadAttentionAttrs const &) { - return lift_task_id_t(task_id_t::ATTENTION_INIT_TASK_ID); - }, - [](NoopAttrs const &) { return default_noop_task(); }, - [](Pool2DAttrs const &) { - return lift_task_id_t(task_id_t::POOL2D_INIT_TASK_ID); - }, - [](ReduceAttrs const &) { - return lift_task_id_t(task_id_t::REDUCE_INIT_TASK_ID); - }, - [](ReshapeAttrs const &) { return default_noop_task(); }, - [](ReverseAttrs const &) { return default_noop_task(); }, - [](SoftmaxAttrs const &) { - return lift_task_id_t(task_id_t::SOFTMAX_INIT_TASK_ID); - }, - [](SplitAttrs const &) { return default_noop_task(); }, - [](TopKAttrs const &) { return default_noop_task(); }, - [](TransposeAttrs const &) { return default_noop_task(); }, - [](WeightAttrs const &) { return default_noop_task(); }, - }); -} - -task_id_with_noop_default_t - get_fwd_task_id_for_op_attrs(ComputationGraphOpAttrs const &op_attrs) { - - return op_attrs.visit(overload{ - [](BatchMatmulAttrs const &) { - return lift_task_id_t(task_id_t::BATCHMATMUL_FWD_TASK_ID); - }, - [](BatchNormAttrs const &) { - return lift_task_id_t(task_id_t::BATCHNORM_FWD_TASK_ID); - }, - [](BroadcastAttrs const &) { - return lift_task_id_t(task_id_t::BROADCAST_FWD_TASK_ID); - }, - [](CastAttrs const &) { - return lift_task_id_t(task_id_t::CAST_FWD_TASK_ID); - }, - [](ConcatAttrs const &) { - return lift_task_id_t(task_id_t::CONCAT_FWD_TASK_ID); - }, - [](Conv2DAttrs const &) { - return lift_task_id_t(task_id_t::CONV2D_FWD_TASK_ID); - }, - [](DropoutAttrs const &) { - return lift_task_id_t(task_id_t::DROPOUT_FWD_TASK_ID); - }, - [](ElementBinaryAttrs const &) { - return lift_task_id_t(task_id_t::ELEMENTBINARY_FWD_TASK_ID); - }, - [](ElementUnaryAttrs const &) { - return lift_task_id_t(task_id_t::ELEMENTUNARY_FWD_TASK_ID); - }, - [](EmbeddingAttrs const &) { - return lift_task_id_t(task_id_t::EMBED_FWD_TASK_ID); - }, - [](FlatAttrs const &) { - return lift_task_id_t(task_id_t::FLAT_FWD_TASK_ID); - }, - [](GatherAttrs const &) { - return lift_task_id_t(task_id_t::GATHER_FWD_TASK_ID); - }, - [](InputAttrs const &) { return default_noop_task(); }, - [](LayerNormAttrs const &) { - return lift_task_id_t(task_id_t::LAYERNORM_FWD_TASK_ID); - }, - [](LinearAttrs const &) { - return lift_task_id_t(task_id_t::LINEAR_FWD_TASK_ID); - }, - [](MultiHeadAttentionAttrs const &) { - return lift_task_id_t(task_id_t::ATTENTION_FWD_TASK_ID); - }, - [](NoopAttrs const &) { return default_noop_task(); }, - [](Pool2DAttrs const &) { - return lift_task_id_t(task_id_t::POOL2D_FWD_TASK_ID); - }, - [](ReduceAttrs const &) { - return lift_task_id_t(task_id_t::REDUCE_FWD_TASK_ID); - }, - [](ReshapeAttrs const &) { - return lift_task_id_t(task_id_t::RESHAPE_FWD_TASK_ID); - }, - [](ReverseAttrs const &) { - return lift_task_id_t(task_id_t::REVERSE_FWD_TASK_ID); - }, - [](SoftmaxAttrs const &) { - return lift_task_id_t(task_id_t::SOFTMAX_FWD_TASK_ID); - }, - [](SplitAttrs const &) { - return lift_task_id_t(task_id_t::SPLIT_FWD_TASK_ID); - }, - [](TopKAttrs const &) { - return lift_task_id_t(task_id_t::TOPK_FWD_TASK_ID); - }, - [](TransposeAttrs const &) { - return lift_task_id_t(task_id_t::TRANSPOSE_FWD_TASK_ID); - }, - [](WeightAttrs const &) { return default_noop_task(); }, - }); -} - -task_id_with_noop_default_t - get_bwd_task_id_for_op_attrs(ComputationGraphOpAttrs const &op_attrs) { - - return op_attrs.visit(overload{ - [](BatchMatmulAttrs const &) { - return lift_task_id_t(task_id_t::BATCHMATMUL_BWD_TASK_ID); - }, - [](BatchNormAttrs const &) { - return lift_task_id_t(task_id_t::BATCHNORM_BWD_TASK_ID); - }, - [](BroadcastAttrs const &) { - return lift_task_id_t(task_id_t::BROADCAST_BWD_TASK_ID); - }, - [](CastAttrs const &) { - return lift_task_id_t(task_id_t::CAST_BWD_TASK_ID); - }, - [](ConcatAttrs const &) { - return lift_task_id_t(task_id_t::CONCAT_BWD_TASK_ID); - }, - [](Conv2DAttrs const &) { - return lift_task_id_t(task_id_t::CONV2D_BWD_TASK_ID); - }, - [](DropoutAttrs const &) { - return lift_task_id_t(task_id_t::DROPOUT_BWD_TASK_ID); - }, - [](ElementBinaryAttrs const &) { - return lift_task_id_t(task_id_t::ELEMENTBINARY_BWD_TASK_ID); - }, - [](ElementUnaryAttrs const &) { - return lift_task_id_t(task_id_t::ELEMENTUNARY_BWD_TASK_ID); - }, - [](EmbeddingAttrs const &) { - return lift_task_id_t(task_id_t::EMBED_BWD_TASK_ID); - }, - [](FlatAttrs const &) { - return lift_task_id_t(task_id_t::FLAT_BWD_TASK_ID); - }, - [](GatherAttrs const &) { - return lift_task_id_t(task_id_t::GATHER_BWD_TASK_ID); - }, - [](InputAttrs const &) { return default_noop_task(); }, - [](LayerNormAttrs const &) { - return lift_task_id_t(task_id_t::LAYERNORM_BWD_TASK_ID); - }, - [](LinearAttrs const &) { - return lift_task_id_t(task_id_t::LINEAR_BWD_TASK_ID); - }, - [](MultiHeadAttentionAttrs const &) { - return lift_task_id_t(task_id_t::ATTENTION_BWD_TASK_ID); - }, - [](NoopAttrs const &) { return default_noop_task(); }, - [](Pool2DAttrs const &) { - return lift_task_id_t(task_id_t::POOL2D_BWD_TASK_ID); - }, - [](ReduceAttrs const &) { - return lift_task_id_t(task_id_t::REDUCE_BWD_TASK_ID); - }, - [](ReshapeAttrs const &) { - return lift_task_id_t(task_id_t::RESHAPE_BWD_TASK_ID); - }, - [](ReverseAttrs const &) { - return lift_task_id_t(task_id_t::REVERSE_BWD_TASK_ID); - }, - [](SoftmaxAttrs const &) { - return lift_task_id_t(task_id_t::SOFTMAX_BWD_TASK_ID); - }, - [](SplitAttrs const &) { - return lift_task_id_t(task_id_t::SPLIT_BWD_TASK_ID); - }, - [](TopKAttrs const &) { - return lift_task_id_t(task_id_t::TOPK_BWD_TASK_ID); - }, - [](TransposeAttrs const &) { - return lift_task_id_t(task_id_t::TRANSPOSE_BWD_TASK_ID); - }, - [](WeightAttrs const &) { return default_noop_task(); }, - }); -} - -} // namespace FlexFlow diff --git a/lib/utils/include/utils/graph/kwarg_dataflow_graph/kwarg_dataflow_output.dtg.toml b/lib/utils/include/utils/graph/kwarg_dataflow_graph/kwarg_dataflow_output.dtg.toml index f286fb90a7..5b537eac88 100644 --- a/lib/utils/include/utils/graph/kwarg_dataflow_graph/kwarg_dataflow_output.dtg.toml +++ b/lib/utils/include/utils/graph/kwarg_dataflow_graph/kwarg_dataflow_output.dtg.toml @@ -6,6 +6,7 @@ features = [ "ord", "hash", "fmt", + "json", ] template_params = [