diff --git a/.flake/pkgs/legion.nix b/.flake/pkgs/legion.nix
deleted file mode 100644
index 361a66c4ff..0000000000
--- a/.flake/pkgs/legion.nix
+++ /dev/null
@@ -1,48 +0,0 @@
-{ lib
-, stdenv
-, fetchFromGitLab
-, cmake
-, cudaPackages ? { }
-, cudaCapabilities ? [ "60" "70" "80" "86" ]
-, maxDim ? 5
-}:
-
-# from https://codeberg.org/Uli/nix-things/src/commit/776519e382c81b136c1d0b10d8c7b52b4acb9192/overlays/cq/python/libclang-python.nix
-
-let 
-  cmakeFlag = x: if x then "1" else "0";
-
-  inherit (cudaPackages) cudatoolkit;
-in
-
-stdenv.mkDerivation rec {
-  pname = "legion";
-  version = "2025-01-06";
-
-  src = fetchFromGitLab {
-    owner = "StanfordLegion";
-    repo = "legion";
-    rev = "7be1abd0207eb1126c7629b16d1123fa6f58ce9d";
-    sha256 = "sha256-gTjnGYYTQwTsrV1WcY0qqpTrlwbzAPcndurRy6XnG8A=";
-  };
-
-  nativeBuildInputs = [
-    cmake
-  ];
-
-  cmakeFlags = [
-    "-DLegion_USE_CUDA=1"
-    "-DLegion_CUDA_ARCH=${lib.concatStringsSep "," cudaCapabilities}"
-    "-DLegion_MAX_DIM=${toString maxDim}"
-  ];
-
-  buildInputs = [ 
-    cudatoolkit
-  ];
-
-  meta = with lib; {
-    description = "Legion is a parallel programming model for distributed, heterogeneous machines";
-    homepage = "https://legion.stanford.edu/";
-    license = licenses.asl20;
-  };
-}
diff --git a/.flake/pkgs/realm.nix b/.flake/pkgs/realm.nix
new file mode 100644
index 0000000000..b809573690
--- /dev/null
+++ b/.flake/pkgs/realm.nix
@@ -0,0 +1,46 @@
+{ lib
+, stdenv
+, fetchFromGitHub
+, cmake
+, cudaPackages ? { }
+, zlib
+, maxDim ? 5
+}:
+
+let
+  inherit (cudaPackages) cudatoolkit;
+in
+
+stdenv.mkDerivation rec {
+  pname = "realm";
+  version = "2026-02-06";
+
+  src = fetchFromGitHub {
+    owner = "StanfordLegion";
+    repo = "realm";
+    rev = "0405b67ca14b586f7dec0dcddee194cecee7efa6";
+    sha256 = "sha256-iUPVV1rh3QuyDKgXuu8aDlaZGlNwcpPvPsSVLWp8tr4=";
+  };
+
+  nativeBuildInputs = [
+    cmake
+  ];
+
+  cmakeFlags = [
+    "-DBUILD_SHARED_LIBS=ON"
+    "-DREALM_ENABLE_CUDA=ON"
+    "-DREALM_ENABLE_PREALM=ON"
+    "-DREALM_MAX_DIM=${toString maxDim}"
+  ];
+
+  buildInputs = [
+    cudatoolkit
+    zlib
+  ];
+
+  meta = with lib; {
+    description = "Realm is a distributed, event–based tasking runtime for building high-performance applications that span clusters of CPUs, GPUs, and other accelerators";
+    homepage = "https://legion.stanford.edu/realm";
+    license = licenses.asl20;
+  };
+}
diff --git a/.proj.toml b/.proj.toml
index 38690f710b..5dbbfbcdd7 100644
--- a/.proj.toml
+++ b/.proj.toml
@@ -85,6 +85,13 @@ has-cpu-only-benchmarks = false
 has-cuda-tests = true
 has-cuda-benchmarks = false
 
+[targets.realm-execution]
+type = "lib"
+has-cpu-only-tests = true
+has-cpu-only-benchmarks = false
+has-cuda-tests = true
+has-cuda-benchmarks = false
+
 # [targets.local-pcg-execution]
 # type = "lib"
 # has-cpu-only-tests = true
diff --git a/flake.nix b/flake.nix
index 6ccd5616cd..dad0e2fc32 100644
--- a/flake.nix
+++ b/flake.nix
@@ -30,8 +30,8 @@
     };
   };
 
-  outputs = { self, nixpkgs, flake-utils, proj-repo, nixGL, ... }: flake-utils.lib.eachSystem [ "x86_64-linux" ] (system: 
-    let 
+  outputs = { self, nixpkgs, flake-utils, proj-repo, nixGL, ... }: flake-utils.lib.eachSystem [ "x86_64-linux" ] (system:
+    let
       pkgs = import nixpkgs {
         inherit system;
         config.allowUnfree = true;
@@ -41,21 +41,21 @@
       mkShell = attrs: pkgs.mkShell.override {
         stdenv = pkgs.cudaPackages.backendStdenv;
       } (attrs // {
-        hardeningDisable = ["all"]; # disable nixpkgs default compiler arguments, otherwise ubsan doesn't catch 
-                                    # signed overflows due to the signedoverflow hardening setting. 
-                                    # for more details, see the following (long-running) nixpkgs github issues: 
+        hardeningDisable = ["all"]; # disable nixpkgs default compiler arguments, otherwise ubsan doesn't catch
+                                    # signed overflows due to the signedoverflow hardening setting.
+                                    # for more details, see the following (long-running) nixpkgs github issues:
                                     # - https://github.com/NixOS/nixpkgs/issues/18995
                                     # - https://github.com/NixOS/nixpkgs/issues/60919
       });
 
       proj = proj-repo.packages.${system}.proj;
-    in 
+    in
     {
       packages = rec {
         libdwarf-lite = pkgs.callPackage ./.flake/pkgs/libdwarf-lite.nix { };
         cpptrace = pkgs.callPackage ./.flake/pkgs/cpptrace.nix { inherit libdwarf-lite; };
         libassert = pkgs.callPackage ./.flake/pkgs/libassert.nix { inherit cpptrace; };
-        legion = pkgs.callPackage ./.flake/pkgs/legion.nix { };
+        realm = pkgs.callPackage ./.flake/pkgs/realm.nix { };
         bencher-cli = pkgs.callPackage ./.flake/pkgs/bencher-cli.nix { };
         ffdb = pkgs.callPackage ./.flake/pkgs/ffdb { inherit proj; };
         hpp2plantuml = pkgs.python3Packages.callPackage ./.flake/pkgs/hpp2plantuml.nix { };
@@ -83,8 +83,7 @@
           shellHook = ''
             export PATH="$HOME/ff/.scripts/:$PATH"
             export RC_PARAMS="max_discard_ratio=100"
-            export CMAKE_FLAGS="-DFF_USE_EXTERNAL_LEGION=ON \
-                                -DFF_USE_EXTERNAL_NCCL=ON \
+            export CMAKE_FLAGS="-DFF_USE_EXTERNAL_NCCL=ON \
                                 -DFF_USE_EXTERNAL_JSON=ON \
                                 -DFF_USE_EXTERNAL_FMT=ON \
                                 -DFF_USE_EXTERNAL_SPDLOG=ON \
@@ -94,7 +93,7 @@
                                 -DFF_USE_EXTERNAL_GBENCHMARK=ON \
                                 -DFF_USE_EXTERNAL_LIBASSERT=ON"
           '';
-          
+
           buildInputs = builtins.concatLists [
             (with pkgs; [
               zlib
@@ -125,7 +124,7 @@
             ])
             (with self.packages.${system}; [
               libassert
-              legion
+              realm
               rapidcheckFull
               doctest
             ])
diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
index 2e71e577c0..cb3bd6d6ae 100644
--- a/lib/CMakeLists.txt
+++ b/lib/CMakeLists.txt
@@ -5,6 +5,7 @@ add_subdirectory(op-attrs)
 add_subdirectory(kernels)
 add_subdirectory(local-execution)
 add_subdirectory(local-pcg-execution)
+add_subdirectory(realm-execution)
 add_subdirectory(task-spec)
 add_subdirectory(utils)
 add_subdirectory(ffi)
diff --git a/lib/kernels/include/kernels/device_handle_t.h b/lib/kernels/include/kernels/device_handle_t.h
index 9b7769355e..0836503717 100644
--- a/lib/kernels/include/kernels/device_handle_t.h
+++ b/lib/kernels/include/kernels/device_handle_t.h
@@ -9,6 +9,9 @@ namespace FlexFlow {
 device_handle_t device_handle_t_from_managed_handle(
     std::optional<ManagedPerDeviceFFHandle> const &managed_handle);
 
+device_handle_t device_handle_t_from_managed_handle_ptr(
+    std::optional<ManagedPerDeviceFFHandle *> const &managed_handle);
+
 device_handle_t gpu_make_device_handle_t(PerDeviceFFHandle const &ff_handle);
 device_handle_t cpu_make_device_handle_t();
 
diff --git a/lib/kernels/src/kernels/device_handle_t.cc b/lib/kernels/src/kernels/device_handle_t.cc
index 85f9e2a388..0225ee8e94 100644
--- a/lib/kernels/src/kernels/device_handle_t.cc
+++ b/lib/kernels/src/kernels/device_handle_t.cc
@@ -11,6 +11,15 @@ device_handle_t device_handle_t_from_managed_handle(
   }
 }
 
+device_handle_t device_handle_t_from_managed_handle_ptr(
+    std::optional<ManagedPerDeviceFFHandle *> const &managed_handle) {
+  if (managed_handle.has_value()) {
+    return gpu_make_device_handle_t(managed_handle.value()->raw_handle());
+  } else {
+    return cpu_make_device_handle_t();
+  }
+}
+
 device_handle_t gpu_make_device_handle_t(PerDeviceFFHandle const &ff_handle) {
   return device_handle_t{
       ff_handle,
diff --git a/lib/pcg/include/pcg/layer_guid_t.dtg.toml b/lib/pcg/include/pcg/layer_guid_t.dtg.toml
index d73cf547da..2f2f7694a0 100644
--- a/lib/pcg/include/pcg/layer_guid_t.dtg.toml
+++ b/lib/pcg/include/pcg/layer_guid_t.dtg.toml
@@ -6,6 +6,7 @@ features = [
   "ord",
   "hash",
   "fmt",
+  "json",
 ]
 
 includes = [
diff --git a/lib/pcg/include/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.h b/lib/pcg/include/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.h
index 5b1cad5e99..ebfdefa478 100644
--- a/lib/pcg/include/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.h
+++ b/lib/pcg/include/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.h
@@ -5,6 +5,7 @@
 #include "pcg/machine_space_coordinate.dtg.h"
 #include "pcg/mapped_parallel_computation_graph/operator_atomic_task_shard_binding.dtg.h"
 #include "utils/bidict/bidict.h"
+#include <nlohmann/json.hpp>
 
 namespace FlexFlow {
 
@@ -45,4 +46,15 @@ struct hash<::FlexFlow::MappedOperatorTaskGroup> {
 };
 
 } // namespace std
+
+namespace nlohmann {
+
+template <>
+struct adl_serializer<::FlexFlow::MappedOperatorTaskGroup> {
+  static ::FlexFlow::MappedOperatorTaskGroup from_json(json const &j);
+  static void to_json(json &j, ::FlexFlow::MappedOperatorTaskGroup const &t);
+};
+
+} // namespace nlohmann
+
 #endif
diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h
index 25dc0721cd..21f33f6d3d 100644
--- a/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_computation_graph.h
@@ -32,6 +32,10 @@ ParallelLayerAddedResult add_parallel_layer(
 ParallelLayerAddedResult pcg_add_input_layer(ParallelComputationGraph &pcg,
                                              TensorShape const &tensor_shape);
 
+ParallelLayerAddedResult
+    pcg_add_input_layer_with_grad(ParallelComputationGraph &pcg,
+                                  TensorShape const &tensor_shape);
+
 OperatorTaskSpace get_operator_task_space(ParallelComputationGraph const &pcg,
                                           parallel_layer_guid_t const &layer);
 
@@ -54,6 +58,9 @@ std::unordered_map<TensorSlotName, ParallelComputationGraphEdge>
 std::unordered_set<parallel_layer_guid_t>
     get_initial_layers(ParallelComputationGraph const &);
 
+std::unordered_map<TensorSlotName, parallel_tensor_guid_t>
+    get_outgoing_tensors(ParallelComputationGraph const &,
+                         parallel_layer_guid_t const &);
 std::unordered_map<TensorSlotName, parallel_tensor_guid_t>
     get_incoming_tensors(ParallelComputationGraph const &,
                          parallel_layer_guid_t const &);
@@ -107,6 +114,9 @@ ParallelTensorShape get_parallel_tensor_shape(ParallelComputationGraph const &,
 std::vector<parallel_layer_guid_t>
     topological_ordering(ParallelComputationGraph const &);
 
+std::unordered_map<parallel_layer_guid_t, ParallelLayerAttrs>
+    get_parallel_layer_attrs_mapping(ParallelComputationGraph const &pcg);
+
 parallel_layer_guid_t
     get_parallel_layer_by_name(ParallelComputationGraph const &pcg,
                                std::string const &name);
diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.toml b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.toml
index 618bcb0dc4..292b361fc8 100644
--- a/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.toml
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.toml
@@ -6,6 +6,7 @@ features = [
   "ord",
   "hash",
   "fmt",
+  "json",
 ]
 
 includes = [
diff --git a/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.toml b/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.toml
index 4494a31ac2..2710a15664 100644
--- a/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.toml
+++ b/lib/pcg/include/pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.toml
@@ -6,6 +6,7 @@ features = [
   "ord",
   "hash",
   "fmt",
+  "json",
 ]
 
 includes = [
diff --git a/lib/pcg/include/pcg/tensor_guid_t.dtg.toml b/lib/pcg/include/pcg/tensor_guid_t.dtg.toml
index 151f7b1f0f..e8caf0021f 100644
--- a/lib/pcg/include/pcg/tensor_guid_t.dtg.toml
+++ b/lib/pcg/include/pcg/tensor_guid_t.dtg.toml
@@ -6,6 +6,7 @@ features = [
   "ord",
   "hash",
   "fmt",
+  "json",
 ]
 
 includes = [
diff --git a/lib/pcg/src/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.cc b/lib/pcg/src/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.cc
index b96a447383..4436efd727 100644
--- a/lib/pcg/src/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.cc
+++ b/lib/pcg/src/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.cc
@@ -90,3 +90,20 @@ size_t hash<::FlexFlow::MappedOperatorTaskGroup>::operator()(
 }
 
 } // namespace std
+
+namespace nlohmann {
+
+::FlexFlow::MappedOperatorTaskGroup
+    adl_serializer<::FlexFlow::MappedOperatorTaskGroup>::from_json(
+        json const &j) {
+  return ::FlexFlow::MappedOperatorTaskGroup{j.template get<
+      ::FlexFlow::bidict<::FlexFlow::MachineSpaceCoordinate,
+                         ::FlexFlow::OperatorAtomicTaskShardBinding>>()};
+}
+
+void adl_serializer<::FlexFlow::MappedOperatorTaskGroup>::to_json(
+    json &j, ::FlexFlow::MappedOperatorTaskGroup const &t) {
+  j = t.get_shard_bindings();
+}
+
+} // namespace nlohmann
diff --git a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc
index f83628b8e1..959747dbc7 100644
--- a/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc
+++ b/lib/pcg/src/pcg/parallel_computation_graph/parallel_computation_graph.cc
@@ -142,6 +142,27 @@ ParallelLayerAddedResult pcg_add_input_layer(ParallelComputationGraph &pcg,
                             });
 }
 
+ParallelLayerAddedResult
+    pcg_add_input_layer_with_grad(ParallelComputationGraph &pcg,
+                                  TensorShape const &tensor_shape) {
+  ParallelLayerAttrs layer_attrs = ParallelLayerAttrs{
+      /*op_attrs=*/PCGOperatorAttrs{InputAttrs{tensor_shape}},
+      /*name=*/std::nullopt,
+  };
+
+  return add_parallel_layer(/*pcg=*/pcg,
+                            /*layer_attrs=*/layer_attrs,
+                            /*inputs=*/{},
+                            /*weights=*/{},
+                            /*output_flags=*/
+                            std::unordered_map<TensorSlotName, CreateGrad>{
+                                {
+                                    TensorSlotName::OUTPUT,
+                                    CreateGrad::YES,
+                                },
+                            });
+}
+
 OperatorTaskSpace get_operator_task_space(ParallelComputationGraph const &pcg,
                                           parallel_layer_guid_t const &layer) {
   PCGOperatorAttrs op_attrs = pcg_get_op_attrs(pcg, layer);
@@ -212,6 +233,16 @@ std::unordered_set<parallel_layer_guid_t>
                    [](Node const &n) { return parallel_layer_guid_t{n}; });
 }
 
+std::unordered_map<TensorSlotName, parallel_tensor_guid_t>
+    get_outgoing_tensors(ParallelComputationGraph const &pcg,
+                         parallel_layer_guid_t const &l) {
+  return map_values(get_outgoing_kwarg_dataflow_outputs_for_node(
+                        pcg.raw_graph, l.raw_graph_node),
+                    [](KwargDataflowOutput<TensorSlotName> const &o) {
+                      return parallel_tensor_guid_t{o};
+                    });
+}
+
 std::unordered_map<TensorSlotName, parallel_tensor_guid_t>
     get_incoming_tensors(ParallelComputationGraph const &pcg,
                          parallel_layer_guid_t const &l) {
@@ -378,6 +409,17 @@ std::vector<parallel_layer_guid_t>
                    [](Node const &n) { return parallel_layer_guid_t{n}; });
 }
 
+std::unordered_map<parallel_layer_guid_t, ParallelLayerAttrs>
+    get_parallel_layer_attrs_mapping(ParallelComputationGraph const &pcg) {
+  std::unordered_map<parallel_layer_guid_t, ParallelLayerAttrs>
+      layer_attrs_mapping;
+  for (parallel_layer_guid_t const &layer_guid : get_parallel_layers(pcg)) {
+    layer_attrs_mapping.insert(
+        {layer_guid, get_parallel_layer_attrs(pcg, layer_guid)});
+  }
+  return layer_attrs_mapping;
+}
+
 parallel_layer_guid_t
     get_parallel_layer_by_name(ParallelComputationGraph const &pcg,
                                std::string const &name) {
diff --git a/lib/pcg/test/src/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.cc b/lib/pcg/test/src/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.cc
new file mode 100644
index 0000000000..1c3667afc7
--- /dev/null
+++ b/lib/pcg/test/src/pcg/mapped_parallel_computation_graph/mapped_operator_task_group.cc
@@ -0,0 +1,42 @@
+#include "pcg/mapped_parallel_computation_graph/mapped_operator_task_group.h"
+#include "op-attrs/parallel_tensor_space_coordinate.dtg.h"
+#include "op-attrs/tensor_slot_name.dtg.h"
+#include "pcg/device_type.dtg.h"
+#include "pcg/machine_space_coordinate.dtg.h"
+#include "pcg/mapped_parallel_computation_graph/operator_atomic_task_shard_binding.dtg.h"
+#include <doctest/doctest.h>
+#include <nlohmann/json.hpp>
+
+using namespace ::FlexFlow;
+
+TEST_SUITE(FF_TEST_SUITE) {
+  TEST_CASE("adl_serializer<MappedOperatorTaskGroup>") {
+    bidict<MachineSpaceCoordinate, OperatorAtomicTaskShardBinding>
+        shard_bindings{
+            {MachineSpaceCoordinate{0_n, 0_n, DeviceType::CPU},
+             OperatorAtomicTaskShardBinding{
+                 {
+                     {TensorSlotName::INPUT,
+                      ParallelTensorSpaceCoordinate{
+                          0_n, 0_n, FFOrdered{1_n, 2_n, 3_n}}},
+                 },
+             }},
+        };
+    MappedOperatorTaskGroup deserialized{shard_bindings};
+    nlohmann::json serialized = shard_bindings;
+
+    SUBCASE("to_json") {
+      nlohmann::json result = deserialized;
+      nlohmann::json correct = serialized;
+
+      CHECK(result == correct);
+    }
+
+    SUBCASE("from_json") {
+      MappedOperatorTaskGroup result = serialized;
+      MappedOperatorTaskGroup correct = deserialized;
+
+      CHECK(result == correct);
+    }
+  }
+}
diff --git a/lib/realm-execution/CMakeLists.txt b/lib/realm-execution/CMakeLists.txt
new file mode 100644
index 0000000000..08676525e1
--- /dev/null
+++ b/lib/realm-execution/CMakeLists.txt
@@ -0,0 +1,22 @@
+ff_add_library(
+  NAME
+    realm-execution
+  SRC_PATTERNS
+    src/*.cc
+  PUBLIC_INCLUDE
+    include/
+  PRIVATE_INCLUDE
+    src/
+  DEPS
+    compiler
+    kernels
+    local-execution
+    op-attrs
+    pcg
+    spdlog
+    task-spec
+    utils
+    Realm::Realm
+)
+
+add_subdirectory(test)
diff --git a/lib/realm-execution/include/realm-execution/atomic_dependency_set.h b/lib/realm-execution/include/realm-execution/atomic_dependency_set.h
new file mode 100644
index 0000000000..da6ba86638
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/atomic_dependency_set.h
@@ -0,0 +1,27 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_ATOMIC_DEPENDENCY_SET_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_ATOMIC_DEPENDENCY_SET_H
+
+#include "realm-execution/realm.h"
+#include <vector>
+
+namespace FlexFlow {
+
+struct AtomicDependencySet {
+public:
+  AtomicDependencySet() = delete;
+  explicit AtomicDependencySet(Realm::Event precondition);
+
+  void add_writer(Realm::Event writer);
+  void add_reader(Realm::Event reader);
+
+  Realm::Event get_dependency_for_writer() const;
+  Realm::Event get_dependency_for_reader() const;
+
+private:
+  Realm::Event writer;
+  std::vector<Realm::Event> readers;
+};
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/dependency_set.h b/lib/realm-execution/include/realm-execution/dependency_set.h
new file mode 100644
index 0000000000..629a40e2e7
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/dependency_set.h
@@ -0,0 +1,34 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DEPENDENCY_SET_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DEPENDENCY_SET_H
+
+#include "realm-execution/atomic_dependency_set.h"
+#include "realm-execution/realm.h"
+#include "task-spec/dynamic_graph/dynamic_value_attrs.dtg.h"
+#include <unordered_map>
+
+namespace FlexFlow {
+
+struct DependencySet {
+public:
+  DependencySet() = delete;
+  explicit DependencySet(Realm::Event precondition);
+
+  void add_writer(DynamicValueAttrs const &value, Realm::Event writer);
+  void add_reader(DynamicValueAttrs const &value, Realm::Event reader);
+
+  Realm::Event get_dependency_for_writer(DynamicValueAttrs const &value) const;
+  Realm::Event get_dependency_for_reader(DynamicValueAttrs const &value) const;
+
+private:
+  AtomicDependencySet &
+      get_atomic_dependency_set(DynamicValueAttrs const &value);
+
+private:
+  Realm::Event precondition;
+  std::unordered_map<DynamicValueAttrs, AtomicDependencySet>
+      atomic_dependencies;
+};
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/device_specific_managed_per_device_ff_handle.h b/lib/realm-execution/include/realm-execution/device_specific_managed_per_device_ff_handle.h
new file mode 100644
index 0000000000..d48a80f438
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/device_specific_managed_per_device_ff_handle.h
@@ -0,0 +1,38 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DEVICE_SPECIFIC_MANAGED_PER_DEVICE_FF_HANDLE_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DEVICE_SPECIFIC_MANAGED_PER_DEVICE_FF_HANDLE_H
+
+#include "kernels/device_handle_t.dtg.h"
+#include "kernels/managed_per_device_ff_handle.h"
+#include "pcg/device_id_t.dtg.h"
+#include "realm-execution/tasks/serializer/serializable_device_specific_ptr.dtg.h"
+#include <nlohmann/json.hpp>
+#include <optional>
+
+namespace FlexFlow {
+
+struct DeviceSpecificManagedPerDeviceFFHandle {
+public:
+  DeviceSpecificManagedPerDeviceFFHandle() = delete;
+  explicit DeviceSpecificManagedPerDeviceFFHandle(
+      device_id_t owner, std::optional<ManagedPerDeviceFFHandle *> handle);
+
+  std::optional<ManagedPerDeviceFFHandle *> get(device_id_t device_idx) const;
+
+  SerializableDeviceSpecificPtr serialize() const;
+  static DeviceSpecificManagedPerDeviceFFHandle
+      deserialize(SerializableDeviceSpecificPtr const &j);
+
+private:
+  device_id_t owner;
+  std::optional<ManagedPerDeviceFFHandle *> handle;
+};
+
+DeviceSpecificManagedPerDeviceFFHandle make_device_specific_managed_handle(
+    device_id_t const &, std::optional<ManagedPerDeviceFFHandle *> const &);
+
+device_handle_t device_handle_t_from_device_specific_managed_handle(
+    DeviceSpecificManagedPerDeviceFFHandle const &, device_id_t);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/distributed_device_handle.h b/lib/realm-execution/include/realm-execution/distributed_device_handle.h
new file mode 100644
index 0000000000..268be3583d
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/distributed_device_handle.h
@@ -0,0 +1,36 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DISTRIBUTED_DEVICE_HANDLE_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DISTRIBUTED_DEVICE_HANDLE_H
+
+#include "realm-execution/device_specific_managed_per_device_ff_handle.h"
+#include "realm-execution/hash/processor.h"
+#include "realm-execution/realm.h"
+#include "realm-execution/realm_context.h"
+#include <unordered_map>
+
+namespace FlexFlow {
+
+struct DistributedDeviceHandle {
+public:
+  DistributedDeviceHandle() = delete;
+  explicit DistributedDeviceHandle(
+      std::unordered_map<Realm::Processor,
+                         DeviceSpecificManagedPerDeviceFFHandle> const
+          &handles);
+
+  DeviceSpecificManagedPerDeviceFFHandle const &
+      at(Realm::Processor processor) const;
+
+private:
+  std::unordered_map<Realm::Processor, DeviceSpecificManagedPerDeviceFFHandle>
+      handles;
+};
+
+DistributedDeviceHandle create_distributed_device_handle(
+    RealmContext &ctx,
+    size_t workSpaceSize,
+    bool allowTensorOpMathConversion,
+    Realm::Event precondition = Realm::Event::NO_EVENT);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/distributed_device_state_initialization.h b/lib/realm-execution/include/realm-execution/distributed_device_state_initialization.h
new file mode 100644
index 0000000000..ca24ecdd4c
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/distributed_device_state_initialization.h
@@ -0,0 +1,24 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DISTRIBUTED_DEVICE_STATE_INITIALIZATION_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_DISTRIBUTED_DEVICE_STATE_INITIALIZATION_H
+
+#include "kernels/profiling_settings.dtg.h"
+#include "pcg/optimizer_attrs.dtg.h"
+#include "realm-execution/distributed_device_handle.h"
+#include "realm-execution/realm_context.h"
+#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.dtg.h"
+#include "task-spec/ff_iteration_config.dtg.h"
+
+namespace FlexFlow {
+
+DynamicOpenDataflowGraph perform_distributed_device_state_initialization(
+    DynamicOpenDataflowGraph const &dg,
+    RealmContext &ctx,
+    ProfilingSettings const &profiling_settings,
+    DistributedDeviceHandle const &device_handle,
+    FFIterationConfig const &iteration_config,
+    OptimizerAttrs const &optimizer_attrs,
+    Realm::Event precondition);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/fmt/instance.h b/lib/realm-execution/include/realm-execution/fmt/instance.h
new file mode 100644
index 0000000000..c7c2df6735
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/fmt/instance.h
@@ -0,0 +1,35 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_FMT_INSTANCE_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_FMT_INSTANCE_H
+
+#include "realm-execution/realm.h"
+#include "utils/check_fmtable.h"
+#include <fmt/format.h>
+#include <utility>
+
+namespace fmt {
+
+template <typename Char>
+struct formatter<::FlexFlow::Realm::RegionInstance,
+                 Char,
+                 std::enable_if_t<!detail::has_format_as<
+                     ::FlexFlow::Realm::RegionInstance>::value>>
+    : formatter<::std::string> {
+  template <typename FormatContext>
+  auto format(::FlexFlow::Realm::RegionInstance const &m, FormatContext &ctx)
+      -> decltype(ctx.out()) {
+    std::string result = fmt::format("<RegionInstance {}>", m.id);
+
+    return formatter<std::string>::format(result, ctx);
+  }
+};
+
+} // namespace fmt
+
+namespace FlexFlow {
+
+std::ostream &operator<<(std::ostream &s,
+                         ::FlexFlow::Realm::RegionInstance const &m);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/hash/processor.h b/lib/realm-execution/include/realm-execution/hash/processor.h
new file mode 100644
index 0000000000..e5eb8eb503
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/hash/processor.h
@@ -0,0 +1,16 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_HASH_PROCESSOR_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_HASH_PROCESSOR_H
+
+#include "realm-execution/realm.h"
+#include <utility>
+
+namespace std {
+
+template <>
+struct hash<::FlexFlow::Realm::Processor> {
+  size_t operator()(::FlexFlow::Realm::Processor const &p) const;
+};
+
+} // namespace std
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/instance_allocation.h b/lib/realm-execution/include/realm-execution/instance_allocation.h
new file mode 100644
index 0000000000..09709201ce
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/instance_allocation.h
@@ -0,0 +1,23 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_INSTANCE_ALLOCATION_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_INSTANCE_ALLOCATION_H
+
+#include "realm-execution/realm_context.h"
+#include "realm-execution/tensor_instance_backing.dtg.h"
+#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.dtg.h"
+
+namespace FlexFlow {
+
+std::pair<Realm::RegionInstance, Realm::Event>
+    perform_instance_allocation_for_value(DynamicNodeAttrs const &node,
+                                          DynamicValueAttrs const &value,
+                                          RealmContext &ctx);
+
+TensorInstanceBacking perform_instance_allocation(
+    DynamicOpenDataflowGraph const &g,
+    std::unordered_map<DynamicValueAttrs, DynamicTensorAccessor> const
+        &preallocated,
+    RealmContext &ctx);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/pcg_instance/pcg_instance.h b/lib/realm-execution/include/realm-execution/pcg_instance/pcg_instance.h
new file mode 100644
index 0000000000..b0037f51b2
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/pcg_instance/pcg_instance.h
@@ -0,0 +1,90 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_PCG_INSTANCE_PCG_INSTANCE_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_PCG_INSTANCE_PCG_INSTANCE_H
+
+#include "kernels/accessor.h"
+#include "kernels/allocation.h"
+#include "kernels/device_handle_t.dtg.h"
+#include "kernels/profiling_settings.dtg.h"
+#include "op-attrs/ops/loss_functions/loss_attrs.dtg.h"
+#include "pcg/device_id_t.dtg.h"
+#include "pcg/mapped_parallel_computation_graph/mapped_parallel_computation_graph.dtg.h"
+#include "pcg/optimizer_attrs.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h"
+#include "realm-execution/distributed_device_handle.h"
+#include "realm-execution/realm_context.h"
+#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.dtg.h"
+#include "task-spec/dynamic_graph/dynamic_tensor_accessor.dtg.h"
+#include "task-spec/dynamic_graph/dynamic_value_attrs.dtg.h"
+#include "task-spec/ff_iteration_config.dtg.h"
+#include "utils/units/milliseconds_t.h"
+#include <optional>
+
+namespace FlexFlow {
+
+struct PCGInstance {
+public:
+  PCGInstance() = delete;
+  PCGInstance(PCGInstance const &) = delete;
+  PCGInstance(PCGInstance &&) = delete;
+  explicit PCGInstance(
+      RealmContext &ctx,
+      std::vector<DynamicNodeInvocation> const &execution_order,
+      OptimizerAttrs const &optimizer_attrs,
+      std::optional<Realm::RegionInstance> logit_grad_tensor);
+  RealmContext &get_realm_context();
+  std::vector<DynamicNodeInvocation> const &get_execution_order() const;
+  OptimizerAttrs const &get_optimizer_attrs() const;
+  void update_optimizer_attrs_for_next_iter();
+  std::optional<Realm::RegionInstance> get_loss_tensor_instance() const;
+
+private:
+  RealmContext &ctx;
+  std::vector<DynamicNodeInvocation> execution_order;
+  OptimizerAttrs optimizer_attrs;
+  std::optional<Realm::RegionInstance> logit_grad_tensor;
+};
+
+PCGInstance create_pcg_instance(
+    RealmContext &ctx,
+    MappedParallelComputationGraph const &mpcg,
+    OptimizerAttrs const &optimizer_attrs,
+    std::optional<LossAttrs> const &loss_attrs,
+    std::optional<GenericTensorAccessorR> label_tensor,
+    std::optional<parallel_tensor_guid_t> logit_tensor,
+    std::unordered_map<DynamicValueAttrs, DynamicTensorAccessor> const
+        &input_tensors,
+    ProfilingSettings const &profiling_settings,
+    DistributedDeviceHandle const &device_handle,
+    FFIterationConfig const &iteration_config);
+
+std::unordered_map<dynamic_layer_guid_t, Realm::Event>
+    perform_all_passes_for_pcg_instance(
+        PCGInstance &instance,
+        ProfilingSettings const &profiling_settings,
+        DistributedDeviceHandle const &device_handle,
+        FFIterationConfig iteration_config);
+
+std::unordered_map<dynamic_layer_guid_t, Realm::Event>
+    perform_forward_pass_for_pcg_instance(
+        PCGInstance &instance,
+        ProfilingSettings const &profiling_settings,
+        DistributedDeviceHandle const &device_handle,
+        FFIterationConfig iteration_config);
+
+std::unordered_map<dynamic_layer_guid_t, Realm::Event>
+    perform_backward_pass_for_pcg_instance(
+        PCGInstance &instance,
+        ProfilingSettings const &profiling_settings,
+        DistributedDeviceHandle const &device_handle,
+        FFIterationConfig iteration_config);
+
+std::unordered_map<dynamic_layer_guid_t, Realm::Event>
+    perform_update_pass_for_pcg_instance(
+        PCGInstance &instance,
+        ProfilingSettings const &profiling_settings,
+        DistributedDeviceHandle const &device_handle,
+        FFIterationConfig iteration_config);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/realm.h b/lib/realm-execution/include/realm-execution/realm.h
new file mode 100644
index 0000000000..b6913e66f5
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/realm.h
@@ -0,0 +1,22 @@
+#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_REALM_H
+#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_REALM_H
+
+#define FLEXFLOW_USE_PREALM
+
+#ifdef FLEXFLOW_USE_PREALM
+#include <realm/prealm/prealm.h>
+#else
+#include <realm.h>
+#endif
+
+namespace FlexFlow {
+
+#ifdef FLEXFLOW_USE_PREALM
+namespace Realm = ::PRealm;
+#else
+namespace Realm = ::Realm;
+#endif
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/realm_allocator.h b/lib/realm-execution/include/realm-execution/realm_allocator.h
new file mode 100644
index 0000000000..d72f2d7f91
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/realm_allocator.h
@@ -0,0 +1,33 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_REALM_ALLOCATOR_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_REALM_ALLOCATOR_H
+
+#include "kernels/allocation.h"
+#include "realm-execution/realm.h"
+
+namespace FlexFlow {
+
+struct RealmAllocator : public IAllocator {
+  RealmAllocator(Realm::Processor processor, Realm::Memory memory);
+
+  RealmAllocator() = delete;
+  RealmAllocator(RealmAllocator const &) = delete;
+  RealmAllocator(RealmAllocator &&) = delete;
+  ~RealmAllocator() = default;
+
+  void *allocate(size_t) override;
+  void deallocate(void *) override;
+
+  DeviceType get_allocation_device_type() const override;
+
+private:
+  Realm::Processor processor;
+  Realm::Memory memory;
+  std::unordered_map<void *, Realm::RegionInstance> ptr_instances;
+};
+CHECK_RC_COPY_VIRTUAL_COMPLIANT(RealmAllocator);
+
+Allocator get_realm_allocator(Realm::Processor processor, Realm::Memory memory);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/realm_context.h b/lib/realm-execution/include/realm-execution/realm_context.h
new file mode 100644
index 0000000000..b8baad41b9
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/realm_context.h
@@ -0,0 +1,84 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_REALM_CONTEXT_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_REALM_CONTEXT_H
+
+#include "kernels/allocation.h"
+#include "kernels/device_handle_t.dtg.h"
+#include "kernels/managed_per_device_ff_handle.h"
+#include "pcg/device_id_t.dtg.h"
+#include "pcg/machine_space_coordinate.dtg.h"
+#include "realm-execution/realm.h"
+#include "realm-execution/tasks/task_id_t.dtg.h"
+#include <optional>
+#include <unordered_map>
+
+namespace FlexFlow {
+
+struct RealmContext {
+public:
+  RealmContext(Realm::Processor processor);
+  virtual ~RealmContext();
+
+  RealmContext() = delete;
+  RealmContext(RealmContext const &) = delete;
+  RealmContext(RealmContext &&) = delete;
+
+  // Device mapping
+  Realm::Processor
+      map_device_coord_to_processor(MachineSpaceCoordinate const &);
+  static Realm::Memory get_nearest_memory(Realm::Processor);
+
+  // Current device context
+  Realm::Processor get_current_processor() const;
+  Allocator &get_current_device_allocator();
+  device_id_t get_current_device_idx() const;
+
+  // Task creation
+  Realm::Event spawn_task(Realm::Processor proc,
+                          task_id_t task_id,
+                          void const *args,
+                          size_t arglen,
+                          Realm::ProfilingRequestSet const &requests,
+                          Realm::Event wait_on = Realm::Event::NO_EVENT,
+                          int priority = 0);
+
+  Realm::Event
+      collective_spawn_task(Realm::Processor target_proc,
+                            task_id_t task_id,
+                            void const *args,
+                            size_t arglen,
+                            Realm::Event wait_on = Realm::Event::NO_EVENT,
+                            int priority = 0);
+
+  // Instance management
+  std::pair<Realm::RegionInstance, Realm::Event>
+      create_instance(Realm::Memory memory,
+                      TensorShape const &shape,
+                      Realm::ProfilingRequestSet const &prs,
+                      Realm::Event wait_on = Realm::Event::NO_EVENT);
+
+  // Get the current set of outstanding events
+  Realm::Event get_outstanding_events();
+
+protected:
+  // Compact AND CLEAR the outstanding event queue
+  // Important: USER MUST BLOCK on event or else use it, or it WILL BE LOST
+  [[nodiscard]] Realm::Event merge_outstanding_events();
+
+  void discover_machine_topology();
+
+  static std::optional<ManagedPerDeviceFFHandle>
+      make_device_handle_for_processor(Realm::Processor processor);
+
+protected:
+  Realm::Runtime runtime;
+  Realm::Processor processor;
+  Allocator allocator;
+  std::vector<Realm::Event> outstanding_events;
+  std::unordered_map<std::pair<Realm::AddressSpace, Realm::Processor::Kind>,
+                     std::vector<Realm::Processor>>
+      processors;
+};
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/realm_manager.h b/lib/realm-execution/include/realm-execution/realm_manager.h
new file mode 100644
index 0000000000..8a79476bcf
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/realm_manager.h
@@ -0,0 +1,28 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_REALM_MANAGER_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_REALM_MANAGER_H
+
+#include "kernels/allocation.h"
+#include "kernels/device_handle_t.dtg.h"
+#include "pcg/device_id_t.dtg.h"
+#include "realm-execution/realm.h"
+#include "realm-execution/realm_context.h"
+
+namespace FlexFlow {
+
+struct RealmManager : private RealmContext {
+public:
+  RealmManager(int *argc, char ***argv);
+  virtual ~RealmManager();
+
+  RealmManager() = delete;
+  RealmManager(RealmManager const &) = delete;
+  RealmManager(RealmManager &&) = delete;
+
+  [[nodiscard]] Realm::Event
+      start_controller(std::function<void(RealmContext &)>,
+                       Realm::Event wait_on = Realm::Event::NO_EVENT);
+};
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/controller_task.h b/lib/realm-execution/include/realm-execution/tasks/impl/controller_task.h
new file mode 100644
index 0000000000..7134973ead
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/impl/controller_task.h
@@ -0,0 +1,20 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_CONTROLLER_TASK_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_CONTROLLER_TASK_H
+
+#include "realm-execution/realm.h"
+#include "realm-execution/realm_context.h"
+
+namespace FlexFlow {
+
+void controller_task_body(
+    void const *, size_t, void const *, size_t, Realm::Processor);
+
+Realm::Event
+    collective_spawn_controller_task(RealmContext &ctx,
+                                     Realm::Processor &target_proc,
+                                     std::function<void(RealmContext &)> thunk,
+                                     Realm::Event precondition);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_return_task.h b/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_return_task.h
new file mode 100644
index 0000000000..a87652b5ce
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_return_task.h
@@ -0,0 +1,22 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_HANDLE_INIT_RETURN_TASK_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_HANDLE_INIT_RETURN_TASK_H
+
+#include "realm-execution/device_specific_managed_per_device_ff_handle.h"
+#include "realm-execution/realm.h"
+#include "realm-execution/realm_context.h"
+
+namespace FlexFlow {
+
+void device_handle_init_return_task_body(
+    void const *, size_t, void const *, size_t, Realm::Processor);
+
+Realm::Event spawn_device_handle_init_return_task(
+    RealmContext &ctx,
+    Realm::Processor origin_proc,
+    DeviceSpecificManagedPerDeviceFFHandle const &result,
+    DeviceSpecificManagedPerDeviceFFHandle *origin_result_ptr,
+    Realm::Event precondition);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_task.h b/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_task.h
new file mode 100644
index 0000000000..312ed26add
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_task.h
@@ -0,0 +1,23 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_HANDLE_INIT_TASK_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_HANDLE_INIT_TASK_H
+
+#include "realm-execution/device_specific_managed_per_device_ff_handle.h"
+#include "realm-execution/realm.h"
+#include "realm-execution/realm_context.h"
+
+namespace FlexFlow {
+
+void device_handle_init_task_body(
+    void const *, size_t, void const *, size_t, Realm::Processor);
+
+Realm::Event spawn_device_handle_init_task(
+    RealmContext &ctx,
+    Realm::Processor target_proc,
+    size_t workSpaceSize,
+    bool allowTensorOpMathConversion,
+    DeviceSpecificManagedPerDeviceFFHandle *result_ptr,
+    Realm::Event precondition);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_task_args.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_task_args.dtg.toml
new file mode 100644
index 0000000000..c0ba37bb5d
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/impl/device_handle_init_task_args.dtg.toml
@@ -0,0 +1,26 @@
+namespace = "FlexFlow"
+name = "DeviceHandleInitTaskArgs"
+type = "struct"
+features = []
+
+includes = [
+  "realm-execution/device_specific_managed_per_device_ff_handle.h",
+  "realm-execution/realm.h",
+  "realm-execution/tasks/serializer/serializable_realm_processor.h",
+]
+
+[[fields]]
+name = "workSpaceSize"
+type = "size_t"
+
+[[fields]]
+name = "allowTensorOpMathConversion"
+type = "bool"
+
+[[fields]]
+name = "origin_proc"
+type = "::FlexFlow::Realm::Processor"
+
+[[fields]]
+name = "origin_result_ptr"
+type = "::FlexFlow::DeviceSpecificManagedPerDeviceFFHandle *"
diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_return_task.h b/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_return_task.h
new file mode 100644
index 0000000000..8f44680815
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_return_task.h
@@ -0,0 +1,22 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_STATE_INIT_RETURN_TASK_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_STATE_INIT_RETURN_TASK_H
+
+#include "realm-execution/realm.h"
+#include "realm-execution/realm_context.h"
+#include "task-spec/device_specific_per_device_op_state.dtg.h"
+
+namespace FlexFlow {
+
+void device_state_init_return_task_body(
+    void const *, size_t, void const *, size_t, Realm::Processor);
+
+Realm::Event spawn_device_state_init_return_task(
+    RealmContext &ctx,
+    Realm::Processor origin_proc,
+    DeviceSpecificPerDeviceOpState const &result,
+    DeviceSpecificPerDeviceOpState *origin_result_ptr,
+    Realm::Event precondition);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_task.h b/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_task.h
new file mode 100644
index 0000000000..4ed8c1726d
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_task.h
@@ -0,0 +1,31 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_STATE_INIT_TASK_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_DEVICE_STATE_INIT_TASK_H
+
+#include "kernels/profiling_settings.dtg.h"
+#include "pcg/optimizer_attrs.dtg.h"
+#include "realm-execution/device_specific_managed_per_device_ff_handle.h"
+#include "realm-execution/realm.h"
+#include "realm-execution/realm_context.h"
+#include "task-spec/device_specific_per_device_op_state.dtg.h"
+#include "task-spec/dynamic_graph/dynamic_node_invocation.dtg.h"
+#include "task-spec/ff_iteration_config.dtg.h"
+
+namespace FlexFlow {
+
+void device_state_init_task_body(
+    void const *, size_t, void const *, size_t, Realm::Processor);
+
+std::optional<Realm::Event> spawn_device_state_init_task(
+    RealmContext &ctx,
+    Realm::Processor target_proc,
+    DynamicNodeInvocation const &invocation,
+    ProfilingSettings const &profiling_settings,
+    DeviceSpecificManagedPerDeviceFFHandle const &device_handle,
+    FFIterationConfig const &iteration_config,
+    OptimizerAttrs const &optimizer_attrs,
+    DeviceSpecificPerDeviceOpState *result_ptr,
+    Realm::Event precondition);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_task_args.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_task_args.dtg.toml
new file mode 100644
index 0000000000..a9aa77dde9
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/impl/device_state_init_task_args.dtg.toml
@@ -0,0 +1,42 @@
+namespace = "FlexFlow"
+name = "DeviceStateInitTaskArgs"
+type = "struct"
+features = []
+
+includes = [
+  "kernels/profiling_settings.dtg.h",
+  "pcg/optimizer_attrs.dtg.h",
+  "realm-execution/device_specific_managed_per_device_ff_handle.h",
+  "realm-execution/realm.h",
+  "task-spec/device_specific_per_device_op_state.dtg.h",
+  "task-spec/dynamic_graph/dynamic_node_invocation.dtg.h",
+  "task-spec/ff_iteration_config.dtg.h",
+]
+
+[[fields]]
+name = "invocation"
+type = "::FlexFlow::DynamicNodeInvocation"
+
+[[fields]]
+name = "profiling_settings"
+type = "::FlexFlow::ProfilingSettings"
+
+[[fields]]
+name = "device_handle"
+type = "::FlexFlow::DeviceSpecificManagedPerDeviceFFHandle"
+
+[[fields]]
+name = "iteration_config"
+type = "::FlexFlow::FFIterationConfig"
+
+[[fields]]
+name = "optimizer_attrs"
+type = "::FlexFlow::OptimizerAttrs"
+
+[[fields]]
+name = "origin_proc"
+type = "::FlexFlow::Realm::Processor"
+
+[[fields]]
+name = "origin_result_ptr"
+type = "::FlexFlow::DeviceSpecificPerDeviceOpState *"
diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/op_task.h b/lib/realm-execution/include/realm-execution/tasks/impl/op_task.h
new file mode 100644
index 0000000000..9d4c2fd451
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/impl/op_task.h
@@ -0,0 +1,29 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_OP_TASK_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_OP_TASK_H
+
+#include "kernels/profiling_settings.dtg.h"
+#include "op-attrs/ops/loss_functions/loss_attrs.dtg.h"
+#include "pcg/optimizer_attrs.dtg.h"
+#include "realm-execution/device_specific_managed_per_device_ff_handle.h"
+#include "realm-execution/realm.h"
+#include "realm-execution/realm_context.h"
+#include "task-spec/dynamic_graph/dynamic_node_invocation.dtg.h"
+#include "task-spec/ff_iteration_config.dtg.h"
+
+namespace FlexFlow {
+
+void op_task_body(void const *, size_t, void const *, size_t, Realm::Processor);
+
+Realm::Event
+    spawn_op_task(RealmContext &ctx,
+                  Realm::Processor target_proc,
+                  DynamicNodeInvocation const &invocation,
+                  ProfilingSettings const &profiling_settings,
+                  DeviceSpecificManagedPerDeviceFFHandle const &device_handle,
+                  FFIterationConfig const &iteration_config,
+                  std::optional<OptimizerAttrs> const &optimizer_attrs,
+                  Realm::Event precondition);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/op_task_args.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/impl/op_task_args.dtg.toml
new file mode 100644
index 0000000000..814f9f802b
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/impl/op_task_args.dtg.toml
@@ -0,0 +1,32 @@
+namespace = "FlexFlow"
+name = "OpTaskArgs"
+type = "struct"
+features = []
+
+includes = [
+  "kernels/profiling_settings.dtg.h",
+  "pcg/optimizer_attrs.dtg.h",
+  "realm-execution/device_specific_managed_per_device_ff_handle.h",
+  "task-spec/dynamic_graph/dynamic_node_invocation.dtg.h",
+  "task-spec/ff_iteration_config.dtg.h",
+]
+
+[[fields]]
+name = "invocation"
+type = "::FlexFlow::DynamicNodeInvocation"
+
+[[fields]]
+name = "profiling_settings"
+type = "::FlexFlow::ProfilingSettings"
+
+[[fields]]
+name = "device_handle"
+type = "::FlexFlow::DeviceSpecificManagedPerDeviceFFHandle"
+
+[[fields]]
+name = "iteration_config"
+type = "::FlexFlow::FFIterationConfig"
+
+[[fields]]
+name = "optimizer_attrs"
+type = "std::optional<::FlexFlow::OptimizerAttrs>"
diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_handle_init_task_args.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_handle_init_task_args.dtg.toml
new file mode 100644
index 0000000000..34f52880f8
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_handle_init_task_args.dtg.toml
@@ -0,0 +1,29 @@
+namespace = "FlexFlow"
+name = "SerializableDeviceHandleInitTaskArgs"
+type = "struct"
+features = [
+  "eq",
+  "fmt",
+  "hash",
+  "json",
+]
+
+includes = [
+  "realm-execution/tasks/serializer/serializable_realm_processor.dtg.h",
+]
+
+[[fields]]
+name = "workSpaceSize"
+type = "size_t"
+
+[[fields]]
+name = "allowTensorOpMathConversion"
+type = "bool"
+
+[[fields]]
+name = "origin_proc"
+type = "::FlexFlow::SerializableRealmProcessor"
+
+[[fields]]
+name = "origin_result_ptr"
+type = "uintptr_t"
diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_handle_init_task_args.h b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_handle_init_task_args.h
new file mode 100644
index 0000000000..63d70fe10a
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_handle_init_task_args.h
@@ -0,0 +1,17 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_SERIALIZABLE_DEVICE_HANDLE_INIT_TASK_ARGS_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_SERIALIZABLE_DEVICE_HANDLE_INIT_TASK_ARGS_H
+
+#include "realm-execution/tasks/impl/device_handle_init_task_args.dtg.h"
+#include "realm-execution/tasks/impl/serializable_device_handle_init_task_args.dtg.h"
+
+namespace FlexFlow {
+
+SerializableDeviceHandleInitTaskArgs
+    device_handle_init_task_args_to_serializable(
+        DeviceHandleInitTaskArgs const &);
+DeviceHandleInitTaskArgs device_handle_init_task_args_from_serializable(
+    SerializableDeviceHandleInitTaskArgs const &);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_state_init_task_args.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_state_init_task_args.dtg.toml
new file mode 100644
index 0000000000..c99d2758c0
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_state_init_task_args.dtg.toml
@@ -0,0 +1,47 @@
+namespace = "FlexFlow"
+name = "SerializableDeviceStateInitTaskArgs"
+type = "struct"
+features = [
+  "eq",
+  "fmt",
+  "hash",
+  "json",
+]
+
+includes = [
+  "kernels/profiling_settings.dtg.h",
+  "pcg/optimizer_attrs.dtg.h",
+  "realm-execution/tasks/serializer/serializable_device_specific_ptr.dtg.h",
+  "realm-execution/tasks/serializer/serializable_realm_processor.dtg.h",
+  "task-spec/device_specific_per_device_op_state.dtg.h",
+  "task-spec/dynamic_graph/serializable_dynamic_node_invocation.dtg.h",
+  "task-spec/ff_iteration_config.dtg.h",
+]
+
+[[fields]]
+name = "invocation"
+type = "::FlexFlow::SerializableDynamicNodeInvocation"
+
+[[fields]]
+name = "profiling_settings"
+type = "::FlexFlow::ProfilingSettings"
+
+[[fields]]
+name = "device_handle"
+type = "::FlexFlow::SerializableDeviceSpecificPtr"
+
+[[fields]]
+name = "iteration_config"
+type = "::FlexFlow::FFIterationConfig"
+
+[[fields]]
+name = "optimizer_attrs"
+type = "::FlexFlow::OptimizerAttrs"
+
+[[fields]]
+name = "origin_proc"
+type = "::FlexFlow::SerializableRealmProcessor"
+
+[[fields]]
+name = "origin_result_ptr"
+type = "uintptr_t"
diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_state_init_task_args.h b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_state_init_task_args.h
new file mode 100644
index 0000000000..f028820974
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_device_state_init_task_args.h
@@ -0,0 +1,16 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_SERIALIZABLE_DEVICE_STATE_INIT_TASK_ARGS_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_SERIALIZABLE_DEVICE_STATE_INIT_TASK_ARGS_H
+
+#include "realm-execution/tasks/impl/device_state_init_task_args.dtg.h"
+#include "realm-execution/tasks/impl/serializable_device_state_init_task_args.dtg.h"
+
+namespace FlexFlow {
+
+SerializableDeviceStateInitTaskArgs device_state_init_task_args_to_serializable(
+    DeviceStateInitTaskArgs const &);
+DeviceStateInitTaskArgs device_state_init_task_args_from_serializable(
+    SerializableDeviceStateInitTaskArgs const &);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/serializable_op_task_args.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_op_task_args.dtg.toml
new file mode 100644
index 0000000000..a0f89e3ae2
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_op_task_args.dtg.toml
@@ -0,0 +1,42 @@
+namespace = "FlexFlow"
+name = "SerializableOpTaskArgs"
+type = "struct"
+features = [
+  "eq",
+  "fmt",
+  "hash",
+  "json",
+]
+
+includes = [
+  "kernels/profiling_settings.dtg.h",
+  "pcg/optimizer_attrs.dtg.h",
+  "realm-execution/tasks/serializer/serializable_device_specific_ptr.dtg.h",
+  "task-spec/dynamic_graph/serializable_dynamic_node_invocation.dtg.h",
+  "task-spec/ff_iteration_config.dtg.h",
+]
+
+src_includes = [
+  "utils/fmt/optional.h",
+  "utils/json/optional.h",
+]
+
+[[fields]]
+name = "invocation"
+type = "::FlexFlow::SerializableDynamicNodeInvocation"
+
+[[fields]]
+name = "profiling_settings"
+type = "::FlexFlow::ProfilingSettings"
+
+[[fields]]
+name = "device_handle"
+type = "::FlexFlow::SerializableDeviceSpecificPtr"
+
+[[fields]]
+name = "iteration_config"
+type = "::FlexFlow::FFIterationConfig"
+
+[[fields]]
+name = "optimizer_attrs"
+type = "std::optional<::FlexFlow::OptimizerAttrs>"
diff --git a/lib/realm-execution/include/realm-execution/tasks/impl/serializable_op_task_args.h b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_op_task_args.h
new file mode 100644
index 0000000000..3b2d05d0b6
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/impl/serializable_op_task_args.h
@@ -0,0 +1,14 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_SERIALIZABLE_OP_TASK_ARGS_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_IMPL_SERIALIZABLE_OP_TASK_ARGS_H
+
+#include "realm-execution/tasks/impl/op_task_args.dtg.h"
+#include "realm-execution/tasks/impl/serializable_op_task_args.dtg.h"
+
+namespace FlexFlow {
+
+SerializableOpTaskArgs op_task_args_to_serializable(OpTaskArgs const &);
+OpTaskArgs op_task_args_from_serializable(SerializableOpTaskArgs const &);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/tasks/realm_task_id_t.h b/lib/realm-execution/include/realm-execution/tasks/realm_task_id_t.h
new file mode 100644
index 0000000000..a3c6891fb0
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/realm_task_id_t.h
@@ -0,0 +1,13 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_REALM_TASK_ID_T_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_REALM_TASK_ID_T_H
+
+#include "realm-execution/realm.h"
+#include "realm-execution/tasks/task_id_t.dtg.h"
+
+namespace FlexFlow {
+
+Realm::Processor::TaskFuncID get_realm_task_id_for_task_id(task_id_t);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/tasks/realm_task_registry.h b/lib/realm-execution/include/realm-execution/tasks/realm_task_registry.h
new file mode 100644
index 0000000000..8114f1a82c
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/realm_task_registry.h
@@ -0,0 +1,21 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_REALM_TASK_REGISTRY_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_REALM_TASK_REGISTRY_H
+
+#include "realm-execution/realm.h"
+#include "realm-execution/tasks/task_id_t.dtg.h"
+
+namespace FlexFlow {
+
+[[nodiscard]] Realm::Event register_task(Realm::Processor::Kind target_kind,
+                                         task_id_t func_id,
+                                         void (*task_body)(void const *,
+                                                           size_t,
+                                                           void const *,
+                                                           size_t,
+                                                           Realm::Processor));
+
+[[nodiscard]] Realm::Event register_all_tasks();
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_device_specific_ptr.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_device_specific_ptr.dtg.toml
new file mode 100644
index 0000000000..07cf61f7e1
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_device_specific_ptr.dtg.toml
@@ -0,0 +1,28 @@
+namespace = "FlexFlow"
+name = "SerializableDeviceSpecificPtr"
+type = "struct"
+features = [
+  "eq",
+  "fmt",
+  "hash",
+  "json",
+]
+
+includes = [
+  "pcg/device_id_t.dtg.h",
+  "cstdint",
+  "optional",
+]
+
+src_includes = [
+  "utils/fmt/optional.h",
+  "utils/json/optional.h",
+]
+
+[[fields]]
+name = "device_idx"
+type = "::FlexFlow::device_id_t"
+
+[[fields]]
+name = "ptr"
+type = "std::optional<uintptr_t>"
diff --git a/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_realm_processor.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_realm_processor.dtg.toml
new file mode 100644
index 0000000000..3cb64d95c1
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_realm_processor.dtg.toml
@@ -0,0 +1,17 @@
+namespace = "FlexFlow"
+name = "SerializableRealmProcessor"
+type = "struct"
+features = [
+  "eq",
+  "fmt",
+  "hash",
+  "json",
+]
+
+includes = [
+  "realm-execution/realm.h",
+]
+
+[[fields]]
+name = "id"
+type = "::FlexFlow::Realm::Processor::id_t"
diff --git a/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_realm_processor.h b/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_realm_processor.h
new file mode 100644
index 0000000000..6b29b6e223
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_realm_processor.h
@@ -0,0 +1,16 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_SERIALIZER_SERIALIZABLE_REALM_PROCESSOR_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_SERIALIZER_SERIALIZABLE_REALM_PROCESSOR_H
+
+#include "realm-execution/realm.h"
+#include "realm-execution/tasks/serializer/serializable_realm_processor.dtg.h"
+
+namespace FlexFlow {
+
+SerializableRealmProcessor
+    realm_processor_to_serializable(Realm::Processor const &);
+Realm::Processor
+    realm_processor_from_serializable(SerializableRealmProcessor const &);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/tasks/serializer/task_arg_serializer.h b/lib/realm-execution/include/realm-execution/tasks/serializer/task_arg_serializer.h
new file mode 100644
index 0000000000..3208368d2d
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/serializer/task_arg_serializer.h
@@ -0,0 +1,25 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_SERIALIZER_TASK_ARG_SERIALIZER_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_SERIALIZER_TASK_ARG_SERIALIZER_H
+
+#include <nlohmann/json.hpp>
+#include <string>
+#include <string_view>
+
+namespace FlexFlow {
+
+template <typename T>
+std::string serialize_task_args(T const &args) {
+  nlohmann::json j = args;
+  return j.dump();
+}
+
+template <typename T>
+T deserialize_task_args(void const *args, size_t arglen) {
+  nlohmann::json j = nlohmann::json::parse(
+      std::string_view{reinterpret_cast<char const *>(args), arglen});
+  return j.get<T>();
+}
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/task-spec/include/task-spec/task_id_t.dtg.toml b/lib/realm-execution/include/realm-execution/tasks/task_id_t.dtg.toml
similarity index 97%
rename from lib/task-spec/include/task-spec/task_id_t.dtg.toml
rename to lib/realm-execution/include/realm-execution/tasks/task_id_t.dtg.toml
index ce2de52d40..97b19b5f51 100644
--- a/lib/task-spec/include/task-spec/task_id_t.dtg.toml
+++ b/lib/realm-execution/include/realm-execution/tasks/task_id_t.dtg.toml
@@ -9,10 +9,16 @@ features = [
 ]
 
 [[values]]
-name = "TOP_LEVEL_TASK_ID"
+name = "CONTROLLER_TASK_ID"
 
 [[values]]
-name = "FF_INIT_TASK_ID"
+name = "DEVICE_HANDLE_INIT_TASK_ID"
+
+[[values]]
+name = "DEVICE_HANDLE_INIT_RETURN_TASK_ID"
+
+[[values]]
+name = "DEVICE_STATE_INIT_RETURN_TASK_ID"
 
 [[values]]
 name = "IMAGE_INIT_TASK_ID"
diff --git a/lib/realm-execution/include/realm-execution/tasks/task_id_t.h b/lib/realm-execution/include/realm-execution/tasks/task_id_t.h
new file mode 100644
index 0000000000..53945d2e5b
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tasks/task_id_t.h
@@ -0,0 +1,28 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_TASK_ID_T_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TASKS_TASK_ID_T_H
+
+#include "op-attrs/pcg_operator_attrs.dtg.h"
+#include "pcg/optimizer_attrs.dtg.h"
+#include "realm-execution/tasks/task_id_t.dtg.h"
+#include "task-spec/dynamic_graph/dynamic_node_attrs.dtg.h"
+#include <optional>
+
+namespace FlexFlow {
+
+std::optional<task_id_t>
+    get_task_id_for_op(DynamicNodeAttrs const &,
+                       std::optional<OptimizerAttrs> const &);
+
+std::optional<task_id_t>
+    get_init_task_id_for_op_attrs(PCGOperatorAttrs const &);
+
+std::optional<task_id_t> get_fwd_task_id_for_op_attrs(PCGOperatorAttrs const &);
+
+std::optional<task_id_t> get_bwd_task_id_for_op_attrs(PCGOperatorAttrs const &);
+
+std::optional<task_id_t>
+    get_update_task_id_for_optimizer_attrs(OptimizerAttrs const &);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/include/realm-execution/tensor_instance_backing.dtg.toml b/lib/realm-execution/include/realm-execution/tensor_instance_backing.dtg.toml
new file mode 100644
index 0000000000..e6a8bd58d9
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tensor_instance_backing.dtg.toml
@@ -0,0 +1,24 @@
+namespace = "FlexFlow"
+name = "TensorInstanceBacking"
+type = "struct"
+features = [
+  "eq",
+  #"fmt",
+  #"hash",
+]
+
+includes = [
+  "<unordered_map>",
+  "realm-execution/realm.h",
+  "task-spec/dynamic_graph/dynamic_value_attrs.dtg.h",
+]
+
+src_includes = [
+  "realm-execution/fmt/instance.h",
+  "utils/hash/unordered_map.h",
+  "utils/fmt/unordered_map.h",
+]
+
+[[fields]]
+name = "backing"
+type = "std::unordered_map<::FlexFlow::DynamicValueAttrs, std::pair<::FlexFlow::Realm::RegionInstance, ::FlexFlow::Realm::Event>>"
diff --git a/lib/realm-execution/include/realm-execution/tensor_instance_backing.h b/lib/realm-execution/include/realm-execution/tensor_instance_backing.h
new file mode 100644
index 0000000000..1d143b7409
--- /dev/null
+++ b/lib/realm-execution/include/realm-execution/tensor_instance_backing.h
@@ -0,0 +1,12 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TENSOR_INSTANCE_BACKING_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_INCLUDE_REALM_EXECUTION_TENSOR_INSTANCE_BACKING_H
+
+#include "realm-execution/tensor_instance_backing.dtg.h"
+
+namespace FlexFlow {
+
+TensorInstanceBacking make_empty_tensor_instance_backing();
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/src/realm-execution/atomic_dependency_set.cc b/lib/realm-execution/src/realm-execution/atomic_dependency_set.cc
new file mode 100644
index 0000000000..ba4fcc5a9f
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/atomic_dependency_set.cc
@@ -0,0 +1,27 @@
+#include "realm-execution/atomic_dependency_set.h"
+
+namespace FlexFlow {
+
+AtomicDependencySet::AtomicDependencySet(Realm::Event precondition)
+    : writer(precondition) {}
+
+void AtomicDependencySet::add_writer(Realm::Event writer) {
+  this->writer =
+      Realm::Event::merge_events(writer, this->get_dependency_for_writer());
+  this->readers.clear();
+}
+
+void AtomicDependencySet::add_reader(Realm::Event reader) {
+  this->readers.push_back(reader);
+}
+
+Realm::Event AtomicDependencySet::get_dependency_for_writer() const {
+  Realm::Event readers = Realm::Event::merge_events(this->readers);
+  return Realm::Event::merge_events(this->writer, readers);
+}
+
+Realm::Event AtomicDependencySet::get_dependency_for_reader() const {
+  return this->writer;
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/dependency_set.cc b/lib/realm-execution/src/realm-execution/dependency_set.cc
new file mode 100644
index 0000000000..84412a125d
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/dependency_set.cc
@@ -0,0 +1,49 @@
+#include "realm-execution/dependency_set.h"
+#include "realm-execution/atomic_dependency_set.h"
+#include "utils/containers/contains_key.h"
+
+namespace FlexFlow {
+
+DependencySet::DependencySet(Realm::Event precondition)
+    : precondition(precondition) {}
+
+void DependencySet::add_writer(DynamicValueAttrs const &value,
+                               Realm::Event writer) {
+  AtomicDependencySet &atomic_dependence_set =
+      this->get_atomic_dependency_set(value);
+  atomic_dependence_set.add_writer(writer);
+}
+
+void DependencySet::add_reader(DynamicValueAttrs const &value,
+                               Realm::Event reader) {
+  AtomicDependencySet &atomic_dependence_set =
+      this->get_atomic_dependency_set(value);
+  atomic_dependence_set.add_reader(reader);
+}
+
+Realm::Event DependencySet::get_dependency_for_writer(
+    DynamicValueAttrs const &value) const {
+  if (contains_key(this->atomic_dependencies, value)) {
+    return this->atomic_dependencies.at(value).get_dependency_for_writer();
+  }
+  return this->precondition;
+}
+
+Realm::Event DependencySet::get_dependency_for_reader(
+    DynamicValueAttrs const &value) const {
+  if (contains_key(this->atomic_dependencies, value)) {
+    return this->atomic_dependencies.at(value).get_dependency_for_reader();
+  }
+  return this->precondition;
+}
+
+AtomicDependencySet &
+    DependencySet::get_atomic_dependency_set(DynamicValueAttrs const &value) {
+  if (!contains_key(this->atomic_dependencies, value)) {
+    this->atomic_dependencies.insert(
+        {value, AtomicDependencySet{this->precondition}});
+  }
+  return this->atomic_dependencies.at(value);
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/device_specific_managed_per_device_ff_handle.cc b/lib/realm-execution/src/realm-execution/device_specific_managed_per_device_ff_handle.cc
new file mode 100644
index 0000000000..6e0cef0bb2
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/device_specific_managed_per_device_ff_handle.cc
@@ -0,0 +1,57 @@
+#include "realm-execution/device_specific_managed_per_device_ff_handle.h"
+#include "kernels/device_handle_t.h"
+#include "utils/containers/transform.h"
+#include "utils/json/optional.h"
+#include <cstdint>
+
+namespace FlexFlow {
+
+DeviceSpecificManagedPerDeviceFFHandle::DeviceSpecificManagedPerDeviceFFHandle(
+    device_id_t owner, std::optional<ManagedPerDeviceFFHandle *> handle)
+    : owner(owner), handle(handle) {}
+
+std::optional<ManagedPerDeviceFFHandle *>
+    DeviceSpecificManagedPerDeviceFFHandle::get(device_id_t device_idx) const {
+  ASSERT(this->owner == device_idx);
+  return this->handle;
+}
+
+SerializableDeviceSpecificPtr
+    DeviceSpecificManagedPerDeviceFFHandle::serialize() const {
+  return SerializableDeviceSpecificPtr{
+      /*device_idx=*/owner,
+      /*ptr=*/
+      transform(handle,
+                [](ManagedPerDeviceFFHandle *ptr) {
+                  return reinterpret_cast<uintptr_t>(ptr);
+                }),
+  };
+}
+
+DeviceSpecificManagedPerDeviceFFHandle
+    DeviceSpecificManagedPerDeviceFFHandle::deserialize(
+        SerializableDeviceSpecificPtr const &handle) {
+  return DeviceSpecificManagedPerDeviceFFHandle{
+      /*owner=*/handle.device_idx,
+      /*handle=*/
+      transform(handle.ptr,
+                [](uintptr_t ptrval) {
+                  return reinterpret_cast<ManagedPerDeviceFFHandle *>(ptrval);
+                }),
+  };
+}
+
+DeviceSpecificManagedPerDeviceFFHandle make_device_specific_managed_handle(
+    device_id_t const &device_id,
+    std::optional<ManagedPerDeviceFFHandle *> const &managed_handle) {
+  return DeviceSpecificManagedPerDeviceFFHandle{device_id, managed_handle};
+}
+
+device_handle_t device_handle_t_from_device_specific_managed_handle(
+    DeviceSpecificManagedPerDeviceFFHandle const &device_specific,
+    device_id_t device_idx) {
+  return device_handle_t_from_managed_handle_ptr(
+      *device_specific.get(device_idx));
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/distributed_device_handle.cc b/lib/realm-execution/src/realm-execution/distributed_device_handle.cc
new file mode 100644
index 0000000000..87376be9b1
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/distributed_device_handle.cc
@@ -0,0 +1,51 @@
+#include "realm-execution/distributed_device_handle.h"
+#include "realm-execution/device_specific_managed_per_device_ff_handle.h"
+#include "realm-execution/tasks/impl/device_handle_init_task.h"
+#include "task-spec/device_specific.h"
+
+namespace FlexFlow {
+
+DistributedDeviceHandle::DistributedDeviceHandle(
+    std::unordered_map<Realm::Processor,
+                       DeviceSpecificManagedPerDeviceFFHandle> const &handles)
+    : handles(handles) {}
+
+DeviceSpecificManagedPerDeviceFFHandle const &
+    DistributedDeviceHandle::at(Realm::Processor processor) const {
+  return this->handles.at(processor);
+}
+
+DistributedDeviceHandle
+    create_distributed_device_handle(RealmContext &ctx,
+                                     size_t workSpaceSize,
+                                     bool allowTensorOpMathConversion,
+                                     Realm::Event precondition) {
+  std::unordered_map<Realm::Processor, DeviceSpecificManagedPerDeviceFFHandle>
+      handles;
+
+  // Allocate space for the result before launching any tasks
+  Realm::Machine::ProcessorQuery pq(Realm::Machine::get_machine());
+  for (Realm::Processor proc : pq) {
+    if (proc.kind() == Realm::Processor::LOC_PROC ||
+        proc.kind() == Realm::Processor::TOC_PROC) {
+      handles.insert({proc,
+                      make_device_specific_managed_handle(
+                          ctx.get_current_device_idx(), std::nullopt)});
+    }
+  }
+
+  for (auto &[proc, handle] : handles) {
+    spawn_device_handle_init_task(ctx,
+                                  proc,
+                                  workSpaceSize,
+                                  allowTensorOpMathConversion,
+                                  &handle,
+                                  precondition);
+  }
+
+  ctx.get_outstanding_events().wait();
+
+  return DistributedDeviceHandle{handles};
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/distributed_device_state_initialization.cc b/lib/realm-execution/src/realm-execution/distributed_device_state_initialization.cc
new file mode 100644
index 0000000000..cab2b49e15
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/distributed_device_state_initialization.cc
@@ -0,0 +1,72 @@
+#include "realm-execution/distributed_device_state_initialization.h"
+#include "local-execution/device_state_initialization.h"
+#include "realm-execution/tasks/impl/device_state_init_task.h"
+#include "task-spec/dynamic_graph/dynamic_node_invocation.dtg.h"
+#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.h"
+#include "utils/optional.h"
+#include <optional>
+#include <unordered_map>
+
+namespace FlexFlow {
+
+DynamicOpenDataflowGraph perform_distributed_device_state_initialization(
+    DynamicOpenDataflowGraph const &dg,
+    RealmContext &ctx,
+    ProfilingSettings const &profiling_settings,
+    DistributedDeviceHandle const &device_handle,
+    FFIterationConfig const &iteration_config,
+    OptimizerAttrs const &optimizer_attrs,
+    Realm::Event precondition) {
+
+  // Initialize all operators and save the per-device op state
+  ASSERT(no_nodes_are_initialized(dg));
+
+  std::unordered_map<DynamicNodeInvocation, DeviceSpecificPerDeviceOpState *>
+      result_map;
+  for (DynamicNodeInvocation const &invocation : dg.invocations) {
+    Realm::Processor target_proc = ctx.map_device_coord_to_processor(
+        assert_unwrap(invocation.node_attrs.device_coord));
+
+    // FIXME: in the absense of a real serializer we're just tossing around raw
+    // bytes, which means we need to bypass the constructor for this type (yes,
+    // ugh)
+    DeviceSpecificPerDeviceOpState *output =
+        static_cast<DeviceSpecificPerDeviceOpState *>(
+            malloc(sizeof(DeviceSpecificPerDeviceOpState)));
+    std::optional<Realm::Event> result =
+        spawn_device_state_init_task(ctx,
+                                     target_proc,
+                                     invocation,
+                                     profiling_settings,
+                                     device_handle.at(target_proc),
+                                     iteration_config,
+                                     optimizer_attrs,
+                                     output,
+                                     precondition);
+    if (result) {
+      result_map[invocation] = output;
+    } else {
+      free(output);
+    }
+  }
+
+  ctx.get_outstanding_events().wait();
+
+  DynamicOpenDataflowGraph result = transform_dynamic_invocation_set(
+      dg, [&](DynamicNodeInvocation const &invocation) {
+        DynamicNodeInvocation result = invocation;
+        auto device_state = result_map.find(invocation);
+        if (device_state != result_map.end()) {
+          result.node_attrs.per_device_op_state = *device_state->second;
+        }
+        return result;
+      });
+
+  for (auto &[invocation, output] : result_map) {
+    free(output);
+  }
+
+  return result;
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/fmt/instance.cc b/lib/realm-execution/src/realm-execution/fmt/instance.cc
new file mode 100644
index 0000000000..f8eabe9bb0
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/fmt/instance.cc
@@ -0,0 +1,10 @@
+#include "realm-execution/fmt/instance.h"
+
+namespace FlexFlow {
+
+std::ostream &operator<<(std::ostream &s,
+                         ::FlexFlow::Realm::RegionInstance const &m) {
+  return s << fmt::to_string(m);
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/hash/processor.cc b/lib/realm-execution/src/realm-execution/hash/processor.cc
new file mode 100644
index 0000000000..dcc1bc5d06
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/hash/processor.cc
@@ -0,0 +1,11 @@
+#include "realm-execution/hash/processor.h"
+#include <utility>
+
+namespace std {
+
+size_t hash<::FlexFlow::Realm::Processor>::operator()(
+    ::FlexFlow::Realm::Processor const &p) const {
+  return hash<::FlexFlow::Realm::Processor::id_t>{}(p.id);
+}
+
+} // namespace std
diff --git a/lib/realm-execution/src/realm-execution/instance_allocation.cc b/lib/realm-execution/src/realm-execution/instance_allocation.cc
new file mode 100644
index 0000000000..b740859e22
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/instance_allocation.cc
@@ -0,0 +1,76 @@
+#include "realm-execution/instance_allocation.h"
+#include "local-execution/tensor_allocation.h"
+#include "op-attrs/parallel_tensor_shape.h"
+#include "op-attrs/tensor_shape.dtg.h"
+#include "realm-execution/realm_context.h"
+#include "realm-execution/tensor_instance_backing.h"
+#include "task-spec/dynamic_graph/dynamic_node_attrs.dtg.h"
+#include "task-spec/dynamic_graph/dynamic_node_invocation.dtg.h"
+#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.h"
+#include "task-spec/dynamic_graph/dynamic_tensor_accessor.dtg.h"
+#include "task-spec/dynamic_graph/dynamic_value_attrs.dtg.h"
+#include "utils/bidict/generate_bidict.h"
+#include "utils/containers/all_are_true.h"
+#include "utils/containers/contains_key.h"
+#include "utils/containers/make.h"
+#include "utils/containers/map_values.h"
+#include "utils/containers/unordered_set_of.h"
+#include "utils/containers/values.h"
+#include "utils/exception.h"
+#include "utils/optional.h"
+
+namespace FlexFlow {
+
+std::pair<Realm::RegionInstance, Realm::Event>
+    perform_instance_allocation_for_value(DynamicNodeAttrs const &node,
+                                          DynamicValueAttrs const &value,
+                                          RealmContext &ctx) {
+  ASSERT(value.accessor == std::nullopt);
+
+  TensorShape shape = get_piece_shape(value.parallel_tensor_shape.value());
+
+  MachineSpaceCoordinate device_coord = assert_unwrap(node.device_coord);
+  Realm::Processor proc = ctx.map_device_coord_to_processor(device_coord);
+  Realm::Memory memory = ctx.get_nearest_memory(proc);
+  return ctx.create_instance(memory, shape, Realm::ProfilingRequestSet());
+}
+
+TensorInstanceBacking perform_instance_allocation(
+    DynamicOpenDataflowGraph const &g,
+    std::unordered_map<DynamicValueAttrs, DynamicTensorAccessor> const
+        &preallocated,
+    RealmContext &ctx) {
+  ASSERT(no_tensors_are_allocated(g));
+  ASSERT(tensors_are_ready_for_allocation(g));
+  for (DynamicValueAttrs const &v : keys(preallocated)) {
+    ASSERT(v.accessor == std::nullopt);
+  }
+
+  TensorInstanceBacking result = make_empty_tensor_instance_backing();
+  auto allocate = [&](DynamicNodeAttrs const &n, DynamicValueAttrs const &v) {
+    if (contains_key(preallocated, v)) {
+      // FIXME: Attach external instance to existing allocation and use that
+      NOT_IMPLEMENTED();
+    } else {
+      if (contains_key(result.backing, v)) {
+        return result.backing.at(v);
+      } else {
+        result.backing.insert(
+            std::pair{v, perform_instance_allocation_for_value(n, v, ctx)});
+      }
+    }
+  };
+
+  for (DynamicNodeInvocation const &invocation : g.invocations) {
+    for (DynamicValueAttrs const &input : values(invocation.inputs)) {
+      allocate(invocation.node_attrs, input);
+    }
+    for (DynamicValueAttrs const &output : values(invocation.outputs)) {
+      allocate(invocation.node_attrs, output);
+    }
+  }
+
+  return result;
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/pcg_instance/pcg_instance.cc b/lib/realm-execution/src/realm-execution/pcg_instance/pcg_instance.cc
new file mode 100644
index 0000000000..8e6ab022aa
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/pcg_instance/pcg_instance.cc
@@ -0,0 +1,259 @@
+#include "realm-execution/pcg_instance/pcg_instance.h"
+#include "pcg/optimizer_attrs.h"
+#include "realm-execution/dependency_set.h"
+#include "realm-execution/distributed_device_state_initialization.h"
+#include "realm-execution/instance_allocation.h"
+#include "realm-execution/realm_context.h"
+#include "realm-execution/tasks/impl/op_task.h"
+#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.h"
+#include "task-spec/dynamic_graph/dynamic_tensor_guid_t.dtg.h"
+#include "task-spec/dynamic_graph/dynamic_value_attrs.dtg.h"
+#include "task-spec/dynamic_graph/loss_insertion.h"
+#include "task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.h"
+#include "task-spec/dynamic_graph/pass_expansion.h"
+#include "task-spec/dynamic_graph/shard_expansion.h"
+#include "task-spec/dynamic_graph/update_insertion.h"
+#include "utils/containers/transform.h"
+#include "utils/containers/values.h"
+#include "utils/graph/digraph/algorithms/get_topological_ordering.h"
+#include "utils/optional.h"
+
+namespace FlexFlow {
+
+PCGInstance::PCGInstance(
+    RealmContext &ctx,
+    std::vector<DynamicNodeInvocation> const &execution_order,
+    OptimizerAttrs const &optimizer_attrs,
+    std::optional<Realm::RegionInstance> logit_grad_tensor)
+    : ctx(ctx), execution_order(execution_order),
+      optimizer_attrs(optimizer_attrs), logit_grad_tensor(logit_grad_tensor) {}
+
+RealmContext &PCGInstance::get_realm_context() {
+  return this->ctx;
+}
+std::vector<DynamicNodeInvocation> const &
+    PCGInstance::get_execution_order() const {
+  return this->execution_order;
+}
+OptimizerAttrs const &PCGInstance::get_optimizer_attrs() const {
+  return this->optimizer_attrs;
+}
+void PCGInstance::update_optimizer_attrs_for_next_iter() {
+  this->optimizer_attrs =
+      get_optimizer_attrs_for_next_iter(this->optimizer_attrs);
+}
+std::optional<Realm::RegionInstance>
+    PCGInstance::get_loss_tensor_instance() const {
+  return this->logit_grad_tensor;
+}
+
+PCGInstance create_pcg_instance(
+    RealmContext &ctx,
+    MappedParallelComputationGraph const &mpcg,
+    OptimizerAttrs const &optimizer_attrs,
+    std::optional<LossAttrs> const &loss_attrs,
+    std::optional<GenericTensorAccessorR> label_tensor,
+    std::optional<parallel_tensor_guid_t> logit_tensor,
+    std::unordered_map<DynamicValueAttrs, DynamicTensorAccessor> const
+        &input_tensors,
+    ProfilingSettings const &profiling_settings,
+    DistributedDeviceHandle const &device_handle,
+    FFIterationConfig const &iteration_config) {
+
+  DynamicOpenDataflowGraph dg =
+      make_dynamic_open_dataflow_graph_from_mpcg(mpcg);
+  dg = perform_pass_expansion(dg);
+
+  std::unordered_map<DynamicValueAttrs, DynamicTensorAccessor> inputs =
+      input_tensors;
+  std::optional<DynamicValueAttrs> logit_grad_value;
+  if (loss_attrs) {
+    auto [dg2, label_v, logit_grad_v] = perform_loss_insertion(
+        dg,
+        assert_unwrap(loss_attrs),
+        dynamic_tensor_guid_t{assert_unwrap(logit_tensor)});
+    dg = dg2;
+    logit_grad_value = logit_grad_v;
+    inputs.insert(std::pair{label_v, assert_unwrap(label_tensor)});
+  }
+
+  dg = perform_update_insertion(dg, optimizer_attrs);
+  dg = perform_shard_expansion(dg);
+  TensorInstanceBacking backing = perform_instance_allocation(dg, inputs, ctx);
+
+  std::optional<Realm::RegionInstance> logit_grad_tensor =
+      transform(logit_grad_value, [&](DynamicValueAttrs const &lgv) {
+        return backing.backing.at(lgv).first;
+      });
+
+  // FIXME: for now we're going to be lazy and block on everything rather than
+  // do fine-grained dependencies on instances
+  dg = perform_distributed_device_state_initialization(
+      dg,
+      ctx,
+      profiling_settings,
+      device_handle,
+      iteration_config,
+      optimizer_attrs,
+      ctx.get_outstanding_events());
+
+  // Compute the topological ordering of the graph
+  auto [kwarg_graph, node_map] =
+      labelled_open_kwarg_dataflow_graph_from_dynamic_open_dataflow_graph(dg);
+  std::vector<Node> node_topo_order = get_topological_ordering(kwarg_graph);
+  std::vector<DynamicNodeInvocation> invocation_topo_order = transform(
+      node_topo_order, [&](Node node) { return node_map.at_l(node); });
+
+  return PCGInstance{
+      ctx, invocation_topo_order, optimizer_attrs, logit_grad_tensor};
+
+  // TODO list:
+  //  * external instances
+  //  * task argument serializer
+  //  * pass instances to task and convert to tensor accessor
+  //  * copies
+  //  * parallel operator implementation (partition, reduce, gather, etc.)
+  //  * and fused parallel operators (reduce + broadcast = allreduce)
+  //  * memory-optimizing compiler integration (tensor creation/destruction,
+  //  tensor reuse)
+}
+
+static std::unordered_map<dynamic_layer_guid_t, Realm::Event>
+    execute_distributed_dynamic_node_invocation_set(
+        RealmContext &ctx,
+        std::vector<DynamicNodeInvocation> const &invocations,
+        OptimizerAttrs const &optimizer_attrs,
+        ProfilingSettings const &profiling_settings,
+        DistributedDeviceHandle const &device_handle,
+        FFIterationConfig iteration_config) {
+  // For simplicity we'll track a dependency on all outstanding operations up to
+  // this point. This will create an effective barrier between phases.
+  DependencySet dependency_set{ctx.get_outstanding_events()};
+  return unordered_map_from_pairs(
+      transform(invocations, [&](DynamicNodeInvocation const &invocation) {
+        std::vector<Realm::Event> input_dependencies =
+            transform(vector_of(values(invocation.inputs)),
+                      [&](DynamicValueAttrs const &value) {
+                        return dependency_set.get_dependency_for_reader(value);
+                      });
+        std::vector<Realm::Event> output_dependencies =
+            transform(vector_of(values(invocation.outputs)),
+                      [&](DynamicValueAttrs const &value) {
+                        return dependency_set.get_dependency_for_writer(value);
+                      });
+        Realm::Event dependencies = Realm::Event::merge_events(
+            Realm::Event::merge_events(input_dependencies),
+            Realm::Event::merge_events(output_dependencies));
+        Realm::Processor target_proc = ctx.map_device_coord_to_processor(
+            assert_unwrap(invocation.node_attrs.device_coord));
+        Realm::Event result = spawn_op_task(ctx,
+                                            target_proc,
+                                            invocation,
+                                            profiling_settings,
+                                            device_handle.at(target_proc),
+                                            iteration_config,
+                                            optimizer_attrs,
+                                            dependencies);
+        for (DynamicValueAttrs const &value : values(invocation.inputs)) {
+          dependency_set.add_reader(value, result);
+        }
+        for (DynamicValueAttrs const &value : values(invocation.outputs)) {
+          dependency_set.add_writer(value, result);
+        }
+        return std::pair{invocation.node_attrs.layer_guid, result};
+      }));
+}
+
+std::unordered_map<dynamic_layer_guid_t, Realm::Event>
+    perform_all_passes_for_pcg_instance(
+        PCGInstance &instance,
+        ProfilingSettings const &profiling_settings,
+        DistributedDeviceHandle const &device_handle,
+        FFIterationConfig iteration_config) {
+  std::vector<DynamicNodeInvocation> execution_order =
+      instance.get_execution_order();
+  std::unordered_map<dynamic_layer_guid_t, Realm::Event> result =
+      execute_distributed_dynamic_node_invocation_set(
+          /*ctx=*/instance.get_realm_context(),
+          /*invocations=*/execution_order,
+          /*optimizer_attrs=*/instance.get_optimizer_attrs(),
+          /*profiling_settings=*/profiling_settings,
+          /*device_handle=*/device_handle,
+          /*iteration_config=*/iteration_config);
+  instance.update_optimizer_attrs_for_next_iter();
+  return result;
+}
+
+std::unordered_map<dynamic_layer_guid_t, Realm::Event>
+    perform_forward_pass_for_pcg_instance(
+        PCGInstance &instance,
+        ProfilingSettings const &profiling_settings,
+        DistributedDeviceHandle const &device_handle,
+        FFIterationConfig iteration_config) {
+  std::vector<DynamicNodeInvocation> execution_order =
+      filter(instance.get_execution_order(),
+             [](DynamicNodeInvocation const &invocation) {
+               DynamicTaskType task_type =
+                   assert_unwrap(invocation.node_attrs.task_type);
+               return task_type == DynamicTaskType::FWD;
+             });
+
+  return execute_distributed_dynamic_node_invocation_set(
+      /*ctx=*/instance.get_realm_context(),
+      /*invocations=*/execution_order,
+      /*optimizer_attrs=*/instance.get_optimizer_attrs(),
+      /*profiling_settings=*/profiling_settings,
+      /*device_handle=*/device_handle,
+      /*iteration_config=*/iteration_config);
+}
+
+std::unordered_map<dynamic_layer_guid_t, Realm::Event>
+    perform_backward_pass_for_pcg_instance(
+        PCGInstance &instance,
+        ProfilingSettings const &profiling_settings,
+        DistributedDeviceHandle const &device_handle,
+        FFIterationConfig iteration_config) {
+  std::vector<DynamicNodeInvocation> execution_order =
+      filter(instance.get_execution_order(),
+             [](DynamicNodeInvocation const &invocation) {
+               DynamicTaskType task_type =
+                   assert_unwrap(invocation.node_attrs.task_type);
+               return task_type == DynamicTaskType::BWD;
+             });
+
+  return execute_distributed_dynamic_node_invocation_set(
+      /*ctx=*/instance.get_realm_context(),
+      /*invocations=*/execution_order,
+      /*optimizer_attrs=*/instance.get_optimizer_attrs(),
+      /*profiling_settings=*/profiling_settings,
+      /*device_handle=*/device_handle,
+      /*iteration_config=*/iteration_config);
+}
+
+std::unordered_map<dynamic_layer_guid_t, Realm::Event>
+    perform_update_pass_for_pcg_instance(
+        PCGInstance &instance,
+        ProfilingSettings const &profiling_settings,
+        DistributedDeviceHandle const &device_handle,
+        FFIterationConfig iteration_config) {
+  std::vector<DynamicNodeInvocation> execution_order =
+      filter(instance.get_execution_order(),
+             [](DynamicNodeInvocation const &invocation) {
+               DynamicTaskType task_type =
+                   assert_unwrap(invocation.node_attrs.task_type);
+               return task_type == DynamicTaskType::UPD;
+             });
+
+  std::unordered_map<dynamic_layer_guid_t, Realm::Event> result =
+      execute_distributed_dynamic_node_invocation_set(
+          /*ctx=*/instance.get_realm_context(),
+          /*invocations=*/execution_order,
+          /*optimizer_attrs=*/instance.get_optimizer_attrs(),
+          /*profiling_settings=*/profiling_settings,
+          /*device_handle=*/device_handle,
+          /*iteration_config=*/iteration_config);
+  instance.update_optimizer_attrs_for_next_iter();
+  return result;
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/realm_allocator.cc b/lib/realm-execution/src/realm-execution/realm_allocator.cc
new file mode 100644
index 0000000000..f24106b0bc
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/realm_allocator.cc
@@ -0,0 +1,53 @@
+#include "realm-execution/realm_allocator.h"
+#include "kernels/device.h"
+#include "pcg/device_type.dtg.h"
+
+namespace FlexFlow {
+
+RealmAllocator::RealmAllocator(Realm::Processor processor, Realm::Memory memory)
+    : processor(processor), memory(memory) {}
+
+void *RealmAllocator::allocate(size_t requested_memory_size) {
+  Realm::Rect<1> bounds{Realm::Point<1>::ZEROES(),
+                        Realm::Point<1>{requested_memory_size} -
+                            Realm::Point<1>::ONES()};
+  std::vector<size_t> field_sizes{1};
+  Realm::RegionInstance inst;
+  Realm::Event ready =
+      Realm::RegionInstance::create_instance(inst,
+                                             this->memory,
+                                             bounds,
+                                             field_sizes,
+                                             0 /*SOA*/,
+                                             Realm::ProfilingRequestSet{});
+  ready.wait();
+  void *ptr =
+      inst.pointer_untyped(/*offset=*/0, /*datalen=*/requested_memory_size);
+  ASSERT(ptr);
+  this->ptr_instances.insert({ptr, inst});
+  return ptr;
+}
+
+void RealmAllocator::deallocate(void *ptr) {
+  this->ptr_instances.at(ptr).destroy(Realm::Event::NO_EVENT);
+  this->ptr_instances.erase(ptr);
+}
+
+DeviceType RealmAllocator::get_allocation_device_type() const {
+  switch (this->processor.kind()) {
+    case Realm::Processor::Kind::LOC_PROC:
+      return DeviceType::CPU;
+    case Realm::Processor::Kind::TOC_PROC:
+      return DeviceType::GPU;
+    default:
+      PANIC("Unhandled FwbTensorType", this->processor.kind());
+  }
+}
+
+Allocator get_realm_allocator(Realm::Processor processor,
+                              Realm::Memory memory) {
+  Allocator allocator = Allocator::create<RealmAllocator>(processor, memory);
+  return allocator;
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/realm_context.cc b/lib/realm-execution/src/realm-execution/realm_context.cc
new file mode 100644
index 0000000000..3427e8cbee
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/realm_context.cc
@@ -0,0 +1,252 @@
+#include "realm-execution/realm_context.h"
+#include "kernels/device_handle_t.dtg.h"
+#include "kernels/device_handle_t.h"
+#include "op-attrs/datatype.h"
+#include "op-attrs/tensor_dims.dtg.h"
+#include "pcg/device_id_t.h"
+#include "pcg/device_type.dtg.h"
+#include "realm-execution/realm_allocator.h"
+#include "realm-execution/tasks/realm_task_id_t.h"
+#include "realm-execution/tasks/task_id_t.dtg.h"
+#include "utils/containers/contains_key.h"
+#include "utils/containers/transform.h"
+#include "utils/nonnegative_int/nonnegative_int.h"
+#include "utils/one_to_many/one_to_many.h"
+#include "utils/positive_int/positive_int.h"
+
+namespace FlexFlow {
+
+RealmContext::RealmContext(Realm::Processor processor)
+    : processor(processor),
+      allocator(get_realm_allocator(
+          processor, RealmContext::get_nearest_memory(processor))) {}
+
+RealmContext::~RealmContext() {
+  if (!this->outstanding_events.empty()) {
+    Realm::Event outstanding = this->merge_outstanding_events();
+    outstanding.wait();
+  }
+}
+
+static std::tuple<Realm::AddressSpace, Realm::Processor::Kind, nonnegative_int>
+    convert_machine_space_coordinate(
+        MachineSpaceCoordinate const &device_coord) {
+  Realm::AddressSpace as = int{device_coord.node_idx};
+  Realm::Processor::Kind kind;
+  switch (device_coord.device_type) {
+    case DeviceType::CPU:
+      kind = Realm::Processor::Kind::LOC_PROC;
+      break;
+    case DeviceType::GPU:
+      kind = Realm::Processor::Kind::TOC_PROC;
+      break;
+    default:
+      PANIC("Unhandled DeviceType", fmt::to_string(device_coord.device_type));
+      break;
+  }
+  nonnegative_int proc_in_node = device_coord.device_idx;
+  return std::tuple{as, kind, proc_in_node};
+}
+
+Realm::Processor RealmContext::map_device_coord_to_processor(
+    MachineSpaceCoordinate const &device_coord) {
+  this->discover_machine_topology();
+  auto [as, kind, proc_in_node] =
+      convert_machine_space_coordinate(device_coord);
+  return this->processors.at(std::pair{as, kind}).at(int{proc_in_node});
+}
+
+Realm::Memory RealmContext::get_nearest_memory(Realm::Processor proc) {
+  if (!proc.exists()) {
+    return Realm::Memory::NO_MEMORY;
+  }
+
+  // FIMXE: this isn't going to do what you expect until
+  // https://github.com/StanfordLegion/realm/pull/392 merges
+  Realm::Machine::MemoryQuery mq(Realm::Machine::get_machine());
+  mq.best_affinity_to(proc);
+  ASSERT(mq.count() > 0);
+  return mq.first();
+}
+
+Realm::Processor RealmContext::get_current_processor() const {
+  return this->processor;
+}
+
+Allocator &RealmContext::get_current_device_allocator() {
+  return this->allocator;
+}
+
+device_id_t RealmContext::get_current_device_idx() const {
+  Realm::Processor proc = this->get_current_processor();
+
+  // FIXME: find a more efficient way to implement this than scanning the
+  // machine every time
+  Realm::Machine::ProcessorQuery pq(Realm::Machine::get_machine());
+  pq.same_address_space_as(proc);
+  nonnegative_int idx{0};
+  for (Realm::Processor p : pq) {
+    if (p == proc) {
+      break;
+    }
+    idx++;
+  }
+
+  switch (proc.kind()) {
+    case Realm::Processor::LOC_PROC:
+      return make_device_id_t_from_idx(idx, DeviceType::CPU);
+    case Realm::Processor::TOC_PROC:
+      return make_device_id_t_from_idx(idx, DeviceType::GPU);
+    default:
+      PANIC("Unhandled Realm::ProcessorKind", fmt::to_string(int{proc.kind()}));
+  }
+}
+
+Realm::Event
+    RealmContext::spawn_task(Realm::Processor proc,
+                             task_id_t task_id,
+                             void const *args,
+                             size_t arglen,
+                             Realm::ProfilingRequestSet const &requests,
+                             Realm::Event wait_on,
+                             int priority) {
+  Realm::Event result = proc.spawn(get_realm_task_id_for_task_id(task_id),
+                                   args,
+                                   arglen,
+                                   requests,
+                                   wait_on,
+                                   priority);
+  this->outstanding_events.push_back(result);
+  return result;
+}
+
+Realm::Event RealmContext::collective_spawn_task(Realm::Processor target_proc,
+                                                 task_id_t task_id,
+                                                 void const *args,
+                                                 size_t arglen,
+                                                 Realm::Event wait_on,
+                                                 int priority) {
+  Realm::Event result =
+      this->runtime.collective_spawn(target_proc,
+                                     get_realm_task_id_for_task_id(task_id),
+                                     args,
+                                     arglen,
+                                     wait_on,
+                                     priority);
+  this->outstanding_events.push_back(result);
+  return result;
+}
+
+template <int N>
+static Realm::Rect<N> rect_from_dims(TensorDims const &dims) {
+  std::vector<int> values{dims.ff_ordered.begin(), dims.ff_ordered.end()};
+  return Realm::Rect<N>{Realm::Point<N>::ZEROES(),
+                        Realm::Point<N>{values.data()} -
+                            Realm::Point<N>::ONES()};
+}
+
+std::pair<Realm::RegionInstance, Realm::Event>
+    RealmContext::create_instance(Realm::Memory memory,
+                                  TensorShape const &shape,
+                                  Realm::ProfilingRequestSet const &prs,
+                                  Realm::Event wait_on) {
+  std::vector<size_t> field_sizes{
+      static_cast<size_t>(int{size_of_datatype(shape.data_type)})};
+  Realm::RegionInstance inst;
+  Realm::Event ready;
+  switch (shape.dims.ff_ordered.num_dims()) {
+#if REALM_MAX_DIM >= 1
+    case 1:
+      ready =
+          Realm::RegionInstance::create_instance(inst,
+                                                 memory,
+                                                 rect_from_dims<1>(shape.dims),
+                                                 field_sizes,
+                                                 0 /*SOA*/,
+                                                 prs,
+                                                 wait_on);
+      break;
+#endif
+#if REALM_MAX_DIM >= 2
+    case 2:
+      ready =
+          Realm::RegionInstance::create_instance(inst,
+                                                 memory,
+                                                 rect_from_dims<2>(shape.dims),
+                                                 field_sizes,
+                                                 0 /*SOA*/,
+                                                 prs,
+                                                 wait_on);
+      break;
+#endif
+#if REALM_MAX_DIM >= 3
+    case 3:
+      ready =
+          Realm::RegionInstance::create_instance(inst,
+                                                 memory,
+                                                 rect_from_dims<3>(shape.dims),
+                                                 field_sizes,
+                                                 0 /*SOA*/,
+                                                 prs,
+                                                 wait_on);
+      break;
+#endif
+#if REALM_MAX_DIM >= 4
+    case 4:
+      ready =
+          Realm::RegionInstance::create_instance(inst,
+                                                 memory,
+                                                 rect_from_dims<4>(shape.dims),
+                                                 field_sizes,
+                                                 0 /*SOA*/,
+                                                 prs,
+                                                 wait_on);
+      break;
+#endif
+#if REALM_MAX_DIM >= 5
+    case 5:
+      ready =
+          Realm::RegionInstance::create_instance(inst,
+                                                 memory,
+                                                 rect_from_dims<5>(shape.dims),
+                                                 field_sizes,
+                                                 0 /*SOA*/,
+                                                 prs,
+                                                 wait_on);
+      break;
+#endif
+    default:
+      PANIC("TensorShape dims greater than REALM_MAX_DIM",
+            fmt::to_string(shape.dims.ff_ordered.num_dims()));
+      break;
+  }
+  this->outstanding_events.push_back(ready);
+  return std::pair{inst, ready};
+}
+
+Realm::Event RealmContext::get_outstanding_events() {
+  Realm::Event result = this->merge_outstanding_events();
+  this->outstanding_events.push_back(result);
+  return result;
+}
+
+Realm::Event RealmContext::merge_outstanding_events() {
+  Realm::Event result = Realm::Event::merge_events(this->outstanding_events);
+  this->outstanding_events.clear();
+  return result;
+}
+
+void RealmContext::discover_machine_topology() {
+  if (!this->processors.empty()) {
+    return;
+  }
+
+  Realm::Machine::ProcessorQuery pq(Realm::Machine::get_machine());
+  for (Realm::Processor proc : pq) {
+    Realm::AddressSpace as = proc.address_space();
+    Realm::Processor::Kind kind = proc.kind();
+    this->processors[std::pair{as, kind}].push_back(proc);
+  }
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/realm_manager.cc b/lib/realm-execution/src/realm-execution/realm_manager.cc
new file mode 100644
index 0000000000..fc74fffe5d
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/realm_manager.cc
@@ -0,0 +1,34 @@
+#include "realm-execution/realm_manager.h"
+#include "realm-execution/realm_context.h"
+#include "realm-execution/tasks/impl/controller_task.h"
+#include "realm-execution/tasks/realm_task_registry.h"
+
+namespace FlexFlow {
+
+RealmManager::RealmManager(int *argc, char ***argv)
+    : RealmContext(Realm::Processor::NO_PROC) {
+  bool ok = this->runtime.init(argc, argv);
+  ASSERT(ok);
+
+  // Register all tasks at initialization time so we don't need to later
+  register_all_tasks().wait();
+}
+
+RealmManager::~RealmManager() {
+  Realm::Event outstanding = this->merge_outstanding_events();
+  this->runtime.shutdown(outstanding);
+  this->runtime.wait_for_shutdown();
+}
+
+Realm::Event
+    RealmManager::start_controller(std::function<void(RealmContext &)> thunk,
+                                   Realm::Event wait_on) {
+  Realm::Processor target_proc =
+      Realm::Machine::ProcessorQuery(Realm::Machine::get_machine())
+          .only_kind(Realm::Processor::LOC_PROC)
+          .first();
+
+  return collective_spawn_controller_task(*this, target_proc, thunk, wait_on);
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/controller_task.cc b/lib/realm-execution/src/realm-execution/tasks/impl/controller_task.cc
new file mode 100644
index 0000000000..285e8acaa7
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/tasks/impl/controller_task.cc
@@ -0,0 +1,39 @@
+#include "realm-execution/tasks/impl/op_task.h"
+#include "realm-execution/tasks/task_id_t.h"
+
+namespace FlexFlow {
+
+struct ControllerTaskArgs {
+public:
+  std::function<void(RealmContext &)> thunk;
+};
+
+void controller_task_body(void const *args,
+                          size_t arglen,
+                          void const *userdata,
+                          size_t userlen,
+                          Realm::Processor proc) {
+  ASSERT(arglen == sizeof(ControllerTaskArgs));
+  ControllerTaskArgs task_args =
+      *reinterpret_cast<ControllerTaskArgs const *>(args);
+
+  RealmContext ctx{proc};
+  task_args.thunk(ctx);
+}
+
+Realm::Event
+    collective_spawn_controller_task(RealmContext &ctx,
+                                     Realm::Processor &target_proc,
+                                     std::function<void(RealmContext &)> thunk,
+                                     Realm::Event precondition) {
+  ControllerTaskArgs task_args;
+  task_args.thunk = thunk;
+
+  return ctx.collective_spawn_task(target_proc,
+                                   task_id_t::CONTROLLER_TASK_ID,
+                                   &task_args,
+                                   sizeof(task_args),
+                                   precondition);
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/device_handle_init_return_task.cc b/lib/realm-execution/src/realm-execution/tasks/impl/device_handle_init_return_task.cc
new file mode 100644
index 0000000000..bda6f7781c
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/tasks/impl/device_handle_init_return_task.cc
@@ -0,0 +1,52 @@
+#include "realm-execution/tasks/impl/device_handle_init_task.h"
+#include "realm-execution/tasks/task_id_t.dtg.h"
+
+namespace FlexFlow {
+
+struct DeviceHandleInitReturnTaskArgs {
+public:
+  DeviceHandleInitReturnTaskArgs() = delete;
+  DeviceHandleInitReturnTaskArgs(
+      DeviceSpecificManagedPerDeviceFFHandle result,
+      Realm::Processor origin_proc,
+      DeviceSpecificManagedPerDeviceFFHandle *origin_result_ptr)
+      : result(result), origin_proc(origin_proc),
+        origin_result_ptr(origin_result_ptr) {}
+
+public:
+  DeviceSpecificManagedPerDeviceFFHandle result;
+  Realm::Processor origin_proc;
+  DeviceSpecificManagedPerDeviceFFHandle *origin_result_ptr;
+};
+
+void device_handle_init_return_task_body(void const *args,
+                                         size_t arglen,
+                                         void const *userdata,
+                                         size_t userlen,
+                                         Realm::Processor proc) {
+  ASSERT(arglen == sizeof(DeviceHandleInitReturnTaskArgs));
+  DeviceHandleInitReturnTaskArgs task_args =
+      *reinterpret_cast<DeviceHandleInitReturnTaskArgs const *>(args);
+
+  ASSERT(task_args.origin_proc.address_space() == proc.address_space());
+  *task_args.origin_result_ptr = task_args.result;
+}
+
+Realm::Event spawn_device_handle_init_return_task(
+    RealmContext &ctx,
+    Realm::Processor origin_proc,
+    DeviceSpecificManagedPerDeviceFFHandle const &result,
+    DeviceSpecificManagedPerDeviceFFHandle *origin_result_ptr,
+    Realm::Event precondition) {
+  DeviceHandleInitReturnTaskArgs task_args{
+      result, origin_proc, origin_result_ptr};
+
+  return ctx.spawn_task(origin_proc,
+                        task_id_t::DEVICE_HANDLE_INIT_RETURN_TASK_ID,
+                        &task_args,
+                        sizeof(task_args),
+                        Realm::ProfilingRequestSet{},
+                        precondition);
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/device_handle_init_task.cc b/lib/realm-execution/src/realm-execution/tasks/impl/device_handle_init_task.cc
new file mode 100644
index 0000000000..b806aa1277
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/tasks/impl/device_handle_init_task.cc
@@ -0,0 +1,81 @@
+#include "realm-execution/tasks/impl/device_handle_init_task.h"
+#include "realm-execution/device_specific_managed_per_device_ff_handle.h"
+#include "realm-execution/tasks/impl/device_handle_init_return_task.h"
+#include "realm-execution/tasks/impl/device_handle_init_task_args.dtg.h"
+#include "realm-execution/tasks/impl/serializable_device_handle_init_task_args.h"
+#include "realm-execution/tasks/serializer/task_arg_serializer.h"
+#include "realm-execution/tasks/task_id_t.dtg.h"
+#include <type_traits>
+
+namespace FlexFlow {
+
+static std::optional<ManagedPerDeviceFFHandle *>
+    make_device_handle_for_processor(Realm::Processor processor,
+                                     size_t workSpaceSize,
+                                     bool allowTensorOpMathConversion) {
+  switch (processor.kind()) {
+    case Realm::Processor::LOC_PROC:
+      return std::nullopt;
+    case Realm::Processor::TOC_PROC:
+      return new ManagedPerDeviceFFHandle{initialize_multi_gpu_handle(
+          /*num_ranks=*/Realm::Machine::get_machine().get_address_space_count(),
+          /*my_rank=*/processor.address_space(),
+          /*workSpaceSize=*/workSpaceSize,
+          /*allowTensorOpMathConversion=*/allowTensorOpMathConversion)};
+    default:
+      PANIC("Unhandled Realm::ProcessorKind",
+            fmt::to_string(int{processor.kind()}));
+  }
+}
+
+void device_handle_init_task_body(void const *args,
+                                  size_t arglen,
+                                  void const *userdata,
+                                  size_t userlen,
+                                  Realm::Processor proc) {
+  DeviceHandleInitTaskArgs task_args =
+      device_handle_init_task_args_from_serializable(
+          deserialize_task_args<SerializableDeviceHandleInitTaskArgs>(args,
+                                                                      arglen));
+
+  RealmContext ctx{proc};
+  DeviceSpecificManagedPerDeviceFFHandle managed_handle =
+      make_device_specific_managed_handle(
+          ctx.get_current_device_idx(),
+          make_device_handle_for_processor(
+              proc,
+              task_args.workSpaceSize,
+              task_args.allowTensorOpMathConversion));
+
+  spawn_device_handle_init_return_task(ctx,
+                                       task_args.origin_proc,
+                                       managed_handle,
+                                       task_args.origin_result_ptr,
+                                       Realm::Event::NO_EVENT);
+}
+
+Realm::Event spawn_device_handle_init_task(
+    RealmContext &ctx,
+    Realm::Processor target_proc,
+    size_t workSpaceSize,
+    bool allowTensorOpMathConversion,
+    DeviceSpecificManagedPerDeviceFFHandle *result_ptr,
+    Realm::Event precondition) {
+  DeviceHandleInitTaskArgs task_args{
+      workSpaceSize,
+      allowTensorOpMathConversion,
+      ctx.get_current_processor(),
+      result_ptr,
+  };
+
+  std::string args = serialize_task_args(
+      device_handle_init_task_args_to_serializable(task_args));
+  return ctx.spawn_task(target_proc,
+                        task_id_t::DEVICE_HANDLE_INIT_TASK_ID,
+                        args.data(),
+                        args.size(),
+                        Realm::ProfilingRequestSet{},
+                        precondition);
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/device_state_init_return_task.cc b/lib/realm-execution/src/realm-execution/tasks/impl/device_state_init_return_task.cc
new file mode 100644
index 0000000000..306697e950
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/tasks/impl/device_state_init_return_task.cc
@@ -0,0 +1,52 @@
+#include "realm-execution/tasks/impl/device_state_init_return_task.h"
+#include "realm-execution/tasks/task_id_t.dtg.h"
+
+namespace FlexFlow {
+
+struct DeviceStateInitReturnTaskArgs {
+public:
+  DeviceStateInitReturnTaskArgs() = delete;
+  DeviceStateInitReturnTaskArgs(
+      DeviceSpecificPerDeviceOpState result,
+      Realm::Processor origin_proc,
+      DeviceSpecificPerDeviceOpState *origin_result_ptr)
+      : result(result), origin_proc(origin_proc),
+        origin_result_ptr(origin_result_ptr) {}
+
+public:
+  DeviceSpecificPerDeviceOpState result;
+  Realm::Processor origin_proc;
+  DeviceSpecificPerDeviceOpState *origin_result_ptr;
+};
+
+void device_state_init_return_task_body(void const *args,
+                                        size_t arglen,
+                                        void const *userdata,
+                                        size_t userlen,
+                                        Realm::Processor proc) {
+  ASSERT(arglen == sizeof(DeviceStateInitReturnTaskArgs));
+  DeviceStateInitReturnTaskArgs task_args =
+      *reinterpret_cast<DeviceStateInitReturnTaskArgs const *>(args);
+
+  ASSERT(task_args.origin_proc.address_space() == proc.address_space());
+  *task_args.origin_result_ptr = task_args.result;
+}
+
+Realm::Event spawn_device_state_init_return_task(
+    RealmContext &ctx,
+    Realm::Processor origin_proc,
+    DeviceSpecificPerDeviceOpState const &result,
+    DeviceSpecificPerDeviceOpState *origin_result_ptr,
+    Realm::Event precondition) {
+  DeviceStateInitReturnTaskArgs task_args{
+      result, origin_proc, origin_result_ptr};
+
+  return ctx.spawn_task(origin_proc,
+                        task_id_t::DEVICE_STATE_INIT_RETURN_TASK_ID,
+                        &task_args,
+                        sizeof(task_args),
+                        Realm::ProfilingRequestSet{},
+                        precondition);
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/device_state_init_task.cc b/lib/realm-execution/src/realm-execution/tasks/impl/device_state_init_task.cc
new file mode 100644
index 0000000000..99c72cf5e7
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/tasks/impl/device_state_init_task.cc
@@ -0,0 +1,89 @@
+#include "realm-execution/tasks/impl/device_state_init_task.h"
+#include "local-execution/device_state_initialization.h"
+#include "realm-execution/tasks/impl/device_state_init_return_task.h"
+#include "realm-execution/tasks/impl/device_state_init_task_args.dtg.h"
+#include "realm-execution/tasks/impl/serializable_device_state_init_task_args.h"
+#include "realm-execution/tasks/serializer/task_arg_serializer.h"
+#include "realm-execution/tasks/task_id_t.dtg.h"
+#include "realm-execution/tasks/task_id_t.h"
+#include "utils/optional.h"
+#include <optional>
+#include <type_traits>
+
+namespace FlexFlow {
+
+void device_state_init_task_body(void const *args,
+                                 size_t arglen,
+                                 void const *userdata,
+                                 size_t userlen,
+                                 Realm::Processor proc) {
+  DeviceStateInitTaskArgs task_args =
+      device_state_init_task_args_from_serializable(
+          deserialize_task_args<SerializableDeviceStateInitTaskArgs>(args,
+                                                                     arglen));
+
+  RealmContext ctx{proc};
+  device_handle_t device_handle =
+      device_handle_t_from_device_specific_managed_handle(
+          task_args.device_handle, ctx.get_current_device_idx());
+  DynamicNodeInvocation result_invocation =
+      initialize_node(task_args.invocation,
+                      ctx.get_current_device_allocator(),
+                      task_args.profiling_settings,
+                      device_handle,
+                      task_args.iteration_config,
+                      task_args.optimizer_attrs,
+                      ctx.get_current_device_idx());
+  DeviceSpecificPerDeviceOpState result_state =
+      assert_unwrap(result_invocation.node_attrs.per_device_op_state);
+  // Important: to make sure this doesn't get deallocated, we intentionally leak
+  // the allocation here
+  DeviceSpecificPerDeviceOpState *result_state_ptr =
+      new DeviceSpecificPerDeviceOpState{result_state};
+  spawn_device_state_init_return_task(ctx,
+                                      task_args.origin_proc,
+                                      *result_state_ptr,
+                                      task_args.origin_result_ptr,
+                                      Realm::Event::NO_EVENT);
+}
+
+std::optional<Realm::Event> spawn_device_state_init_task(
+    RealmContext &ctx,
+    Realm::Processor target_proc,
+    DynamicNodeInvocation const &invocation,
+    ProfilingSettings const &profiling_settings,
+    DeviceSpecificManagedPerDeviceFFHandle const &device_handle,
+    FFIterationConfig const &iteration_config,
+    OptimizerAttrs const &optimizer_attrs,
+    DeviceSpecificPerDeviceOpState *result_ptr,
+    Realm::Event precondition) {
+  DeviceStateInitTaskArgs task_args{
+      invocation,
+      profiling_settings,
+      device_handle,
+      iteration_config,
+      optimizer_attrs,
+      ctx.get_current_processor(),
+      result_ptr,
+  };
+
+  std::optional<task_id_t> task_id =
+      and_then(and_then(invocation.node_attrs.op_attrs,
+                        [](TrainingOperationAttrs const &op_attrs) {
+                          return op_attrs.try_require_pcg_op();
+                        }),
+               get_init_task_id_for_op_attrs);
+  if (task_id.has_value()) {
+    std::string args = serialize_task_args(
+        device_state_init_task_args_to_serializable(task_args));
+    return ctx.spawn_task(target_proc,
+                          assert_unwrap(task_id),
+                          args.data(),
+                          args.size(),
+                          Realm::ProfilingRequestSet{},
+                          precondition);
+  }
+  return std::nullopt;
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/op_task.cc b/lib/realm-execution/src/realm-execution/tasks/impl/op_task.cc
new file mode 100644
index 0000000000..d8b8873442
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/tasks/impl/op_task.cc
@@ -0,0 +1,67 @@
+#include "realm-execution/tasks/impl/op_task.h"
+#include "local-execution/task_execution.h"
+#include "realm-execution/device_specific_managed_per_device_ff_handle.h"
+#include "realm-execution/tasks/impl/op_task_args.dtg.h"
+#include "realm-execution/tasks/impl/serializable_op_task_args.h"
+#include "realm-execution/tasks/serializer/task_arg_serializer.h"
+#include "realm-execution/tasks/task_id_t.h"
+#include "task-spec/per_device_op_state.h"
+#include "utils/optional.h"
+#include <type_traits>
+
+namespace FlexFlow {
+
+void op_task_body(void const *args,
+                  size_t arglen,
+                  void const *userdata,
+                  size_t userlen,
+                  Realm::Processor proc) {
+  OpTaskArgs task_args = op_task_args_from_serializable(
+      deserialize_task_args<SerializableOpTaskArgs>(args, arglen));
+
+  RealmContext ctx{proc};
+  device_handle_t device_handle =
+      device_handle_t_from_device_specific_managed_handle(
+          task_args.device_handle, ctx.get_current_device_idx());
+  execute_dynamic_node_invocation(
+      /*invocation=*/task_args.invocation,
+      /*allocator=*/ctx.get_current_device_allocator(),
+      /*profiling_settings=*/task_args.profiling_settings,
+      /*ff_handle=*/device_handle,
+      /*per_device_op_state=*/
+      transform(task_args.invocation.node_attrs.per_device_op_state,
+                [&](DeviceSpecificPerDeviceOpState const &op_state) {
+                  return get_device_state_from_device_specific(
+                      op_state, ctx.get_current_device_idx());
+                }),
+      /*iteration_config=*/task_args.iteration_config,
+      /*optimizer_attrs=*/task_args.optimizer_attrs,
+      /*device_idx=*/ctx.get_current_device_idx());
+}
+
+Realm::Event
+    spawn_op_task(RealmContext &ctx,
+                  Realm::Processor target_proc,
+                  DynamicNodeInvocation const &invocation,
+                  ProfilingSettings const &profiling_settings,
+                  DeviceSpecificManagedPerDeviceFFHandle const &device_handle,
+                  FFIterationConfig const &iteration_config,
+                  std::optional<OptimizerAttrs> const &optimizer_attrs,
+                  Realm::Event precondition) {
+  OpTaskArgs task_args{invocation,
+                       profiling_settings,
+                       device_handle,
+                       iteration_config,
+                       optimizer_attrs};
+  std::string args =
+      serialize_task_args(op_task_args_to_serializable(task_args));
+  return ctx.spawn_task(
+      target_proc,
+      assert_unwrap(get_task_id_for_op(invocation.node_attrs, optimizer_attrs)),
+      args.data(),
+      args.size(),
+      Realm::ProfilingRequestSet{},
+      precondition);
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/serializable_device_handle_init_task_args.cc b/lib/realm-execution/src/realm-execution/tasks/impl/serializable_device_handle_init_task_args.cc
new file mode 100644
index 0000000000..a44a5a5db1
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/tasks/impl/serializable_device_handle_init_task_args.cc
@@ -0,0 +1,28 @@
+#include "realm-execution/tasks/impl/serializable_device_handle_init_task_args.h"
+
+namespace FlexFlow {
+
+SerializableDeviceHandleInitTaskArgs
+    device_handle_init_task_args_to_serializable(
+        DeviceHandleInitTaskArgs const &args) {
+  return SerializableDeviceHandleInitTaskArgs{
+      /*workSpaceSize=*/args.workSpaceSize,
+      /*allowTensorOpMathConversion=*/args.allowTensorOpMathConversion,
+      /*origin_proc=*/realm_processor_to_serializable(args.origin_proc),
+      /*origin_result_ptr=*/reinterpret_cast<uintptr_t>(args.origin_result_ptr),
+  };
+}
+
+DeviceHandleInitTaskArgs device_handle_init_task_args_from_serializable(
+    SerializableDeviceHandleInitTaskArgs const &args) {
+  return DeviceHandleInitTaskArgs{
+      /*workSpaceSize=*/args.workSpaceSize,
+      /*allowTensorOpMathConversion=*/args.allowTensorOpMathConversion,
+      /*origin_proc=*/realm_processor_from_serializable(args.origin_proc),
+      /*origin_result_ptr=*/
+      reinterpret_cast<DeviceSpecificManagedPerDeviceFFHandle *>(
+          args.origin_result_ptr),
+  };
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/serializable_device_state_init_task_args.cc b/lib/realm-execution/src/realm-execution/tasks/impl/serializable_device_state_init_task_args.cc
new file mode 100644
index 0000000000..528ff26867
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/tasks/impl/serializable_device_state_init_task_args.cc
@@ -0,0 +1,36 @@
+#include "realm-execution/tasks/impl/serializable_device_state_init_task_args.h"
+#include "realm-execution/tasks/serializer/serializable_realm_processor.h"
+#include "task-spec/dynamic_graph/serializable_dynamic_node_invocation.h"
+
+namespace FlexFlow {
+
+SerializableDeviceStateInitTaskArgs device_state_init_task_args_to_serializable(
+    DeviceStateInitTaskArgs const &args) {
+  return SerializableDeviceStateInitTaskArgs{
+      /*invocation=*/dynamic_node_invocation_to_serializable(args.invocation),
+      /*profiling_settings=*/args.profiling_settings,
+      /*device_handle=*/args.device_handle.serialize(),
+      /*iteration_config=*/args.iteration_config,
+      /*optimizer_attrs=*/args.optimizer_attrs,
+      /*origin_proc=*/realm_processor_to_serializable(args.origin_proc),
+      /*origin_result_ptr=*/reinterpret_cast<uintptr_t>(args.origin_result_ptr),
+  };
+}
+
+DeviceStateInitTaskArgs device_state_init_task_args_from_serializable(
+    SerializableDeviceStateInitTaskArgs const &args) {
+  return DeviceStateInitTaskArgs{
+      /*invocation=*/dynamic_node_invocation_from_serializable(args.invocation),
+      /*profiling_settings=*/args.profiling_settings,
+      /*device_handle=*/
+      DeviceSpecificManagedPerDeviceFFHandle::deserialize(args.device_handle),
+      /*iteration_config=*/args.iteration_config,
+      /*optimizer_attrs=*/args.optimizer_attrs,
+      /*origin_proc=*/realm_processor_from_serializable(args.origin_proc),
+      /*origin_result_ptr=*/
+      reinterpret_cast<DeviceSpecificPerDeviceOpState *>(
+          args.origin_result_ptr),
+  };
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/tasks/impl/serializable_op_task_args.cc b/lib/realm-execution/src/realm-execution/tasks/impl/serializable_op_task_args.cc
new file mode 100644
index 0000000000..0513bc6df7
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/tasks/impl/serializable_op_task_args.cc
@@ -0,0 +1,27 @@
+#include "realm-execution/tasks/impl/serializable_op_task_args.h"
+#include "task-spec/dynamic_graph/serializable_dynamic_node_invocation.h"
+
+namespace FlexFlow {
+
+SerializableOpTaskArgs op_task_args_to_serializable(OpTaskArgs const &args) {
+  return SerializableOpTaskArgs{
+      /*invocation=*/dynamic_node_invocation_to_serializable(args.invocation),
+      /*profiling_settings=*/args.profiling_settings,
+      /*device_handle=*/args.device_handle.serialize(),
+      /*iteration_config=*/args.iteration_config,
+      /*optimizer_attrs=*/args.optimizer_attrs,
+  };
+}
+
+OpTaskArgs op_task_args_from_serializable(SerializableOpTaskArgs const &args) {
+  return OpTaskArgs{
+      /*invocation=*/dynamic_node_invocation_from_serializable(args.invocation),
+      /*profiling_settings=*/args.profiling_settings,
+      /*device_handle=*/
+      DeviceSpecificManagedPerDeviceFFHandle::deserialize(args.device_handle),
+      /*iteration_config=*/args.iteration_config,
+      /*optimizer_attrs=*/args.optimizer_attrs,
+  };
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/tasks/realm_task_id_t.cc b/lib/realm-execution/src/realm-execution/tasks/realm_task_id_t.cc
new file mode 100644
index 0000000000..ec1aa143a6
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/tasks/realm_task_id_t.cc
@@ -0,0 +1,10 @@
+#include "realm-execution/tasks/realm_task_id_t.h"
+
+namespace FlexFlow {
+
+Realm::Processor::TaskFuncID get_realm_task_id_for_task_id(task_id_t task_id) {
+  return Realm::Processor::TASK_ID_FIRST_AVAILABLE +
+         static_cast<Realm::Processor::TaskFuncID>(task_id);
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/tasks/realm_task_registry.cc b/lib/realm-execution/src/realm-execution/tasks/realm_task_registry.cc
new file mode 100644
index 0000000000..cff12c2391
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/tasks/realm_task_registry.cc
@@ -0,0 +1,149 @@
+#include "realm-execution/tasks/realm_task_registry.h"
+#include "realm-execution/tasks/impl/controller_task.h"
+#include "realm-execution/tasks/impl/device_handle_init_return_task.h"
+#include "realm-execution/tasks/impl/device_handle_init_task.h"
+#include "realm-execution/tasks/impl/device_state_init_return_task.h"
+#include "realm-execution/tasks/impl/device_state_init_task.h"
+#include "realm-execution/tasks/impl/op_task.h"
+#include "realm-execution/tasks/realm_task_id_t.h"
+#include "utils/exception.h"
+
+namespace FlexFlow {
+
+Realm::Event register_task(Realm::Processor::Kind target_kind,
+                           task_id_t func_id,
+                           void (*task_body)(void const *,
+                                             size_t,
+                                             void const *,
+                                             size_t,
+                                             Realm::Processor)) {
+  return Realm::Processor::register_task_by_kind(
+      target_kind,
+      /*global=*/false,
+      get_realm_task_id_for_task_id(func_id),
+      Realm::CodeDescriptor(task_body),
+      Realm::ProfilingRequestSet());
+}
+
+Realm::Event register_all_tasks() {
+  std::vector<Realm::Event> pending_registrations;
+
+  std::vector<task_id_t> init_task_ids = {
+      // Init tasks
+      task_id_t::BATCHNORM_INIT_TASK_ID,
+      task_id_t::COMBINE_INIT_TASK_ID,
+      task_id_t::CONV2D_INIT_TASK_ID,
+      task_id_t::DROPOUT_INIT_TASK_ID,
+      task_id_t::ELEMENTBINARY_INIT_TASK_ID,
+      task_id_t::ELEMENTUNARY_INIT_TASK_ID,
+      task_id_t::GATHER_INIT_TASK_ID,
+      task_id_t::LAYERNORM_INIT_TASK_ID,
+      task_id_t::LINEAR_INIT_TASK_ID,
+      task_id_t::ATTENTION_INIT_TASK_ID,
+      task_id_t::POOL2D_INIT_TASK_ID,
+      task_id_t::REDUCE_INIT_TASK_ID,
+      task_id_t::REDUCTION_INIT_TASK_ID,
+      task_id_t::REPARTITION_INIT_TASK_ID,
+      task_id_t::REPLICATE_INIT_TASK_ID,
+      task_id_t::SOFTMAX_INIT_TASK_ID,
+  };
+
+  for (task_id_t task_id : init_task_ids) {
+    pending_registrations.push_back(register_task(
+        Realm::Processor::TOC_PROC, task_id, device_state_init_task_body));
+  }
+
+  std::vector<task_id_t> task_ids = {
+      // Forward tasks
+      task_id_t::BATCHMATMUL_FWD_TASK_ID,
+      task_id_t::BATCHNORM_FWD_TASK_ID,
+      task_id_t::BROADCAST_FWD_TASK_ID,
+      task_id_t::CAST_FWD_TASK_ID,
+      task_id_t::COMBINE_FWD_TASK_ID,
+      task_id_t::CONCAT_FWD_TASK_ID,
+      task_id_t::CONV2D_FWD_TASK_ID,
+      task_id_t::DROPOUT_FWD_TASK_ID,
+      task_id_t::ELEMENTBINARY_FWD_TASK_ID,
+      task_id_t::ELEMENTUNARY_FWD_TASK_ID,
+      task_id_t::EMBED_FWD_TASK_ID,
+      task_id_t::FLAT_FWD_TASK_ID,
+      task_id_t::GATHER_FWD_TASK_ID,
+      task_id_t::LAYERNORM_FWD_TASK_ID,
+      task_id_t::LINEAR_FWD_TASK_ID,
+      task_id_t::ATTENTION_FWD_TASK_ID,
+      task_id_t::POOL2D_FWD_TASK_ID,
+      task_id_t::REDUCE_FWD_TASK_ID,
+      task_id_t::REDUCTION_FWD_TASK_ID,
+      task_id_t::REPARTITION_FWD_TASK_ID,
+      task_id_t::REPLICATE_FWD_TASK_ID,
+      task_id_t::RESHAPE_FWD_TASK_ID,
+      task_id_t::REVERSE_FWD_TASK_ID,
+      task_id_t::SOFTMAX_FWD_TASK_ID,
+      task_id_t::SPLIT_FWD_TASK_ID,
+      task_id_t::TOPK_FWD_TASK_ID,
+      task_id_t::TRANSPOSE_FWD_TASK_ID,
+
+      // Backward tasks
+      task_id_t::BATCHMATMUL_BWD_TASK_ID,
+      task_id_t::BATCHNORM_BWD_TASK_ID,
+      task_id_t::BROADCAST_BWD_TASK_ID,
+      task_id_t::CAST_BWD_TASK_ID,
+      task_id_t::COMBINE_BWD_TASK_ID,
+      task_id_t::CONCAT_BWD_TASK_ID,
+      task_id_t::CONV2D_BWD_TASK_ID,
+      task_id_t::DROPOUT_BWD_TASK_ID,
+      task_id_t::ELEMENTBINARY_BWD_TASK_ID,
+      task_id_t::ELEMENTUNARY_BWD_TASK_ID,
+      task_id_t::EMBED_BWD_TASK_ID,
+      task_id_t::FLAT_BWD_TASK_ID,
+      task_id_t::GATHER_BWD_TASK_ID,
+      task_id_t::LAYERNORM_BWD_TASK_ID,
+      task_id_t::LINEAR_BWD_TASK_ID,
+      task_id_t::ATTENTION_BWD_TASK_ID,
+      task_id_t::POOL2D_BWD_TASK_ID,
+      task_id_t::REDUCE_BWD_TASK_ID,
+      task_id_t::REDUCTION_BWD_TASK_ID,
+      task_id_t::REPARTITION_BWD_TASK_ID,
+      task_id_t::REPLICATE_BWD_TASK_ID,
+      task_id_t::RESHAPE_BWD_TASK_ID,
+      task_id_t::REVERSE_BWD_TASK_ID,
+      task_id_t::SOFTMAX_BWD_TASK_ID,
+      task_id_t::SPLIT_BWD_TASK_ID,
+      task_id_t::TOPK_BWD_TASK_ID,
+      task_id_t::TRANSPOSE_BWD_TASK_ID,
+
+      // Update tasks
+      task_id_t::SGD_UPD_NCCL_TASK_ID,
+      task_id_t::ADAM_UPD_NCCL_TASK_ID,
+  };
+
+  for (task_id_t task_id : task_ids) {
+    pending_registrations.push_back(
+        register_task(Realm::Processor::LOC_PROC, task_id, op_task_body));
+    pending_registrations.push_back(
+        register_task(Realm::Processor::TOC_PROC, task_id, op_task_body));
+  }
+
+  pending_registrations.push_back(register_task(Realm::Processor::LOC_PROC,
+                                                task_id_t::CONTROLLER_TASK_ID,
+                                                controller_task_body));
+  pending_registrations.push_back(
+      register_task(Realm::Processor::LOC_PROC,
+                    task_id_t::DEVICE_HANDLE_INIT_TASK_ID,
+                    device_handle_init_task_body));
+  pending_registrations.push_back(
+      register_task(Realm::Processor::TOC_PROC,
+                    task_id_t::DEVICE_HANDLE_INIT_TASK_ID,
+                    device_handle_init_task_body));
+  pending_registrations.push_back(
+      register_task(Realm::Processor::LOC_PROC,
+                    task_id_t::DEVICE_HANDLE_INIT_RETURN_TASK_ID,
+                    device_handle_init_return_task_body));
+  pending_registrations.push_back(
+      register_task(Realm::Processor::LOC_PROC,
+                    task_id_t::DEVICE_STATE_INIT_RETURN_TASK_ID,
+                    device_state_init_return_task_body));
+  return Realm::Event::merge_events(pending_registrations);
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/tasks/serializer/serializable_realm_processor.cc b/lib/realm-execution/src/realm-execution/tasks/serializer/serializable_realm_processor.cc
new file mode 100644
index 0000000000..b16e2891c4
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/tasks/serializer/serializable_realm_processor.cc
@@ -0,0 +1,15 @@
+#include "realm-execution/tasks/serializer/serializable_realm_processor.h"
+
+namespace FlexFlow {
+
+SerializableRealmProcessor
+    realm_processor_to_serializable(Realm::Processor const &proc) {
+  return SerializableRealmProcessor{proc.id};
+}
+
+Realm::Processor
+    realm_processor_from_serializable(SerializableRealmProcessor const &proc) {
+  return Realm::Processor{proc.id};
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/tasks/task_id_t.cc b/lib/realm-execution/src/realm-execution/tasks/task_id_t.cc
new file mode 100644
index 0000000000..94e1b887e7
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/tasks/task_id_t.cc
@@ -0,0 +1,193 @@
+#include "realm-execution/tasks/task_id_t.h"
+#include "pcg/optimizer_attrs.dtg.h"
+#include "pcg/optimizers/adam_optimizer_attrs.dtg.h"
+#include "task-spec/dynamic_graph/dynamic_node_attrs.dtg.h"
+#include "utils/optional.h"
+#include "utils/overload.h"
+
+namespace FlexFlow {
+
+std::optional<task_id_t>
+    get_task_id_for_op(DynamicNodeAttrs const &node_attrs,
+                       std::optional<OptimizerAttrs> const &optimizer_attrs) {
+  DynamicTaskType task_type = assert_unwrap(node_attrs.task_type);
+  switch (task_type) {
+    case DynamicTaskType::FWD:
+      return get_fwd_task_id_for_op_attrs(
+          assert_unwrap(node_attrs.op_attrs).require_pcg_op());
+    case DynamicTaskType::BWD:
+      return get_bwd_task_id_for_op_attrs(
+          assert_unwrap(node_attrs.op_attrs).require_pcg_op());
+    case DynamicTaskType::UPD:
+      return get_update_task_id_for_optimizer_attrs(
+          assert_unwrap(optimizer_attrs));
+    case DynamicTaskType::LOSS:
+      return task_id_t::LOSS_BWD_TASK_ID;
+    default:
+      PANIC("Unhandled DynamicTaskType", task_type);
+  }
+}
+
+std::optional<task_id_t>
+    get_init_task_id_for_op_attrs(PCGOperatorAttrs const &op_attrs) {
+
+  return op_attrs.visit<std::optional<task_id_t>>(overload{
+      [](BatchMatmulAttrs const &) { return std::nullopt; },
+      [](BatchNormAttrs const &) { return task_id_t::BATCHNORM_INIT_TASK_ID; },
+      [](BroadcastAttrs const &) { return std::nullopt; },
+      [](CastAttrs const &) { return std::nullopt; },
+      [](CombineAttrs const &attrs) { return task_id_t::COMBINE_INIT_TASK_ID; },
+      [](ConcatAttrs const &) { return std::nullopt; },
+      [](Conv2DAttrs const &) { return task_id_t::CONV2D_INIT_TASK_ID; },
+      [](DropoutAttrs const &) { return task_id_t::DROPOUT_INIT_TASK_ID; },
+      [](ElementBinaryAttrs const &) {
+        return task_id_t::ELEMENTBINARY_INIT_TASK_ID;
+      },
+      [](ElementUnaryAttrs const &) {
+        return task_id_t::ELEMENTUNARY_INIT_TASK_ID;
+      },
+      [](EmbeddingAttrs const &) { return std::nullopt; },
+      [](FlatAttrs const &) { return std::nullopt; },
+      [](GatherAttrs const &) { return task_id_t::GATHER_INIT_TASK_ID; },
+      [](InputAttrs const &) { return std::nullopt; },
+      [](LayerNormAttrs const &) { return task_id_t::LAYERNORM_INIT_TASK_ID; },
+      [](LinearAttrs const &) { return task_id_t::LINEAR_INIT_TASK_ID; },
+      [](MultiHeadAttentionAttrs const &) {
+        return task_id_t::ATTENTION_INIT_TASK_ID;
+      },
+      [](NoopAttrs const &) { return std::nullopt; },
+      [](Pool2DAttrs const &) { return task_id_t::POOL2D_INIT_TASK_ID; },
+      [](ReduceAttrs const &) { return task_id_t::REDUCE_INIT_TASK_ID; },
+      [](ReductionAttrs const &attrs) {
+        return task_id_t::REDUCTION_INIT_TASK_ID;
+      },
+      [](RepartitionAttrs const &attrs) {
+        return task_id_t::REPARTITION_INIT_TASK_ID;
+      },
+      [](ReplicateAttrs const &attrs) {
+        return task_id_t::REPLICATE_INIT_TASK_ID;
+      },
+      [](ReshapeAttrs const &) { return std::nullopt; },
+      [](ReverseAttrs const &) { return std::nullopt; },
+      [](SoftmaxAttrs const &) { return task_id_t::SOFTMAX_INIT_TASK_ID; },
+      [](SplitAttrs const &) { return std::nullopt; },
+      [](TopKAttrs const &) { return std::nullopt; },
+      [](TransposeAttrs const &) { return std::nullopt; },
+      [](WeightAttrs const &) { return std::nullopt; },
+  });
+}
+
+std::optional<task_id_t>
+    get_fwd_task_id_for_op_attrs(PCGOperatorAttrs const &op_attrs) {
+
+  return op_attrs.visit<std::optional<task_id_t>>(overload{
+      [](BatchMatmulAttrs const &) {
+        return task_id_t::BATCHMATMUL_FWD_TASK_ID;
+      },
+      [](BatchNormAttrs const &) { return task_id_t::BATCHNORM_FWD_TASK_ID; },
+      [](BroadcastAttrs const &) { return task_id_t::BROADCAST_FWD_TASK_ID; },
+      [](CastAttrs const &) { return task_id_t::CAST_FWD_TASK_ID; },
+      [](CombineAttrs const &attrs) { return task_id_t::COMBINE_FWD_TASK_ID; },
+      [](ConcatAttrs const &) { return task_id_t::CONCAT_FWD_TASK_ID; },
+      [](Conv2DAttrs const &) { return task_id_t::CONV2D_FWD_TASK_ID; },
+      [](DropoutAttrs const &) { return task_id_t::DROPOUT_FWD_TASK_ID; },
+      [](ElementBinaryAttrs const &) {
+        return task_id_t::ELEMENTBINARY_FWD_TASK_ID;
+      },
+      [](ElementUnaryAttrs const &) {
+        return task_id_t::ELEMENTUNARY_FWD_TASK_ID;
+      },
+      [](EmbeddingAttrs const &) { return task_id_t::EMBED_FWD_TASK_ID; },
+      [](FlatAttrs const &) { return task_id_t::FLAT_FWD_TASK_ID; },
+      [](GatherAttrs const &) { return task_id_t::GATHER_FWD_TASK_ID; },
+      [](InputAttrs const &) { return std::nullopt; },
+      [](LayerNormAttrs const &) { return task_id_t::LAYERNORM_FWD_TASK_ID; },
+      [](LinearAttrs const &) { return task_id_t::LINEAR_FWD_TASK_ID; },
+      [](MultiHeadAttentionAttrs const &) {
+        return task_id_t::ATTENTION_FWD_TASK_ID;
+      },
+      [](NoopAttrs const &) { return std::nullopt; },
+      [](Pool2DAttrs const &) { return task_id_t::POOL2D_FWD_TASK_ID; },
+      [](ReduceAttrs const &) { return task_id_t::REDUCE_FWD_TASK_ID; },
+      [](ReductionAttrs const &attrs) {
+        return task_id_t::REDUCTION_FWD_TASK_ID;
+      },
+      [](RepartitionAttrs const &attrs) {
+        return task_id_t::REPARTITION_FWD_TASK_ID;
+      },
+      [](ReplicateAttrs const &attrs) {
+        return task_id_t::REPLICATE_FWD_TASK_ID;
+      },
+      [](ReshapeAttrs const &) { return task_id_t::RESHAPE_FWD_TASK_ID; },
+      [](ReverseAttrs const &) { return task_id_t::REVERSE_FWD_TASK_ID; },
+      [](SoftmaxAttrs const &) { return task_id_t::SOFTMAX_FWD_TASK_ID; },
+      [](SplitAttrs const &) { return task_id_t::SPLIT_FWD_TASK_ID; },
+      [](TopKAttrs const &) { return task_id_t::TOPK_FWD_TASK_ID; },
+      [](TransposeAttrs const &) { return task_id_t::TRANSPOSE_FWD_TASK_ID; },
+      [](WeightAttrs const &) { return std::nullopt; },
+  });
+}
+
+std::optional<task_id_t>
+    get_bwd_task_id_for_op_attrs(PCGOperatorAttrs const &op_attrs) {
+
+  return op_attrs.visit<std::optional<task_id_t>>(overload{
+      [](BatchMatmulAttrs const &) {
+        return task_id_t::BATCHMATMUL_BWD_TASK_ID;
+      },
+      [](BatchNormAttrs const &) { return task_id_t::BATCHNORM_BWD_TASK_ID; },
+      [](BroadcastAttrs const &) { return task_id_t::BROADCAST_BWD_TASK_ID; },
+      [](CastAttrs const &) { return task_id_t::CAST_BWD_TASK_ID; },
+      [](CombineAttrs const &attrs) { return task_id_t::COMBINE_BWD_TASK_ID; },
+      [](ConcatAttrs const &) { return task_id_t::CONCAT_BWD_TASK_ID; },
+      [](Conv2DAttrs const &) { return task_id_t::CONV2D_BWD_TASK_ID; },
+      [](DropoutAttrs const &) { return task_id_t::DROPOUT_BWD_TASK_ID; },
+      [](ElementBinaryAttrs const &) {
+        return task_id_t::ELEMENTBINARY_BWD_TASK_ID;
+      },
+      [](ElementUnaryAttrs const &) {
+        return task_id_t::ELEMENTUNARY_BWD_TASK_ID;
+      },
+      [](EmbeddingAttrs const &) { return task_id_t::EMBED_BWD_TASK_ID; },
+      [](FlatAttrs const &) { return task_id_t::FLAT_BWD_TASK_ID; },
+      [](GatherAttrs const &) { return task_id_t::GATHER_BWD_TASK_ID; },
+      [](InputAttrs const &) { return std::nullopt; },
+      [](LayerNormAttrs const &) { return task_id_t::LAYERNORM_BWD_TASK_ID; },
+      [](LinearAttrs const &) { return task_id_t::LINEAR_BWD_TASK_ID; },
+      [](MultiHeadAttentionAttrs const &) {
+        return task_id_t::ATTENTION_BWD_TASK_ID;
+      },
+      [](NoopAttrs const &) { return std::nullopt; },
+      [](Pool2DAttrs const &) { return task_id_t::POOL2D_BWD_TASK_ID; },
+      [](ReduceAttrs const &) { return task_id_t::REDUCE_BWD_TASK_ID; },
+      [](ReductionAttrs const &attrs) {
+        return task_id_t::REDUCTION_BWD_TASK_ID;
+      },
+      [](RepartitionAttrs const &attrs) {
+        return task_id_t::REPARTITION_BWD_TASK_ID;
+      },
+      [](ReplicateAttrs const &attrs) {
+        return task_id_t::REPLICATE_BWD_TASK_ID;
+      },
+      [](ReshapeAttrs const &) { return task_id_t::RESHAPE_BWD_TASK_ID; },
+      [](ReverseAttrs const &) { return task_id_t::REVERSE_BWD_TASK_ID; },
+      [](SoftmaxAttrs const &) { return task_id_t::SOFTMAX_BWD_TASK_ID; },
+      [](SplitAttrs const &) { return task_id_t::SPLIT_BWD_TASK_ID; },
+      [](TopKAttrs const &) { return task_id_t::TOPK_BWD_TASK_ID; },
+      [](TransposeAttrs const &) { return task_id_t::TRANSPOSE_BWD_TASK_ID; },
+      [](WeightAttrs const &) { return std::nullopt; },
+  });
+}
+
+std::optional<task_id_t> get_update_task_id_for_optimizer_attrs(
+    OptimizerAttrs const &optimizer_attrs) {
+
+  return optimizer_attrs.visit<std::optional<task_id_t>>(overload{
+      [](SGDOptimizerAttrs const &) { return task_id_t::SGD_UPD_NCCL_TASK_ID; },
+      [](AdamOptimizerAttrs const &) {
+        return task_id_t::ADAM_UPD_NCCL_TASK_ID;
+      },
+  });
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/src/realm-execution/tensor_instance_backing.cc b/lib/realm-execution/src/realm-execution/tensor_instance_backing.cc
new file mode 100644
index 0000000000..53c2a2b271
--- /dev/null
+++ b/lib/realm-execution/src/realm-execution/tensor_instance_backing.cc
@@ -0,0 +1,11 @@
+#include "realm-execution/tensor_instance_backing.h"
+
+namespace FlexFlow {
+
+TensorInstanceBacking make_empty_tensor_instance_backing() {
+  return TensorInstanceBacking{
+      /*backing=*/{},
+  };
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/test/CMakeLists.txt b/lib/realm-execution/test/CMakeLists.txt
new file mode 100644
index 0000000000..b3beff42c0
--- /dev/null
+++ b/lib/realm-execution/test/CMakeLists.txt
@@ -0,0 +1,15 @@
+ff_add_test_executable(
+  NAME
+    realm-execution-tests
+  SRC_PATTERNS
+    src/*.cc
+  PRIVATE_INCLUDE
+    src/
+  DEPS
+    doctest
+    utils-test-common
+    realm-execution
+    kernels
+    op-attrs
+    task-spec
+)
diff --git a/lib/realm-execution/test/src/internal/realm_test_utils.cc b/lib/realm-execution/test/src/internal/realm_test_utils.cc
new file mode 100644
index 0000000000..e381feb8de
--- /dev/null
+++ b/lib/realm-execution/test/src/internal/realm_test_utils.cc
@@ -0,0 +1,28 @@
+#include "internal/realm_test_utils.h"
+#include <fmt/format.h>
+#include <string>
+
+namespace FlexFlow {
+
+static char *leak_string_contents(std::string const &str) {
+  // Realm command-line arguments require char* so intentionally leak the
+  // allocated string contents here
+  std::vector<char> *content = new std::vector<char>{str.begin(), str.end()};
+  content->push_back(0); // NUL byte
+  return content->data();
+}
+
+std::vector<char *> make_fake_realm_args(positive_int num_cpus,
+                                         nonnegative_int num_gpus) {
+  std::vector<char *> result;
+  result.push_back(leak_string_contents("fake_executable_name"));
+  result.push_back(leak_string_contents("-ll:cpu"));
+  result.push_back(leak_string_contents(fmt::to_string(num_cpus)));
+  if (num_gpus > 0) {
+    result.push_back(leak_string_contents("-ll:gpu"));
+    result.push_back(leak_string_contents(fmt::to_string(num_gpus)));
+  }
+  return result;
+}
+
+} // namespace FlexFlow
diff --git a/lib/realm-execution/test/src/internal/realm_test_utils.h b/lib/realm-execution/test/src/internal/realm_test_utils.h
new file mode 100644
index 0000000000..8e2775ad8b
--- /dev/null
+++ b/lib/realm-execution/test/src/internal/realm_test_utils.h
@@ -0,0 +1,15 @@
+#ifndef _FLEXFLOW_LIB_REALM_EXECUTION_TEST_SRC_INTERNAL_REALM_TEST_UTILS_H
+#define _FLEXFLOW_LIB_REALM_EXECUTION_TEST_SRC_INTERNAL_REALM_TEST_UTILS_H
+
+#include "utils/nonnegative_int/nonnegative_int.h"
+#include "utils/positive_int/positive_int.h"
+#include <vector>
+
+namespace FlexFlow {
+
+std::vector<char *> make_fake_realm_args(positive_int num_cpus,
+                                         nonnegative_int num_gpus);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/realm-execution/test/src/realm-execution/distributed_device_handle.cc b/lib/realm-execution/test/src/realm-execution/distributed_device_handle.cc
new file mode 100644
index 0000000000..fb7dff01e3
--- /dev/null
+++ b/lib/realm-execution/test/src/realm-execution/distributed_device_handle.cc
@@ -0,0 +1,36 @@
+#include "realm-execution/distributed_device_handle.h"
+#include "internal/realm_test_utils.h"
+#include "realm-execution/realm_manager.h"
+#include <doctest/doctest.h>
+
+namespace test {
+
+using namespace ::FlexFlow;
+namespace Realm = ::FlexFlow::Realm;
+
+TEST_SUITE(FF_TEST_SUITE) {
+  TEST_CASE("DistributedDeviceHandle") {
+    std::vector<char *> fake_args =
+        make_fake_realm_args(/*num_cpus=*/2_p, /*num_gpus=*/0_n);
+    int fake_argc = fake_args.size();
+    char **fake_argv = fake_args.data();
+
+    RealmManager manager(&fake_argc, &fake_argv);
+
+    (void)manager.start_controller([](RealmContext &ctx) {
+      DistributedDeviceHandle handle = create_distributed_device_handle(
+          /*ctx=*/ctx,
+          /*workSpaceSize=*/1024 * 1024,
+          /*allowTensorOpMathConversion=*/true);
+
+      // Make sure we have handles for the processors we're expecting
+      Realm::Machine::ProcessorQuery pq(Realm::Machine::get_machine());
+      pq.only_kind(Realm::Processor::LOC_PROC);
+      for (Realm::Processor proc : pq) {
+        handle.at(proc);
+      }
+    });
+  }
+}
+
+} // namespace test
diff --git a/lib/realm-execution/test/src/realm-execution/realm_manager.cc b/lib/realm-execution/test/src/realm-execution/realm_manager.cc
new file mode 100644
index 0000000000..450d7fd3ec
--- /dev/null
+++ b/lib/realm-execution/test/src/realm-execution/realm_manager.cc
@@ -0,0 +1,33 @@
+#include "realm-execution/realm_manager.h"
+#include "internal/realm_test_utils.h"
+#include "realm-execution/distributed_device_handle.h"
+#include <doctest/doctest.h>
+
+namespace test {
+
+using namespace ::FlexFlow;
+namespace Realm = ::FlexFlow::Realm;
+
+TEST_SUITE(FF_TEST_SUITE) {
+  TEST_CASE("RealmManager") {
+    std::vector<char *> fake_args =
+        make_fake_realm_args(/*num_cpus=*/1_p, /*num_gpus=*/0_n);
+    int fake_argc = fake_args.size();
+    char **fake_argv = fake_args.data();
+
+    // Initialize Realm
+    RealmManager manager(&fake_argc, &fake_argv);
+
+    // Launch a controller
+    int some_data = 123;
+    Realm::Event event = manager.start_controller([&](RealmContext &ctx) {
+      // Data is captured and retains value
+      ASSERT(some_data == 123);
+    });
+    // Need to block on the completion of the event to ensure we don't race,
+    // because the lambda captures the environment
+    event.wait();
+  }
+}
+
+} // namespace test
diff --git a/lib/realm-execution/test/src/realm-execution/test_e2e.cc b/lib/realm-execution/test/src/realm-execution/test_e2e.cc
new file mode 100644
index 0000000000..8e5edf72ad
--- /dev/null
+++ b/lib/realm-execution/test/src/realm-execution/test_e2e.cc
@@ -0,0 +1,230 @@
+#include "internal/realm_test_utils.h"
+#include "kernels/allocation.h"
+#include "op-attrs/tensor_shape.dtg.h"
+#include "op-attrs/tensor_slot_name.dtg.h"
+#include "pcg/device_type.dtg.h"
+#include "pcg/machine_space_coordinate.dtg.h"
+#include "pcg/mapped_parallel_computation_graph/operator_atomic_task_shard_binding.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
+#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h"
+#include "pcg/parallel_computation_graph/parallel_tensor_guid_t.dtg.h"
+#include "realm-execution/distributed_device_handle.h"
+#include "realm-execution/pcg_instance/pcg_instance.h"
+#include "realm-execution/realm_manager.h"
+#include "utils/containers/require_only_key.h"
+#include <doctest/doctest.h>
+
+namespace test {
+
+using namespace ::FlexFlow;
+namespace Realm = ::FlexFlow::Realm;
+
+TEST_SUITE(FF_TEST_SUITE) {
+  TEST_CASE("RealmBackend e2e Training") {
+    std::vector<char *> fake_args =
+        make_fake_realm_args(/*num_cpus=*/1_p, /*num_gpus=*/0_n);
+    int fake_argc = fake_args.size();
+    char **fake_argv = fake_args.data();
+
+    RealmManager manager(&fake_argc, &fake_argv);
+
+    (void)manager.start_controller([](RealmContext &ctx) {
+      Allocator allocator = ctx.get_current_device_allocator();
+
+      positive_int batch_size = 10_p;
+      positive_int data_dim = 16_p;
+      positive_int hidden_dim = 32_p;
+      positive_int output_dim = 1_p;
+
+      TensorShape output_tensor_shape = TensorShape{
+          TensorDims{FFOrdered{batch_size, output_dim}}, DataType::FLOAT};
+
+      GenericTensorAccessorW label_tensor_backing =
+          allocator.allocate_tensor(output_tensor_shape);
+
+      // construct computation graph
+      ParallelComputationGraph pcg = empty_parallel_computation_graph();
+
+      TensorShape input_tensor_shape = TensorShape{
+          TensorDims{FFOrdered{batch_size, data_dim}}, DataType::FLOAT};
+
+      TensorShape label_tensor_shape = TensorShape{
+          TensorDims{FFOrdered{batch_size, output_dim}}, DataType::FLOAT};
+      GenericTensorAccessorW label_tensor =
+          allocator.allocate_tensor(label_tensor_shape);
+
+      TensorShape weight_shape_1 = TensorShape{
+          TensorDims{FFOrdered{hidden_dim, data_dim}}, DataType::FLOAT};
+      TensorShape weight_shape_2 = TensorShape{
+          TensorDims{FFOrdered{output_dim, hidden_dim}}, DataType::FLOAT};
+
+      ParallelLayerAddedResult inputs_layer =
+          pcg_add_input_layer_with_grad(pcg, input_tensor_shape);
+      parallel_tensor_guid_t t_input =
+          require_only_key(inputs_layer.outputs, TensorSlotName::OUTPUT);
+
+      ParallelLayerAddedResult weights_layer_1 = add_parallel_layer(
+          pcg,
+          ParallelLayerAttrs{
+              PCGOperatorAttrs{WeightAttrs{
+                  weight_shape_1, InitializerAttrs{GlorotNormalAttrs{0}}}},
+              std::nullopt},
+          {},
+          {});
+      parallel_tensor_guid_t t_weights_1 =
+          require_only_key(weights_layer_1.outputs, TensorSlotName::OUTPUT);
+
+      ParallelLayerAddedResult weights_layer_2 = add_parallel_layer(
+          pcg,
+          ParallelLayerAttrs{
+              PCGOperatorAttrs{WeightAttrs{
+                  weight_shape_2, InitializerAttrs{GlorotNormalAttrs{0}}}},
+              std::nullopt},
+          {},
+          {});
+      parallel_tensor_guid_t t_weights_2 =
+          require_only_key(weights_layer_2.outputs, TensorSlotName::OUTPUT);
+
+      ParallelLayerAddedResult linear_operator_1 = add_parallel_layer(
+          pcg,
+          ParallelLayerAttrs{PCGOperatorAttrs{LinearAttrs{hidden_dim,
+                                                          /*use_bias=*/false,
+                                                          DataType::FLOAT,
+                                                          Activation::RELU,
+                                                          std::nullopt}},
+                             std::nullopt},
+          {
+              {
+                  TensorSlotName::INPUT,
+                  t_input,
+              },
+          },
+          {
+              {
+                  TensorSlotName::WEIGHT,
+                  t_weights_1,
+              },
+          });
+      parallel_tensor_guid_t t_linear_1 =
+          require_only_key(linear_operator_1.outputs, TensorSlotName::OUTPUT);
+
+      ParallelLayerAddedResult linear_operator_2 = add_parallel_layer(
+          pcg,
+          ParallelLayerAttrs{PCGOperatorAttrs{LinearAttrs{output_dim,
+                                                          /*use_bias=*/false,
+                                                          DataType::FLOAT,
+                                                          Activation::RELU,
+                                                          std::nullopt}},
+                             std::nullopt},
+          {
+              {
+                  TensorSlotName::INPUT,
+                  t_linear_1,
+              },
+          },
+          {
+              {
+                  TensorSlotName::WEIGHT,
+                  t_weights_2,
+              },
+          });
+      parallel_tensor_guid_t t_linear_2 =
+          require_only_key(linear_operator_2.outputs, TensorSlotName::OUTPUT);
+
+      MachineSpaceCoordinate cpu0{0_n, 0_n, DeviceType::CPU};
+      ParallelTensorSpaceCoordinate tensor_coord0{0_n, 0_n, FFOrdered{0_n}};
+      MappedParallelComputationGraph mpcg{
+          pcg,
+          {
+              {inputs_layer.parallel_layer,
+               MappedOperatorTaskGroup{
+                   {{cpu0,
+                     OperatorAtomicTaskShardBinding{
+                         {{TensorSlotName::OUTPUT, tensor_coord0}}}}}}},
+              {weights_layer_1.parallel_layer,
+               MappedOperatorTaskGroup{
+                   {{cpu0,
+                     OperatorAtomicTaskShardBinding{
+                         {{TensorSlotName::OUTPUT, tensor_coord0}}}}}}},
+              {weights_layer_2.parallel_layer,
+               MappedOperatorTaskGroup{
+                   {{cpu0,
+                     OperatorAtomicTaskShardBinding{
+                         {{TensorSlotName::OUTPUT, tensor_coord0}}}}}}},
+              {linear_operator_1.parallel_layer,
+               MappedOperatorTaskGroup{
+                   {{cpu0,
+                     OperatorAtomicTaskShardBinding{{
+                         {TensorSlotName::INPUT, tensor_coord0},
+                         {TensorSlotName::WEIGHT, tensor_coord0},
+                         {TensorSlotName::OUTPUT, tensor_coord0},
+                     }}}}}},
+              {linear_operator_2.parallel_layer,
+               MappedOperatorTaskGroup{
+                   {{cpu0,
+                     OperatorAtomicTaskShardBinding{{
+                         {TensorSlotName::INPUT, tensor_coord0},
+                         {TensorSlotName::WEIGHT, tensor_coord0},
+                         {TensorSlotName::OUTPUT, tensor_coord0},
+                     }}}}}},
+          },
+      };
+
+      // instantiate computation graph
+      LossAttrs loss_attrs = LossAttrs{
+          NonconfigurableLossAttrs{LossFunction::CATEGORICAL_CROSSENTROPY}};
+      OptimizerAttrs optimizer_attrs =
+          OptimizerAttrs{SGDOptimizerAttrs{/*lr=*/0.001,
+                                           /*momentum=*/0.9,
+                                           /*nesterov=*/false,
+                                           /*weight_decay=*/0.001}};
+
+      std::unordered_map<DynamicValueAttrs, DynamicTensorAccessor>
+          input_tensors;
+
+      DistributedDeviceHandle device_handle = create_distributed_device_handle(
+          ctx,
+          /*workSpaceSize=*/1024 * 1024,
+          /*allowTensorOpMathConversion=*/true);
+
+      PCGInstance pcg_instance = create_pcg_instance(
+          /*ctx=*/ctx,
+          /*mpcg=*/mpcg,
+          /*optimizer=*/optimizer_attrs,
+          /*loss=*/loss_attrs,
+          /*label_tensor=*/label_tensor,
+          /*logit_tensor=*/t_linear_2,
+          /*input_tensors=*/input_tensors,
+          /*profiling_settings=*/ProfilingSettings{0, 0},
+          /*device_handle=*/device_handle,
+          /*iteration_config=*/FFIterationConfig{1_p});
+
+      // begin training loop
+      int num_epochs = 5;
+      std::vector<GenericTensorAccessorR> loss_values;
+
+      for (int i = 0; i < num_epochs; i++) {
+        perform_all_passes_for_pcg_instance(
+            /*instance=*/pcg_instance,
+            /*profiling_settings=*/ProfilingSettings{0, 0},
+            /*device_handle=*/device_handle,
+            /*iteration_config=*/FFIterationConfig{1_p});
+        // loss_values.push_back(copy_tensor_accessor_r(
+        //     pcg_instance.get_loss_tensor_accessor().value(),
+        //     allocator));
+      }
+
+      // // Assert that each sample in the batch has a lower loss in last epoch
+      // // than the first epoch
+      // GenericTensorAccessorR first_epoch_loss = loss_values.at(0);
+      // GenericTensorAccessorR last_epoch_loss = loss_values.back();
+      // CHECK_MESSAGE(did_loss_decrease(first_epoch_loss, last_epoch_loss),
+      //               check_kv("first_epoch_loss",
+      //                        format_accessor_r_contents(first_epoch_loss)),
+      //               check_kv("last_epoch_loss",
+      //                        format_accessor_r_contents(last_epoch_loss)));
+    });
+  }
+}
+
+} // namespace test
diff --git a/lib/task-spec/include/task-spec/dynamic_graph/dynamic_layer_guid_t.dtg.toml b/lib/task-spec/include/task-spec/dynamic_graph/dynamic_layer_guid_t.dtg.toml
index c6e6673f33..bd64f52567 100644
--- a/lib/task-spec/include/task-spec/dynamic_graph/dynamic_layer_guid_t.dtg.toml
+++ b/lib/task-spec/include/task-spec/dynamic_graph/dynamic_layer_guid_t.dtg.toml
@@ -5,6 +5,7 @@ features = [
   "eq",
   "hash",
   "fmt",
+  "json",
 ]
 
 includes = [
diff --git a/lib/task-spec/include/task-spec/dynamic_graph/dynamic_tensor_guid_t.dtg.toml b/lib/task-spec/include/task-spec/dynamic_graph/dynamic_tensor_guid_t.dtg.toml
index 75e9099104..c9171b928b 100644
--- a/lib/task-spec/include/task-spec/dynamic_graph/dynamic_tensor_guid_t.dtg.toml
+++ b/lib/task-spec/include/task-spec/dynamic_graph/dynamic_tensor_guid_t.dtg.toml
@@ -5,6 +5,7 @@ features = [
   "eq",
   "hash",
   "fmt",
+  "json",
 ]
 
 includes = [
diff --git a/lib/task-spec/include/task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.h b/lib/task-spec/include/task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.h
new file mode 100644
index 0000000000..758a0c2813
--- /dev/null
+++ b/lib/task-spec/include/task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.h
@@ -0,0 +1,14 @@
+#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_DYNAMIC_OPEN_DATAFLOW_GRAPH_FROM_MPCG_H
+#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_DYNAMIC_OPEN_DATAFLOW_GRAPH_FROM_MPCG_H
+
+#include "pcg/mapped_parallel_computation_graph/mapped_parallel_computation_graph.dtg.h"
+#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.dtg.h"
+
+namespace FlexFlow {
+
+DynamicOpenDataflowGraph make_dynamic_open_dataflow_graph_from_mpcg(
+    MappedParallelComputationGraph const &);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_attrs.dtg.toml b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_attrs.dtg.toml
new file mode 100644
index 0000000000..3c43e1d637
--- /dev/null
+++ b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_attrs.dtg.toml
@@ -0,0 +1,43 @@
+namespace = "FlexFlow"
+name = "SerializableDynamicNodeAttrs"
+type = "struct"
+features = [
+  "eq",
+  "hash",
+  "fmt",
+  "json",
+]
+
+includes = [
+  "<optional>",
+  "task-spec/dynamic_graph/dynamic_task_type.dtg.h",
+  "pcg/machine_space_coordinate.dtg.h",
+  "pcg/mapped_parallel_computation_graph/mapped_operator_task_group.h",
+  "task-spec/dynamic_graph/dynamic_layer_guid_t.dtg.h",
+  "task-spec/dynamic_graph/training_operation_attrs.dtg.h",
+]
+
+src_includes = [
+  "utils/fmt/optional.h",
+  "utils/json/optional.h",
+]
+
+[[fields]]
+name = "task_type"
+type = "std::optional<::FlexFlow::DynamicTaskType>"
+
+[[fields]]
+name = "device_coord"
+type = "std::optional<::FlexFlow::MachineSpaceCoordinate>"
+
+[[fields]]
+name = "mapping"
+type = "std::optional<::FlexFlow::MappedOperatorTaskGroup>"
+
+[[fields]]
+name = "op_attrs"
+type = "std::optional<::FlexFlow::TrainingOperationAttrs>"
+
+[[fields]]
+name = "layer_guid"
+type = "::FlexFlow::dynamic_layer_guid_t"
diff --git a/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_attrs.h b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_attrs.h
new file mode 100644
index 0000000000..7a274a1e7b
--- /dev/null
+++ b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_attrs.h
@@ -0,0 +1,16 @@
+#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_SERIALIZABLE_DYNAMIC_NODE_ATTRS_H
+#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_SERIALIZABLE_DYNAMIC_NODE_ATTRS_H
+
+#include "task-spec/dynamic_graph/dynamic_node_attrs.dtg.h"
+#include "task-spec/dynamic_graph/serializable_dynamic_node_attrs.dtg.h"
+
+namespace FlexFlow {
+
+SerializableDynamicNodeAttrs
+    dynamic_node_attrs_to_serializable(DynamicNodeAttrs const &);
+DynamicNodeAttrs
+    dynamic_node_attrs_from_serializable(SerializableDynamicNodeAttrs const &);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_invocation.dtg.toml b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_invocation.dtg.toml
new file mode 100644
index 0000000000..01f4cc8876
--- /dev/null
+++ b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_invocation.dtg.toml
@@ -0,0 +1,33 @@
+namespace = "FlexFlow"
+name = "SerializableDynamicNodeInvocation"
+type = "struct"
+features = [
+  "eq",
+  "fmt",
+  "hash",
+  "json",
+]
+
+includes = [
+  "<unordered_map>",
+  "task-spec/dynamic_graph/serializable_dynamic_node_attrs.dtg.h",
+  "task-spec/dynamic_graph/dynamic_tensor_slot.dtg.h",
+  "task-spec/dynamic_graph/serializable_dynamic_value_attrs.dtg.h",
+]
+
+src_includes = [
+  "utils/hash/unordered_map.h",
+  "utils/fmt/unordered_map.h",
+]
+
+[[fields]]
+name = "inputs"
+type = "std::unordered_map<::FlexFlow::DynamicTensorSlot, ::FlexFlow::SerializableDynamicValueAttrs>"
+
+[[fields]]
+name = "node_attrs"
+type = "::FlexFlow::SerializableDynamicNodeAttrs"
+
+[[fields]]
+name = "outputs"
+type = "std::unordered_map<::FlexFlow::DynamicTensorSlot, ::FlexFlow::SerializableDynamicValueAttrs>"
diff --git a/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_invocation.h b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_invocation.h
new file mode 100644
index 0000000000..2bcdb9a898
--- /dev/null
+++ b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_node_invocation.h
@@ -0,0 +1,16 @@
+#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_SERIALIZABLE_DYNAMIC_NODE_INVOCATION_H
+#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_SERIALIZABLE_DYNAMIC_NODE_INVOCATION_H
+
+#include "task-spec/dynamic_graph/dynamic_node_invocation.dtg.h"
+#include "task-spec/dynamic_graph/serializable_dynamic_node_invocation.dtg.h"
+
+namespace FlexFlow {
+
+SerializableDynamicNodeInvocation
+    dynamic_node_invocation_to_serializable(DynamicNodeInvocation const &);
+DynamicNodeInvocation dynamic_node_invocation_from_serializable(
+    SerializableDynamicNodeInvocation const &);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_value_attrs.dtg.toml b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_value_attrs.dtg.toml
new file mode 100644
index 0000000000..6209bfa247
--- /dev/null
+++ b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_value_attrs.dtg.toml
@@ -0,0 +1,38 @@
+namespace = "FlexFlow"
+name = "SerializableDynamicValueAttrs"
+type = "struct"
+features = [
+  "eq",
+  "hash",
+  "fmt",
+  "json",
+]
+
+includes = [
+  "<optional>",
+  "task-spec/dynamic_graph/dynamic_tensor_guid_t.dtg.h",
+  "op-attrs/parallel_tensor_shape.dtg.h",
+  "op-attrs/parallel_tensor_space_coordinate.dtg.h",
+  "task-spec/dynamic_graph/dynamic_tensor_role.dtg.h",
+]
+
+src_includes = [
+  "utils/fmt/optional.h",
+  "utils/json/optional.h",
+]
+
+[[fields]]
+name = "tensor_guid"
+type = "::FlexFlow::dynamic_tensor_guid_t"
+
+[[fields]]
+name = "parallel_tensor_shape"
+type = "std::optional<::FlexFlow::ParallelTensorShape>"
+
+[[fields]]
+name = "shard_coord"
+type = "std::optional<::FlexFlow::ParallelTensorSpaceCoordinate>"
+
+[[fields]]
+name = "role"
+type = "std::optional<::FlexFlow::DynamicTensorRole>"
diff --git a/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_value_attrs.h b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_value_attrs.h
new file mode 100644
index 0000000000..6272265b7e
--- /dev/null
+++ b/lib/task-spec/include/task-spec/dynamic_graph/serializable_dynamic_value_attrs.h
@@ -0,0 +1,16 @@
+#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_SERIALIZABLE_DYNAMIC_VALUE_ATTRS_H
+#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_SERIALIZABLE_DYNAMIC_VALUE_ATTRS_H
+
+#include "task-spec/dynamic_graph/dynamic_value_attrs.dtg.h"
+#include "task-spec/dynamic_graph/serializable_dynamic_value_attrs.dtg.h"
+
+namespace FlexFlow {
+
+SerializableDynamicValueAttrs
+    dynamic_value_attrs_to_serializable(DynamicValueAttrs const &);
+DynamicValueAttrs dynamic_value_attrs_from_serializable(
+    SerializableDynamicValueAttrs const &);
+
+} // namespace FlexFlow
+
+#endif
diff --git a/lib/task-spec/include/task-spec/dynamic_graph/training_operation_attrs.dtg.toml b/lib/task-spec/include/task-spec/dynamic_graph/training_operation_attrs.dtg.toml
index 66c475b3a9..1051d8ac13 100644
--- a/lib/task-spec/include/task-spec/dynamic_graph/training_operation_attrs.dtg.toml
+++ b/lib/task-spec/include/task-spec/dynamic_graph/training_operation_attrs.dtg.toml
@@ -5,6 +5,7 @@ features = [
   "eq",
   "hash",
   "fmt",
+  "json",
 ]
 
 includes = [
diff --git a/lib/task-spec/include/task-spec/ops/impl/dropout.h b/lib/task-spec/include/task-spec/ops/impl/dropout.h
index a7b382ce62..192f2f8244 100644
--- a/lib/task-spec/include/task-spec/ops/impl/dropout.h
+++ b/lib/task-spec/include/task-spec/ops/impl/dropout.h
@@ -2,7 +2,6 @@
 #define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_OPS_IMPL_DROPOUT_H
 
 #include "op-attrs/ops/dropout_attrs.dtg.h"
-#include "task-spec/task_id_t.dtg.h"
 #include "task-spec/task_impl_function.dtg.h"
 
 namespace FlexFlow {
diff --git a/lib/task-spec/include/task-spec/ops/op_task_id_t.dtg.toml b/lib/task-spec/include/task-spec/ops/op_task_id_t.dtg.toml
deleted file mode 100644
index 557da6cf4c..0000000000
--- a/lib/task-spec/include/task-spec/ops/op_task_id_t.dtg.toml
+++ /dev/null
@@ -1,18 +0,0 @@
-namespace = "FlexFlow"
-name = "op_task_id_t"
-type = "enum"
-features = [
-  "hash",
-  "json",
-  "rapidcheck",
-  "fmt",
-]
-
-[[values]]
-name = "INIT"
-
-[[values]]
-name = "FWD"
-
-[[values]]
-name = "BWD"
diff --git a/lib/task-spec/include/task-spec/task_id_with_noop_default_t.dtg.toml b/lib/task-spec/include/task-spec/task_id_with_noop_default_t.dtg.toml
deleted file mode 100644
index 50349d5773..0000000000
--- a/lib/task-spec/include/task-spec/task_id_with_noop_default_t.dtg.toml
+++ /dev/null
@@ -1,28 +0,0 @@
-namespace = "FlexFlow"
-name = "task_id_with_noop_default_t"
-type = "variant"
-features = [
-  "eq",
-  "ord",
-  "hash",
-  "fmt",
-  "rapidcheck",
-]
-
-includes = [
-  "task-spec/task_id_t.dtg.h",
-  "<utility>",
-]
-
-src_includes = [
-  "utils/rapidcheck/monostate.h",
-  "utils/fmt/monostate.h",
-]
-
-[[values]]
-type = "::FlexFlow::task_id_t"
-key = "real_task"
-
-[[values]]
-type = "std::monostate"
-key = "noop_task"
diff --git a/lib/task-spec/include/task-spec/task_id_with_noop_default_t.h b/lib/task-spec/include/task-spec/task_id_with_noop_default_t.h
deleted file mode 100644
index 054b73844e..0000000000
--- a/lib/task-spec/include/task-spec/task_id_with_noop_default_t.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TASK_ID_WITH_NOOP_DEFAULT_T_H
-#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_TASK_ID_WITH_NOOP_DEFAULT_T_H
-
-#include "op-attrs/computation_graph_op_attrs.dtg.h"
-#include "op-attrs/operator_type.dtg.h"
-#include "task-spec/ops/op_task_id_t.dtg.h"
-#include "task-spec/task_id_with_noop_default_t.dtg.h"
-
-namespace FlexFlow {
-
-task_id_with_noop_default_t lift_task_id_t(task_id_t);
-task_id_with_noop_default_t default_noop_task();
-
-task_id_with_noop_default_t lower_op_task_id_to_task_id_with_noop_default_t(
-    op_task_id_t, ComputationGraphOpAttrs const &);
-
-task_id_with_noop_default_t
-    get_init_task_id_for_op_attrs(ComputationGraphOpAttrs const &);
-
-task_id_with_noop_default_t
-    get_fwd_task_id_for_op_attrs(ComputationGraphOpAttrs const &);
-
-task_id_with_noop_default_t
-    get_bwd_task_id_for_op_attrs(ComputationGraphOpAttrs const &);
-
-} // namespace FlexFlow
-
-#endif
diff --git a/lib/task-spec/src/task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.cc b/lib/task-spec/src/task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.cc
new file mode 100644
index 0000000000..ced98dfd44
--- /dev/null
+++ b/lib/task-spec/src/task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.cc
@@ -0,0 +1,78 @@
+#include "task-spec/dynamic_graph/make_dynamic_open_dataflow_graph_from_mpcg.h"
+#include "op-attrs/parallel_tensor_shape.h"
+#include "op-attrs/pcg_operator_attrs.h"
+#include "pcg/parallel_computation_graph/parallel_computation_graph.h"
+#include "pcg/parallel_computation_graph/parallel_tensor_attrs.dtg.h"
+#include "task-spec/dynamic_graph/dynamic_layer_guid_t.dtg.h"
+#include "task-spec/dynamic_graph/dynamic_open_dataflow_graph.h"
+#include "task-spec/dynamic_graph/dynamic_tensor_role.h"
+#include "utils/containers/generate_map.h"
+#include <optional>
+#include <unordered_map>
+#include <utility>
+
+namespace FlexFlow {
+
+DynamicOpenDataflowGraph make_dynamic_open_dataflow_graph_from_mpcg(
+    MappedParallelComputationGraph const &mpcg) {
+  DynamicOpenDataflowGraph result = make_empty_dynamic_open_dataflow_graph();
+
+  for (auto const &[layer, attrs] :
+       get_parallel_layer_attrs_mapping(mpcg.pcg)) {
+    DynamicNodeAttrs result_attrs{
+        /*task_type=*/std::nullopt,
+        /*device_coord=*/std::nullopt,
+        /*mapping=*/mpcg.mapped_tasks.at(layer),
+        /*op_attrs=*/TrainingOperationAttrs{attrs.op_attrs},
+        /*pcg_layer_guid=*/dynamic_layer_guid_t{layer},
+        /*per_device_op_state=*/std::nullopt,
+    };
+
+    std::unordered_map<DynamicTensorSlot, DynamicValueAttrs> result_inputs =
+        transform(get_incoming_tensors(mpcg.pcg, layer),
+                  [&](TensorSlotName const &slot_name,
+                      parallel_tensor_guid_t const &tensor) {
+                    ParallelTensorAttrs attrs =
+                        get_parallel_tensor_attrs(mpcg.pcg, tensor);
+                    return std::pair<DynamicTensorSlot, DynamicValueAttrs>{
+                        DynamicTensorSlot{
+                            /*slot_name=*/slot_name,
+                            /*slot_tensor_role=*/std::nullopt,
+                        },
+                        DynamicValueAttrs{
+                            /*tensor_guid=*/dynamic_tensor_guid_t{tensor},
+                            /*parallel_tensor_shape=*/attrs.shape,
+                            /*shard_coord=*/std::nullopt,
+                            /*accessor=*/std::nullopt,
+                            /*role=*/std::nullopt,
+                        },
+                    };
+                  });
+    std::unordered_map<DynamicTensorSlot, DynamicValueAttrs> result_outputs =
+        transform(get_outgoing_tensors(mpcg.pcg, layer),
+                  [&](TensorSlotName const &slot_name,
+                      parallel_tensor_guid_t const &tensor) {
+                    ParallelTensorAttrs attrs =
+                        get_parallel_tensor_attrs(mpcg.pcg, tensor);
+                    return std::pair<DynamicTensorSlot, DynamicValueAttrs>{
+                        DynamicTensorSlot{
+                            /*slot_name=*/slot_name,
+                            /*slot_tensor_role=*/std::nullopt,
+                        },
+                        DynamicValueAttrs{
+                            /*tensor_guid=*/dynamic_tensor_guid_t{tensor},
+                            /*parallel_tensor_shape=*/attrs.shape,
+                            /*shard_coord=*/std::nullopt,
+                            /*accessor=*/std::nullopt,
+                            /*role=*/std::nullopt,
+                        },
+                    };
+                  });
+
+    result.invocations.emplace(result_inputs, result_attrs, result_outputs);
+  }
+
+  return result;
+}
+
+} // namespace FlexFlow
diff --git a/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_node_attrs.cc b/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_node_attrs.cc
new file mode 100644
index 0000000000..d613194d14
--- /dev/null
+++ b/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_node_attrs.cc
@@ -0,0 +1,29 @@
+#include "task-spec/dynamic_graph/serializable_dynamic_node_attrs.h"
+#include <optional>
+
+namespace FlexFlow {
+
+SerializableDynamicNodeAttrs
+    dynamic_node_attrs_to_serializable(DynamicNodeAttrs const &attrs) {
+  return SerializableDynamicNodeAttrs{
+      /*task_type=*/attrs.task_type,
+      /*device_coord=*/attrs.device_coord,
+      /*mapping=*/attrs.mapping,
+      /*op_attrs=*/attrs.op_attrs,
+      /*layer_guid=*/attrs.layer_guid,
+  };
+}
+
+DynamicNodeAttrs dynamic_node_attrs_from_serializable(
+    SerializableDynamicNodeAttrs const &attrs) {
+  return DynamicNodeAttrs{
+      /*task_type=*/attrs.task_type,
+      /*device_coord=*/attrs.device_coord,
+      /*mapping=*/attrs.mapping,
+      /*op_attrs=*/attrs.op_attrs,
+      /*layer_guid=*/attrs.layer_guid,
+      /*per_device_op_state=*/std::nullopt,
+  };
+}
+
+} // namespace FlexFlow
diff --git a/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_node_invocation.cc b/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_node_invocation.cc
new file mode 100644
index 0000000000..334623ee67
--- /dev/null
+++ b/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_node_invocation.cc
@@ -0,0 +1,31 @@
+#include "task-spec/dynamic_graph/serializable_dynamic_node_invocation.h"
+#include "task-spec/dynamic_graph/serializable_dynamic_node_attrs.h"
+#include "task-spec/dynamic_graph/serializable_dynamic_value_attrs.h"
+#include "utils/containers/map_values.h"
+
+namespace FlexFlow {
+
+SerializableDynamicNodeInvocation dynamic_node_invocation_to_serializable(
+    DynamicNodeInvocation const &invocation) {
+  return SerializableDynamicNodeInvocation{
+      /*inputs=*/map_values(invocation.inputs,
+                            dynamic_value_attrs_to_serializable),
+      /*node_attrs=*/dynamic_node_attrs_to_serializable(invocation.node_attrs),
+      /*outputs=*/
+      map_values(invocation.outputs, dynamic_value_attrs_to_serializable),
+  };
+}
+
+DynamicNodeInvocation dynamic_node_invocation_from_serializable(
+    SerializableDynamicNodeInvocation const &invocation) {
+  return DynamicNodeInvocation{
+      /*inputs=*/map_values(invocation.inputs,
+                            dynamic_value_attrs_from_serializable),
+      /*node_attrs=*/
+      dynamic_node_attrs_from_serializable(invocation.node_attrs),
+      /*outputs=*/
+      map_values(invocation.outputs, dynamic_value_attrs_from_serializable),
+  };
+}
+
+} // namespace FlexFlow
diff --git a/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_value_attrs.cc b/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_value_attrs.cc
new file mode 100644
index 0000000000..2dc0b509ab
--- /dev/null
+++ b/lib/task-spec/src/task-spec/dynamic_graph/serializable_dynamic_value_attrs.cc
@@ -0,0 +1,27 @@
+#include "task-spec/dynamic_graph/serializable_dynamic_value_attrs.h"
+#include <optional>
+
+namespace FlexFlow {
+
+SerializableDynamicValueAttrs
+    dynamic_value_attrs_to_serializable(DynamicValueAttrs const &attrs) {
+  return SerializableDynamicValueAttrs{
+      /*tensor_guid=*/attrs.tensor_guid,
+      /*parallel_tensor_shape=*/attrs.parallel_tensor_shape,
+      /*shard_coord=*/attrs.shard_coord,
+      /*role=*/attrs.role,
+  };
+}
+
+DynamicValueAttrs dynamic_value_attrs_from_serializable(
+    SerializableDynamicValueAttrs const &attrs) {
+  return DynamicValueAttrs{
+      /*tensor_guid=*/attrs.tensor_guid,
+      /*parallel_tensor_shape=*/attrs.parallel_tensor_shape,
+      /*shard_coord=*/attrs.shard_coord,
+      /*accessor=*/std::nullopt,
+      /*role=*/attrs.role,
+  };
+}
+
+} // namespace FlexFlow
diff --git a/lib/task-spec/src/task-spec/dynamic_graph/shard_expansion.cc b/lib/task-spec/src/task-spec/dynamic_graph/shard_expansion.cc
index ea253b63f8..402e0ef055 100644
--- a/lib/task-spec/src/task-spec/dynamic_graph/shard_expansion.cc
+++ b/lib/task-spec/src/task-spec/dynamic_graph/shard_expansion.cc
@@ -15,7 +15,7 @@ bool value_is_shard_expanded(DynamicValueAttrs const &n) {
 
 bool no_part_of_graph_is_shard_expanded(DynamicOpenDataflowGraph const &g) {
   auto slot_is_shard_expanded = [](DynamicTensorSlot const &) -> bool {
-    return true;
+    return false;
   };
 
   return no_part_of_dynamic_graph_satisfies(g,
@@ -81,4 +81,19 @@ std::unordered_set<DynamicNodeInvocation>
       });
 }
 
+DynamicOpenDataflowGraph
+    perform_shard_expansion(DynamicOpenDataflowGraph const &g) {
+
+  ASSERT(no_part_of_graph_is_shard_expanded(g));
+
+  DynamicOpenDataflowGraph result =
+      flatmap_dynamic_invocation_set(g, [&](DynamicNodeInvocation const &i) {
+        return perform_shard_expansion_for_invocation(i);
+      });
+
+  ASSERT(graph_is_fully_shard_expanded(result));
+
+  return result;
+}
+
 } // namespace FlexFlow
diff --git a/lib/task-spec/src/task-spec/task_id_with_noop_default_t.cc b/lib/task-spec/src/task-spec/task_id_with_noop_default_t.cc
deleted file mode 100644
index 20e0d00c57..0000000000
--- a/lib/task-spec/src/task-spec/task_id_with_noop_default_t.cc
+++ /dev/null
@@ -1,243 +0,0 @@
-#include "task-spec/task_id_with_noop_default_t.h"
-#include "utils/overload.h"
-
-namespace FlexFlow {
-
-task_id_with_noop_default_t lift_task_id_t(task_id_t task_id) {
-  return task_id_with_noop_default_t{task_id};
-}
-
-task_id_with_noop_default_t default_noop_task() {
-  return task_id_with_noop_default_t{std::monostate{}};
-}
-
-task_id_with_noop_default_t lower_op_task_id_to_task_id_with_noop_default_t(
-    op_task_id_t op_task_id, ComputationGraphOpAttrs const &op_attrs) {
-  switch (op_task_id) {
-    case op_task_id_t::INIT:
-      return get_init_task_id_for_op_attrs(op_attrs);
-    case op_task_id_t::FWD:
-      return get_fwd_task_id_for_op_attrs(op_attrs);
-    case op_task_id_t::BWD:
-      return get_bwd_task_id_for_op_attrs(op_attrs);
-    default:
-      PANIC("Unhandled op_task_id_t", op_task_id);
-  }
-}
-
-task_id_with_noop_default_t
-    get_init_task_id_for_op_attrs(ComputationGraphOpAttrs const &op_attrs) {
-
-  return op_attrs.visit<task_id_with_noop_default_t>(overload{
-      [](BatchMatmulAttrs const &) { return default_noop_task(); },
-      [](BatchNormAttrs const &) {
-        return lift_task_id_t(task_id_t::BATCHNORM_INIT_TASK_ID);
-      },
-      [](BroadcastAttrs const &) { return default_noop_task(); },
-      [](CastAttrs const &) { return default_noop_task(); },
-      [](ConcatAttrs const &) { return default_noop_task(); },
-      [](Conv2DAttrs const &) {
-        return lift_task_id_t(task_id_t::CONV2D_INIT_TASK_ID);
-      },
-      [](DropoutAttrs const &) {
-        return lift_task_id_t(task_id_t::DROPOUT_INIT_TASK_ID);
-      },
-      [](ElementBinaryAttrs const &) {
-        return lift_task_id_t(task_id_t::ELEMENTBINARY_INIT_TASK_ID);
-      },
-      [](ElementUnaryAttrs const &) {
-        return lift_task_id_t(task_id_t::ELEMENTUNARY_INIT_TASK_ID);
-      },
-      [](EmbeddingAttrs const &) { return default_noop_task(); },
-      [](FlatAttrs const &) { return default_noop_task(); },
-      [](GatherAttrs const &) {
-        return lift_task_id_t(task_id_t::GATHER_INIT_TASK_ID);
-      },
-      [](InputAttrs const &) { return default_noop_task(); },
-      [](LayerNormAttrs const &) {
-        return lift_task_id_t(task_id_t::LAYERNORM_INIT_TASK_ID);
-      },
-      [](LinearAttrs const &) {
-        return lift_task_id_t(task_id_t::LINEAR_INIT_TASK_ID);
-      },
-      [](MultiHeadAttentionAttrs const &) {
-        return lift_task_id_t(task_id_t::ATTENTION_INIT_TASK_ID);
-      },
-      [](NoopAttrs const &) { return default_noop_task(); },
-      [](Pool2DAttrs const &) {
-        return lift_task_id_t(task_id_t::POOL2D_INIT_TASK_ID);
-      },
-      [](ReduceAttrs const &) {
-        return lift_task_id_t(task_id_t::REDUCE_INIT_TASK_ID);
-      },
-      [](ReshapeAttrs const &) { return default_noop_task(); },
-      [](ReverseAttrs const &) { return default_noop_task(); },
-      [](SoftmaxAttrs const &) {
-        return lift_task_id_t(task_id_t::SOFTMAX_INIT_TASK_ID);
-      },
-      [](SplitAttrs const &) { return default_noop_task(); },
-      [](TopKAttrs const &) { return default_noop_task(); },
-      [](TransposeAttrs const &) { return default_noop_task(); },
-      [](WeightAttrs const &) { return default_noop_task(); },
-  });
-}
-
-task_id_with_noop_default_t
-    get_fwd_task_id_for_op_attrs(ComputationGraphOpAttrs const &op_attrs) {
-
-  return op_attrs.visit<task_id_with_noop_default_t>(overload{
-      [](BatchMatmulAttrs const &) {
-        return lift_task_id_t(task_id_t::BATCHMATMUL_FWD_TASK_ID);
-      },
-      [](BatchNormAttrs const &) {
-        return lift_task_id_t(task_id_t::BATCHNORM_FWD_TASK_ID);
-      },
-      [](BroadcastAttrs const &) {
-        return lift_task_id_t(task_id_t::BROADCAST_FWD_TASK_ID);
-      },
-      [](CastAttrs const &) {
-        return lift_task_id_t(task_id_t::CAST_FWD_TASK_ID);
-      },
-      [](ConcatAttrs const &) {
-        return lift_task_id_t(task_id_t::CONCAT_FWD_TASK_ID);
-      },
-      [](Conv2DAttrs const &) {
-        return lift_task_id_t(task_id_t::CONV2D_FWD_TASK_ID);
-      },
-      [](DropoutAttrs const &) {
-        return lift_task_id_t(task_id_t::DROPOUT_FWD_TASK_ID);
-      },
-      [](ElementBinaryAttrs const &) {
-        return lift_task_id_t(task_id_t::ELEMENTBINARY_FWD_TASK_ID);
-      },
-      [](ElementUnaryAttrs const &) {
-        return lift_task_id_t(task_id_t::ELEMENTUNARY_FWD_TASK_ID);
-      },
-      [](EmbeddingAttrs const &) {
-        return lift_task_id_t(task_id_t::EMBED_FWD_TASK_ID);
-      },
-      [](FlatAttrs const &) {
-        return lift_task_id_t(task_id_t::FLAT_FWD_TASK_ID);
-      },
-      [](GatherAttrs const &) {
-        return lift_task_id_t(task_id_t::GATHER_FWD_TASK_ID);
-      },
-      [](InputAttrs const &) { return default_noop_task(); },
-      [](LayerNormAttrs const &) {
-        return lift_task_id_t(task_id_t::LAYERNORM_FWD_TASK_ID);
-      },
-      [](LinearAttrs const &) {
-        return lift_task_id_t(task_id_t::LINEAR_FWD_TASK_ID);
-      },
-      [](MultiHeadAttentionAttrs const &) {
-        return lift_task_id_t(task_id_t::ATTENTION_FWD_TASK_ID);
-      },
-      [](NoopAttrs const &) { return default_noop_task(); },
-      [](Pool2DAttrs const &) {
-        return lift_task_id_t(task_id_t::POOL2D_FWD_TASK_ID);
-      },
-      [](ReduceAttrs const &) {
-        return lift_task_id_t(task_id_t::REDUCE_FWD_TASK_ID);
-      },
-      [](ReshapeAttrs const &) {
-        return lift_task_id_t(task_id_t::RESHAPE_FWD_TASK_ID);
-      },
-      [](ReverseAttrs const &) {
-        return lift_task_id_t(task_id_t::REVERSE_FWD_TASK_ID);
-      },
-      [](SoftmaxAttrs const &) {
-        return lift_task_id_t(task_id_t::SOFTMAX_FWD_TASK_ID);
-      },
-      [](SplitAttrs const &) {
-        return lift_task_id_t(task_id_t::SPLIT_FWD_TASK_ID);
-      },
-      [](TopKAttrs const &) {
-        return lift_task_id_t(task_id_t::TOPK_FWD_TASK_ID);
-      },
-      [](TransposeAttrs const &) {
-        return lift_task_id_t(task_id_t::TRANSPOSE_FWD_TASK_ID);
-      },
-      [](WeightAttrs const &) { return default_noop_task(); },
-  });
-}
-
-task_id_with_noop_default_t
-    get_bwd_task_id_for_op_attrs(ComputationGraphOpAttrs const &op_attrs) {
-
-  return op_attrs.visit<task_id_with_noop_default_t>(overload{
-      [](BatchMatmulAttrs const &) {
-        return lift_task_id_t(task_id_t::BATCHMATMUL_BWD_TASK_ID);
-      },
-      [](BatchNormAttrs const &) {
-        return lift_task_id_t(task_id_t::BATCHNORM_BWD_TASK_ID);
-      },
-      [](BroadcastAttrs const &) {
-        return lift_task_id_t(task_id_t::BROADCAST_BWD_TASK_ID);
-      },
-      [](CastAttrs const &) {
-        return lift_task_id_t(task_id_t::CAST_BWD_TASK_ID);
-      },
-      [](ConcatAttrs const &) {
-        return lift_task_id_t(task_id_t::CONCAT_BWD_TASK_ID);
-      },
-      [](Conv2DAttrs const &) {
-        return lift_task_id_t(task_id_t::CONV2D_BWD_TASK_ID);
-      },
-      [](DropoutAttrs const &) {
-        return lift_task_id_t(task_id_t::DROPOUT_BWD_TASK_ID);
-      },
-      [](ElementBinaryAttrs const &) {
-        return lift_task_id_t(task_id_t::ELEMENTBINARY_BWD_TASK_ID);
-      },
-      [](ElementUnaryAttrs const &) {
-        return lift_task_id_t(task_id_t::ELEMENTUNARY_BWD_TASK_ID);
-      },
-      [](EmbeddingAttrs const &) {
-        return lift_task_id_t(task_id_t::EMBED_BWD_TASK_ID);
-      },
-      [](FlatAttrs const &) {
-        return lift_task_id_t(task_id_t::FLAT_BWD_TASK_ID);
-      },
-      [](GatherAttrs const &) {
-        return lift_task_id_t(task_id_t::GATHER_BWD_TASK_ID);
-      },
-      [](InputAttrs const &) { return default_noop_task(); },
-      [](LayerNormAttrs const &) {
-        return lift_task_id_t(task_id_t::LAYERNORM_BWD_TASK_ID);
-      },
-      [](LinearAttrs const &) {
-        return lift_task_id_t(task_id_t::LINEAR_BWD_TASK_ID);
-      },
-      [](MultiHeadAttentionAttrs const &) {
-        return lift_task_id_t(task_id_t::ATTENTION_BWD_TASK_ID);
-      },
-      [](NoopAttrs const &) { return default_noop_task(); },
-      [](Pool2DAttrs const &) {
-        return lift_task_id_t(task_id_t::POOL2D_BWD_TASK_ID);
-      },
-      [](ReduceAttrs const &) {
-        return lift_task_id_t(task_id_t::REDUCE_BWD_TASK_ID);
-      },
-      [](ReshapeAttrs const &) {
-        return lift_task_id_t(task_id_t::RESHAPE_BWD_TASK_ID);
-      },
-      [](ReverseAttrs const &) {
-        return lift_task_id_t(task_id_t::REVERSE_BWD_TASK_ID);
-      },
-      [](SoftmaxAttrs const &) {
-        return lift_task_id_t(task_id_t::SOFTMAX_BWD_TASK_ID);
-      },
-      [](SplitAttrs const &) {
-        return lift_task_id_t(task_id_t::SPLIT_BWD_TASK_ID);
-      },
-      [](TopKAttrs const &) {
-        return lift_task_id_t(task_id_t::TOPK_BWD_TASK_ID);
-      },
-      [](TransposeAttrs const &) {
-        return lift_task_id_t(task_id_t::TRANSPOSE_BWD_TASK_ID);
-      },
-      [](WeightAttrs const &) { return default_noop_task(); },
-  });
-}
-
-} // namespace FlexFlow
diff --git a/lib/utils/include/utils/graph/kwarg_dataflow_graph/kwarg_dataflow_output.dtg.toml b/lib/utils/include/utils/graph/kwarg_dataflow_graph/kwarg_dataflow_output.dtg.toml
index f286fb90a7..5b537eac88 100644
--- a/lib/utils/include/utils/graph/kwarg_dataflow_graph/kwarg_dataflow_output.dtg.toml
+++ b/lib/utils/include/utils/graph/kwarg_dataflow_graph/kwarg_dataflow_output.dtg.toml
@@ -6,6 +6,7 @@ features = [
   "ord",
   "hash",
   "fmt",
+  "json",
 ]
 
 template_params = [