diff --git a/aztk/client.py b/aztk/client.py
index 1253db54..cdc12ac2 100644
--- a/aztk/client.py
+++ b/aztk/client.py
@@ -67,14 +67,14 @@ def __delete_pool_and_job(self, pool_id: str, keep_logs: bool = False):
 
         return job_exists or pool_exists
 
-    def __create_pool_and_job(self, cluster_conf: models.ClusterConfiguration, software_metadata_key: str, start_task, VmImageModel):
+    def __create_pool_and_job(self, cluster_conf: models.ClusterConfiguration, software_metadata_key: str, start_task, vm_image_model):
         """
             Create a pool and job
             :param cluster_conf: the configuration object used to create the cluster
             :type cluster_conf: aztk.models.ClusterConfiguration
             :parm software_metadata_key: the id of the software being used on the cluster
             :param start_task: the start task for the cluster
-            :param VmImageModel: the type of image to provision for the cluster
+            :param vm_image_model: the type of image to provision for the cluster
             :param wait: wait until the cluster is ready
         """
         self._get_cluster_data(cluster_conf.cluster_id).save_cluster_config(cluster_conf)
@@ -85,7 +85,7 @@ def __create_pool_and_job(self, cluster_conf: models.ClusterConfiguration, softw
         # Get a verified node agent sku
         sku_to_use, image_ref_to_use = \
             helpers.select_latest_verified_vm_image_with_node_agent_sku(
-                VmImageModel.publisher, VmImageModel.offer, VmImageModel.sku, self.batch_client)
+                vm_image_model.publisher, vm_image_model.offer, vm_image_model.sku, self.batch_client)
 
         network_conf = None
         if cluster_conf.subnet_id is not None:
@@ -99,7 +99,11 @@ def __create_pool_and_job(self, cluster_conf: models.ClusterConfiguration, softw
             id=pool_id,
             virtual_machine_configuration=batch_models.VirtualMachineConfiguration(
                 image_reference=image_ref_to_use,
-                node_agent_sku_id=sku_to_use),
+                node_agent_sku_id=sku_to_use,
+                data_disks=[batch_models.DataDisk(
+                    lun=i,
+                    disk_size_gb=data_disk.disk_size_gb
+                ) for i, data_disk in enumerate(vm_image_model.data_disks)]),
             vm_size=cluster_conf.vm_size,
             enable_auto_scale=True,
             auto_scale_formula=auto_scale_formula,
@@ -382,7 +386,11 @@ def __submit_job(self,
                 display_name=job_configuration.id,
                 virtual_machine_configuration=batch_models.VirtualMachineConfiguration(
                     image_reference=image_ref_to_use,
-                    node_agent_sku_id=sku_to_use),
+                    node_agent_sku_id=sku_to_use,
+                    data_disks=[batch_models.DataDisk(
+                        lun=i,
+                        disk_size_gb=data_disk.disk_size_gb
+                    ) for i, data_disk in enumerate(vm_image_model.data_disks)]),
                 vm_size=job_configuration.vm_size,
                 enable_auto_scale=True,
                 auto_scale_formula=autoscale_formula,
diff --git a/aztk/core/models/fields.py b/aztk/core/models/fields.py
index 12d1719e..50e33444 100644
--- a/aztk/core/models/fields.py
+++ b/aztk/core/models/fields.py
@@ -142,6 +142,15 @@ def __set__(self, instance, value):
             value = []
         super().__set__(instance, value)
 
+    def __get__(self, instance, _):
+        if instance is not None:
+            value = instance._data.get(self)
+            if value is None:
+                return instance._data.setdefault(self, self._default(instance))
+            return value
+
+        return self
+
     def _resolve(self, value):
         result = []
         for item in value:
@@ -158,7 +167,7 @@ def merge(self, instance, value):
             value = []
 
         if self.merge_strategy == ListMergeStrategy.Append:
-            current = instance._data.get(self)
+            current = instance._data.get(self)          
             if current is None:
                 current = []
             value = current + value
diff --git a/aztk/core/models/model.py b/aztk/core/models/model.py
index 6f016f49..8ef194e5 100644
--- a/aztk/core/models/model.py
+++ b/aztk/core/models/model.py
@@ -90,7 +90,7 @@ def validate(self):
     def merge(self, other):
         if not isinstance(other, self.__class__):
             raise AztkError("Cannot merge {0} as is it not an instance of {1}".format(other, self.__class__.__name__))
-
+        
         for field in other._fields.values():
             if field in other._data:
                 field.merge(self, other._data[field])
diff --git a/aztk/internal/docker_cmd.py b/aztk/internal/docker_cmd.py
index 7dc75e1e..73a6e517 100644
--- a/aztk/internal/docker_cmd.py
+++ b/aztk/internal/docker_cmd.py
@@ -28,7 +28,7 @@ def pass_env(self, env: str):
         self.cmd.add_option('-e', '{0}'.format(env))
 
     def share_folder(self, folder: str):
-        self.cmd.add_option('-v', '{0}:{0}'.format(folder))
+        self.cmd.add_option('--mount', 'type=bind,src={0},dst={0}'.format(folder))
 
     def open_port(self, port: int):
         self.cmd.add_option('-p', '{0}:{0}'.format(port))       # Spark Master UI
diff --git a/aztk/models/__init__.py b/aztk/models/__init__.py
index cf5c2e01..7e30de6a 100644
--- a/aztk/models/__init__.py
+++ b/aztk/models/__init__.py
@@ -13,6 +13,8 @@
 from .remote_login import RemoteLogin
 from .ssh_log import SSHLog
 from .vm_image import VmImage
+from .data_disk import DataDisk
+from .data_disk_format_type import DataDiskFormatType
 from .software import Software
 from .cluster import Cluster
 from .scheduling_target import SchedulingTarget
diff --git a/aztk/models/cluster_configuration.py b/aztk/models/cluster_configuration.py
index 301deb56..3b456611 100644
--- a/aztk/models/cluster_configuration.py
+++ b/aztk/models/cluster_configuration.py
@@ -3,12 +3,14 @@
 from aztk.utils import deprecated,deprecate, helpers
 
 from .custom_script import CustomScript
+from .data_disk import DataDisk
 from .file_share import FileShare
 from .plugins import PluginConfiguration
 from .toolkit import Toolkit
 from .user_configuration import UserConfiguration
 from .scheduling_target import SchedulingTarget
 
+
 class ClusterConfiguration(Model):
     """
     Cluster configuration model
@@ -36,6 +38,7 @@ class ClusterConfiguration(Model):
     plugins = fields.List(PluginConfiguration)
     custom_scripts = fields.List(CustomScript)
     file_shares = fields.List(FileShare)
+    data_disks = fields.List(DataDisk)
     user_configuration = fields.Model(UserConfiguration, default=None)
     scheduling_target = fields.Enum(SchedulingTarget, default=None)
 
diff --git a/aztk/models/data_disk.py b/aztk/models/data_disk.py
new file mode 100644
index 00000000..0cabff0b
--- /dev/null
+++ b/aztk/models/data_disk.py
@@ -0,0 +1,17 @@
+from aztk.core.models import Model, fields
+
+from .data_disk_format_type import DataDiskFormatType
+
+class DataDisk(Model):
+    """
+    Configuration for an additional local storage disk that is attached to the virtual machine,
+        formatted and mounted into the Spark Docker container
+
+    Args:
+        disk_size_gb (int): Which docker endpoint to use. Default to docker hub.
+        mount_path (:obj:`str`, optional): the path where the disk should be mounted
+        format_type (:obj:`aztk.models.DataDiskFormatType`, optional): the type of file system format
+    """
+    disk_size_gb = fields.Integer()
+    mount_path = fields.String()
+    format_type = fields.String(default=DataDiskFormatType.ext4)
diff --git a/aztk/models/data_disk_format_type.py b/aztk/models/data_disk_format_type.py
new file mode 100644
index 00000000..e1455a3e
--- /dev/null
+++ b/aztk/models/data_disk_format_type.py
@@ -0,0 +1,24 @@
+class DataDiskFormatType:
+    """
+    The valid file system formats for a Data Disk
+    
+    Attributes:
+        bfs (:obj:`str`)
+        btrfs (:obj:`str`)
+        cramfs (:obj:`str`)
+        ext2 (:obj:`str`)
+        ext3 (:obj:`str`)
+        ext4 (:obj:`str`)
+        fat (:obj:`str`)
+        minix (:obj:`str`)
+        xfs (:obj:`str`)
+    """
+    bfs = "bfs"
+    btrfs = "btrfs"
+    cramfs = "cramfs"
+    ext2 = "ext2"
+    ext3 = "ext3"
+    ext4 = "ext4"
+    fat = "fat"
+    minix = "minix"
+    xfs = "xfs"
diff --git a/aztk/models/file_share.py b/aztk/models/file_share.py
index c94bdcbb..6c02f93b 100644
--- a/aztk/models/file_share.py
+++ b/aztk/models/file_share.py
@@ -1,6 +1,15 @@
 from aztk.core.models import Model, fields
 
 class FileShare(Model):
+    """
+    Azure Files file share to mount to each node in the cluster
+
+    Args:
+        storage_account_name (int): the name of the Azure Storage Account
+        storage_account_key (:obj:`str`, optional): the shared key to the Azure Storage Account
+        file_share_path (:obj:`str`, optional): the path of the file share in Azure Files
+        mount_path (:obj:`str`, optional): the path on the node to mount the file share 
+    """
     storage_account_name = fields.String()
     storage_account_key = fields.String()
     file_share_path = fields.String()
diff --git a/aztk/models/vm_image.py b/aztk/models/vm_image.py
index baa3483c..f1c76022 100644
--- a/aztk/models/vm_image.py
+++ b/aztk/models/vm_image.py
@@ -1,5 +1,6 @@
 class VmImage:
-    def __init__(self, publisher, offer, sku):
+    def __init__(self, publisher, offer, sku, data_disks):
         self.publisher = publisher
         self.offer = offer
         self.sku = sku
+        self.data_disks = data_disks
diff --git a/aztk/node_scripts/install/install.py b/aztk/node_scripts/install/install.py
index 57957797..8a55a4a0 100644
--- a/aztk/node_scripts/install/install.py
+++ b/aztk/node_scripts/install/install.py
@@ -1,10 +1,13 @@
 import os
-from core import config
-from install import pick_master, spark, scripts, create_user, plugins, spark_container
+
 import wait_until_master_selected
-from aztk.models.plugins import PluginTarget
 from aztk.internal import cluster_data
+from aztk.models.plugins import PluginTarget
+from core import config
+from install import (create_user, pick_master, plugins, scripts, spark, spark_container)
+
 from .node_scheduling import setup_node_scheduling
+from .setup_data_disks import setup_data_disks
 
 
 def read_cluster_config():
@@ -13,10 +16,14 @@ def read_cluster_config():
     print("Got cluster config", cluster_config)
     return cluster_config
 
+
+
+
 def setup_host(docker_repo: str):
     """
     Code to be run on the node(NOT in a container)
     """
+
     client = config.batch_client
 
     create_user.create_user(batch_client=client)
@@ -43,6 +50,8 @@ def setup_host(docker_repo: str):
 
     cluster_conf = read_cluster_config()
 
+    setup_data_disks(cluster_conf)
+
     setup_node_scheduling(client, cluster_conf, is_master)
 
     #TODO pass azure file shares
@@ -50,6 +59,7 @@ def setup_host(docker_repo: str):
         docker_repo=docker_repo,
         gpu_enabled=os.environ.get("AZTK_GPU_ENABLED") == "true",
         plugins=cluster_conf.plugins,
+        data_disks=cluster_conf.data_disks,
     )
     plugins.setup_plugins(target=PluginTarget.Host, is_master=is_master, is_worker=is_worker)
 
diff --git a/aztk/node_scripts/install/setup_data_disk.sh b/aztk/node_scripts/install/setup_data_disk.sh
new file mode 100644
index 00000000..2c18ffcb
--- /dev/null
+++ b/aztk/node_scripts/install/setup_data_disk.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+set -e
+
+devicename="/dev/"$1
+format_type=$2
+mount_path=$3
+device_partition_name="${devicename}1"
+
+# make parition
+parted --script --align optimal ${devicename} mklabel gpt
+parted --script --align optimal ${devicename} mkpart primary ext4 0% 100%
+
+# format partition
+sleep 1
+mkfs.${format_type} ${device_partition_name}
+
+# make partition directory
+mkdir -p ${mount_path}
+
+# auto mount parition on reboot
+echo "${device_partition_name}       ${mount_path}	auto    defaults,nofail        0 0" >> /etc/fstab
+
+# mount partition
+mount ${device_partition_name} ${mount_path}
diff --git a/aztk/node_scripts/install/setup_data_disks.py b/aztk/node_scripts/install/setup_data_disks.py
new file mode 100644
index 00000000..2b33b902
--- /dev/null
+++ b/aztk/node_scripts/install/setup_data_disks.py
@@ -0,0 +1,49 @@
+import subprocess
+import sys
+import os
+
+
+def mount_data_disk(data_disk, device_name, number):
+    cmd = os.environ["AZTK_WORKING_DIR"] + "/aztk/node_scripts/install/setup_data_disk.sh "
+    data_disk.mount_path = "/data-disk" + str(number) if not data_disk.mount_path else data_disk.mount_path
+    args = device_name + " " + data_disk.format_type + " " + data_disk.mount_path
+    cmd = cmd + args
+    print("mount disk cmd:", cmd)
+    p = subprocess.Popen([cmd], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout, stderr = p.communicate()
+    if p.returncode != 0:
+        print("ERROR: failed to mount data_disk device {}", device_name)
+        sys.exit(p.returncode)
+
+    return data_disk
+
+
+def setup_data_disks(cluster_configuration):
+    cmd = 'lsblk -lnS --sort name | wc -l'
+    p = subprocess.Popen([cmd], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    output, _ = p.communicate()
+    if int(output) <= 3:
+        return
+
+    # by default, there are 3 devices on each host: sda, sdb, sr0
+    cmd = 'lsblk -lnbS --sort=name --output NAME,SIZE | grep -v "sr0\|sd[ab]"'
+    p = subprocess.Popen([cmd], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdout, stderr = p.communicate()
+    disks = stdout.decode('UTF-8').split('\n')[:-1]
+
+    disk_size_mapping = {}
+    for disk in disks:
+        assert len(disk.split()) == 2
+        name, size = disk.split()
+        # convert size from bytes to gb
+        size = int(size) / 1024 / 1024 / 1024
+        if not disk_size_mapping.get(size):
+            disk_size_mapping[size] = [name]
+        else:
+            disk_size_mapping[size].append(name)
+
+    for i, defined_data_disk in enumerate(cluster_configuration.data_disks):
+        device_name = disk_size_mapping[defined_data_disk.disk_size_gb].pop()
+        mounted_data_disk = mount_data_disk(data_disk=defined_data_disk, device_name=device_name, number=i)
+        # update cluster_configuration in case mount_path changed
+        cluster_configuration[i] = mounted_data_disk
diff --git a/aztk/node_scripts/install/spark_container.py b/aztk/node_scripts/install/spark_container.py
index 405498ee..b29a3f21 100644
--- a/aztk/node_scripts/install/spark_container.py
+++ b/aztk/node_scripts/install/spark_container.py
@@ -7,7 +7,8 @@ def start_spark_container(
         docker_repo: str=None,
         gpu_enabled: bool=False,
         file_mounts=None,
-        plugins=None):
+        plugins=None,
+        data_disks=None):
 
     cmd = DockerCmd(
         name=constants.DOCKER_SPARK_CONTAINER_NAME,
@@ -18,7 +19,8 @@ def start_spark_container(
     if file_mounts:
         for mount in file_mounts:
             cmd.share_folder(mount.mount_path)
-    cmd.share_folder('/mnt')
+    cmd.share_folder('/mnt/batch')
+    [cmd.share_folder(data_disk.mount_path) for data_disk in data_disks]
 
     cmd.pass_env('AZTK_WORKING_DIR')
     cmd.pass_env('AZ_BATCH_ACCOUNT_NAME')
diff --git a/aztk/node_scripts/setup_host.sh b/aztk/node_scripts/setup_host.sh
index c23ff0dd..60053f5e 100644
--- a/aztk/node_scripts/setup_host.sh
+++ b/aztk/node_scripts/setup_host.sh
@@ -125,6 +125,7 @@ main () {
 
     # Unzip resource files and set permissions
     chmod 777 $AZTK_WORKING_DIR/aztk/node_scripts/docker_main.sh
+    chmod +x $AZTK_WORKING_DIR/aztk/node_scripts/install/setup_data_disk.sh
 
     # Check docker is running
     docker info > /dev/null 2>&1
diff --git a/aztk/spark/client.py b/aztk/spark/client.py
index 685b8909..da0f39ef 100644
--- a/aztk/spark/client.py
+++ b/aztk/spark/client.py
@@ -37,6 +37,7 @@ def create_cluster(self, cluster_conf: models.ClusterConfiguration, wait: bool =
             aztk.spark.models.Cluster
         """
         cluster_conf = _apply_default_for_cluster_config(cluster_conf)
+
         cluster_conf.validate()
 
         cluster_data = self._get_cluster_data(cluster_conf.cluster_id)
@@ -56,11 +57,11 @@ def create_cluster(self, cluster_conf: models.ClusterConfiguration, wait: bool =
                                                                            cluster_conf.worker_on_master)
 
             software_metadata_key = "spark"
-
             vm_image = models.VmImage(
                 publisher='Canonical',
                 offer='UbuntuServer',
-                sku='16.04')
+                sku='16.04',
+                data_disks=cluster_conf.data_disks)
 
             cluster = self.__create_pool_and_job(
                 cluster_conf, software_metadata_key, start_task, vm_image)
@@ -249,7 +250,8 @@ def submit_job(self, job_configuration: models.JobConfiguration):
             vm_image = models.VmImage(
                 publisher='Canonical',
                 offer='UbuntuServer',
-                sku='16.04')
+                sku='16.04',
+                data_disks=job_configuration.data_disks)
 
             autoscale_formula = "$TargetDedicatedNodes = {0}; " \
                                 "$TargetLowPriorityNodes = {1}".format(
@@ -352,6 +354,7 @@ def _apply_default_for_cluster_config(configuration: models.ClusterConfiguration
     cluster_conf.merge(configuration)
     if cluster_conf.scheduling_target is None:
         cluster_conf.scheduling_target = _default_scheduling_target(cluster_conf.size)
+
     return cluster_conf
 
 def _apply_default_for_job_config(job_conf: models.JobConfiguration):
diff --git a/aztk/spark/models/models.py b/aztk/spark/models/models.py
index a7e18233..7fb06dcf 100644
--- a/aztk/spark/models/models.py
+++ b/aztk/spark/models/models.py
@@ -111,6 +111,10 @@ class SecretsConfiguration(aztk.models.SecretsConfiguration):
     pass
 
 
+class DataDisk(aztk.models.DataDisk):
+    pass
+
+
 class VmImage(aztk.models.VmImage):
     pass
 
@@ -195,7 +199,8 @@ def __init__(
             max_low_pri_nodes=0,
             subnet_id=None,
             scheduling_target: SchedulingTarget = None,
-            worker_on_master=None):
+            worker_on_master: bool = None,
+            data_disks: List[DataDisk] = None):
 
         self.id = id
         self.applications = applications
@@ -211,6 +216,7 @@ def __init__(
         self.subnet_id = subnet_id
         self.worker_on_master = worker_on_master
         self.scheduling_target = scheduling_target
+        self.data_disks = data_disks
 
     def to_cluster_config(self):
         return ClusterConfiguration(
@@ -224,6 +230,7 @@ def to_cluster_config(self):
             worker_on_master=self.worker_on_master,
             spark_configuration=self.spark_configuration,
             scheduling_target=self.scheduling_target,
+            data_disks=self.data_disks,
         )
 
     def mixed_mode(self) -> bool:
diff --git a/aztk_cli/config.py b/aztk_cli/config.py
index 9f521057..e8a34d39 100644
--- a/aztk_cli/config.py
+++ b/aztk_cli/config.py
@@ -5,6 +5,7 @@
     SecretsConfiguration,
     ClusterConfiguration,
     SchedulingTarget,
+    DataDisk,
 )
 from aztk.utils import deprecate
 from aztk.models import Toolkit
@@ -184,6 +185,7 @@ def __init__(self):
         self.subnet_id = None
         self.worker_on_master = None
         self.scheduling_target = None
+        self.data_disks = []
 
     def _merge_dict(self, config):
         config = config.get('job')
@@ -205,7 +207,18 @@ def _merge_dict(self, config):
             scheduling_target = cluster_configuration.get("scheduling_target")
             if scheduling_target:
                 self.scheduling_target = SchedulingTarget(scheduling_target)
-
+            if cluster_configuration.get("data_disks"):
+                for item in cluster_configuration.get("data_disks"):
+                    data_disk = DataDisk()
+                    print("data_disk", data_disk.to_dict())
+                    sys.exit()
+                    if item.get("disk_size_gb"):
+                        data_disk.disk_size_gb = item.get("disk_size_gb")
+                    if item.get("mount_path"):
+                        data_disk.mount_path = item.get("mount_path")
+                    if item.get("format_type"):
+                        data_disk.format_type = item.get("format_type")
+                    self.data_disks.append(data_disk)
 
         applications = config.get('applications')
         if applications:
diff --git a/aztk_cli/spark/endpoints/cluster/cluster_create.py b/aztk_cli/spark/endpoints/cluster/cluster_create.py
index 63df76a2..e6bd89ed 100644
--- a/aztk_cli/spark/endpoints/cluster/cluster_create.py
+++ b/aztk_cli/spark/endpoints/cluster/cluster_create.py
@@ -2,7 +2,7 @@
 import typing
 
 import aztk.spark
-from aztk.spark.models import ClusterConfiguration, UserConfiguration
+from aztk.spark.models import ClusterConfiguration, UserConfiguration, DataDisk
 from aztk.utils import deprecate
 from aztk_cli import config, log, utils
 from aztk_cli.config import load_aztk_spark_config
@@ -29,6 +29,8 @@ def setup_parser(parser: argparse.ArgumentParser):
                              (<my-username>/<my-repo>:<tag>)')
     parser.add_argument('--subnet-id',
                         help='The subnet in which to create the cluster.')
+    parser.add_argument('--data-disk-size', type=int,
+                        help="Size in GB of additional local disk storage on each node.")
 
     parser.add_argument('--no-wait', dest='wait', action='store_false')
     parser.add_argument('--wait', dest='wait', action='store_true')
@@ -43,20 +45,28 @@ def execute(args: typing.NamedTuple):
     # read cluster.yaml configuration file, overwrite values with args
     file_config, wait = config.read_cluster_config()
     cluster_conf.merge(file_config)
+
     if args.size_low_pri is not None:
         deprecate("0.9.0", "--size-low-pri has been deprecated.", "Please use --size-low-priority.")
         args.size_low_priority = args.size_low_pri
 
-    cluster_conf.merge(ClusterConfiguration(
-        cluster_id=args.cluster_id,
-        size=args.size,
-        size_low_priority=args.size_low_priority,
-        vm_size=args.vm_size,
-        subnet_id=args.subnet_id,
-        user_configuration=UserConfiguration(
-            username=args.username,
-            password=args.password,
-        )))
+    cluster_conf.merge(
+        ClusterConfiguration(
+            cluster_id=args.cluster_id,
+            size=args.size,
+            size_low_priority=args.size_low_priority,
+            vm_size=args.vm_size,
+            subnet_id=args.subnet_id,
+            user_configuration=UserConfiguration(
+                username=args.username,
+                password=args.password,
+            ),
+        )
+    )
+
+    if args.data_disk_size:
+        cluster_conf.data_disks.append(DataDisk(disk_size_gb=args.data_disk_size))
+
 
     if args.docker_repo and cluster_conf.toolkit:
         cluster_conf.toolkit.docker_repo = args.docker_repo
diff --git a/aztk_cli/spark/endpoints/job/submit.py b/aztk_cli/spark/endpoints/job/submit.py
index 91c5b768..a3347773 100644
--- a/aztk_cli/spark/endpoints/job/submit.py
+++ b/aztk_cli/spark/endpoints/job/submit.py
@@ -45,6 +45,7 @@ def execute(args: typing.NamedTuple):
         subnet_id=job_conf.subnet_id,
         worker_on_master=job_conf.worker_on_master,
         scheduling_target=job_conf.scheduling_target,
+        data_disks=job_conf.data_disks,
     )
 
     #TODO: utils.print_job_conf(job_configuration)
diff --git a/docs/dev/writing-models.md b/docs/dev/writing-models.md
index e4c0cb45..53266635 100644
--- a/docs/dev/writing-models.md
+++ b/docs/dev/writing-models.md
@@ -6,7 +6,7 @@ In `aztk/models` create a new file with the name of your model `my_model.py`
 
 In `aztk/models/__init__.py` add `from .my_model import MyModel`
 
-Create a new class `MyModel` that inherit `Modle`
+Create a new class `MyModel` that inherit `Module`
 ```python
 from aztk.core.models import Model, fields
 
diff --git a/tests/core/test_models.py b/tests/core/test_models.py
index 3ec9682d..d0fb591a 100644
--- a/tests/core/test_models.py
+++ b/tests/core/test_models.py
@@ -294,6 +294,39 @@ class UserList(Model):
     assert obj1.infos[1].age == 38
 
 
+def test_merge_nested_model_append_strategy_initial_not_set():
+    class UserList(Model):
+        infos = fields.List(UserInfo, merge_strategy=ListMergeStrategy.Append)
+
+    obj1 = UserList()
+    obj1.infos.append(UserInfo(
+        name="John",
+        age=29,
+    ))
+
+    obj2 = UserList(
+        infos=[
+            dict(
+                name="Frank",
+                age=38,
+            ),
+        ],
+    )
+
+    assert len(obj1.infos) == 1
+    assert len(obj2.infos) == 1
+    assert obj1.infos[0].name == "John"
+    assert obj1.infos[0].age == 29
+    assert obj2.infos[0].name == "Frank"
+    assert obj2.infos[0].age == 38
+
+    obj1.merge(obj2)
+    assert len(obj1.infos) == 2
+    assert obj1.infos[0].name == "John"
+    assert obj1.infos[0].age == 29
+    assert obj1.infos[1].name == "Frank"
+    assert obj1.infos[1].age == 38
+
 def test_serialize_simple_model_to_yaml():
     info = UserInfo(name="John", age=29)
     output = yaml.dump(info)
diff --git a/tests/models/test_data_disk.py b/tests/models/test_data_disk.py
new file mode 100644
index 00000000..112d0d84
--- /dev/null
+++ b/tests/models/test_data_disk.py
@@ -0,0 +1,40 @@
+import pytest
+
+from aztk.models import DataDisk, DataDiskFormatType
+from aztk.error import InvalidModelError
+
+
+def test_valid_data_disk():
+    data_disk = DataDisk(disk_size_gb=10, mount_path='/test/path', format_type=DataDiskFormatType.ext2)
+    data_disk.validate()
+
+    assert data_disk.disk_size_gb == 10
+    assert data_disk.mount_path == '/test/path'
+    assert data_disk.format_type == DataDiskFormatType.ext2
+
+
+def test_default_data_disk():
+    data_disk = DataDisk()
+    with pytest.raises(InvalidModelError):
+        data_disk.validate()
+
+    assert data_disk.disk_size_gb is None
+    assert data_disk.mount_path is None
+    assert data_disk.format_type == DataDiskFormatType.ext4
+
+
+def test_data_disk_minimum_required_fields():
+    data_disk = DataDisk(disk_size_gb=1)
+    assert data_disk.disk_size_gb == 1
+    assert data_disk.mount_path == None
+    assert data_disk.format_type == DataDiskFormatType.ext4
+
+
+def test_data_disk_format_type():
+    data_disk = DataDisk(disk_size_gb=1, format_type=DataDiskFormatType.ext2)
+    assert data_disk.format_type == "ext2"
+    assert data_disk.format_type == DataDiskFormatType.ext2
+
+    data_disk = DataDisk(disk_size_gb=1, format_type="ext2")
+    assert data_disk.format_type == "ext2"
+    assert data_disk.format_type == DataDiskFormatType.ext2