diff --git a/aztk/client.py b/aztk/client.py index 1253db54..cdc12ac2 100644 --- a/aztk/client.py +++ b/aztk/client.py @@ -67,14 +67,14 @@ def __delete_pool_and_job(self, pool_id: str, keep_logs: bool = False): return job_exists or pool_exists - def __create_pool_and_job(self, cluster_conf: models.ClusterConfiguration, software_metadata_key: str, start_task, VmImageModel): + def __create_pool_and_job(self, cluster_conf: models.ClusterConfiguration, software_metadata_key: str, start_task, vm_image_model): """ Create a pool and job :param cluster_conf: the configuration object used to create the cluster :type cluster_conf: aztk.models.ClusterConfiguration :parm software_metadata_key: the id of the software being used on the cluster :param start_task: the start task for the cluster - :param VmImageModel: the type of image to provision for the cluster + :param vm_image_model: the type of image to provision for the cluster :param wait: wait until the cluster is ready """ self._get_cluster_data(cluster_conf.cluster_id).save_cluster_config(cluster_conf) @@ -85,7 +85,7 @@ def __create_pool_and_job(self, cluster_conf: models.ClusterConfiguration, softw # Get a verified node agent sku sku_to_use, image_ref_to_use = \ helpers.select_latest_verified_vm_image_with_node_agent_sku( - VmImageModel.publisher, VmImageModel.offer, VmImageModel.sku, self.batch_client) + vm_image_model.publisher, vm_image_model.offer, vm_image_model.sku, self.batch_client) network_conf = None if cluster_conf.subnet_id is not None: @@ -99,7 +99,11 @@ def __create_pool_and_job(self, cluster_conf: models.ClusterConfiguration, softw id=pool_id, virtual_machine_configuration=batch_models.VirtualMachineConfiguration( image_reference=image_ref_to_use, - node_agent_sku_id=sku_to_use), + node_agent_sku_id=sku_to_use, + data_disks=[batch_models.DataDisk( + lun=i, + disk_size_gb=data_disk.disk_size_gb + ) for i, data_disk in enumerate(vm_image_model.data_disks)]), vm_size=cluster_conf.vm_size, enable_auto_scale=True, auto_scale_formula=auto_scale_formula, @@ -382,7 +386,11 @@ def __submit_job(self, display_name=job_configuration.id, virtual_machine_configuration=batch_models.VirtualMachineConfiguration( image_reference=image_ref_to_use, - node_agent_sku_id=sku_to_use), + node_agent_sku_id=sku_to_use, + data_disks=[batch_models.DataDisk( + lun=i, + disk_size_gb=data_disk.disk_size_gb + ) for i, data_disk in enumerate(vm_image_model.data_disks)]), vm_size=job_configuration.vm_size, enable_auto_scale=True, auto_scale_formula=autoscale_formula, diff --git a/aztk/core/models/fields.py b/aztk/core/models/fields.py index 12d1719e..50e33444 100644 --- a/aztk/core/models/fields.py +++ b/aztk/core/models/fields.py @@ -142,6 +142,15 @@ def __set__(self, instance, value): value = [] super().__set__(instance, value) + def __get__(self, instance, _): + if instance is not None: + value = instance._data.get(self) + if value is None: + return instance._data.setdefault(self, self._default(instance)) + return value + + return self + def _resolve(self, value): result = [] for item in value: @@ -158,7 +167,7 @@ def merge(self, instance, value): value = [] if self.merge_strategy == ListMergeStrategy.Append: - current = instance._data.get(self) + current = instance._data.get(self) if current is None: current = [] value = current + value diff --git a/aztk/core/models/model.py b/aztk/core/models/model.py index 6f016f49..8ef194e5 100644 --- a/aztk/core/models/model.py +++ b/aztk/core/models/model.py @@ -90,7 +90,7 @@ def validate(self): def merge(self, other): if not isinstance(other, self.__class__): raise AztkError("Cannot merge {0} as is it not an instance of {1}".format(other, self.__class__.__name__)) - + for field in other._fields.values(): if field in other._data: field.merge(self, other._data[field]) diff --git a/aztk/internal/docker_cmd.py b/aztk/internal/docker_cmd.py index 7dc75e1e..73a6e517 100644 --- a/aztk/internal/docker_cmd.py +++ b/aztk/internal/docker_cmd.py @@ -28,7 +28,7 @@ def pass_env(self, env: str): self.cmd.add_option('-e', '{0}'.format(env)) def share_folder(self, folder: str): - self.cmd.add_option('-v', '{0}:{0}'.format(folder)) + self.cmd.add_option('--mount', 'type=bind,src={0},dst={0}'.format(folder)) def open_port(self, port: int): self.cmd.add_option('-p', '{0}:{0}'.format(port)) # Spark Master UI diff --git a/aztk/models/__init__.py b/aztk/models/__init__.py index cf5c2e01..7e30de6a 100644 --- a/aztk/models/__init__.py +++ b/aztk/models/__init__.py @@ -13,6 +13,8 @@ from .remote_login import RemoteLogin from .ssh_log import SSHLog from .vm_image import VmImage +from .data_disk import DataDisk +from .data_disk_format_type import DataDiskFormatType from .software import Software from .cluster import Cluster from .scheduling_target import SchedulingTarget diff --git a/aztk/models/cluster_configuration.py b/aztk/models/cluster_configuration.py index 301deb56..3b456611 100644 --- a/aztk/models/cluster_configuration.py +++ b/aztk/models/cluster_configuration.py @@ -3,12 +3,14 @@ from aztk.utils import deprecated,deprecate, helpers from .custom_script import CustomScript +from .data_disk import DataDisk from .file_share import FileShare from .plugins import PluginConfiguration from .toolkit import Toolkit from .user_configuration import UserConfiguration from .scheduling_target import SchedulingTarget + class ClusterConfiguration(Model): """ Cluster configuration model @@ -36,6 +38,7 @@ class ClusterConfiguration(Model): plugins = fields.List(PluginConfiguration) custom_scripts = fields.List(CustomScript) file_shares = fields.List(FileShare) + data_disks = fields.List(DataDisk) user_configuration = fields.Model(UserConfiguration, default=None) scheduling_target = fields.Enum(SchedulingTarget, default=None) diff --git a/aztk/models/data_disk.py b/aztk/models/data_disk.py new file mode 100644 index 00000000..0cabff0b --- /dev/null +++ b/aztk/models/data_disk.py @@ -0,0 +1,17 @@ +from aztk.core.models import Model, fields + +from .data_disk_format_type import DataDiskFormatType + +class DataDisk(Model): + """ + Configuration for an additional local storage disk that is attached to the virtual machine, + formatted and mounted into the Spark Docker container + + Args: + disk_size_gb (int): Which docker endpoint to use. Default to docker hub. + mount_path (:obj:`str`, optional): the path where the disk should be mounted + format_type (:obj:`aztk.models.DataDiskFormatType`, optional): the type of file system format + """ + disk_size_gb = fields.Integer() + mount_path = fields.String() + format_type = fields.String(default=DataDiskFormatType.ext4) diff --git a/aztk/models/data_disk_format_type.py b/aztk/models/data_disk_format_type.py new file mode 100644 index 00000000..e1455a3e --- /dev/null +++ b/aztk/models/data_disk_format_type.py @@ -0,0 +1,24 @@ +class DataDiskFormatType: + """ + The valid file system formats for a Data Disk + + Attributes: + bfs (:obj:`str`) + btrfs (:obj:`str`) + cramfs (:obj:`str`) + ext2 (:obj:`str`) + ext3 (:obj:`str`) + ext4 (:obj:`str`) + fat (:obj:`str`) + minix (:obj:`str`) + xfs (:obj:`str`) + """ + bfs = "bfs" + btrfs = "btrfs" + cramfs = "cramfs" + ext2 = "ext2" + ext3 = "ext3" + ext4 = "ext4" + fat = "fat" + minix = "minix" + xfs = "xfs" diff --git a/aztk/models/file_share.py b/aztk/models/file_share.py index c94bdcbb..6c02f93b 100644 --- a/aztk/models/file_share.py +++ b/aztk/models/file_share.py @@ -1,6 +1,15 @@ from aztk.core.models import Model, fields class FileShare(Model): + """ + Azure Files file share to mount to each node in the cluster + + Args: + storage_account_name (int): the name of the Azure Storage Account + storage_account_key (:obj:`str`, optional): the shared key to the Azure Storage Account + file_share_path (:obj:`str`, optional): the path of the file share in Azure Files + mount_path (:obj:`str`, optional): the path on the node to mount the file share + """ storage_account_name = fields.String() storage_account_key = fields.String() file_share_path = fields.String() diff --git a/aztk/models/vm_image.py b/aztk/models/vm_image.py index baa3483c..f1c76022 100644 --- a/aztk/models/vm_image.py +++ b/aztk/models/vm_image.py @@ -1,5 +1,6 @@ class VmImage: - def __init__(self, publisher, offer, sku): + def __init__(self, publisher, offer, sku, data_disks): self.publisher = publisher self.offer = offer self.sku = sku + self.data_disks = data_disks diff --git a/aztk/node_scripts/install/install.py b/aztk/node_scripts/install/install.py index 57957797..8a55a4a0 100644 --- a/aztk/node_scripts/install/install.py +++ b/aztk/node_scripts/install/install.py @@ -1,10 +1,13 @@ import os -from core import config -from install import pick_master, spark, scripts, create_user, plugins, spark_container + import wait_until_master_selected -from aztk.models.plugins import PluginTarget from aztk.internal import cluster_data +from aztk.models.plugins import PluginTarget +from core import config +from install import (create_user, pick_master, plugins, scripts, spark, spark_container) + from .node_scheduling import setup_node_scheduling +from .setup_data_disks import setup_data_disks def read_cluster_config(): @@ -13,10 +16,14 @@ def read_cluster_config(): print("Got cluster config", cluster_config) return cluster_config + + + def setup_host(docker_repo: str): """ Code to be run on the node(NOT in a container) """ + client = config.batch_client create_user.create_user(batch_client=client) @@ -43,6 +50,8 @@ def setup_host(docker_repo: str): cluster_conf = read_cluster_config() + setup_data_disks(cluster_conf) + setup_node_scheduling(client, cluster_conf, is_master) #TODO pass azure file shares @@ -50,6 +59,7 @@ def setup_host(docker_repo: str): docker_repo=docker_repo, gpu_enabled=os.environ.get("AZTK_GPU_ENABLED") == "true", plugins=cluster_conf.plugins, + data_disks=cluster_conf.data_disks, ) plugins.setup_plugins(target=PluginTarget.Host, is_master=is_master, is_worker=is_worker) diff --git a/aztk/node_scripts/install/setup_data_disk.sh b/aztk/node_scripts/install/setup_data_disk.sh new file mode 100644 index 00000000..2c18ffcb --- /dev/null +++ b/aztk/node_scripts/install/setup_data_disk.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -e + +devicename="/dev/"$1 +format_type=$2 +mount_path=$3 +device_partition_name="${devicename}1" + +# make parition +parted --script --align optimal ${devicename} mklabel gpt +parted --script --align optimal ${devicename} mkpart primary ext4 0% 100% + +# format partition +sleep 1 +mkfs.${format_type} ${device_partition_name} + +# make partition directory +mkdir -p ${mount_path} + +# auto mount parition on reboot +echo "${device_partition_name} ${mount_path} auto defaults,nofail 0 0" >> /etc/fstab + +# mount partition +mount ${device_partition_name} ${mount_path} diff --git a/aztk/node_scripts/install/setup_data_disks.py b/aztk/node_scripts/install/setup_data_disks.py new file mode 100644 index 00000000..2b33b902 --- /dev/null +++ b/aztk/node_scripts/install/setup_data_disks.py @@ -0,0 +1,49 @@ +import subprocess +import sys +import os + + +def mount_data_disk(data_disk, device_name, number): + cmd = os.environ["AZTK_WORKING_DIR"] + "/aztk/node_scripts/install/setup_data_disk.sh " + data_disk.mount_path = "/data-disk" + str(number) if not data_disk.mount_path else data_disk.mount_path + args = device_name + " " + data_disk.format_type + " " + data_disk.mount_path + cmd = cmd + args + print("mount disk cmd:", cmd) + p = subprocess.Popen([cmd], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = p.communicate() + if p.returncode != 0: + print("ERROR: failed to mount data_disk device {}", device_name) + sys.exit(p.returncode) + + return data_disk + + +def setup_data_disks(cluster_configuration): + cmd = 'lsblk -lnS --sort name | wc -l' + p = subprocess.Popen([cmd], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output, _ = p.communicate() + if int(output) <= 3: + return + + # by default, there are 3 devices on each host: sda, sdb, sr0 + cmd = 'lsblk -lnbS --sort=name --output NAME,SIZE | grep -v "sr0\|sd[ab]"' + p = subprocess.Popen([cmd], shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = p.communicate() + disks = stdout.decode('UTF-8').split('\n')[:-1] + + disk_size_mapping = {} + for disk in disks: + assert len(disk.split()) == 2 + name, size = disk.split() + # convert size from bytes to gb + size = int(size) / 1024 / 1024 / 1024 + if not disk_size_mapping.get(size): + disk_size_mapping[size] = [name] + else: + disk_size_mapping[size].append(name) + + for i, defined_data_disk in enumerate(cluster_configuration.data_disks): + device_name = disk_size_mapping[defined_data_disk.disk_size_gb].pop() + mounted_data_disk = mount_data_disk(data_disk=defined_data_disk, device_name=device_name, number=i) + # update cluster_configuration in case mount_path changed + cluster_configuration[i] = mounted_data_disk diff --git a/aztk/node_scripts/install/spark_container.py b/aztk/node_scripts/install/spark_container.py index 405498ee..b29a3f21 100644 --- a/aztk/node_scripts/install/spark_container.py +++ b/aztk/node_scripts/install/spark_container.py @@ -7,7 +7,8 @@ def start_spark_container( docker_repo: str=None, gpu_enabled: bool=False, file_mounts=None, - plugins=None): + plugins=None, + data_disks=None): cmd = DockerCmd( name=constants.DOCKER_SPARK_CONTAINER_NAME, @@ -18,7 +19,8 @@ def start_spark_container( if file_mounts: for mount in file_mounts: cmd.share_folder(mount.mount_path) - cmd.share_folder('/mnt') + cmd.share_folder('/mnt/batch') + [cmd.share_folder(data_disk.mount_path) for data_disk in data_disks] cmd.pass_env('AZTK_WORKING_DIR') cmd.pass_env('AZ_BATCH_ACCOUNT_NAME') diff --git a/aztk/node_scripts/setup_host.sh b/aztk/node_scripts/setup_host.sh index c23ff0dd..60053f5e 100644 --- a/aztk/node_scripts/setup_host.sh +++ b/aztk/node_scripts/setup_host.sh @@ -125,6 +125,7 @@ main () { # Unzip resource files and set permissions chmod 777 $AZTK_WORKING_DIR/aztk/node_scripts/docker_main.sh + chmod +x $AZTK_WORKING_DIR/aztk/node_scripts/install/setup_data_disk.sh # Check docker is running docker info > /dev/null 2>&1 diff --git a/aztk/spark/client.py b/aztk/spark/client.py index 685b8909..da0f39ef 100644 --- a/aztk/spark/client.py +++ b/aztk/spark/client.py @@ -37,6 +37,7 @@ def create_cluster(self, cluster_conf: models.ClusterConfiguration, wait: bool = aztk.spark.models.Cluster """ cluster_conf = _apply_default_for_cluster_config(cluster_conf) + cluster_conf.validate() cluster_data = self._get_cluster_data(cluster_conf.cluster_id) @@ -56,11 +57,11 @@ def create_cluster(self, cluster_conf: models.ClusterConfiguration, wait: bool = cluster_conf.worker_on_master) software_metadata_key = "spark" - vm_image = models.VmImage( publisher='Canonical', offer='UbuntuServer', - sku='16.04') + sku='16.04', + data_disks=cluster_conf.data_disks) cluster = self.__create_pool_and_job( cluster_conf, software_metadata_key, start_task, vm_image) @@ -249,7 +250,8 @@ def submit_job(self, job_configuration: models.JobConfiguration): vm_image = models.VmImage( publisher='Canonical', offer='UbuntuServer', - sku='16.04') + sku='16.04', + data_disks=job_configuration.data_disks) autoscale_formula = "$TargetDedicatedNodes = {0}; " \ "$TargetLowPriorityNodes = {1}".format( @@ -352,6 +354,7 @@ def _apply_default_for_cluster_config(configuration: models.ClusterConfiguration cluster_conf.merge(configuration) if cluster_conf.scheduling_target is None: cluster_conf.scheduling_target = _default_scheduling_target(cluster_conf.size) + return cluster_conf def _apply_default_for_job_config(job_conf: models.JobConfiguration): diff --git a/aztk/spark/models/models.py b/aztk/spark/models/models.py index a7e18233..7fb06dcf 100644 --- a/aztk/spark/models/models.py +++ b/aztk/spark/models/models.py @@ -111,6 +111,10 @@ class SecretsConfiguration(aztk.models.SecretsConfiguration): pass +class DataDisk(aztk.models.DataDisk): + pass + + class VmImage(aztk.models.VmImage): pass @@ -195,7 +199,8 @@ def __init__( max_low_pri_nodes=0, subnet_id=None, scheduling_target: SchedulingTarget = None, - worker_on_master=None): + worker_on_master: bool = None, + data_disks: List[DataDisk] = None): self.id = id self.applications = applications @@ -211,6 +216,7 @@ def __init__( self.subnet_id = subnet_id self.worker_on_master = worker_on_master self.scheduling_target = scheduling_target + self.data_disks = data_disks def to_cluster_config(self): return ClusterConfiguration( @@ -224,6 +230,7 @@ def to_cluster_config(self): worker_on_master=self.worker_on_master, spark_configuration=self.spark_configuration, scheduling_target=self.scheduling_target, + data_disks=self.data_disks, ) def mixed_mode(self) -> bool: diff --git a/aztk_cli/config.py b/aztk_cli/config.py index 9f521057..e8a34d39 100644 --- a/aztk_cli/config.py +++ b/aztk_cli/config.py @@ -5,6 +5,7 @@ SecretsConfiguration, ClusterConfiguration, SchedulingTarget, + DataDisk, ) from aztk.utils import deprecate from aztk.models import Toolkit @@ -184,6 +185,7 @@ def __init__(self): self.subnet_id = None self.worker_on_master = None self.scheduling_target = None + self.data_disks = [] def _merge_dict(self, config): config = config.get('job') @@ -205,7 +207,18 @@ def _merge_dict(self, config): scheduling_target = cluster_configuration.get("scheduling_target") if scheduling_target: self.scheduling_target = SchedulingTarget(scheduling_target) - + if cluster_configuration.get("data_disks"): + for item in cluster_configuration.get("data_disks"): + data_disk = DataDisk() + print("data_disk", data_disk.to_dict()) + sys.exit() + if item.get("disk_size_gb"): + data_disk.disk_size_gb = item.get("disk_size_gb") + if item.get("mount_path"): + data_disk.mount_path = item.get("mount_path") + if item.get("format_type"): + data_disk.format_type = item.get("format_type") + self.data_disks.append(data_disk) applications = config.get('applications') if applications: diff --git a/aztk_cli/spark/endpoints/cluster/cluster_create.py b/aztk_cli/spark/endpoints/cluster/cluster_create.py index 63df76a2..e6bd89ed 100644 --- a/aztk_cli/spark/endpoints/cluster/cluster_create.py +++ b/aztk_cli/spark/endpoints/cluster/cluster_create.py @@ -2,7 +2,7 @@ import typing import aztk.spark -from aztk.spark.models import ClusterConfiguration, UserConfiguration +from aztk.spark.models import ClusterConfiguration, UserConfiguration, DataDisk from aztk.utils import deprecate from aztk_cli import config, log, utils from aztk_cli.config import load_aztk_spark_config @@ -29,6 +29,8 @@ def setup_parser(parser: argparse.ArgumentParser): (/:)') parser.add_argument('--subnet-id', help='The subnet in which to create the cluster.') + parser.add_argument('--data-disk-size', type=int, + help="Size in GB of additional local disk storage on each node.") parser.add_argument('--no-wait', dest='wait', action='store_false') parser.add_argument('--wait', dest='wait', action='store_true') @@ -43,20 +45,28 @@ def execute(args: typing.NamedTuple): # read cluster.yaml configuration file, overwrite values with args file_config, wait = config.read_cluster_config() cluster_conf.merge(file_config) + if args.size_low_pri is not None: deprecate("0.9.0", "--size-low-pri has been deprecated.", "Please use --size-low-priority.") args.size_low_priority = args.size_low_pri - cluster_conf.merge(ClusterConfiguration( - cluster_id=args.cluster_id, - size=args.size, - size_low_priority=args.size_low_priority, - vm_size=args.vm_size, - subnet_id=args.subnet_id, - user_configuration=UserConfiguration( - username=args.username, - password=args.password, - ))) + cluster_conf.merge( + ClusterConfiguration( + cluster_id=args.cluster_id, + size=args.size, + size_low_priority=args.size_low_priority, + vm_size=args.vm_size, + subnet_id=args.subnet_id, + user_configuration=UserConfiguration( + username=args.username, + password=args.password, + ), + ) + ) + + if args.data_disk_size: + cluster_conf.data_disks.append(DataDisk(disk_size_gb=args.data_disk_size)) + if args.docker_repo and cluster_conf.toolkit: cluster_conf.toolkit.docker_repo = args.docker_repo diff --git a/aztk_cli/spark/endpoints/job/submit.py b/aztk_cli/spark/endpoints/job/submit.py index 91c5b768..a3347773 100644 --- a/aztk_cli/spark/endpoints/job/submit.py +++ b/aztk_cli/spark/endpoints/job/submit.py @@ -45,6 +45,7 @@ def execute(args: typing.NamedTuple): subnet_id=job_conf.subnet_id, worker_on_master=job_conf.worker_on_master, scheduling_target=job_conf.scheduling_target, + data_disks=job_conf.data_disks, ) #TODO: utils.print_job_conf(job_configuration) diff --git a/docs/dev/writing-models.md b/docs/dev/writing-models.md index e4c0cb45..53266635 100644 --- a/docs/dev/writing-models.md +++ b/docs/dev/writing-models.md @@ -6,7 +6,7 @@ In `aztk/models` create a new file with the name of your model `my_model.py` In `aztk/models/__init__.py` add `from .my_model import MyModel` -Create a new class `MyModel` that inherit `Modle` +Create a new class `MyModel` that inherit `Module` ```python from aztk.core.models import Model, fields diff --git a/tests/core/test_models.py b/tests/core/test_models.py index 3ec9682d..d0fb591a 100644 --- a/tests/core/test_models.py +++ b/tests/core/test_models.py @@ -294,6 +294,39 @@ class UserList(Model): assert obj1.infos[1].age == 38 +def test_merge_nested_model_append_strategy_initial_not_set(): + class UserList(Model): + infos = fields.List(UserInfo, merge_strategy=ListMergeStrategy.Append) + + obj1 = UserList() + obj1.infos.append(UserInfo( + name="John", + age=29, + )) + + obj2 = UserList( + infos=[ + dict( + name="Frank", + age=38, + ), + ], + ) + + assert len(obj1.infos) == 1 + assert len(obj2.infos) == 1 + assert obj1.infos[0].name == "John" + assert obj1.infos[0].age == 29 + assert obj2.infos[0].name == "Frank" + assert obj2.infos[0].age == 38 + + obj1.merge(obj2) + assert len(obj1.infos) == 2 + assert obj1.infos[0].name == "John" + assert obj1.infos[0].age == 29 + assert obj1.infos[1].name == "Frank" + assert obj1.infos[1].age == 38 + def test_serialize_simple_model_to_yaml(): info = UserInfo(name="John", age=29) output = yaml.dump(info) diff --git a/tests/models/test_data_disk.py b/tests/models/test_data_disk.py new file mode 100644 index 00000000..112d0d84 --- /dev/null +++ b/tests/models/test_data_disk.py @@ -0,0 +1,40 @@ +import pytest + +from aztk.models import DataDisk, DataDiskFormatType +from aztk.error import InvalidModelError + + +def test_valid_data_disk(): + data_disk = DataDisk(disk_size_gb=10, mount_path='/test/path', format_type=DataDiskFormatType.ext2) + data_disk.validate() + + assert data_disk.disk_size_gb == 10 + assert data_disk.mount_path == '/test/path' + assert data_disk.format_type == DataDiskFormatType.ext2 + + +def test_default_data_disk(): + data_disk = DataDisk() + with pytest.raises(InvalidModelError): + data_disk.validate() + + assert data_disk.disk_size_gb is None + assert data_disk.mount_path is None + assert data_disk.format_type == DataDiskFormatType.ext4 + + +def test_data_disk_minimum_required_fields(): + data_disk = DataDisk(disk_size_gb=1) + assert data_disk.disk_size_gb == 1 + assert data_disk.mount_path == None + assert data_disk.format_type == DataDiskFormatType.ext4 + + +def test_data_disk_format_type(): + data_disk = DataDisk(disk_size_gb=1, format_type=DataDiskFormatType.ext2) + assert data_disk.format_type == "ext2" + assert data_disk.format_type == DataDiskFormatType.ext2 + + data_disk = DataDisk(disk_size_gb=1, format_type="ext2") + assert data_disk.format_type == "ext2" + assert data_disk.format_type == DataDiskFormatType.ext2