Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ It is recommended to install Modalities via uv or install PyTorch, psutil and Ni
# Get uv (tested with uv version 0.9.13)
curl -LsSf https://astral.sh/uv/install.sh | sh

uv sync
uv sync --extra [cpu|cu126|cu128|cu130] # Get CUDA version via nvidia-smi
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The README shows installation commands using bracket notation [cpu|cu126|cu128|cu130], but this is not valid shell syntax. Users cannot literally type this command and expect it to work. The command should be clarified to show that users need to choose ONE option, for example:

  • uv sync --extra cpu
  • uv sync --extra cu126
  • uv sync --extra cu128
  • uv sync --extra cu130

Consider using a different notation or providing explicit examples rather than shell pipe syntax inside brackets.

Copilot uses AI. Check for mistakes.
source .venv/bin/activate

# For developers: use [tests,linting] and install pre-commit hooks
uv sync --extra tests --extra linting
uv sync --extra [cpu|cu126|cu128|cu130] --extra tests --extra linting
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The README shows installation commands using bracket notation [cpu|cu126|cu128|cu130], but this is not valid shell syntax. Users cannot literally type this command and expect it to work. The command should be clarified to show that users need to choose ONE option, for example:

  • uv sync --extra cpu --extra tests --extra linting
  • uv sync --extra cu126 --extra tests --extra linting

Consider using a different notation or providing explicit examples rather than shell pipe syntax inside brackets.

Copilot uses AI. Check for mistakes.
pre-commit install --install-hooks
```

Expand All @@ -60,7 +60,7 @@ conda create -n modalities python=3.13
conda activate modalities

# Install PyTorch, psutil, Ninja and Flash Attention
pip install "torch<2.11.0"
pip install "torch<2.11.0" # Or appropriate version for your CUDA setup.
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment suggests using "appropriate version for your CUDA setup" but the version constraint "torch<2.11.0" doesn't specify how to select the CUDA version. With the new changes, users should be directed to use the appropriate PyTorch index URL for their CUDA version when installing manually, or they should install with extras like in Option 1. Consider updating this comment to align with the new CUDA version selection approach.

Suggested change
pip install "torch<2.11.0" # Or appropriate version for your CUDA setup.
# For PyTorch, select the correct index URL for your CUDA/CPU setup from https://pytorch.org/get-started/locally/
pip install --index-url https://download.pytorch.org/whl/cu121 "torch<2.11.0"

Copilot uses AI. Check for mistakes.
pip install psutil ninja # Ninja lowers compilation time of flash attention significantly
pip install flash-attn==2.8.3 --no-build-isolation
```
Expand Down
72 changes: 66 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ description = "Modalities, a PyTorch-native framework for distributed and reprod
readme = "README.md"
dependencies = [
"numpy",
"torch<2.11.0",
"ninja",
"packaging",
"tqdm",
Expand All @@ -25,25 +24,86 @@ dependencies = [
"matplotlib",
"wandb",
"einops>=0.7.0",
"flash-attn==2.8.3; platform_system != 'Darwin' and platform_machine != 'aarch64'",
"debugpy", # For VSCode debugging support
]

[project.urls]
Homepage = "https://github.com/Modalities/modalities"
Issues = "https://github.com/Modalities/modalities/issues"

[project.optional-dependencies]
linting = ["pre-commit"]
tests = ["pytest", "pytest-cov", "debugpy"]

[project.scripts]
modalities = "modalities.__main__:main"

[build-system]
requires = ["setuptools >= 61.0.0"]
build-backend = "setuptools.build_meta"

[project.optional-dependencies]
linting = ["pre-commit"]
tests = ["pytest", "pytest-cov", "debugpy"]

cpu = ["torch>=2.10,<2.11.0", "torchvision"]
cu126 = [
"torch>=2.10,<2.11.0",
"torchvision",
"flash-attn==2.8.3; platform_system != 'Darwin' and platform_machine != 'aarch64'"
]
cu128 = [
"torch>=2.10,<2.11.0",
"torchvision",
"flash-attn==2.8.3; platform_system != 'Darwin' and platform_machine != 'aarch64'"
]
cu130 = [
"torch>=2.10,<2.11.0",
"torchvision",
"flash-attn==2.8.3; platform_system != 'Darwin' and platform_machine != 'aarch64'"
]

[tool.uv]
conflicts = [
[
{ extra = "cpu" },
{ extra = "cu126" },
{ extra = "cu128" },
{ extra = "cu130" },
],
]

[tool.uv.sources]
torch = [
{ index = "pytorch-cpu", extra = "cpu" },
{ index = "pytorch-cu126", extra = "cu126" },
{ index = "pytorch-cu128", extra = "cu128" },
{ index = "pytorch-cu130", extra = "cu130" },
]
torchvision = [
{ index = "pytorch-cpu", extra = "cpu" },
{ index = "pytorch-cu126", extra = "cu126" },
{ index = "pytorch-cu128", extra = "cu128" },
{ index = "pytorch-cu130", extra = "cu130" },
]

[[tool.uv.index]]
name = "pytorch-cpu"
url = "https://download.pytorch.org/whl/cpu"
explicit = true

[[tool.uv.index]]
name = "pytorch-cu126"
url = "https://download.pytorch.org/whl/cu126"
explicit = true

[[tool.uv.index]]
name = "pytorch-cu128"
url = "https://download.pytorch.org/whl/cu128"
explicit = true

[[tool.uv.index]]
name = "pytorch-cu130"
url = "https://download.pytorch.org/whl/cu130"
explicit = true


[tool.uv.extra-build-dependencies]
flash-attn = [
{ requirement = "torch", match-runtime = true },
Expand Down
7 changes: 6 additions & 1 deletion src/modalities/utils/mfu.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
# https://www.nvidia.com/en-us/data-center/h100/
#
# NOTE: These values are valid for fp16 and bf16 only
PEAK_PERFORMANCE = {"A100": 312e12, "H100": 989e12, "GH200": 989e12}
# FIXME: For B200, is 2.25 PFLOPS correct or should it be 4.5 PFLOPS?
PEAK_PERFORMANCE = {"A100": 312e12, "H100": 989e12, "GH200": 989e12, "B200": 2.25e15}


class MFUCalculatorABC:
Expand Down Expand Up @@ -130,6 +131,10 @@ def _get_theoretical_gpu_peak_performance(model_parts: FSDPX | list[FSDP2], worl
single_gpu_peak_performance = MFUCalculatorABC._get_theoretical_gpu_peak_performance_single(
precision, "GH200"
)
elif device_name.startswith("NVIDIA B200"):
single_gpu_peak_performance = MFUCalculatorABC._get_theoretical_gpu_peak_performance_single(
precision, "B200"
)
else:
warnings.warn(f"Could not get theoretical GPU peak performance for unknown device = {device_name}.")
return None
Expand Down