From f01be8fb221a177e28ec7b2fe956e4c8f0be4084 Mon Sep 17 00:00:00 2001
From: rohan <e4rohan@gmail.com>
Date: Thu, 5 Mar 2026 14:10:10 +0530
Subject: [PATCH 1/7] feat(gds-psuu): add declarative parameter space
 constraints

Add Constraint ABC with LinearConstraint and FunctionalConstraint
implementations. ParameterSpace now accepts an optional constraints
tuple, validates constraint param references at construction time,
and filters infeasible points from grid_points(). RandomSearchOptimizer
uses rejection sampling with a 1000-retry limit. 22 new tests.

Closes #113
---
 packages/gds-psuu/gds_psuu/__init__.py        |  13 +-
 .../gds-psuu/gds_psuu/optimizers/random.py    |  28 +-
 packages/gds-psuu/gds_psuu/space.py           |  72 ++++-
 packages/gds-psuu/tests/test_constraints.py   | 259 ++++++++++++++++++
 4 files changed, 362 insertions(+), 10 deletions(-)
 create mode 100644 packages/gds-psuu/tests/test_constraints.py

diff --git a/packages/gds-psuu/gds_psuu/__init__.py b/packages/gds-psuu/gds_psuu/__init__.py
index 663a378..99df342 100644
--- a/packages/gds-psuu/gds_psuu/__init__.py
+++ b/packages/gds-psuu/gds_psuu/__init__.py
@@ -9,21 +9,32 @@
 from gds_psuu.optimizers.grid import GridSearchOptimizer
 from gds_psuu.optimizers.random import RandomSearchOptimizer
 from gds_psuu.results import EvaluationSummary, SweepResults
-from gds_psuu.space import Continuous, Discrete, Integer, ParameterSpace
+from gds_psuu.space import (
+    Constraint,
+    Continuous,
+    Discrete,
+    FunctionalConstraint,
+    Integer,
+    LinearConstraint,
+    ParameterSpace,
+)
 from gds_psuu.sweep import Sweep
 from gds_psuu.types import KPIFn, KPIScores, ParamPoint
 
 __all__ = [
     "KPI",
+    "Constraint",
     "Continuous",
     "Discrete",
     "EvaluationResult",
     "EvaluationSummary",
     "Evaluator",
+    "FunctionalConstraint",
     "GridSearchOptimizer",
     "Integer",
     "KPIFn",
     "KPIScores",
+    "LinearConstraint",
     "Optimizer",
     "ParamPoint",
     "ParameterSpace",
diff --git a/packages/gds-psuu/gds_psuu/optimizers/random.py b/packages/gds-psuu/gds_psuu/optimizers/random.py
index fee3dab..aa1fa03 100644
--- a/packages/gds-psuu/gds_psuu/optimizers/random.py
+++ b/packages/gds-psuu/gds_psuu/optimizers/random.py
@@ -5,17 +5,22 @@
 import random
 from typing import TYPE_CHECKING
 
+from gds_psuu.errors import PsuuSearchError
 from gds_psuu.optimizers.base import Optimizer
 from gds_psuu.space import Continuous, Discrete, Integer, ParameterSpace
 
 if TYPE_CHECKING:
     from gds_psuu.types import KPIScores, ParamPoint
 
+_MAX_REJECTION_RETRIES = 1000
+
 
 class RandomSearchOptimizer(Optimizer):
     """Samples parameter points uniformly at random.
 
     Uses stdlib ``random.Random`` for reproducibility — no numpy required.
+    When the parameter space has constraints, uses rejection sampling
+    with a configurable retry limit.
     """
 
     def __init__(self, n_samples: int = 20, seed: int | None = None) -> None:
@@ -28,8 +33,8 @@ def setup(self, space: ParameterSpace, kpi_names: list[str]) -> None:
         self._space = space
         self._count = 0
 
-    def suggest(self) -> ParamPoint:
-        assert self._space is not None, "Call setup() before suggest()"
+    def _sample_point(self) -> ParamPoint:
+        assert self._space is not None
         point: ParamPoint = {}
         for name, dim in self._space.params.items():
             if isinstance(dim, Continuous):
@@ -38,9 +43,26 @@ def suggest(self) -> ParamPoint:
                 point[name] = self._rng.randint(dim.min_val, dim.max_val)
             elif isinstance(dim, Discrete):
                 point[name] = self._rng.choice(dim.values)
-        self._count += 1
         return point
 
+    def suggest(self) -> ParamPoint:
+        assert self._space is not None, "Call setup() before suggest()"
+        if not self._space.constraints:
+            point = self._sample_point()
+            self._count += 1
+            return point
+
+        for _ in range(_MAX_REJECTION_RETRIES):
+            point = self._sample_point()
+            if self._space.is_feasible(point):
+                self._count += 1
+                return point
+
+        raise PsuuSearchError(
+            f"Could not find a feasible point after {_MAX_REJECTION_RETRIES} "
+            "random samples. The feasible region may be too small."
+        )
+
     def observe(self, params: ParamPoint, scores: KPIScores) -> None:
         pass  # Random search doesn't adapt
 
diff --git a/packages/gds-psuu/gds_psuu/space.py b/packages/gds-psuu/gds_psuu/space.py
index 317340e..9784871 100644
--- a/packages/gds-psuu/gds_psuu/space.py
+++ b/packages/gds-psuu/gds_psuu/space.py
@@ -4,14 +4,14 @@
 
 import itertools
 import math
-from typing import TYPE_CHECKING, Any, Self
+from abc import ABC, abstractmethod
+from collections.abc import Callable  # noqa: TC003
+from typing import Any, Self
 
 from pydantic import BaseModel, ConfigDict, model_validator
 
 from gds_psuu.errors import PsuuValidationError
-
-if TYPE_CHECKING:
-    from gds_psuu.types import ParamPoint
+from gds_psuu.types import ParamPoint  # noqa: TC001
 
 
 class Continuous(BaseModel):
@@ -67,12 +67,51 @@ def _validate_values(self) -> Self:
 Dimension = Continuous | Integer | Discrete
 
 
+class Constraint(BaseModel, ABC):
+    """Base class for parameter space constraints."""
+
+    model_config = ConfigDict(frozen=True, arbitrary_types_allowed=True)
+
+    @abstractmethod
+    def is_feasible(self, point: ParamPoint) -> bool:
+        """Return True if the point satisfies this constraint."""
+
+
+class LinearConstraint(Constraint):
+    """Linear inequality constraint: sum(coeff_i * x_i) <= bound."""
+
+    coefficients: dict[str, float]
+    bound: float
+
+    @model_validator(mode="after")
+    def _validate_nonempty(self) -> Self:
+        if not self.coefficients:
+            raise PsuuValidationError(
+                "LinearConstraint must have at least 1 coefficient"
+            )
+        return self
+
+    def is_feasible(self, point: ParamPoint) -> bool:
+        total = sum(coeff * point[name] for name, coeff in self.coefficients.items())
+        return total <= self.bound
+
+
+class FunctionalConstraint(Constraint):
+    """Arbitrary feasibility predicate over a parameter point."""
+
+    fn: Callable[[ParamPoint], bool]
+
+    def is_feasible(self, point: ParamPoint) -> bool:
+        return self.fn(point)
+
+
 class ParameterSpace(BaseModel):
     """Defines the searchable parameter space as a mapping of named dimensions."""
 
-    model_config = ConfigDict(frozen=True)
+    model_config = ConfigDict(frozen=True, arbitrary_types_allowed=True)
 
     params: dict[str, Dimension]
+    constraints: tuple[Constraint, ...] = ()
 
     @model_validator(mode="after")
     def _validate_nonempty(self) -> Self:
@@ -80,17 +119,35 @@ def _validate_nonempty(self) -> Self:
             raise PsuuValidationError("ParameterSpace must have at least 1 parameter")
         return self
 
+    @model_validator(mode="after")
+    def _validate_constraint_params(self) -> Self:
+        param_names = set(self.params.keys())
+        for constraint in self.constraints:
+            if isinstance(constraint, LinearConstraint):
+                unknown = set(constraint.coefficients.keys()) - param_names
+                if unknown:
+                    raise PsuuValidationError(
+                        f"LinearConstraint references unknown params: {unknown}"
+                    )
+        return self
+
     @property
     def dimension_names(self) -> list[str]:
         """Ordered list of parameter names."""
         return list(self.params.keys())
 
+    def is_feasible(self, point: ParamPoint) -> bool:
+        """Check if a parameter point satisfies all constraints."""
+        return all(c.is_feasible(point) for c in self.constraints)
+
     def grid_points(self, n_steps: int) -> list[ParamPoint]:
         """Generate a grid of parameter points.
 
         For Continuous: ``n_steps`` evenly spaced values between min and max.
         For Integer: all integers in [min_val, max_val] (ignores n_steps).
         For Discrete: all values.
+
+        Points that violate constraints are excluded.
         """
         axes: list[list[Any]] = []
         for dim in self.params.values():
@@ -106,6 +163,9 @@ def grid_points(self, n_steps: int) -> list[ParamPoint]:
             elif isinstance(dim, Discrete):
                 axes.append(list(dim.values))
         names = self.dimension_names
-        return [
+        all_points = [
             dict(zip(names, combo, strict=True)) for combo in itertools.product(*axes)
         ]
+        if self.constraints:
+            return [p for p in all_points if self.is_feasible(p)]
+        return all_points
diff --git a/packages/gds-psuu/tests/test_constraints.py b/packages/gds-psuu/tests/test_constraints.py
new file mode 100644
index 0000000..96a0c2e
--- /dev/null
+++ b/packages/gds-psuu/tests/test_constraints.py
@@ -0,0 +1,259 @@
+"""Tests for parameter space constraints."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+from gds_psuu import (
+    KPI,
+    Continuous,
+    Discrete,
+    FunctionalConstraint,
+    GridSearchOptimizer,
+    Integer,
+    LinearConstraint,
+    ParameterSpace,
+    PsuuSearchError,
+    PsuuValidationError,
+    RandomSearchOptimizer,
+    Sweep,
+    final_state_mean,
+)
+
+if TYPE_CHECKING:
+    from gds_sim import Model
+
+
+class TestLinearConstraint:
+    def test_feasible(self) -> None:
+        c = LinearConstraint(coefficients={"a": 1.0, "b": 1.0}, bound=10.0)
+        assert c.is_feasible({"a": 3.0, "b": 5.0}) is True
+
+    def test_infeasible(self) -> None:
+        c = LinearConstraint(coefficients={"a": 1.0, "b": 1.0}, bound=10.0)
+        assert c.is_feasible({"a": 6.0, "b": 5.0}) is False
+
+    def test_boundary(self) -> None:
+        c = LinearConstraint(coefficients={"a": 1.0, "b": 1.0}, bound=10.0)
+        assert c.is_feasible({"a": 5.0, "b": 5.0}) is True  # exactly equal
+
+    def test_empty_coefficients_rejected(self) -> None:
+        with pytest.raises(
+            (PsuuValidationError, ValueError),
+            match="at least 1 coefficient",
+        ):
+            LinearConstraint(coefficients={}, bound=10.0)
+
+    def test_negative_coefficients(self) -> None:
+        c = LinearConstraint(coefficients={"a": 1.0, "b": -1.0}, bound=0.0)
+        # a - b <= 0 means a <= b
+        assert c.is_feasible({"a": 3.0, "b": 5.0}) is True
+        assert c.is_feasible({"a": 5.0, "b": 3.0}) is False
+
+
+class TestFunctionalConstraint:
+    def test_feasible(self) -> None:
+        c = FunctionalConstraint(fn=lambda p: p["x"] > 0)
+        assert c.is_feasible({"x": 1.0}) is True
+
+    def test_infeasible(self) -> None:
+        c = FunctionalConstraint(fn=lambda p: p["x"] > 0)
+        assert c.is_feasible({"x": -1.0}) is False
+
+    def test_multi_param(self) -> None:
+        c = FunctionalConstraint(fn=lambda p: p["a"] * p["b"] < 100)
+        assert c.is_feasible({"a": 5, "b": 10}) is True
+        assert c.is_feasible({"a": 20, "b": 10}) is False
+
+
+class TestParameterSpaceConstraints:
+    def test_no_constraints_default(self) -> None:
+        space = ParameterSpace(params={"x": Continuous(min_val=0, max_val=10)})
+        assert space.constraints == ()
+
+    def test_is_feasible_no_constraints(self) -> None:
+        space = ParameterSpace(params={"x": Continuous(min_val=0, max_val=10)})
+        assert space.is_feasible({"x": 5.0}) is True
+
+    def test_is_feasible_with_linear(self) -> None:
+        space = ParameterSpace(
+            params={
+                "a": Continuous(min_val=0, max_val=100),
+                "b": Continuous(min_val=0, max_val=100),
+            },
+            constraints=(
+                LinearConstraint(coefficients={"a": 1.0, "b": 1.0}, bound=100.0),
+            ),
+        )
+        assert space.is_feasible({"a": 40.0, "b": 50.0}) is True
+        assert space.is_feasible({"a": 60.0, "b": 50.0}) is False
+
+    def test_is_feasible_with_functional(self) -> None:
+        space = ParameterSpace(
+            params={
+                "x": Continuous(min_val=0, max_val=10),
+                "y": Continuous(min_val=0, max_val=10),
+            },
+            constraints=(FunctionalConstraint(fn=lambda p: p["x"] < p["y"]),),
+        )
+        assert space.is_feasible({"x": 3, "y": 5}) is True
+        assert space.is_feasible({"x": 5, "y": 3}) is False
+
+    def test_multiple_constraints(self) -> None:
+        space = ParameterSpace(
+            params={
+                "a": Continuous(min_val=0, max_val=100),
+                "b": Continuous(min_val=0, max_val=100),
+            },
+            constraints=(
+                LinearConstraint(coefficients={"a": 1.0, "b": 1.0}, bound=100.0),
+                FunctionalConstraint(fn=lambda p: p["a"] >= 10),
+            ),
+        )
+        # Satisfies both
+        assert space.is_feasible({"a": 40.0, "b": 50.0}) is True
+        # Fails first constraint
+        assert space.is_feasible({"a": 60.0, "b": 50.0}) is False
+        # Fails second constraint
+        assert space.is_feasible({"a": 5.0, "b": 10.0}) is False
+
+    def test_linear_constraint_unknown_param_rejected(self) -> None:
+        with pytest.raises((PsuuValidationError, ValueError), match="unknown params"):
+            ParameterSpace(
+                params={"a": Continuous(min_val=0, max_val=10)},
+                constraints=(
+                    LinearConstraint(coefficients={"a": 1.0, "z": 1.0}, bound=10.0),
+                ),
+            )
+
+    def test_grid_points_filtered(self) -> None:
+        space = ParameterSpace(
+            params={
+                "a": Continuous(min_val=0, max_val=10),
+                "b": Continuous(min_val=0, max_val=10),
+            },
+            constraints=(
+                LinearConstraint(coefficients={"a": 1.0, "b": 1.0}, bound=10.0),
+            ),
+        )
+        points = space.grid_points(n_steps=3)  # 0, 5, 10 for each
+        # Valid: (0,0), (0,5), (0,10), (5,0), (5,5), (10,0) = 6 of 9
+        assert len(points) == 6
+        for p in points:
+            assert p["a"] + p["b"] <= 10.0
+
+    def test_grid_points_no_constraints_unchanged(self) -> None:
+        space = ParameterSpace(params={"x": Continuous(min_val=0, max_val=10)})
+        points = space.grid_points(n_steps=3)
+        assert len(points) == 3
+
+    def test_grid_points_all_infeasible(self) -> None:
+        space = ParameterSpace(
+            params={"x": Continuous(min_val=0, max_val=10)},
+            constraints=(FunctionalConstraint(fn=lambda p: False),),
+        )
+        points = space.grid_points(n_steps=3)
+        assert points == []
+
+
+class TestGridOptimizerWithConstraints:
+    def test_grid_respects_constraints(self, simple_model: Model) -> None:
+        space = ParameterSpace(
+            params={
+                "growth_rate": Continuous(min_val=0.01, max_val=0.1),
+            },
+            constraints=(FunctionalConstraint(fn=lambda p: p["growth_rate"] <= 0.06),),
+        )
+        sweep = Sweep(
+            model=simple_model,
+            space=space,
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                )
+            ],
+            optimizer=GridSearchOptimizer(n_steps=3),
+            timesteps=5,
+            runs=1,
+        )
+        results = sweep.run()
+        # 3 grid points: 0.01, 0.055, 0.1 — only first two are <= 0.06
+        assert len(results.evaluations) == 2
+
+
+class TestRandomOptimizerWithConstraints:
+    def test_random_respects_constraints(self, simple_model: Model) -> None:
+        space = ParameterSpace(
+            params={
+                "growth_rate": Continuous(min_val=0.01, max_val=0.1),
+            },
+            constraints=(FunctionalConstraint(fn=lambda p: p["growth_rate"] <= 0.05),),
+        )
+        sweep = Sweep(
+            model=simple_model,
+            space=space,
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                )
+            ],
+            optimizer=RandomSearchOptimizer(n_samples=10, seed=42),
+            timesteps=5,
+            runs=1,
+        )
+        results = sweep.run()
+        assert len(results.evaluations) == 10
+        for ev in results.evaluations:
+            assert ev.params["growth_rate"] <= 0.05
+
+    def test_random_infeasible_raises(self) -> None:
+        space = ParameterSpace(
+            params={"x": Continuous(min_val=0, max_val=10)},
+            constraints=(FunctionalConstraint(fn=lambda p: False),),
+        )
+        opt = RandomSearchOptimizer(n_samples=1, seed=0)
+        opt.setup(space, ["kpi"])
+        with pytest.raises(PsuuSearchError, match="feasible point"):
+            opt.suggest()
+
+    def test_random_no_constraints_unchanged(self) -> None:
+        space = ParameterSpace(params={"x": Continuous(min_val=0, max_val=10)})
+        opt = RandomSearchOptimizer(n_samples=5, seed=42)
+        opt.setup(space, ["kpi"])
+        points = [opt.suggest() for _ in range(5)]
+        assert len(points) == 5
+        assert opt.is_exhausted()
+
+
+class TestConstraintWithIntegerAndDiscrete:
+    def test_integer_with_linear_constraint(self) -> None:
+        space = ParameterSpace(
+            params={
+                "a": Integer(min_val=1, max_val=5),
+                "b": Integer(min_val=1, max_val=5),
+            },
+            constraints=(
+                LinearConstraint(coefficients={"a": 1.0, "b": 1.0}, bound=5.0),
+            ),
+        )
+        points = space.grid_points(n_steps=0)  # n_steps ignored for Integer
+        for p in points:
+            assert p["a"] + p["b"] <= 5
+
+    def test_discrete_with_functional_constraint(self) -> None:
+        space = ParameterSpace(
+            params={
+                "strategy": Discrete(values=("A", "B", "C")),
+                "x": Continuous(min_val=0, max_val=10),
+            },
+            constraints=(FunctionalConstraint(fn=lambda p: p["strategy"] != "C"),),
+        )
+        points = space.grid_points(n_steps=3)
+        # 3 discrete * 3 continuous = 9, minus 3 where strategy="C" = 6
+        assert len(points) == 6
+        for p in points:
+            assert p["strategy"] != "C"

From d4b738a05561222d3b8c6c4c217250e94e1ea62a Mon Sep 17 00:00:00 2001
From: rohan <e4rohan@gmail.com>
Date: Thu, 5 Mar 2026 14:16:06 +0530
Subject: [PATCH 2/7] feat(gds-psuu): add composable objectives and migrate to
 optuna

Add Objective ABC with SingleKPI and WeightedSum implementations,
separating "what to optimize" from "how to optimize". SweepResults
gains best_by_objective() for multi-KPI ranking. Sweep accepts an
optional objective parameter.

Migrate BayesianOptimizer from unmaintained scikit-optimize to optuna
(TPE sampler, ask/tell API). Update optional dependency accordingly.

15 new tests (4 Bayesian skipped without optuna installed).

Closes #114
---
 packages/gds-psuu/gds_psuu/__init__.py        |   8 +-
 packages/gds-psuu/gds_psuu/objective.py       |  50 ++++
 .../gds-psuu/gds_psuu/optimizers/__init__.py  |   2 +
 .../gds-psuu/gds_psuu/optimizers/bayesian.py  |  83 ++++---
 packages/gds-psuu/gds_psuu/results.py         |  20 +-
 packages/gds-psuu/gds_psuu/sweep.py           |   2 +
 packages/gds-psuu/pyproject.toml              |   2 +-
 packages/gds-psuu/tests/test_objective.py     | 225 ++++++++++++++++++
 8 files changed, 352 insertions(+), 40 deletions(-)
 create mode 100644 packages/gds-psuu/gds_psuu/objective.py
 create mode 100644 packages/gds-psuu/tests/test_objective.py

diff --git a/packages/gds-psuu/gds_psuu/__init__.py b/packages/gds-psuu/gds_psuu/__init__.py
index 99df342..820ce45 100644
--- a/packages/gds-psuu/gds_psuu/__init__.py
+++ b/packages/gds-psuu/gds_psuu/__init__.py
@@ -5,7 +5,9 @@
 from gds_psuu.errors import PsuuError, PsuuSearchError, PsuuValidationError
 from gds_psuu.evaluation import EvaluationResult, Evaluator
 from gds_psuu.kpi import KPI, final_state_mean, final_state_std, time_average
+from gds_psuu.objective import Objective, SingleKPI, WeightedSum
 from gds_psuu.optimizers.base import Optimizer
+from gds_psuu.optimizers.bayesian import BayesianOptimizer
 from gds_psuu.optimizers.grid import GridSearchOptimizer
 from gds_psuu.optimizers.random import RandomSearchOptimizer
 from gds_psuu.results import EvaluationSummary, SweepResults
@@ -22,7 +24,7 @@
 from gds_psuu.types import KPIFn, KPIScores, ParamPoint
 
 __all__ = [
-    "KPI",
+    "BayesianOptimizer",
     "Constraint",
     "Continuous",
     "Discrete",
@@ -32,9 +34,11 @@
     "FunctionalConstraint",
     "GridSearchOptimizer",
     "Integer",
+    "KPI",
     "KPIFn",
     "KPIScores",
     "LinearConstraint",
+    "Objective",
     "Optimizer",
     "ParamPoint",
     "ParameterSpace",
@@ -42,8 +46,10 @@
     "PsuuSearchError",
     "PsuuValidationError",
     "RandomSearchOptimizer",
+    "SingleKPI",
     "Sweep",
     "SweepResults",
+    "WeightedSum",
     "final_state_mean",
     "final_state_std",
     "time_average",
diff --git a/packages/gds-psuu/gds_psuu/objective.py b/packages/gds-psuu/gds_psuu/objective.py
new file mode 100644
index 0000000..c21f8c3
--- /dev/null
+++ b/packages/gds-psuu/gds_psuu/objective.py
@@ -0,0 +1,50 @@
+"""Composable objective functions for multi-KPI optimization."""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import Self
+
+from pydantic import BaseModel, ConfigDict, model_validator
+
+from gds_psuu.errors import PsuuValidationError
+from gds_psuu.types import KPIScores  # noqa: TC001
+
+
+class Objective(BaseModel, ABC):
+    """Reduces KPIScores to a single scalar for optimizer consumption."""
+
+    model_config = ConfigDict(frozen=True)
+
+    @abstractmethod
+    def score(self, kpi_scores: KPIScores) -> float:
+        """Compute a scalar objective value from KPI scores."""
+
+
+class SingleKPI(Objective):
+    """Optimize a single KPI."""
+
+    name: str
+    maximize: bool = True
+
+    def score(self, kpi_scores: KPIScores) -> float:
+        val = kpi_scores[self.name]
+        return val if self.maximize else -val
+
+
+class WeightedSum(Objective):
+    """Weighted linear combination of KPIs.
+
+    Use negative weights to minimize a KPI.
+    """
+
+    weights: dict[str, float]
+
+    @model_validator(mode="after")
+    def _validate_nonempty(self) -> Self:
+        if not self.weights:
+            raise PsuuValidationError("WeightedSum must have at least 1 weight")
+        return self
+
+    def score(self, kpi_scores: KPIScores) -> float:
+        return sum(w * kpi_scores[k] for k, w in self.weights.items())
diff --git a/packages/gds-psuu/gds_psuu/optimizers/__init__.py b/packages/gds-psuu/gds_psuu/optimizers/__init__.py
index bb91678..e16c58c 100644
--- a/packages/gds-psuu/gds_psuu/optimizers/__init__.py
+++ b/packages/gds-psuu/gds_psuu/optimizers/__init__.py
@@ -1,10 +1,12 @@
 """Optimizer implementations for parameter space search."""
 
 from gds_psuu.optimizers.base import Optimizer
+from gds_psuu.optimizers.bayesian import BayesianOptimizer
 from gds_psuu.optimizers.grid import GridSearchOptimizer
 from gds_psuu.optimizers.random import RandomSearchOptimizer
 
 __all__ = [
+    "BayesianOptimizer",
     "GridSearchOptimizer",
     "Optimizer",
     "RandomSearchOptimizer",
diff --git a/packages/gds-psuu/gds_psuu/optimizers/bayesian.py b/packages/gds-psuu/gds_psuu/optimizers/bayesian.py
index 015b4a1..0cff90d 100644
--- a/packages/gds-psuu/gds_psuu/optimizers/bayesian.py
+++ b/packages/gds-psuu/gds_psuu/optimizers/bayesian.py
@@ -1,4 +1,4 @@
-"""Bayesian optimizer — wraps scikit-optimize (optional dependency)."""
+"""Bayesian optimizer — wraps optuna (optional dependency)."""
 
 from __future__ import annotations
 
@@ -12,44 +12,44 @@
     from gds_psuu.types import KPIScores, ParamPoint
 
 try:
-    from skopt import Optimizer as SkoptOptimizer  # type: ignore[import-untyped]
-    from skopt.space import Categorical, Real  # type: ignore[import-untyped]
-    from skopt.space import Integer as SkoptInteger
+    import optuna
 
-    _HAS_SKOPT = True
+    _HAS_OPTUNA = True
 except ImportError:  # pragma: no cover
-    _HAS_SKOPT = False
+    _HAS_OPTUNA = False
 
 
 class BayesianOptimizer(Optimizer):
-    """Bayesian optimization using Gaussian process surrogate.
+    """Bayesian optimization using optuna's TPE sampler.
 
-    Requires ``scikit-optimize``. Install with::
+    Requires ``optuna``. Install with::
 
-        pip install gds-psuu[bayesian]
+        uv add gds-psuu[bayesian]
 
     Optimizes a single target KPI (by default the first one registered).
     """
 
     def __init__(
         self,
-        n_calls: int = 20,
+        n_trials: int = 20,
         target_kpi: str | None = None,
         maximize: bool = True,
         seed: int | None = None,
     ) -> None:
-        if not _HAS_SKOPT:  # pragma: no cover
+        if not _HAS_OPTUNA:  # pragma: no cover
             raise ImportError(
-                "scikit-optimize is required for BayesianOptimizer. "
-                "Install with: pip install gds-psuu[bayesian]"
+                "optuna is required for BayesianOptimizer. "
+                "Install with: uv add gds-psuu[bayesian]"
             )
-        self._n_calls = n_calls
+        self._n_trials = n_trials
         self._target_kpi = target_kpi
         self._maximize = maximize
         self._seed = seed
-        self._optimizer: Any = None
+        self._study: Any = None
+        self._space: ParameterSpace | None = None
         self._param_names: list[str] = []
         self._count: int = 0
+        self._current_trial: Any = None
 
     def setup(self, space: ParameterSpace, kpi_names: list[str]) -> None:
         if self._target_kpi is None:
@@ -59,38 +59,47 @@ def setup(self, space: ParameterSpace, kpi_names: list[str]) -> None:
                 f"Target KPI '{self._target_kpi}' not found in {kpi_names}"
             )
 
+        self._space = space
         self._param_names = space.dimension_names
-        dimensions: list[Any] = []
-        for dim in space.params.values():
-            if isinstance(dim, Continuous):
-                dimensions.append(Real(dim.min_val, dim.max_val))
-            elif isinstance(dim, Integer):
-                dimensions.append(SkoptInteger(dim.min_val, dim.max_val))
-            elif isinstance(dim, Discrete):
-                dimensions.append(Categorical(list(dim.values)))
 
-        self._optimizer = SkoptOptimizer(
-            dimensions=dimensions,
-            random_state=self._seed,
-            n_initial_points=min(5, self._n_calls),
+        sampler = optuna.samplers.TPESampler(seed=self._seed)
+        direction = "maximize" if self._maximize else "minimize"
+        optuna.logging.set_verbosity(optuna.logging.WARNING)
+        self._study = optuna.create_study(
+            direction=direction,
+            sampler=sampler,
         )
         self._count = 0
 
     def suggest(self) -> ParamPoint:
-        assert self._optimizer is not None, "Call setup() before suggest()"
-        point = self._optimizer.ask()
-        return dict(zip(self._param_names, point, strict=True))
+        assert self._study is not None, "Call setup() before suggest()"
+        assert self._space is not None
+
+        self._current_trial = self._study.ask()
+        point: ParamPoint = {}
+        for name, dim in self._space.params.items():
+            if isinstance(dim, Continuous):
+                point[name] = self._current_trial.suggest_float(
+                    name, dim.min_val, dim.max_val
+                )
+            elif isinstance(dim, Integer):
+                point[name] = self._current_trial.suggest_int(
+                    name, dim.min_val, dim.max_val
+                )
+            elif isinstance(dim, Discrete):
+                point[name] = self._current_trial.suggest_categorical(
+                    name, list(dim.values)
+                )
+        return point
 
     def observe(self, params: ParamPoint, scores: KPIScores) -> None:
-        assert self._optimizer is not None
+        assert self._study is not None
         assert self._target_kpi is not None
-        point = [params[name] for name in self._param_names]
+        assert self._current_trial is not None
         value = scores[self._target_kpi]
-        # skopt minimizes, so negate if we want to maximize
-        if self._maximize:
-            value = -value
-        self._optimizer.tell(point, value)
+        self._study.tell(self._current_trial, value)
+        self._current_trial = None
         self._count += 1
 
     def is_exhausted(self) -> bool:
-        return self._count >= self._n_calls
+        return self._count >= self._n_trials
diff --git a/packages/gds-psuu/gds_psuu/results.py b/packages/gds-psuu/gds_psuu/results.py
index 6709586..5560a94 100644
--- a/packages/gds-psuu/gds_psuu/results.py
+++ b/packages/gds-psuu/gds_psuu/results.py
@@ -2,13 +2,16 @@
 
 from __future__ import annotations
 
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 from pydantic import BaseModel, ConfigDict
 
 from gds_psuu.evaluation import EvaluationResult  # noqa: TC001
 from gds_psuu.types import KPIScores, ParamPoint  # noqa: TC001
 
+if TYPE_CHECKING:
+    from gds_psuu.objective import Objective
+
 
 class EvaluationSummary(BaseModel):
     """Summary of a single evaluation (without raw simulation data)."""
@@ -54,6 +57,21 @@ def best(self, kpi: str, *, maximize: bool = True) -> EvaluationSummary:
         )
         return EvaluationSummary(params=best_eval.params, scores=best_eval.scores)
 
+    def best_by_objective(self, objective: Objective) -> EvaluationSummary:
+        """Return the evaluation with the best objective score.
+
+        The objective reduces multiple KPI scores to a single scalar.
+        Higher is better.
+        """
+        if not self.evaluations:
+            raise ValueError("No evaluations to search")
+
+        best_eval = max(
+            self.evaluations,
+            key=lambda e: objective.score(e.scores),
+        )
+        return EvaluationSummary(params=best_eval.params, scores=best_eval.scores)
+
     def to_dataframe(self) -> Any:
         """Convert to pandas DataFrame. Requires ``pandas`` installed."""
         try:
diff --git a/packages/gds-psuu/gds_psuu/sweep.py b/packages/gds-psuu/gds_psuu/sweep.py
index 837bde9..4a1490b 100644
--- a/packages/gds-psuu/gds_psuu/sweep.py
+++ b/packages/gds-psuu/gds_psuu/sweep.py
@@ -7,6 +7,7 @@
 
 from gds_psuu.evaluation import EvaluationResult, Evaluator
 from gds_psuu.kpi import KPI  # noqa: TC001
+from gds_psuu.objective import Objective  # noqa: TC001
 from gds_psuu.optimizers.base import Optimizer  # noqa: TC001
 from gds_psuu.results import SweepResults
 from gds_psuu.space import ParameterSpace  # noqa: TC001
@@ -25,6 +26,7 @@ class Sweep(BaseModel):
     space: ParameterSpace
     kpis: list[KPI]
     optimizer: Optimizer
+    objective: Objective | None = None
     timesteps: int = 100
     runs: int = 1
 
diff --git a/packages/gds-psuu/pyproject.toml b/packages/gds-psuu/pyproject.toml
index fbad9ac..2377555 100644
--- a/packages/gds-psuu/pyproject.toml
+++ b/packages/gds-psuu/pyproject.toml
@@ -34,7 +34,7 @@ dependencies = [
 
 [project.optional-dependencies]
 pandas = ["pandas>=2.0"]
-bayesian = ["scikit-optimize>=0.10"]
+bayesian = ["optuna>=4.0"]
 
 [project.urls]
 Homepage = "https://github.com/BlockScience/gds-core"
diff --git a/packages/gds-psuu/tests/test_objective.py b/packages/gds-psuu/tests/test_objective.py
new file mode 100644
index 0000000..7569759
--- /dev/null
+++ b/packages/gds-psuu/tests/test_objective.py
@@ -0,0 +1,225 @@
+"""Tests for composable objective functions."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from gds_sim import Results
+
+from gds_psuu import (
+    KPI,
+    Continuous,
+    GridSearchOptimizer,
+    Objective,
+    ParameterSpace,
+    PsuuValidationError,
+    SingleKPI,
+    Sweep,
+    WeightedSum,
+    final_state_mean,
+)
+from gds_psuu.evaluation import EvaluationResult
+from gds_psuu.results import SweepResults
+
+if TYPE_CHECKING:
+    from gds_sim import Model
+
+
+def _make_eval(params: dict, scores: dict) -> EvaluationResult:
+    return EvaluationResult(
+        params=params,
+        scores=scores,
+        results=Results(state_keys=[]),
+        run_count=1,
+    )
+
+
+class TestSingleKPI:
+    def test_maximize(self) -> None:
+        obj = SingleKPI(name="profit")
+        assert obj.score({"profit": 100.0, "risk": 5.0}) == 100.0
+
+    def test_minimize(self) -> None:
+        obj = SingleKPI(name="risk", maximize=False)
+        assert obj.score({"profit": 100.0, "risk": 5.0}) == -5.0
+
+
+class TestWeightedSum:
+    def test_basic(self) -> None:
+        obj = WeightedSum(weights={"profit": 0.7, "risk": -0.3})
+        score = obj.score({"profit": 100.0, "risk": 10.0})
+        assert score == pytest.approx(0.7 * 100 + (-0.3) * 10)
+
+    def test_single_weight(self) -> None:
+        obj = WeightedSum(weights={"kpi": 2.0})
+        assert obj.score({"kpi": 50.0}) == 100.0
+
+    def test_empty_weights_rejected(self) -> None:
+        with pytest.raises(
+            (PsuuValidationError, ValueError),
+            match="at least 1 weight",
+        ):
+            WeightedSum(weights={})
+
+
+class TestObjectiveProtocol:
+    def test_is_abstract(self) -> None:
+        with pytest.raises(TypeError):
+            Objective()  # type: ignore[abstract]
+
+
+class TestBestByObjective:
+    def test_best_weighted_sum(self) -> None:
+        evals = [
+            _make_eval({"x": 1}, {"profit": 100.0, "risk": 50.0}),
+            _make_eval({"x": 2}, {"profit": 80.0, "risk": 10.0}),
+            _make_eval({"x": 3}, {"profit": 90.0, "risk": 30.0}),
+        ]
+        sr = SweepResults(
+            evaluations=evals,
+            kpi_names=["profit", "risk"],
+            optimizer_name="test",
+        )
+        obj = WeightedSum(weights={"profit": 1.0, "risk": -1.0})
+        # Scores: 50, 70, 60 → best is x=2
+        best = sr.best_by_objective(obj)
+        assert best.params == {"x": 2}
+
+    def test_best_single_kpi(self) -> None:
+        evals = [
+            _make_eval({"x": 1}, {"kpi": 10.0}),
+            _make_eval({"x": 2}, {"kpi": 30.0}),
+        ]
+        sr = SweepResults(
+            evaluations=evals,
+            kpi_names=["kpi"],
+            optimizer_name="test",
+        )
+        best = sr.best_by_objective(SingleKPI(name="kpi"))
+        assert best.params == {"x": 2}
+
+    def test_best_by_objective_empty(self) -> None:
+        sr = SweepResults(
+            evaluations=[],
+            kpi_names=["kpi"],
+            optimizer_name="test",
+        )
+        with pytest.raises(ValueError, match="No evaluations"):
+            sr.best_by_objective(SingleKPI(name="kpi"))
+
+
+class TestSweepWithObjective:
+    def test_sweep_with_objective(self, simple_model: Model) -> None:
+        sweep = Sweep(
+            model=simple_model,
+            space=ParameterSpace(
+                params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)}
+            ),
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                )
+            ],
+            objective=SingleKPI(name="final_pop"),
+            optimizer=GridSearchOptimizer(n_steps=3),
+            timesteps=5,
+            runs=1,
+        )
+        results = sweep.run()
+        assert len(results.evaluations) == 3
+
+    def test_sweep_without_objective_backwards_compat(
+        self, simple_model: Model
+    ) -> None:
+        sweep = Sweep(
+            model=simple_model,
+            space=ParameterSpace(
+                params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)}
+            ),
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                )
+            ],
+            optimizer=GridSearchOptimizer(n_steps=2),
+            timesteps=5,
+            runs=1,
+        )
+        results = sweep.run()
+        assert len(results.evaluations) == 2
+        # Old best() still works
+        best = results.best("final_pop")
+        assert "growth_rate" in best.params
+
+
+try:
+    import optuna  # noqa: F401
+
+    _has_optuna = True
+except ImportError:
+    _has_optuna = False
+
+
+@pytest.mark.skipif(not _has_optuna, reason="optuna not installed")
+class TestBayesianOptimizer:
+    def test_bayesian_sweep(self, simple_model: Model) -> None:
+        from gds_psuu import BayesianOptimizer
+
+        sweep = Sweep(
+            model=simple_model,
+            space=ParameterSpace(
+                params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)}
+            ),
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                )
+            ],
+            optimizer=BayesianOptimizer(n_trials=5, target_kpi="final_pop", seed=42),
+            timesteps=5,
+            runs=1,
+        )
+        results = sweep.run()
+        assert len(results.evaluations) == 5
+
+    def test_bayesian_minimize(self, simple_model: Model) -> None:
+        from gds_psuu import BayesianOptimizer
+
+        sweep = Sweep(
+            model=simple_model,
+            space=ParameterSpace(
+                params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)}
+            ),
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                )
+            ],
+            optimizer=BayesianOptimizer(n_trials=5, maximize=False, seed=42),
+            timesteps=5,
+            runs=1,
+        )
+        results = sweep.run()
+        assert len(results.evaluations) == 5
+
+    def test_bayesian_bad_target_kpi(self) -> None:
+        from gds_psuu import BayesianOptimizer
+        from gds_psuu.errors import PsuuSearchError
+
+        opt = BayesianOptimizer(n_trials=5, target_kpi="nonexistent")
+        space = ParameterSpace(params={"x": Continuous(min_val=0, max_val=1)})
+        with pytest.raises(PsuuSearchError, match="not found"):
+            opt.setup(space, ["kpi_a"])
+
+    def test_bayesian_defaults_to_first_kpi(self) -> None:
+        from gds_psuu import BayesianOptimizer
+
+        opt = BayesianOptimizer(n_trials=5, seed=0)
+        space = ParameterSpace(params={"x": Continuous(min_val=0, max_val=1)})
+        opt.setup(space, ["alpha", "beta"])
+        assert opt._target_kpi == "alpha"

From 7eef78b69739b8859739d4a5d6c3a63de51a2daa Mon Sep 17 00:00:00 2001
From: rohan <e4rohan@gmail.com>
Date: Thu, 5 Mar 2026 14:19:38 +0530
Subject: [PATCH 3/7] feat(gds-psuu): add sensitivity analysis framework
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add sensitivity/ subpackage with pluggable Analyzer ABC and two
implementations:

- OATAnalyzer: one-at-a-time perturbation (mean_effect, relative_effect)
- MorrisAnalyzer: elementary effects screening (mu_star, sigma)

SensitivityResult provides ranking() and to_dataframe(). Both analyzers
compose with existing Evaluator and ParameterSpace primitives — no new
dependencies. 12 new tests.

Closes #112
---
 packages/gds-psuu/gds_psuu/__init__.py        |  10 +
 .../gds-psuu/gds_psuu/sensitivity/__init__.py |  12 +
 .../gds-psuu/gds_psuu/sensitivity/base.py     |  57 ++++
 .../gds-psuu/gds_psuu/sensitivity/morris.py   | 121 +++++++++
 packages/gds-psuu/gds_psuu/sensitivity/oat.py |  85 ++++++
 packages/gds-psuu/tests/test_sensitivity.py   | 257 ++++++++++++++++++
 6 files changed, 542 insertions(+)
 create mode 100644 packages/gds-psuu/gds_psuu/sensitivity/__init__.py
 create mode 100644 packages/gds-psuu/gds_psuu/sensitivity/base.py
 create mode 100644 packages/gds-psuu/gds_psuu/sensitivity/morris.py
 create mode 100644 packages/gds-psuu/gds_psuu/sensitivity/oat.py
 create mode 100644 packages/gds-psuu/tests/test_sensitivity.py

diff --git a/packages/gds-psuu/gds_psuu/__init__.py b/packages/gds-psuu/gds_psuu/__init__.py
index 820ce45..f93f9c7 100644
--- a/packages/gds-psuu/gds_psuu/__init__.py
+++ b/packages/gds-psuu/gds_psuu/__init__.py
@@ -11,6 +11,12 @@
 from gds_psuu.optimizers.grid import GridSearchOptimizer
 from gds_psuu.optimizers.random import RandomSearchOptimizer
 from gds_psuu.results import EvaluationSummary, SweepResults
+from gds_psuu.sensitivity import (
+    Analyzer,
+    MorrisAnalyzer,
+    OATAnalyzer,
+    SensitivityResult,
+)
 from gds_psuu.space import (
     Constraint,
     Continuous,
@@ -24,6 +30,7 @@
 from gds_psuu.types import KPIFn, KPIScores, ParamPoint
 
 __all__ = [
+    "Analyzer",
     "BayesianOptimizer",
     "Constraint",
     "Continuous",
@@ -38,6 +45,8 @@
     "KPIFn",
     "KPIScores",
     "LinearConstraint",
+    "MorrisAnalyzer",
+    "OATAnalyzer",
     "Objective",
     "Optimizer",
     "ParamPoint",
@@ -46,6 +55,7 @@
     "PsuuSearchError",
     "PsuuValidationError",
     "RandomSearchOptimizer",
+    "SensitivityResult",
     "SingleKPI",
     "Sweep",
     "SweepResults",
diff --git a/packages/gds-psuu/gds_psuu/sensitivity/__init__.py b/packages/gds-psuu/gds_psuu/sensitivity/__init__.py
new file mode 100644
index 0000000..e4d2149
--- /dev/null
+++ b/packages/gds-psuu/gds_psuu/sensitivity/__init__.py
@@ -0,0 +1,12 @@
+"""Sensitivity analysis framework for gds-psuu."""
+
+from gds_psuu.sensitivity.base import Analyzer, SensitivityResult
+from gds_psuu.sensitivity.morris import MorrisAnalyzer
+from gds_psuu.sensitivity.oat import OATAnalyzer
+
+__all__ = [
+    "Analyzer",
+    "MorrisAnalyzer",
+    "OATAnalyzer",
+    "SensitivityResult",
+]
diff --git a/packages/gds-psuu/gds_psuu/sensitivity/base.py b/packages/gds-psuu/gds_psuu/sensitivity/base.py
new file mode 100644
index 0000000..501ea50
--- /dev/null
+++ b/packages/gds-psuu/gds_psuu/sensitivity/base.py
@@ -0,0 +1,57 @@
+"""Analyzer ABC and SensitivityResult."""
+
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from typing import TYPE_CHECKING, Any
+
+from pydantic import BaseModel, ConfigDict
+
+if TYPE_CHECKING:
+    from gds_psuu.evaluation import Evaluator
+    from gds_psuu.space import ParameterSpace
+
+
+class SensitivityResult(BaseModel):
+    """Per-KPI, per-parameter sensitivity indices."""
+
+    model_config = ConfigDict(frozen=True)
+
+    indices: dict[str, dict[str, dict[str, float]]]
+    """kpi_name -> param_name -> {metric_name: value}."""
+    method: str
+
+    def ranking(self, kpi: str, *, metric: str = "mean_effect") -> list[str]:
+        """Rank parameters by a metric for a given KPI, descending."""
+        kpi_indices = self.indices[kpi]
+        return sorted(
+            kpi_indices,
+            key=lambda p: abs(kpi_indices[p][metric]),
+            reverse=True,
+        )
+
+    def to_dataframe(self) -> Any:
+        """Convert to pandas DataFrame. Requires ``pandas`` installed."""
+        try:
+            import pandas as pd  # type: ignore[import-untyped]
+        except ImportError as exc:  # pragma: no cover
+            raise ImportError(
+                "pandas is required for to_dataframe(). "
+                "Install with: uv add gds-psuu[pandas]"
+            ) from exc
+
+        rows: list[dict[str, Any]] = []
+        for kpi, params in self.indices.items():
+            for param, metrics in params.items():
+                row: dict[str, Any] = {"kpi": kpi, "param": param}
+                row.update(metrics)
+                rows.append(row)
+        return pd.DataFrame(rows)
+
+
+class Analyzer(ABC):
+    """Base class for sensitivity analyzers."""
+
+    @abstractmethod
+    def analyze(self, evaluator: Evaluator, space: ParameterSpace) -> SensitivityResult:
+        """Run sensitivity analysis and return results."""
diff --git a/packages/gds-psuu/gds_psuu/sensitivity/morris.py b/packages/gds-psuu/gds_psuu/sensitivity/morris.py
new file mode 100644
index 0000000..b6f8ccc
--- /dev/null
+++ b/packages/gds-psuu/gds_psuu/sensitivity/morris.py
@@ -0,0 +1,121 @@
+"""Morris method (elementary effects) sensitivity analyzer."""
+
+from __future__ import annotations
+
+import random
+from typing import TYPE_CHECKING
+
+from gds_psuu.sensitivity.base import Analyzer, SensitivityResult
+from gds_psuu.space import Continuous, Discrete, Integer
+
+if TYPE_CHECKING:
+    from gds_psuu.evaluation import Evaluator
+    from gds_psuu.space import ParameterSpace
+    from gds_psuu.types import ParamPoint
+
+
+class MorrisAnalyzer(Analyzer):
+    """Morris screening method for sensitivity analysis.
+
+    Generates ``r`` random trajectories through the parameter space,
+    each with ``k+1`` points (k = number of parameters). Computes
+    elementary effects per parameter:
+
+    - ``mu_star``: mean of absolute elementary effects (influence)
+    - ``sigma``: std of elementary effects (nonlinearity / interactions)
+    """
+
+    def __init__(self, r: int = 10, n_levels: int = 4, seed: int | None = None) -> None:
+        self._r = r
+        self._n_levels = n_levels
+        self._rng = random.Random(seed)
+
+    def analyze(self, evaluator: Evaluator, space: ParameterSpace) -> SensitivityResult:
+        param_names = space.dimension_names
+        k = len(param_names)
+
+        # Precompute level values for each parameter
+        level_values: dict[str, list[object]] = {}
+        for name, dim in space.params.items():
+            level_values[name] = _get_levels(dim, self._n_levels)
+
+        # Collect elementary effects per parameter per KPI
+        kpi_names: list[str] | None = None
+        effects: dict[str, dict[str, list[float]]] = {}
+
+        for _ in range(self._r):
+            # Generate random starting point from levels
+            base_point: ParamPoint = {
+                name: self._rng.choice(level_values[name]) for name in param_names
+            }
+            base_result = evaluator.evaluate(base_point)
+
+            if kpi_names is None:
+                kpi_names = list(base_result.scores.keys())
+                effects = {kpi: {p: [] for p in param_names} for kpi in kpi_names}
+
+            # Permute parameter order for this trajectory
+            order = list(range(k))
+            self._rng.shuffle(order)
+
+            current_point: ParamPoint = dict(base_point)
+            current_scores = dict(base_result.scores)
+
+            for idx in order:
+                name = param_names[idx]
+                levels = level_values[name]
+                old_val = current_point[name]
+
+                # Pick a different level
+                candidates = [v for v in levels if v != old_val]
+                if not candidates:
+                    continue
+                new_val = self._rng.choice(candidates)
+
+                next_point: ParamPoint = dict(current_point)
+                next_point[name] = new_val
+                next_result = evaluator.evaluate(next_point)
+
+                for kpi in kpi_names:
+                    ee = next_result.scores[kpi] - current_scores[kpi]
+                    effects[kpi][name].append(ee)
+
+                current_point = next_point
+                current_scores = dict(next_result.scores)
+
+        assert kpi_names is not None
+        indices: dict[str, dict[str, dict[str, float]]] = {}
+        for kpi in kpi_names:
+            indices[kpi] = {}
+            for param in param_names:
+                effs = effects[kpi][param]
+                n = len(effs)
+                if n == 0:
+                    indices[kpi][param] = {"mu_star": 0.0, "sigma": 0.0}
+                    continue
+                mu_star = sum(abs(e) for e in effs) / n
+                mean = sum(effs) / n
+                variance = (
+                    sum((e - mean) ** 2 for e in effs) / (n - 1) if n > 1 else 0.0
+                )
+                sigma = variance**0.5
+                indices[kpi][param] = {
+                    "mu_star": mu_star,
+                    "sigma": sigma,
+                }
+
+        return SensitivityResult(indices=indices, method="Morris")
+
+
+def _get_levels(dim: Continuous | Integer | Discrete, n_levels: int) -> list[object]:
+    """Generate evenly-spaced levels for a dimension."""
+    if isinstance(dim, Continuous):
+        if n_levels < 2:
+            return [dim.min_val]
+        step = (dim.max_val - dim.min_val) / (n_levels - 1)
+        return [dim.min_val + i * step for i in range(n_levels)]
+    elif isinstance(dim, Integer):
+        return list(range(dim.min_val, dim.max_val + 1))
+    elif isinstance(dim, Discrete):
+        return list(dim.values)
+    return []  # pragma: no cover
diff --git a/packages/gds-psuu/gds_psuu/sensitivity/oat.py b/packages/gds-psuu/gds_psuu/sensitivity/oat.py
new file mode 100644
index 0000000..c19f880
--- /dev/null
+++ b/packages/gds-psuu/gds_psuu/sensitivity/oat.py
@@ -0,0 +1,85 @@
+"""One-at-a-time (OAT) sensitivity analyzer."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from gds_psuu.sensitivity.base import Analyzer, SensitivityResult
+from gds_psuu.space import Continuous, Discrete, Integer
+
+if TYPE_CHECKING:
+    from gds_psuu.evaluation import Evaluator
+    from gds_psuu.space import ParameterSpace
+    from gds_psuu.types import ParamPoint
+
+
+class OATAnalyzer(Analyzer):
+    """One-at-a-time sensitivity analysis.
+
+    Varies each parameter independently while holding others at baseline
+    (midpoint for Continuous/Integer, first value for Discrete).
+    """
+
+    def __init__(self, n_levels: int = 4) -> None:
+        self._n_levels = n_levels
+
+    def analyze(self, evaluator: Evaluator, space: ParameterSpace) -> SensitivityResult:
+        baseline = _compute_baseline(space)
+        baseline_result = evaluator.evaluate(baseline)
+        baseline_scores = baseline_result.scores
+
+        kpi_names = list(baseline_scores.keys())
+        indices: dict[str, dict[str, dict[str, float]]] = {kpi: {} for kpi in kpi_names}
+
+        for param_name, dim in space.params.items():
+            test_values = _get_test_values(dim, self._n_levels)
+            effects: dict[str, list[float]] = {kpi: [] for kpi in kpi_names}
+
+            for val in test_values:
+                point: ParamPoint = dict(baseline)
+                point[param_name] = val
+                result = evaluator.evaluate(point)
+                for kpi in kpi_names:
+                    effects[kpi].append(result.scores[kpi] - baseline_scores[kpi])
+
+            for kpi in kpi_names:
+                effs = effects[kpi]
+                n = len(effs)
+                mean_effect = sum(abs(e) for e in effs) / n if n else 0.0
+                base_val = baseline_scores[kpi]
+                relative = mean_effect / abs(base_val) if base_val != 0 else 0.0
+                indices[kpi][param_name] = {
+                    "mean_effect": mean_effect,
+                    "relative_effect": relative,
+                }
+
+        return SensitivityResult(indices=indices, method="OAT")
+
+
+def _compute_baseline(space: ParameterSpace) -> ParamPoint:
+    """Compute baseline point: midpoints for numeric, first for discrete."""
+    baseline: ParamPoint = {}
+    for name, dim in space.params.items():
+        if isinstance(dim, Continuous):
+            baseline[name] = (dim.min_val + dim.max_val) / 2.0
+        elif isinstance(dim, Integer):
+            baseline[name] = (dim.min_val + dim.max_val) // 2
+        elif isinstance(dim, Discrete):
+            baseline[name] = dim.values[0]
+    return baseline
+
+
+def _get_test_values(
+    dim: Continuous | Integer | Discrete, n_levels: int
+) -> list[object]:
+    """Generate test values for a dimension."""
+    if isinstance(dim, Continuous):
+        if n_levels < 2:
+            return [dim.min_val]
+        step = (dim.max_val - dim.min_val) / (n_levels - 1)
+        return [dim.min_val + i * step for i in range(n_levels)]
+    elif isinstance(dim, Integer):
+        return list(range(dim.min_val, dim.max_val + 1))
+    elif isinstance(dim, Discrete):
+        return list(dim.values)
+    return []  # pragma: no cover
diff --git a/packages/gds-psuu/tests/test_sensitivity.py b/packages/gds-psuu/tests/test_sensitivity.py
new file mode 100644
index 0000000..0f8fd69
--- /dev/null
+++ b/packages/gds-psuu/tests/test_sensitivity.py
@@ -0,0 +1,257 @@
+"""Tests for sensitivity analysis framework."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+
+from gds_psuu import (
+    KPI,
+    Analyzer,
+    Continuous,
+    Discrete,
+    Evaluator,
+    Integer,
+    MorrisAnalyzer,
+    OATAnalyzer,
+    ParameterSpace,
+    SensitivityResult,
+    final_state_mean,
+)
+
+if TYPE_CHECKING:
+    from gds_sim import Model
+
+
+class TestAnalyzerABC:
+    def test_is_abstract(self) -> None:
+        with pytest.raises(TypeError):
+            Analyzer()  # type: ignore[abstract]
+
+
+class TestSensitivityResult:
+    def test_ranking(self) -> None:
+        result = SensitivityResult(
+            indices={
+                "kpi_a": {
+                    "x": {"mean_effect": 10.0},
+                    "y": {"mean_effect": 50.0},
+                    "z": {"mean_effect": 30.0},
+                }
+            },
+            method="test",
+        )
+        ranking = result.ranking("kpi_a")
+        assert ranking == ["y", "z", "x"]
+
+    def test_ranking_custom_metric(self) -> None:
+        result = SensitivityResult(
+            indices={
+                "kpi": {
+                    "a": {"mu_star": 5.0, "sigma": 20.0},
+                    "b": {"mu_star": 15.0, "sigma": 2.0},
+                }
+            },
+            method="test",
+        )
+        ranking = result.ranking("kpi", metric="sigma")
+        assert ranking == ["a", "b"]
+
+    def test_to_dataframe(self) -> None:
+        pytest.importorskip("pandas")
+        result = SensitivityResult(
+            indices={
+                "kpi": {
+                    "x": {"mean_effect": 10.0, "relative_effect": 0.5},
+                    "y": {"mean_effect": 20.0, "relative_effect": 1.0},
+                }
+            },
+            method="OAT",
+        )
+        df = result.to_dataframe()
+        assert len(df) == 2
+        assert "kpi" in df.columns
+        assert "param" in df.columns
+        assert "mean_effect" in df.columns
+
+
+class TestOATAnalyzer:
+    def test_oat_basic(self, simple_model: Model) -> None:
+        space = ParameterSpace(
+            params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)}
+        )
+        evaluator = Evaluator(
+            base_model=simple_model,
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                )
+            ],
+            timesteps=10,
+            runs=1,
+        )
+        analyzer = OATAnalyzer(n_levels=4)
+        result = analyzer.analyze(evaluator, space)
+
+        assert result.method == "OAT"
+        assert "final_pop" in result.indices
+        assert "growth_rate" in result.indices["final_pop"]
+        metrics = result.indices["final_pop"]["growth_rate"]
+        assert "mean_effect" in metrics
+        assert "relative_effect" in metrics
+        assert metrics["mean_effect"] > 0  # growth rate matters
+
+    def test_oat_multi_param(self, simple_model: Model) -> None:
+        space = ParameterSpace(
+            params={
+                "growth_rate": Continuous(min_val=0.01, max_val=0.1),
+                "label": Discrete(values=("A", "B")),
+            }
+        )
+        evaluator = Evaluator(
+            base_model=simple_model,
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                )
+            ],
+            timesteps=10,
+            runs=1,
+        )
+        analyzer = OATAnalyzer(n_levels=3)
+        result = analyzer.analyze(evaluator, space)
+
+        assert "growth_rate" in result.indices["final_pop"]
+        assert "label" in result.indices["final_pop"]
+        # growth_rate should be more influential than label
+        ranking = result.ranking("final_pop")
+        assert ranking[0] == "growth_rate"
+
+    def test_oat_integer_dim(self, simple_model: Model) -> None:
+        space = ParameterSpace(params={"growth_rate": Integer(min_val=1, max_val=3)})
+        evaluator = Evaluator(
+            base_model=simple_model,
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                )
+            ],
+            timesteps=5,
+            runs=1,
+        )
+        analyzer = OATAnalyzer(n_levels=3)
+        result = analyzer.analyze(evaluator, space)
+        assert "growth_rate" in result.indices["final_pop"]
+
+    def test_oat_single_level(self, simple_model: Model) -> None:
+        space = ParameterSpace(
+            params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)}
+        )
+        evaluator = Evaluator(
+            base_model=simple_model,
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                )
+            ],
+            timesteps=5,
+            runs=1,
+        )
+        analyzer = OATAnalyzer(n_levels=1)
+        result = analyzer.analyze(evaluator, space)
+        assert "growth_rate" in result.indices["final_pop"]
+
+
+class TestMorrisAnalyzer:
+    def test_morris_basic(self, simple_model: Model) -> None:
+        space = ParameterSpace(
+            params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)}
+        )
+        evaluator = Evaluator(
+            base_model=simple_model,
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                )
+            ],
+            timesteps=10,
+            runs=1,
+        )
+        analyzer = MorrisAnalyzer(r=5, n_levels=4, seed=42)
+        result = analyzer.analyze(evaluator, space)
+
+        assert result.method == "Morris"
+        assert "final_pop" in result.indices
+        metrics = result.indices["final_pop"]["growth_rate"]
+        assert "mu_star" in metrics
+        assert "sigma" in metrics
+        assert metrics["mu_star"] > 0
+
+    def test_morris_multi_param(self, simple_model: Model) -> None:
+        space = ParameterSpace(
+            params={
+                "growth_rate": Continuous(min_val=0.01, max_val=0.1),
+                "label": Discrete(values=("A", "B")),
+            }
+        )
+        evaluator = Evaluator(
+            base_model=simple_model,
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                )
+            ],
+            timesteps=10,
+            runs=1,
+        )
+        analyzer = MorrisAnalyzer(r=8, n_levels=4, seed=42)
+        result = analyzer.analyze(evaluator, space)
+
+        ranking = result.ranking("final_pop", metric="mu_star")
+        assert ranking[0] == "growth_rate"
+
+    def test_morris_reproducible(self, simple_model: Model) -> None:
+        space = ParameterSpace(
+            params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)}
+        )
+        evaluator = Evaluator(
+            base_model=simple_model,
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                )
+            ],
+            timesteps=5,
+            runs=1,
+        )
+        r1 = MorrisAnalyzer(r=5, seed=99).analyze(evaluator, space)
+        r2 = MorrisAnalyzer(r=5, seed=99).analyze(evaluator, space)
+        assert (
+            r1.indices["final_pop"]["growth_rate"]["mu_star"]
+            == r2.indices["final_pop"]["growth_rate"]["mu_star"]
+        )
+
+    def test_morris_discrete_only(self, simple_model: Model) -> None:
+        space = ParameterSpace(params={"strategy": Discrete(values=("A", "B", "C"))})
+        evaluator = Evaluator(
+            base_model=simple_model,
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                )
+            ],
+            timesteps=5,
+            runs=1,
+        )
+        analyzer = MorrisAnalyzer(r=5, n_levels=3, seed=42)
+        result = analyzer.analyze(evaluator, space)
+        assert "strategy" in result.indices["final_pop"]

From 0622ab27c22da3146bb3d975f845116be4be99b7 Mon Sep 17 00:00:00 2001
From: rohan <e4rohan@gmail.com>
Date: Thu, 5 Mar 2026 15:06:46 +0530
Subject: [PATCH 4/7] feat: separate Metric and Aggregation primitives from KPI

Introduce composable Metric (per-run scalar) and Aggregation (across-run
reducer) types that compose into KPIs, aligning with the simulation glossary
hierarchy. KPI now accepts either legacy fn or metric+aggregation pair.

Built-in metrics: final_value, trajectory_mean, max_value, min_value
Built-in aggregations: mean, std, percentile, probability_above/below

Evaluator populates per-run distributions for metric-based KPIs.

Closes #118
---
 packages/gds-psuu/gds_psuu/__init__.py   |  28 ++-
 packages/gds-psuu/gds_psuu/evaluation.py |  16 +-
 packages/gds-psuu/gds_psuu/kpi.py        |  61 ++++-
 packages/gds-psuu/gds_psuu/metric.py     | 214 +++++++++++++++++
 packages/gds-psuu/gds_psuu/types.py      |   6 +
 packages/gds-psuu/tests/test_metric.py   | 284 +++++++++++++++++++++++
 6 files changed, 601 insertions(+), 8 deletions(-)
 create mode 100644 packages/gds-psuu/gds_psuu/metric.py
 create mode 100644 packages/gds-psuu/tests/test_metric.py

diff --git a/packages/gds-psuu/gds_psuu/__init__.py b/packages/gds-psuu/gds_psuu/__init__.py
index f93f9c7..beb4d17 100644
--- a/packages/gds-psuu/gds_psuu/__init__.py
+++ b/packages/gds-psuu/gds_psuu/__init__.py
@@ -5,6 +5,19 @@
 from gds_psuu.errors import PsuuError, PsuuSearchError, PsuuValidationError
 from gds_psuu.evaluation import EvaluationResult, Evaluator
 from gds_psuu.kpi import KPI, final_state_mean, final_state_std, time_average
+from gds_psuu.metric import (
+    Aggregation,
+    Metric,
+    final_value,
+    max_value,
+    mean_agg,
+    min_value,
+    percentile_agg,
+    probability_above,
+    probability_below,
+    std_agg,
+    trajectory_mean,
+)
 from gds_psuu.objective import Objective, SingleKPI, WeightedSum
 from gds_psuu.optimizers.base import Optimizer
 from gds_psuu.optimizers.bayesian import BayesianOptimizer
@@ -27,9 +40,11 @@
     ParameterSpace,
 )
 from gds_psuu.sweep import Sweep
-from gds_psuu.types import KPIFn, KPIScores, ParamPoint
+from gds_psuu.types import AggregationFn, KPIFn, KPIScores, MetricFn, ParamPoint
 
 __all__ = [
+    "Aggregation",
+    "AggregationFn",
     "Analyzer",
     "BayesianOptimizer",
     "Constraint",
@@ -45,6 +60,8 @@
     "KPIFn",
     "KPIScores",
     "LinearConstraint",
+    "Metric",
+    "MetricFn",
     "MorrisAnalyzer",
     "OATAnalyzer",
     "Objective",
@@ -62,5 +79,14 @@
     "WeightedSum",
     "final_state_mean",
     "final_state_std",
+    "final_value",
+    "max_value",
+    "mean_agg",
+    "min_value",
+    "percentile_agg",
+    "probability_above",
+    "probability_below",
+    "std_agg",
     "time_average",
+    "trajectory_mean",
 ]
diff --git a/packages/gds-psuu/gds_psuu/evaluation.py b/packages/gds-psuu/gds_psuu/evaluation.py
index 06fd5ad..4c1bcc0 100644
--- a/packages/gds-psuu/gds_psuu/evaluation.py
+++ b/packages/gds-psuu/gds_psuu/evaluation.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Any
 
 from gds_sim import Model, Results, Simulation
@@ -20,6 +20,8 @@ class EvaluationResult:
     scores: KPIScores
     results: Results
     run_count: int
+    distributions: dict[str, list[float]] = field(default_factory=dict)
+    """Per-run metric values for metric-based KPIs."""
 
 
 class Evaluator(BaseModel):
@@ -36,7 +38,8 @@ def evaluate(self, params: ParamPoint) -> EvaluationResult:
         """Evaluate a single parameter point.
 
         Injects params as singleton lists into the model, runs the simulation,
-        and computes KPI scores.
+        and computes KPI scores. For metric-based KPIs, also records per-run
+        distributions.
         """
         # Build params dict: each value as a singleton list for gds-sim
         sim_params: dict[str, list[Any]] = {k: [v] for k, v in params.items()}
@@ -51,11 +54,18 @@ def evaluate(self, params: ParamPoint) -> EvaluationResult:
         sim = Simulation(model=model, timesteps=self.timesteps, runs=self.runs)
         results = sim.run()
 
-        scores: KPIScores = {kpi.name: kpi.fn(results) for kpi in self.kpis}
+        scores: KPIScores = {}
+        distributions: dict[str, list[float]] = {}
+
+        for kpi in self.kpis:
+            scores[kpi.name] = kpi.compute(results)
+            if kpi.metric is not None:
+                distributions[kpi.name] = kpi.per_run(results)
 
         return EvaluationResult(
             params=params,
             scores=scores,
             results=results,
             run_count=self.runs,
+            distributions=distributions,
         )
diff --git a/packages/gds-psuu/gds_psuu/kpi.py b/packages/gds-psuu/gds_psuu/kpi.py
index 45393c6..b805ec1 100644
--- a/packages/gds-psuu/gds_psuu/kpi.py
+++ b/packages/gds-psuu/gds_psuu/kpi.py
@@ -2,10 +2,17 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Self
 
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, model_validator
 
+from gds_psuu.errors import PsuuValidationError
+from gds_psuu.metric import (
+    Aggregation,
+    Metric,
+    _extract_run_ids,
+    mean_agg,
+)
 from gds_psuu.types import KPIFn  # noqa: TC001
 
 if TYPE_CHECKING:
@@ -13,12 +20,58 @@
 
 
 class KPI(BaseModel):
-    """Named KPI backed by a scoring function."""
+    """Named KPI backed by either a legacy fn or a Metric + Aggregation pair.
+
+    Legacy usage (backwards compatible)::
+
+        KPI(name="avg_pop", fn=lambda r: final_state_mean(r, "population"))
+
+    Composable usage::
+
+        KPI(name="avg_pop", metric=final_value("population"), aggregation=mean_agg)
+    """
 
     model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
 
     name: str
-    fn: KPIFn
+    fn: KPIFn | None = None
+    metric: Metric | None = None
+    aggregation: Aggregation | None = None
+
+    @model_validator(mode="after")
+    def _validate_specification(self) -> Self:
+        has_fn = self.fn is not None
+        has_metric = self.metric is not None
+        if not has_fn and not has_metric:
+            raise PsuuValidationError("KPI must have either 'fn' or 'metric' specified")
+        if has_fn and has_metric:
+            raise PsuuValidationError(
+                "KPI cannot have both 'fn' and 'metric' specified"
+            )
+        return self
+
+    def compute(self, results: Results) -> float:
+        """Compute the aggregated KPI score from results."""
+        if self.fn is not None:
+            return self.fn(results)
+        assert self.metric is not None
+        agg = self.aggregation or mean_agg
+        per_run = self.per_run(results)
+        return agg.fn(per_run)
+
+    def per_run(self, results: Results) -> list[float]:
+        """Compute per-run metric values. Only available for metric-based KPIs."""
+        if self.metric is None:
+            raise PsuuValidationError(
+                "per_run() requires a metric-based KPI, not a legacy fn-based KPI"
+            )
+        run_ids = _extract_run_ids(results)
+        return [self.metric.fn(results, r) for r in run_ids]
+
+
+# ---------------------------------------------------------------------------
+# Legacy helper functions (backwards compatible)
+# ---------------------------------------------------------------------------
 
 
 def final_state_mean(results: Results, key: str) -> float:
diff --git a/packages/gds-psuu/gds_psuu/metric.py b/packages/gds-psuu/gds_psuu/metric.py
new file mode 100644
index 0000000..5242075
--- /dev/null
+++ b/packages/gds-psuu/gds_psuu/metric.py
@@ -0,0 +1,214 @@
+"""Metric and Aggregation primitives for composable KPI construction."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from pydantic import BaseModel, ConfigDict
+
+from gds_psuu.types import AggregationFn, MetricFn  # noqa: TC001
+
+if TYPE_CHECKING:
+    from gds_sim import Results
+
+
+class Metric(BaseModel):
+    """Per-run scalar extracted from simulation output."""
+
+    model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
+
+    name: str
+    fn: MetricFn
+
+
+class Aggregation(BaseModel):
+    """Combines per-run metric values across Monte Carlo runs."""
+
+    model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True)
+
+    name: str
+    fn: AggregationFn
+
+
+# ---------------------------------------------------------------------------
+# Built-in metric factories
+# ---------------------------------------------------------------------------
+
+
+def _extract_run_ids(results: Results) -> list[int]:
+    """Get unique run IDs from results."""
+    cols = results._trimmed_columns()
+    runs = cols["run"]
+    seen: set[int] = set()
+    ordered: list[int] = []
+    for r in runs:
+        if r not in seen:
+            seen.add(r)
+            ordered.append(r)
+    return ordered
+
+
+def _final_value_fn(results: Results, run: int, key: str) -> float:
+    """Extract the final-timestep value for a specific run."""
+    cols = results._trimmed_columns()
+    timesteps = cols["timestep"]
+    substeps = cols["substep"]
+    runs_col = cols["run"]
+    values = cols[key]
+    n = results._size
+
+    max_t = 0
+    for i in range(n):
+        if runs_col[i] == run:
+            t = timesteps[i]
+            if t > max_t:
+                max_t = t
+
+    max_s = 0
+    for i in range(n):
+        if runs_col[i] == run and timesteps[i] == max_t:
+            s = substeps[i]
+            if s > max_s:
+                max_s = s
+
+    for i in range(n):
+        if runs_col[i] == run and timesteps[i] == max_t and substeps[i] == max_s:
+            return float(values[i])
+
+    return 0.0
+
+
+def final_value(key: str) -> Metric:
+    """Metric: value of a state variable at the final timestep of a run."""
+    return Metric(
+        name=f"final_{key}",
+        fn=lambda results, run, _key=key: _final_value_fn(results, run, _key),
+    )
+
+
+def _trajectory_mean_fn(results: Results, run: int, key: str) -> float:
+    """Mean of a state variable across all timesteps for a specific run."""
+    cols = results._trimmed_columns()
+    runs_col = cols["run"]
+    values = cols[key]
+    n = results._size
+
+    total = 0.0
+    count = 0
+    for i in range(n):
+        if runs_col[i] == run:
+            total += float(values[i])
+            count += 1
+
+    return total / count if count else 0.0
+
+
+def trajectory_mean(key: str) -> Metric:
+    """Metric: mean of a state variable over time for a single run."""
+    return Metric(
+        name=f"mean_{key}",
+        fn=lambda results, run, _key=key: _trajectory_mean_fn(results, run, _key),
+    )
+
+
+def _max_value_fn(results: Results, run: int, key: str) -> float:
+    """Max of a state variable across all timesteps for a specific run."""
+    cols = results._trimmed_columns()
+    runs_col = cols["run"]
+    values = cols[key]
+    n = results._size
+
+    max_val = float("-inf")
+    for i in range(n):
+        if runs_col[i] == run:
+            v = float(values[i])
+            if v > max_val:
+                max_val = v
+    return max_val if max_val != float("-inf") else 0.0
+
+
+def max_value(key: str) -> Metric:
+    """Metric: maximum value of a state variable within a single run."""
+    return Metric(
+        name=f"max_{key}",
+        fn=lambda results, run, _key=key: _max_value_fn(results, run, _key),
+    )
+
+
+def _min_value_fn(results: Results, run: int, key: str) -> float:
+    """Min of a state variable across all timesteps for a specific run."""
+    cols = results._trimmed_columns()
+    runs_col = cols["run"]
+    values = cols[key]
+    n = results._size
+
+    min_val = float("inf")
+    for i in range(n):
+        if runs_col[i] == run:
+            v = float(values[i])
+            if v < min_val:
+                min_val = v
+    return min_val if min_val != float("inf") else 0.0
+
+
+def min_value(key: str) -> Metric:
+    """Metric: minimum value of a state variable within a single run."""
+    return Metric(
+        name=f"min_{key}",
+        fn=lambda results, run, _key=key: _min_value_fn(results, run, _key),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Built-in aggregations
+# ---------------------------------------------------------------------------
+
+mean_agg = Aggregation(
+    name="mean",
+    fn=lambda vals: sum(vals) / len(vals) if vals else 0.0,
+)
+
+std_agg = Aggregation(
+    name="std",
+    fn=lambda vals: (
+        (sum((x - sum(vals) / len(vals)) ** 2 for x in vals) / (len(vals) - 1)) ** 0.5
+        if len(vals) > 1
+        else 0.0
+    ),
+)
+
+
+def percentile_agg(p: float) -> Aggregation:
+    """Aggregation: p-th percentile across runs."""
+
+    def _fn(vals: list[float]) -> float:
+        if not vals:
+            return 0.0
+        s = sorted(vals)
+        k = (p / 100.0) * (len(s) - 1)
+        lo = int(k)
+        hi = min(lo + 1, len(s) - 1)
+        frac = k - lo
+        return s[lo] + frac * (s[hi] - s[lo])
+
+    return Aggregation(name=f"p{p}", fn=_fn)
+
+
+def probability_above(threshold: float) -> Aggregation:
+    """Aggregation: fraction of runs where metric exceeds threshold."""
+    return Aggregation(
+        name=f"P(>{threshold})",
+        fn=lambda vals: (
+            sum(1 for v in vals if v > threshold) / len(vals) if vals else 0.0
+        ),
+    )
+
+
+def probability_below(threshold: float) -> Aggregation:
+    """Aggregation: fraction of runs where metric is below threshold."""
+    return Aggregation(
+        name=f"P(<{threshold})",
+        fn=lambda vals: (
+            sum(1 for v in vals if v < threshold) / len(vals) if vals else 0.0
+        ),
+    )
diff --git a/packages/gds-psuu/gds_psuu/types.py b/packages/gds-psuu/gds_psuu/types.py
index 8b97744..d7e8ec2 100644
--- a/packages/gds-psuu/gds_psuu/types.py
+++ b/packages/gds-psuu/gds_psuu/types.py
@@ -13,5 +13,11 @@
 KPIFn = Callable[[Results], float]
 """Computes a scalar KPI score from simulation results (all Monte Carlo runs)."""
 
+MetricFn = Callable[[Results, int], float]
+"""Computes a scalar from a single run. Args: results, run_number."""
+
+AggregationFn = Callable[[list[float]], float]
+"""Combines per-run metric values into a single scalar."""
+
 KPIScores = dict[str, float]
 """Maps KPI names to their computed scalar scores."""
diff --git a/packages/gds-psuu/tests/test_metric.py b/packages/gds-psuu/tests/test_metric.py
new file mode 100644
index 0000000..122a77f
--- /dev/null
+++ b/packages/gds-psuu/tests/test_metric.py
@@ -0,0 +1,284 @@
+"""Tests for Metric/Aggregation primitives and composable KPIs."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import pytest
+from gds_sim import Results
+
+from gds_psuu import (
+    KPI,
+    Continuous,
+    Evaluator,
+    GridSearchOptimizer,
+    ParameterSpace,
+    PsuuValidationError,
+    Sweep,
+    final_state_mean,
+    final_value,
+    max_value,
+    mean_agg,
+    min_value,
+    percentile_agg,
+    probability_above,
+    probability_below,
+    std_agg,
+    trajectory_mean,
+)
+
+if TYPE_CHECKING:
+    from gds_sim import Model
+
+
+def _make_results(values: list[list[float]], key: str = "x") -> Results:
+    """Create Results with multiple runs. Each inner list is one run's trajectory."""
+    r = Results(state_keys=[key])
+    for run_id, trajectory in enumerate(values, start=1):
+        for t, val in enumerate(trajectory):
+            r.append(
+                {key: val},
+                timestep=t,
+                substep=1,
+                run=run_id,
+                subset=0,
+            )
+    return r
+
+
+class TestMetricFactories:
+    def test_final_value(self) -> None:
+        results = _make_results([[10.0, 20.0, 30.0]], key="pop")
+        m = final_value("pop")
+        assert m.fn(results, 1) == 30.0
+
+    def test_final_value_multi_run(self) -> None:
+        results = _make_results([[10.0, 20.0], [100.0, 200.0]], key="pop")
+        m = final_value("pop")
+        assert m.fn(results, 1) == 20.0
+        assert m.fn(results, 2) == 200.0
+
+    def test_trajectory_mean(self) -> None:
+        results = _make_results([[10.0, 20.0, 30.0]], key="x")
+        m = trajectory_mean("x")
+        assert m.fn(results, 1) == 20.0
+
+    def test_max_value(self) -> None:
+        results = _make_results([[5.0, 100.0, 3.0]], key="x")
+        m = max_value("x")
+        assert m.fn(results, 1) == 100.0
+
+    def test_min_value(self) -> None:
+        results = _make_results([[5.0, 1.0, 3.0]], key="x")
+        m = min_value("x")
+        assert m.fn(results, 1) == 1.0
+
+    def test_metric_name(self) -> None:
+        assert final_value("pop").name == "final_pop"
+        assert trajectory_mean("x").name == "mean_x"
+        assert max_value("y").name == "max_y"
+        assert min_value("z").name == "min_z"
+
+
+class TestAggregations:
+    def test_mean_agg(self) -> None:
+        assert mean_agg.fn([10.0, 20.0, 30.0]) == 20.0
+
+    def test_mean_agg_empty(self) -> None:
+        assert mean_agg.fn([]) == 0.0
+
+    def test_std_agg(self) -> None:
+        vals = [10.0, 20.0, 30.0]
+        result = std_agg.fn(vals)
+        assert result == pytest.approx(10.0)
+
+    def test_std_agg_single(self) -> None:
+        assert std_agg.fn([5.0]) == 0.0
+
+    def test_percentile_agg(self) -> None:
+        agg = percentile_agg(50)
+        assert agg.fn([1.0, 2.0, 3.0, 4.0, 5.0]) == 3.0
+
+    def test_percentile_agg_boundary(self) -> None:
+        assert percentile_agg(0).fn([10.0, 20.0, 30.0]) == 10.0
+        assert percentile_agg(100).fn([10.0, 20.0, 30.0]) == 30.0
+
+    def test_percentile_agg_empty(self) -> None:
+        assert percentile_agg(50).fn([]) == 0.0
+
+    def test_probability_above(self) -> None:
+        agg = probability_above(15.0)
+        assert agg.fn([10.0, 20.0, 30.0]) == pytest.approx(2 / 3)
+
+    def test_probability_above_empty(self) -> None:
+        assert probability_above(0).fn([]) == 0.0
+
+    def test_probability_below(self) -> None:
+        agg = probability_below(25.0)
+        assert agg.fn([10.0, 20.0, 30.0]) == pytest.approx(2 / 3)
+
+    def test_probability_below_empty(self) -> None:
+        assert probability_below(0).fn([]) == 0.0
+
+
+class TestComposableKPI:
+    def test_metric_based_kpi(self) -> None:
+        results = _make_results([[10.0, 20.0], [10.0, 40.0], [10.0, 60.0]], key="pop")
+        kpi = KPI(name="avg_final", metric=final_value("pop"), aggregation=mean_agg)
+        assert kpi.compute(results) == 40.0
+
+    def test_metric_defaults_to_mean(self) -> None:
+        results = _make_results([[10.0, 20.0], [10.0, 40.0]], key="pop")
+        kpi = KPI(name="avg_final", metric=final_value("pop"))
+        assert kpi.compute(results) == 30.0  # mean of [20, 40]
+
+    def test_metric_with_percentile(self) -> None:
+        results = _make_results(
+            [[0.0, v] for v in [10.0, 20.0, 30.0, 40.0, 50.0]], key="x"
+        )
+        kpi = KPI(
+            name="p50_x",
+            metric=final_value("x"),
+            aggregation=percentile_agg(50),
+        )
+        assert kpi.compute(results) == 30.0
+
+    def test_metric_with_probability(self) -> None:
+        results = _make_results([[0.0, v] for v in [10.0, 20.0, 30.0]], key="x")
+        kpi = KPI(
+            name="risk",
+            metric=final_value("x"),
+            aggregation=probability_below(25.0),
+        )
+        assert kpi.compute(results) == pytest.approx(2 / 3)
+
+    def test_per_run(self) -> None:
+        results = _make_results([[10.0, 20.0], [10.0, 40.0], [10.0, 60.0]], key="pop")
+        kpi = KPI(name="final_pop", metric=final_value("pop"))
+        per_run = kpi.per_run(results)
+        assert per_run == [20.0, 40.0, 60.0]
+
+    def test_per_run_on_fn_kpi_raises(self) -> None:
+        kpi = KPI(name="legacy", fn=lambda r: 0.0)
+        results = Results(state_keys=[])
+        with pytest.raises(PsuuValidationError, match="metric-based"):
+            kpi.per_run(results)
+
+
+class TestLegacyKPI:
+    def test_fn_based_still_works(self) -> None:
+        results = _make_results([[10.0, 20.0, 30.0]], key="pop")
+        kpi = KPI(
+            name="legacy",
+            fn=lambda r: final_state_mean(r, "pop"),
+        )
+        assert kpi.compute(results) == 30.0
+
+    def test_validation_neither(self) -> None:
+        with pytest.raises(
+            (PsuuValidationError, ValueError),
+            match="either 'fn' or 'metric'",
+        ):
+            KPI(name="bad")
+
+    def test_validation_both(self) -> None:
+        with pytest.raises(
+            (PsuuValidationError, ValueError),
+            match="cannot have both",
+        ):
+            KPI(
+                name="bad",
+                fn=lambda r: 0.0,
+                metric=final_value("x"),
+            )
+
+
+class TestEvaluatorDistributions:
+    def test_distributions_populated(self, simple_model: Model) -> None:
+        evaluator = Evaluator(
+            base_model=simple_model,
+            kpis=[
+                KPI(name="final_pop", metric=final_value("population")),
+            ],
+            timesteps=5,
+            runs=3,
+        )
+        result = evaluator.evaluate({"growth_rate": 0.05})
+        assert "final_pop" in result.distributions
+        assert len(result.distributions["final_pop"]) == 3
+
+    def test_distributions_empty_for_legacy_kpi(self, simple_model: Model) -> None:
+        evaluator = Evaluator(
+            base_model=simple_model,
+            kpis=[
+                KPI(
+                    name="legacy_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                ),
+            ],
+            timesteps=5,
+            runs=1,
+        )
+        result = evaluator.evaluate({"growth_rate": 0.05})
+        assert "legacy_pop" not in result.distributions
+
+    def test_mixed_kpis(self, simple_model: Model) -> None:
+        evaluator = Evaluator(
+            base_model=simple_model,
+            kpis=[
+                KPI(
+                    name="legacy",
+                    fn=lambda r: final_state_mean(r, "population"),
+                ),
+                KPI(name="composable", metric=final_value("population")),
+            ],
+            timesteps=5,
+            runs=2,
+        )
+        result = evaluator.evaluate({"growth_rate": 0.05})
+        assert "legacy" not in result.distributions
+        assert "composable" in result.distributions
+        assert len(result.distributions["composable"]) == 2
+
+
+class TestSweepWithComposableKPI:
+    def test_sweep_composable(self, simple_model: Model) -> None:
+        sweep = Sweep(
+            model=simple_model,
+            space=ParameterSpace(
+                params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)}
+            ),
+            kpis=[
+                KPI(name="avg_pop", metric=final_value("population")),
+            ],
+            optimizer=GridSearchOptimizer(n_steps=3),
+            timesteps=5,
+            runs=2,
+        )
+        results = sweep.run()
+        assert len(results.evaluations) == 3
+        # Each evaluation should have distributions
+        for ev in results.evaluations:
+            assert "avg_pop" in ev.distributions
+            assert len(ev.distributions["avg_pop"]) == 2
+
+    def test_sweep_legacy_still_works(self, simple_model: Model) -> None:
+        sweep = Sweep(
+            model=simple_model,
+            space=ParameterSpace(
+                params={"growth_rate": Continuous(min_val=0.01, max_val=0.1)}
+            ),
+            kpis=[
+                KPI(
+                    name="final_pop",
+                    fn=lambda r: final_state_mean(r, "population"),
+                ),
+            ],
+            optimizer=GridSearchOptimizer(n_steps=2),
+            timesteps=5,
+            runs=1,
+        )
+        results = sweep.run()
+        assert len(results.evaluations) == 2
+        best = results.best("final_pop")
+        assert best.params["growth_rate"] == 0.1

From faa8e62f8f83f5f45804b8e55aa45e0881389805 Mon Sep 17 00:00:00 2001
From: rohan <e4rohan@gmail.com>
Date: Thu, 5 Mar 2026 15:12:50 +0530
Subject: [PATCH 5/7] docs: add gds-psuu documentation

Overview, getting started guide, concept hierarchy (Metric/Aggregation/KPI),
parameter spaces reference, optimizer guide, and API reference pages.
Adds psuu section to mkdocs nav and landing page package table.
---
 docs/index.md                 |   6 +
 docs/psuu/api/evaluation.md   |   8 ++
 docs/psuu/api/init.md         |   8 ++
 docs/psuu/api/kpi.md          |   8 ++
 docs/psuu/api/metric.md       |   8 ++
 docs/psuu/api/optimizers.md   |  31 +++++
 docs/psuu/api/results.md      |   8 ++
 docs/psuu/api/space.md        |   8 ++
 docs/psuu/api/sweep.md        |   8 ++
 docs/psuu/getting-started.md  | 182 ++++++++++++++++++++++++++++
 docs/psuu/guide/concepts.md   | 215 ++++++++++++++++++++++++++++++++++
 docs/psuu/guide/optimizers.md | 105 +++++++++++++++++
 docs/psuu/guide/spaces.md     | 102 ++++++++++++++++
 docs/psuu/index.md            |  92 +++++++++++++++
 mkdocs.yml                    |  21 ++++
 15 files changed, 810 insertions(+)
 create mode 100644 docs/psuu/api/evaluation.md
 create mode 100644 docs/psuu/api/init.md
 create mode 100644 docs/psuu/api/kpi.md
 create mode 100644 docs/psuu/api/metric.md
 create mode 100644 docs/psuu/api/optimizers.md
 create mode 100644 docs/psuu/api/results.md
 create mode 100644 docs/psuu/api/space.md
 create mode 100644 docs/psuu/api/sweep.md
 create mode 100644 docs/psuu/getting-started.md
 create mode 100644 docs/psuu/guide/concepts.md
 create mode 100644 docs/psuu/guide/optimizers.md
 create mode 100644 docs/psuu/guide/spaces.md
 create mode 100644 docs/psuu/index.md

diff --git a/docs/index.md b/docs/index.md
index 3b1f228..0a6a9bc 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -36,6 +36,8 @@ Key guides include embedded [marimo](https://marimo.io) notebooks — run code,
 | `gds-games` | `ogs` | Typed DSL for compositional game theory (Open Games) |
 | `gds-software` | `gds_software` | Software architecture DSL (DFD, state machine, C4, ERD, etc.) |
 | `gds-business` | `gds_business` | Business dynamics DSL (CLD, supply chain, value stream map) |
+| `gds-sim` | `gds_sim` | Simulation engine — Model, Simulation, Results |
+| `gds-psuu` | `gds_psuu` | Parameter space search under uncertainty for gds-sim |
 | `gds-examples` | — | Tutorial models demonstrating framework features |
 
 ## Architecture
@@ -51,6 +53,10 @@ gds-software   ←  software architecture DSL (depends on gds-framework)
 gds-business   ←  business dynamics DSL (depends on gds-framework)
     ↑
 gds-examples   ←  tutorials (depends on gds-framework + gds-viz)
+
+gds-sim        ←  simulation engine (standalone — no gds-framework dep)
+    ↑
+gds-psuu       ←  parameter search under uncertainty (depends on gds-sim)
 ```
 
 ## License
diff --git a/docs/psuu/api/evaluation.md b/docs/psuu/api/evaluation.md
new file mode 100644
index 0000000..22f5a48
--- /dev/null
+++ b/docs/psuu/api/evaluation.md
@@ -0,0 +1,8 @@
+# gds_psuu.evaluation
+
+Evaluation bridge between parameter points and gds-sim.
+
+::: gds_psuu.evaluation
+    options:
+      show_source: true
+      members_order: source
diff --git a/docs/psuu/api/init.md b/docs/psuu/api/init.md
new file mode 100644
index 0000000..6bdb0fb
--- /dev/null
+++ b/docs/psuu/api/init.md
@@ -0,0 +1,8 @@
+# gds_psuu
+
+Public API -- top-level exports.
+
+::: gds_psuu
+    options:
+      show_submodules: false
+      members: false
diff --git a/docs/psuu/api/kpi.md b/docs/psuu/api/kpi.md
new file mode 100644
index 0000000..63b4178
--- /dev/null
+++ b/docs/psuu/api/kpi.md
@@ -0,0 +1,8 @@
+# gds_psuu.kpi
+
+KPI wrapper and legacy helper functions.
+
+::: gds_psuu.kpi
+    options:
+      show_source: true
+      members_order: source
diff --git a/docs/psuu/api/metric.md b/docs/psuu/api/metric.md
new file mode 100644
index 0000000..9a3ccbf
--- /dev/null
+++ b/docs/psuu/api/metric.md
@@ -0,0 +1,8 @@
+# gds_psuu.metric
+
+Metric and Aggregation primitives for composable KPI construction.
+
+::: gds_psuu.metric
+    options:
+      show_source: true
+      members_order: source
diff --git a/docs/psuu/api/optimizers.md b/docs/psuu/api/optimizers.md
new file mode 100644
index 0000000..ffc727e
--- /dev/null
+++ b/docs/psuu/api/optimizers.md
@@ -0,0 +1,31 @@
+# gds_psuu.optimizers
+
+Search strategy implementations.
+
+## Base
+
+::: gds_psuu.optimizers.base
+    options:
+      show_source: true
+      members_order: source
+
+## Grid Search
+
+::: gds_psuu.optimizers.grid
+    options:
+      show_source: true
+      members_order: source
+
+## Random Search
+
+::: gds_psuu.optimizers.random
+    options:
+      show_source: true
+      members_order: source
+
+## Bayesian (optional)
+
+::: gds_psuu.optimizers.bayesian
+    options:
+      show_source: true
+      members_order: source
diff --git a/docs/psuu/api/results.md b/docs/psuu/api/results.md
new file mode 100644
index 0000000..c4f8231
--- /dev/null
+++ b/docs/psuu/api/results.md
@@ -0,0 +1,8 @@
+# gds_psuu.results
+
+Sweep results and summary types.
+
+::: gds_psuu.results
+    options:
+      show_source: true
+      members_order: source
diff --git a/docs/psuu/api/space.md b/docs/psuu/api/space.md
new file mode 100644
index 0000000..d697c61
--- /dev/null
+++ b/docs/psuu/api/space.md
@@ -0,0 +1,8 @@
+# gds_psuu.space
+
+Parameter space definitions for search.
+
+::: gds_psuu.space
+    options:
+      show_source: true
+      members_order: source
diff --git a/docs/psuu/api/sweep.md b/docs/psuu/api/sweep.md
new file mode 100644
index 0000000..6c49ff6
--- /dev/null
+++ b/docs/psuu/api/sweep.md
@@ -0,0 +1,8 @@
+# gds_psuu.sweep
+
+Sweep orchestrator -- the main entry point for parameter search.
+
+::: gds_psuu.sweep
+    options:
+      show_source: true
+      members_order: source
diff --git a/docs/psuu/getting-started.md b/docs/psuu/getting-started.md
new file mode 100644
index 0000000..55c4728
--- /dev/null
+++ b/docs/psuu/getting-started.md
@@ -0,0 +1,182 @@
+# Getting Started
+
+## Installation
+
+```bash
+uv add gds-psuu
+# or: pip install gds-psuu
+```
+
+For Bayesian optimization (optional):
+
+```bash
+uv add "gds-psuu[bayesian]"
+# or: pip install gds-psuu[bayesian]
+```
+
+For development (monorepo):
+
+```bash
+git clone https://github.com/BlockScience/gds-core.git
+cd gds-core
+uv sync --all-packages
+```
+
+## Your First Parameter Sweep
+
+Define a `gds-sim` model, then sweep a parameter to find the best value:
+
+```python
+from gds_sim import Model, StateUpdateBlock
+from gds_psuu import (
+    KPI,
+    Continuous,
+    GridSearchOptimizer,
+    ParameterSpace,
+    Sweep,
+    final_value,
+    mean_agg,
+)
+
+
+# 1. Define a growth model
+def growth_policy(state, params, **kw):
+    return {"delta": state["population"] * params["growth_rate"]}
+
+
+def update_pop(state, params, *, signal=None, **kw):
+    return ("population", state["population"] + signal["delta"])
+
+
+model = Model(
+    initial_state={"population": 100.0},
+    state_update_blocks=[
+        StateUpdateBlock(
+            policies={"growth": growth_policy},
+            variables={"population": update_pop},
+        )
+    ],
+)
+
+# 2. Define what to search
+space = ParameterSpace(
+    params={"growth_rate": Continuous(min_val=0.01, max_val=0.2)}
+)
+
+# 3. Define what to measure
+kpis = [
+    KPI(
+        name="avg_final_pop",
+        metric=final_value("population"),    # per-run: final value
+        aggregation=mean_agg,                # cross-run: mean
+    ),
+]
+
+# 4. Run the sweep
+sweep = Sweep(
+    model=model,
+    space=space,
+    kpis=kpis,
+    optimizer=GridSearchOptimizer(n_steps=5),
+    timesteps=10,
+    runs=3,  # 3 Monte Carlo runs per parameter point
+)
+results = sweep.run()
+
+# 5. Inspect results
+best = results.best("avg_final_pop")
+print(f"Best growth_rate: {best.params['growth_rate']:.3f}")
+print(f"Best avg final pop: {best.scores['avg_final_pop']:.1f}")
+```
+
+## Composable KPIs
+
+The key design is the **Metric + Aggregation = KPI** pattern:
+
+```python
+from gds_psuu import (
+    KPI,
+    final_value,
+    trajectory_mean,
+    max_value,
+    mean_agg,
+    std_agg,
+    percentile_agg,
+    probability_above,
+)
+
+# Mean of final population across runs
+avg_final = KPI(name="avg_pop", metric=final_value("population"), aggregation=mean_agg)
+
+# Standard deviation of final population (measures uncertainty)
+std_final = KPI(name="std_pop", metric=final_value("population"), aggregation=std_agg)
+
+# 90th percentile of trajectory means
+p90_mean = KPI(name="p90_mean", metric=trajectory_mean("population"), aggregation=percentile_agg(90))
+
+# Probability that max population exceeds 500
+risk = KPI(name="boom_risk", metric=max_value("population"), aggregation=probability_above(500.0))
+```
+
+**Metric** extracts a scalar from each run. **Aggregation** reduces the per-run values to a single score.
+
+If no aggregation is specified, `mean_agg` is used by default:
+
+```python
+# These are equivalent:
+KPI(name="avg_pop", metric=final_value("population"))
+KPI(name="avg_pop", metric=final_value("population"), aggregation=mean_agg)
+```
+
+## Per-Run Distributions
+
+Metric-based KPIs track the full distribution across Monte Carlo runs:
+
+```python
+results = sweep.run()
+
+for ev in results.evaluations:
+    dist = ev.distributions["avg_final_pop"]
+    print(f"  params={ev.params}, per_run={dist}")
+    # e.g. per_run=[265.3, 265.3, 265.3] for deterministic model
+```
+
+## Multiple Optimizers
+
+```python
+from gds_psuu import GridSearchOptimizer, RandomSearchOptimizer
+
+# Exhaustive grid (good for 1-2 dimensions)
+grid = GridSearchOptimizer(n_steps=10)  # 10 points per continuous dim
+
+# Random sampling (good for higher dimensions)
+rand = RandomSearchOptimizer(n_samples=50, seed=42)
+```
+
+For Bayesian optimization (requires `gds-psuu[bayesian]`):
+
+```python
+from gds_psuu.optimizers.bayesian import BayesianOptimizer
+
+bayes = BayesianOptimizer(n_calls=30, target_kpi="avg_final_pop", seed=42)
+```
+
+## Legacy KPI Support
+
+The older `fn`-based KPI interface still works:
+
+```python
+from gds_psuu import KPI, final_state_mean
+
+# Legacy style (backwards compatible)
+kpi = KPI(name="pop", fn=lambda r: final_state_mean(r, "population"))
+```
+
+Legacy KPIs don't track per-run distributions -- use metric-based KPIs for full Monte Carlo awareness.
+
+## Next Steps
+
+- [Concepts](guide/concepts.md) -- Metric, Aggregation, KPI, and the full conceptual hierarchy
+- [Parameter Spaces](guide/spaces.md) -- dimensions, validation, and grid generation
+- [Optimizers](guide/optimizers.md) -- grid, random, and Bayesian search strategies
+- [API Reference](api/init.md) -- complete auto-generated API docs
diff --git a/docs/psuu/guide/concepts.md b/docs/psuu/guide/concepts.md
new file mode 100644
index 0000000..d78e811
--- /dev/null
+++ b/docs/psuu/guide/concepts.md
@@ -0,0 +1,215 @@
+# Concepts
+
+This page explains the core abstractions in `gds-psuu` and how they compose.
+
+## The Hierarchy
+
+```
+Parameter Point  ->  Simulation  ->  Results  ->  Metric  ->  Aggregation  ->  KPI
+```
+
+Each layer transforms data from the previous one. The sweep loop orchestrates the full pipeline across many parameter points.
+
+---
+
+## Parameter Space
+
+A `ParameterSpace` defines what to search. Each dimension has a name and a type:
+
+| Dimension | Description | Grid behavior |
+|-----------|-------------|---------------|
+| `Continuous(min_val, max_val)` | Real-valued range | `n_steps` evenly spaced points |
+| `Integer(min_val, max_val)` | Integer range (inclusive) | All integers in range |
+| `Discrete(values=(...))` | Explicit set of values | All values |
+
+```python
+from gds_psuu import Continuous, Integer, Discrete, ParameterSpace
+
+space = ParameterSpace(params={
+    "learning_rate": Continuous(min_val=0.001, max_val=0.1),
+    "hidden_layers": Integer(min_val=1, max_val=5),
+    "activation": Discrete(values=("relu", "tanh", "sigmoid")),
+})
+```
+
+Validation enforces `min_val < max_val` and at least one parameter.
+
+---
+
+## Metric
+
+A `Metric` extracts a **single scalar from one simulation run**. It receives the full `Results` object and a run ID.
+
+```python
+from gds_psuu import Metric
+
+# Built-in factories
+from gds_psuu import final_value, trajectory_mean, max_value, min_value
+
+final_value("population")      # value at last timestep
+trajectory_mean("population")  # mean over all timesteps
+max_value("population")        # maximum over all timesteps
+min_value("population")        # minimum over all timesteps
+```
+
+Custom metrics:
+
+```python
+Metric(
+    name="range",
+    fn=lambda results, run: (
+        max_value("x").fn(results, run) - min_value("x").fn(results, run)
+    ),
+)
+```
+
+The `MetricFn` signature is `(Results, int) -> float` where the int is the run ID.
+
+---
+
+## Aggregation
+
+An `Aggregation` reduces a **list of per-run values into a single scalar**. It operates on `list[float]` and returns `float`.
+
+```python
+from gds_psuu import mean_agg, std_agg, percentile_agg, probability_above, probability_below
+
+mean_agg                    # arithmetic mean
+std_agg                     # sample standard deviation
+percentile_agg(50)          # median (50th percentile)
+percentile_agg(95)          # 95th percentile
+probability_above(100.0)    # fraction of runs > 100
+probability_below(0.0)      # fraction of runs < 0 (risk measure)
+```
+
+Custom aggregations:
+
+```python
+from gds_psuu import Aggregation
+
+cv_agg = Aggregation(
+    name="cv",
+    fn=lambda vals: (
+        (sum((x - sum(vals)/len(vals))**2 for x in vals) / (len(vals)-1))**0.5
+        / (sum(vals)/len(vals))
+        if len(vals) > 1 and sum(vals) != 0 else 0.0
+    ),
+)
+```
+
+---
+
+## KPI
+
+A `KPI` composes a Metric and an Aggregation into a named score:
+
+```python
+from gds_psuu import KPI, final_value, std_agg
+
+kpi = KPI(
+    name="uncertainty",
+    metric=final_value("population"),  # per-run: final value
+    aggregation=std_agg,               # cross-run: standard deviation
+)
+```
+
+If `aggregation` is omitted, `mean_agg` is used by default.
+
+### Per-run access
+
+Metric-based KPIs expose the full distribution:
+
+```python
+results = simulation_results  # from gds-sim
+per_run_values = kpi.per_run(results)  # [val_run1, val_run2, ...]
+aggregated = kpi.compute(results)       # single float
+```
+
+### Legacy KPIs
+
+The older `fn`-based interface operates on the full `Results` at once:
+
+```python
+from gds_psuu import KPI, final_state_mean
+
+kpi = KPI(name="pop", fn=lambda r: final_state_mean(r, "population"))
+```
+
+Legacy KPIs cannot use `per_run()` and don't produce distributions. Prefer metric-based KPIs for new code.
+
+---
+
+## Evaluator
+
+The `Evaluator` bridges parameter points to scored KPIs:
+
+1. Takes a parameter point `{"growth_rate": 0.05}`
+2. Injects params into the `gds-sim` Model
+3. Runs N Monte Carlo simulations
+4. Computes each KPI on the results
+5. Returns `EvaluationResult` with scores and distributions
+
+```python
+from gds_psuu import Evaluator
+
+evaluator = Evaluator(
+    base_model=model,
+    kpis=[kpi1, kpi2],
+    timesteps=100,
+    runs=10,
+)
+result = evaluator.evaluate({"growth_rate": 0.05})
+# result.scores == {"kpi1_name": 42.0, "kpi2_name": 3.14}
+# result.distributions == {"kpi1_name": [per-run values...]}
+```
+
+---
+
+## Optimizer
+
+An `Optimizer` implements the suggest/observe loop:
+
+| Optimizer | Strategy | When to use |
+|-----------|----------|-------------|
+| `GridSearchOptimizer(n_steps)` | Exhaustive cartesian product | 1-2 dimensions, need full coverage |
+| `RandomSearchOptimizer(n_samples, seed)` | Uniform random sampling | Higher dimensions, exploration |
+| `BayesianOptimizer(n_calls, target_kpi)` | Gaussian process surrogate | Expensive evaluations, optimization |
+
+All optimizers implement the same interface:
+
+```python
+optimizer.setup(space, kpi_names)
+while not optimizer.is_exhausted():
+    params = optimizer.suggest()
+    # ... evaluate ...
+    optimizer.observe(params, scores)
+```
+
+---
+
+## Sweep
+
+`Sweep` is the top-level orchestrator that connects everything:
+
+```python
+from gds_psuu import Sweep
+
+sweep = Sweep(
+    model=model,
+    space=space,
+    kpis=kpis,
+    optimizer=optimizer,
+    timesteps=100,
+    runs=10,
+)
+results = sweep.run()
+```
+
+### SweepResults
+
+```python
+results.evaluations       # list[EvaluationResult] -- all evaluations
+results.summaries         # list[EvaluationSummary] -- params + scores only
+results.best("kpi_name")  # best evaluation for a KPI
+results.to_dataframe()    # pandas DataFrame (requires pandas)
+```
diff --git a/docs/psuu/guide/optimizers.md b/docs/psuu/guide/optimizers.md
new file mode 100644
index 0000000..751220b
--- /dev/null
+++ b/docs/psuu/guide/optimizers.md
@@ -0,0 +1,105 @@
+# Optimizers
+
+All optimizers implement the same `Optimizer` interface: `setup()`, `suggest()`, `observe()`, `is_exhausted()`. The `Sweep` class drives this loop automatically.
+
+## GridSearchOptimizer
+
+Exhaustive evaluation of every point in a regular grid.
+
+```python
+from gds_psuu import GridSearchOptimizer
+
+optimizer = GridSearchOptimizer(n_steps=10)
+```
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `n_steps` | `int` | `5` | Points per continuous dimension |
+
+**Behavior:** Generates the full cartesian product via `ParameterSpace.grid_points()`, then evaluates each point exactly once. Does not adapt based on observed scores.
+
+**When to use:** Small parameter spaces (1-2 dimensions), need complete coverage, want reproducible results.
+
+**Total evaluations:** `n_steps^(n_continuous) * product(integer_ranges) * product(discrete_sizes)`
+
+---
+
+## RandomSearchOptimizer
+
+Uniform random sampling across the parameter space.
+
+```python
+from gds_psuu import RandomSearchOptimizer
+
+optimizer = RandomSearchOptimizer(n_samples=50, seed=42)
+```
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `n_samples` | `int` | `20` | Total parameter points to sample |
+| `seed` | `int \| None` | `None` | Random seed for reproducibility |
+
+**Behavior:** Samples each dimension independently -- `uniform(min, max)` for Continuous, `randint(min, max)` for Integer, `choice(values)` for Discrete. Does not adapt based on observed scores.
+
+**When to use:** Higher-dimensional spaces where grid search is infeasible, initial exploration before Bayesian optimization.
+
+---
+
+## BayesianOptimizer
+
+Gaussian process surrogate model that learns from past evaluations.
+
+!!! note "Optional dependency"
+    Requires `scikit-optimize`. Install with: `uv add "gds-psuu[bayesian]"`
+
+```python
+from gds_psuu.optimizers.bayesian import BayesianOptimizer
+
+optimizer = BayesianOptimizer(
+    n_calls=30,
+    target_kpi="avg_final_pop",
+    maximize=True,
+    seed=42,
+)
+```
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `n_calls` | `int` | `20` | Total evaluations (initial + optimization) |
+| `target_kpi` | `str \| None` | `None` | KPI to optimize (defaults to first) |
+| `maximize` | `bool` | `True` | Maximize (True) or minimize (False) |
+| `seed` | `int \| None` | `None` | Random seed |
+
+**Behavior:** Starts with random exploration (`min(5, n_calls)` initial points), then uses a Gaussian process surrogate to balance exploration and exploitation. Optimizes a single target KPI.
+
+**When to use:** Expensive simulations where you want to find the optimum with fewer evaluations. Works best with continuous parameters.
+
+---
+
+## Custom Optimizers
+
+Subclass `Optimizer` to implement your own search strategy:
+
+```python
+from gds_psuu.optimizers.base import Optimizer
+from gds_psuu.space import ParameterSpace
+from gds_psuu.types import KPIScores, ParamPoint
+
+
+class MyOptimizer(Optimizer):
+    def setup(self, space: ParameterSpace, kpi_names: list[str]) -> None:
+        # Initialize search state
+        ...
+
+    def suggest(self) -> ParamPoint:
+        # Return next parameter point to evaluate
+        ...
+
+    def observe(self, params: ParamPoint, scores: KPIScores) -> None:
+        # Learn from evaluation results
+        ...
+
+    def is_exhausted(self) -> bool:
+        # Return True when search is complete
+        ...
+```
diff --git a/docs/psuu/guide/spaces.md b/docs/psuu/guide/spaces.md
new file mode 100644
index 0000000..963fc6f
--- /dev/null
+++ b/docs/psuu/guide/spaces.md
@@ -0,0 +1,102 @@
+# Parameter Spaces
+
+## Dimension Types
+
+### Continuous
+
+A real-valued range with inclusive bounds.
+
+```python
+from gds_psuu import Continuous
+
+dim = Continuous(min_val=0.0, max_val=1.0)
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `min_val` | `float` | Lower bound (inclusive) |
+| `max_val` | `float` | Upper bound (inclusive) |
+
+Validation: `min_val < max_val`, both must be finite.
+
+Grid behavior: `n_steps` evenly spaced points from `min_val` to `max_val`.
+
+---
+
+### Integer
+
+An integer range with inclusive bounds.
+
+```python
+from gds_psuu import Integer
+
+dim = Integer(min_val=1, max_val=10)
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `min_val` | `int` | Lower bound (inclusive) |
+| `max_val` | `int` | Upper bound (inclusive) |
+
+Validation: `min_val < max_val`.
+
+Grid behavior: all integers from `min_val` to `max_val` (ignores `n_steps`).
+
+---
+
+### Discrete
+
+An explicit set of allowed values (any hashable type).
+
+```python
+from gds_psuu import Discrete
+
+dim = Discrete(values=("adam", "sgd", "rmsprop"))
+```
+
+| Field | Type | Description |
+|-------|------|-------------|
+| `values` | `tuple[Any, ...]` | Allowed values |
+
+Validation: at least 1 value.
+
+Grid behavior: all values (ignores `n_steps`).
+
+---
+
+## ParameterSpace
+
+Combines dimensions into a named parameter space:
+
+```python
+from gds_psuu import Continuous, Integer, Discrete, ParameterSpace
+
+space = ParameterSpace(params={
+    "learning_rate": Continuous(min_val=0.001, max_val=0.1),
+    "batch_size": Integer(min_val=16, max_val=128),
+    "optimizer": Discrete(values=("adam", "sgd")),
+})
+```
+
+### Grid Generation
+
+```python
+points = space.grid_points(n_steps=5)
+# Returns list of dicts, e.g.:
+# [
+#     {"learning_rate": 0.001, "batch_size": 16, "optimizer": "adam"},
+#     {"learning_rate": 0.001, "batch_size": 16, "optimizer": "sgd"},
+#     ...
+# ]
+```
+
+The total number of grid points is the cartesian product:
+`n_steps * (max_int - min_int + 1) * len(discrete_values)`
+
+For the example above: `5 * 113 * 2 = 1130` points.
+
+### Properties
+
+| Property | Returns | Description |
+|----------|---------|-------------|
+| `dimension_names` | `list[str]` | Ordered list of parameter names |
diff --git a/docs/psuu/index.md b/docs/psuu/index.md
new file mode 100644
index 0000000..ba6e652
--- /dev/null
+++ b/docs/psuu/index.md
@@ -0,0 +1,92 @@
+# gds-psuu
+
+[![PyPI](https://img.shields.io/pypi/v/gds-psuu)](https://pypi.org/project/gds-psuu/)
+[![Python](https://img.shields.io/pypi/pyversions/gds-psuu)](https://pypi.org/project/gds-psuu/)
+[![License](https://img.shields.io/github/license/BlockScience/gds-core)](https://github.com/BlockScience/gds-core/blob/main/LICENSE)
+
+**Parameter space search under uncertainty** -- explore, evaluate, and optimize simulation parameters with Monte Carlo awareness.
+
+## What is this?
+
+`gds-psuu` bridges `gds-sim` simulations with systematic parameter exploration. It provides:
+
+- **Parameter spaces** -- `Continuous`, `Integer`, and `Discrete` dimensions with validation
+- **Composable KPIs** -- `Metric` (per-run scalar) + `Aggregation` (cross-run reducer) = `KPI`
+- **3 search strategies** -- Grid, Random, and Bayesian (optuna) optimizers
+- **Monte Carlo awareness** -- per-run distributions tracked alongside aggregated scores
+- **Zero mandatory dependencies** beyond `gds-sim` and `pydantic`
+
+## Architecture
+
+```
+gds-sim (pip install gds-sim)
+|
+|  Simulation engine: Model, StateUpdateBlock,
+|  Simulation, Results (columnar storage).
+|
++-- gds-psuu (pip install gds-psuu)
+    |
+    |  Parameter search: ParameterSpace, Metric, Aggregation,
+    |  KPI, Evaluator, Sweep, Optimizer.
+    |
+    +-- Your application
+        |
+        |  Concrete models, parameter studies,
+        |  sensitivity analysis, optimization.
+```
+
+## Conceptual Hierarchy
+
+The package follows a clear hierarchy from parameters to optimization:
+
+```
+Parameter Point          {"growth_rate": 0.05}
+    |
+    v
+Simulation              Model + timesteps + N runs
+    |
+    v
+Results                 Columnar data (timestep, substep, run, state vars)
+    |
+    v
+Metric (per-run)        final_value("pop") -> scalar per run
+    |
+    v
+Aggregation (cross-run) mean_agg, std_agg, probability_above(...)
+    |
+    v
+KPI (composed)          KPI(metric=..., aggregation=...) -> single score
+    |
+    v
+Sweep                   Optimizer drives suggest/evaluate/observe loop
+    |
+    v
+SweepResults            All evaluations + best() selection
+```
+
+## How the Sweep Loop Works
+
+```
+Optimizer.suggest()  -->  Evaluator.evaluate(params)  -->  Optimizer.observe(scores)
+       ^                          |                              |
+       |                   gds-sim Simulation                    |
+       +------------------------ repeat --------------------------+
+```
+
+1. The **Optimizer** suggests a parameter point
+2. The **Evaluator** injects params into a `gds-sim` Model, runs N Monte Carlo simulations
+3. Each **KPI** extracts a per-run **Metric**, then **Aggregates** across runs into a single score
+4. The **Optimizer** observes the scores and decides what to try next
+
+## Quick Start
+
+```bash
+uv add gds-psuu
+# or: pip install gds-psuu
+```
+
+See [Getting Started](getting-started.md) for a full walkthrough.
+
+## Credits
+
+Built on [gds-sim](../sim/index.md) by [BlockScience](https://block.science).
diff --git a/mkdocs.yml b/mkdocs.yml
index 1f921a9..c8a9910 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -109,6 +109,11 @@ plugins:
           - guides/dsl-roadmap.md
           - guides/research-boundaries.md
           - guides/view-stratification.md
+        PSUU (gds-psuu):
+          - {psuu/index.md: "Parameter space search under uncertainty for gds-sim"}
+          - {psuu/getting-started.md: "Installation + first parameter sweep"}
+          - psuu/guide/*.md
+          - psuu/api/*.md
         Ecosystem:
           - {framework/ecosystem.md: "All packages, their imports, and dependency relationships"}
 
@@ -311,6 +316,22 @@ nav:
               - gds_software.dependency.compile: software/api/dep-compile.md
               - gds_software.dependency.checks: software/api/dep-checks.md
               - gds_software.verification: software/api/verification.md
+      - PSUU:
+          - Overview: psuu/index.md
+          - Getting Started: psuu/getting-started.md
+          - User Guide:
+              - Concepts: psuu/guide/concepts.md
+              - Parameter Spaces: psuu/guide/spaces.md
+              - Optimizers: psuu/guide/optimizers.md
+          - API Reference:
+              - gds_psuu: psuu/api/init.md
+              - gds_psuu.metric: psuu/api/metric.md
+              - gds_psuu.kpi: psuu/api/kpi.md
+              - gds_psuu.evaluation: psuu/api/evaluation.md
+              - gds_psuu.space: psuu/api/space.md
+              - gds_psuu.optimizers: psuu/api/optimizers.md
+              - gds_psuu.sweep: psuu/api/sweep.md
+              - gds_psuu.results: psuu/api/results.md
   - Design & Research:
       - Layer 0 Milestone: guides/architecture-milestone-layer0.md
       - DSL Roadmap: guides/dsl-roadmap.md

From a8ff63dca14abc034860a10dd681752536b60c7b Mon Sep 17 00:00:00 2001
From: rohan <e4rohan@gmail.com>
Date: Thu, 5 Mar 2026 15:23:08 +0530
Subject: [PATCH 6/7] style: fix __all__ sort order in gds-psuu after merge

---
 packages/gds-psuu/gds_psuu/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/gds-psuu/gds_psuu/__init__.py b/packages/gds-psuu/gds_psuu/__init__.py
index beb4d17..92ce516 100644
--- a/packages/gds-psuu/gds_psuu/__init__.py
+++ b/packages/gds-psuu/gds_psuu/__init__.py
@@ -43,6 +43,7 @@
 from gds_psuu.types import AggregationFn, KPIFn, KPIScores, MetricFn, ParamPoint
 
 __all__ = [
+    "KPI",
     "Aggregation",
     "AggregationFn",
     "Analyzer",
@@ -56,7 +57,6 @@
     "FunctionalConstraint",
     "GridSearchOptimizer",
     "Integer",
-    "KPI",
     "KPIFn",
     "KPIScores",
     "LinearConstraint",

From ab2e8deb767821639ea30a416005803faa497276 Mon Sep 17 00:00:00 2001
From: rohan <e4rohan@gmail.com>
Date: Thu, 5 Mar 2026 15:24:23 +0530
Subject: [PATCH 7/7] chore: bump gds-psuu to 0.2.0

New features: composable Metric/Aggregation/KPI, parameter constraints,
sensitivity analysis (OAT + Morris), composable objectives, optuna migration.
---
 packages/gds-psuu/gds_psuu/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/gds-psuu/gds_psuu/__init__.py b/packages/gds-psuu/gds_psuu/__init__.py
index 92ce516..62b12e0 100644
--- a/packages/gds-psuu/gds_psuu/__init__.py
+++ b/packages/gds-psuu/gds_psuu/__init__.py
@@ -1,6 +1,6 @@
 """gds-psuu: Parameter space search under uncertainty for the GDS ecosystem."""
 
-__version__ = "0.1.0"
+__version__ = "0.2.0"
 
 from gds_psuu.errors import PsuuError, PsuuSearchError, PsuuValidationError
 from gds_psuu.evaluation import EvaluationResult, Evaluator