OpenFreeEnergy · hannahbaumann · Dec 18, 2025 · Dec 18, 2025 · Dec 18, 2025 · Dec 19, 2025
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -52,6 +52,33 @@ jobs:
         run: |
           python -m pip install --no-deps .
 
+      - name: Cache Pooch data
+        uses: actions/cache@v4
+        with:
+          path: |
+            # Linux cache location
+            ~/.cache/openfe_analysis
+            # macOS cache location
+            ~/Library/Caches/openfe_analysis
+          key: pooch-${{ matrix.os }}-v2
+
+      - name: "Download Zenodo data"
+        run: |
+          python - <<'EOF'
+          import pooch
+          from openfe_analysis.tests.conftest import ZENODO_DOI, ZENODO_FILES
+
+          zenodo = pooch.create(
+              path=pooch.os_cache('openfe_analysis'),
+              base_url=ZENODO_DOI,
+              registry=ZENODO_FILES,
+          )
+
+          for fname in ZENODO_FILES:
+              zenodo.fetch(fname, processor=pooch.Untar())
+
+          EOF
+
       - name: "Test imports"
         run: |
           python -Ic "import openfe_analysis; print(openfe_analysis.__version__)"

diff --git a/src/openfe_analysis/__init__.py b/src/openfe_analysis/__init__.py
@@ -3,6 +3,6 @@
 from .reader import FEReader
 from .transformations import (
     Aligner,
-    Minimiser,
+    ClosestImageShift,
     NoJump,
 )
diff --git a/src/openfe_analysis/reader.py b/src/openfe_analysis/reader.py
@@ -193,5 +193,7 @@ def _reopen(self):
         self._frame_index = -1
 
     def close(self):
-        if self._dataset_owner:
-            self._dataset.close()
+        if self._dataset is not None:
+            if self._dataset_owner:
+                self._dataset.close()
+            self._dataset = None
diff --git a/src/openfe_analysis/rmsd.py b/src/openfe_analysis/rmsd.py
@@ -7,10 +7,12 @@
 import numpy as np
 import tqdm
 from MDAnalysis.analysis import rms
+from MDAnalysis.lib.mdamath import make_whole
+from MDAnalysis.transformations import unwrap
 from numpy import typing as npt
 
 from .reader import FEReader
-from .transformations import Aligner, Minimiser, NoJump
+from .transformations import Aligner, ClosestImageShift, NoJump
 
 
 def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Universe:
@@ -41,17 +43,21 @@ def make_Universe(top: pathlib.Path, trj: nc.Dataset, state: int) -> mda.Univers
     ligand = u.select_atoms("resname UNK")
 
     if prot:
-        # if there's a protein in the system:
-        # - make the protein not jump periodic images between frames
-        # - put the ligand in the closest periodic image as the protein
-        # - align everything to minimise protein RMSD
-        nope = NoJump(prot)
-        minnie = Minimiser(prot, ligand)
+        # Unwrap all atoms
+        unwrap_tr = unwrap(prot)
+
+        # Shift chains + ligand
+        chains = [seg.atoms for seg in prot.segments]
+        shift = ClosestImageShift(chains[0], [*chains[1:], ligand])
+        # # Make each protein chain whole
+        # for frag in prot.fragments:
+        #     make_whole(frag, reference_atom=frag[0])
+
         align = Aligner(prot)
 
         u.trajectory.add_transformations(
-            nope,
-            minnie,
+            unwrap_tr,
+            shift,
             align,
         )
     else:
@@ -97,87 +103,88 @@ def gather_rms_data(
         "protein_2D_RMSD": [],
     }
 
-    ds = nc.Dataset(dataset)
-    n_lambda = ds.dimensions["state"].size
-
-    # If you're using a new multistate nc file, you need to account for
-    # the position skip rate.
-    if hasattr(ds, "PositionInterval"):
-        n_frames = len(range(0, ds.dimensions["iteration"].size, ds.PositionInterval))
-    else:
-        n_frames = ds.dimensions["iteration"].size
-
-    if skip is None:
-        # find skip that would give ~500 frames of output
-        # max against 1 to avoid skip=0 case
-        skip = max(n_frames // 500, 1)
-
-    pb = tqdm.tqdm(total=int(n_frames / skip) * n_lambda)
-
-    u_top = mda.Universe(pdb_topology)
-
-    for i in range(n_lambda):
-        # cheeky, but we can read the PDB topology once and reuse per universe
-        # this then only hits the PDB file once for all replicas
-        u = make_Universe(u_top._topology, ds, state=i)
-
-        prot = u.select_atoms("protein and name CA")
-        ligand = u.select_atoms("resname UNK")
-
-        # save coordinates for 2D RMSD matrix
-        # TODO: Some smart guard to avoid allocating a silly amount of memory?
-        prot2d = np.empty((len(u.trajectory[::skip]), len(prot), 3), dtype=np.float32)
-
-        prot_start = prot.positions
-        # prot_weights = prot.masses / np.mean(prot.masses)
-        ligand_start = ligand.positions
-        ligand_initial_com = ligand.center_of_mass()
-        ligand_weights = ligand.masses / np.mean(ligand.masses)
-
-        this_protein_rmsd = []
-        this_ligand_rmsd = []
-        this_ligand_wander = []
-
-        for ts_i, ts in enumerate(u.trajectory[::skip]):
-            pb.update()
+    # Open the NetCDF file safely using a context manager
+    with nc.Dataset(dataset) as ds:
+        n_lambda = ds.dimensions["state"].size
+
+        # If you're using a new multistate nc file, you need to account for
+        # the position skip rate.
+        if hasattr(ds, "PositionInterval"):
+            n_frames = len(range(0, ds.dimensions["iteration"].size, ds.PositionInterval))
+        else:
+            n_frames = ds.dimensions["iteration"].size
+
+        if skip is None:
+            # find skip that would give ~500 frames of output
+            # max against 1 to avoid skip=0 case
+            skip = max(n_frames // 500, 1)
+
+        pb = tqdm.tqdm(total=int(n_frames / skip) * n_lambda)
+
+        u_top = mda.Universe(pdb_topology)
+
+        for i in range(n_lambda):
+            # cheeky, but we can read the PDB topology once and reuse per universe
+            # this then only hits the PDB file once for all replicas
+            u = make_Universe(u_top._topology, ds, state=i)
+
+            prot = u.select_atoms("protein and name CA")
+            ligand = u.select_atoms("resname UNK")
+
+            # save coordinates for 2D RMSD matrix
+            # TODO: Some smart guard to avoid allocating a silly amount of memory?
+            prot2d = np.empty((len(u.trajectory[::skip]), len(prot), 3), dtype=np.float32)
+
+            # Would this copy be safer?
+            prot_start = prot.positions.copy()
+            ligand_start = ligand.positions.copy()
+            ligand_initial_com = ligand.center_of_mass()
+            ligand_weights = ligand.masses / np.mean(ligand.masses)
+
+            this_protein_rmsd = []
+            this_ligand_rmsd = []
+            this_ligand_wander = []
+
+            for ts_i, ts in enumerate(u.trajectory[::skip]):
+                pb.update()
+
+                if prot:
+                    prot2d[ts_i, :, :] = prot.positions
+                    this_protein_rmsd.append(
+                        rms.rmsd(
+                            prot.positions,
+                            prot_start,
+                            None,  # prot_weights,
+                            center=False,
+                            superposition=False,
+                        )
+                    )
+                if ligand:
+                    this_ligand_rmsd.append(
+                        rms.rmsd(
+                            ligand.positions,
+                            ligand_start,
+                            ligand_weights,
+                            center=False,
+                            superposition=False,
+                        )
+                    )
+                    this_ligand_wander.append(
+                        # distance between start and current ligand position
+                        # ignores PBC, but we've already centered the traj
+                        mda.lib.distances.calc_bonds(ligand.center_of_mass(), ligand_initial_com)
+                    )
 
             if prot:
-                prot2d[ts_i, :, :] = prot.positions
-                this_protein_rmsd.append(
-                    rms.rmsd(
-                        prot.positions,
-                        prot_start,
-                        None,  # prot_weights,
-                        center=False,
-                        superposition=False,
-                    )
-                )
+                # can ignore weights here as it's all Ca
+                rmsd2d = twoD_RMSD(prot2d, w=None)  # prot_weights)
+                output["protein_RMSD"].append(this_protein_rmsd)
+                output["protein_2D_RMSD"].append(rmsd2d)
             if ligand:
-                this_ligand_rmsd.append(
-                    rms.rmsd(
-                        ligand.positions,
-                        ligand_start,
-                        ligand_weights,
-                        center=False,
-                        superposition=False,
-                    )
-                )
-                this_ligand_wander.append(
-                    # distance between start and current ligand position
-                    # ignores PBC, but we've already centered the traj
-                    mda.lib.distances.calc_bonds(ligand.center_of_mass(), ligand_initial_com)
-                )
-
-        if prot:
-            # can ignore weights here as it's all Ca
-            rmsd2d = twoD_RMSD(prot2d, w=None)  # prot_weights)
-            output["protein_RMSD"].append(this_protein_rmsd)
-            output["protein_2D_RMSD"].append(rmsd2d)
-        if ligand:
-            output["ligand_RMSD"].append(this_ligand_rmsd)
-            output["ligand_wander"].append(this_ligand_wander)
-
-        output["time(ps)"] = list(np.arange(len(u.trajectory))[::skip] * u.trajectory.dt)
+                output["ligand_RMSD"].append(this_ligand_rmsd)
+                output["ligand_wander"].append(this_ligand_wander)
+
+            output["time(ps)"] = list(np.arange(len(u.trajectory))[::skip] * u.trajectory.dt)
 
     return output
 

diff --git a/src/openfe_analysis/tests/conftest.py b/src/openfe_analysis/tests/conftest.py
@@ -4,34 +4,45 @@
 import pooch
 import pytest
 
-POOCH_CACHE = pooch.os_cache("openfe_analysis")
+ZENODO_DOI = "doi:10.5281/zenodo.18378051"
+
+ZENODO_FILES = {
+    "openfe_analysis_simulation_output.tar.gz": "md5:7f0babaac3dc8f7dd2db63cb79dff00f",
+    "openfe_analysis_skipped.tar.gz": "md5:ac42219bde9da3641375adf3a9ddffbf",
+}
+
+POOCH_CACHE = pathlib.Path(pooch.os_cache("openfe_analysis"))
+POOCH_CACHE.mkdir(parents=True, exist_ok=True)
+
 ZENODO_RBFE_DATA = pooch.create(
     path=POOCH_CACHE,
-    base_url="doi:10.5281/zenodo.17916322",
-    registry={
-        "openfe_analysis_simulation_output.tar.gz": "md5:09752f2c4e5b7744d8afdee66dbd1414",
-        "openfe_analysis_skipped.tar.gz": "md5:3840d044299caacc4ccd50e6b22c0880",
-    },
+    base_url=ZENODO_DOI,
+    registry=ZENODO_FILES,
 )
 
 
+def _fetch_and_untar_once(filename: str) -> pathlib.Path:
+    # If already untarred, reuse it
+    untar_dir = POOCH_CACHE / f"{filename}.untar"
+    if untar_dir.exists():
+        return untar_dir
+
+    # Otherwise fetch + untar
+    paths = ZENODO_RBFE_DATA.fetch(filename, processor=pooch.Untar())
+
+    return pathlib.Path(paths[0]).parent
+
+
 @pytest.fixture(scope="session")
 def rbfe_output_data_dir() -> pathlib.Path:
-    ZENODO_RBFE_DATA.fetch("openfe_analysis_simulation_output.tar.gz", processor=pooch.Untar())
-    result_dir = (
-        pathlib.Path(POOCH_CACHE)
-        / "openfe_analysis_simulation_output.tar.gz.untar/openfe_analysis_simulation_output/"
-    )
-    return result_dir
+    untar_dir = _fetch_and_untar_once("openfe_analysis_simulation_output.tar.gz")
+    return untar_dir / "openfe_analysis_simulation_output"
 
 
 @pytest.fixture(scope="session")
 def rbfe_skipped_data_dir() -> pathlib.Path:
-    ZENODO_RBFE_DATA.fetch("openfe_analysis_skipped.tar.gz", processor=pooch.Untar())
-    result_dir = (
-        pathlib.Path(POOCH_CACHE) / "openfe_analysis_skipped.tar.gz.untar/openfe_analysis_skipped/"
-    )
-    return result_dir
+    untar_dir = _fetch_and_untar_once("openfe_analysis_skipped.tar.gz")
+    return untar_dir / "openfe_analysis_skipped"
 
 
 @pytest.fixture(scope="session")