diff --git a/package/CHANGELOG b/package/CHANGELOG index ebaa2902bd6..01af33d6de7 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -115,9 +115,13 @@ Enhancements (Issue #4677, PR #4729) * Enables parallelization for analysis.contacts.Contacts (Issue #4660) * Enable parallelization for analysis.nucleicacids.NucPairDist (Issue #4670) + * Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824) + * Added `precision` for XYZWriter (Issue #4775, PR #4771) + * Parallelize `analysis.rdf.InterRDF` and `analysis.rdf.InterRDF` (Issue #4675) + * Added `precision` for XYZWriter (Issue #4775, PR #4771) * Add check and warning for empty (all zero) coordinates in RDKit converter (PR #4824) - * Added `precision` for XYZWriter (Issue #4775, PR #4771) + * Added `precision` for XYZWriter (Issue #4775, PR #4771) Changes * MDAnalysis.analysis.psa, MDAnalysis.analysis.waterdynamics and diff --git a/package/MDAnalysis/analysis/rdf.py b/package/MDAnalysis/analysis/rdf.py index c3459ac65fa..fde2f0e93c4 100644 --- a/package/MDAnalysis/analysis/rdf.py +++ b/package/MDAnalysis/analysis/rdf.py @@ -80,7 +80,43 @@ import numpy as np from ..lib import distances -from .base import AnalysisBase +from .base import AnalysisBase, ResultsGroup + + +def nested_array_sum(arrs): + r"""Custom aggregator for nested arrays + + This function takes a nested list or tuple of NumPy arrays, flattens it + into a single list, and aggregates the elements at alternating indices + into two separate arrays. The first array accumulates elements at even + indices, while the second accumulates elements at odd indices. + + Parameters + ---------- + arrs : list + List of arrays or nested lists of arrays + + Returns + ------- + list of ndarray + A list containing two NumPy arrays: + - The first array is the sum of all elements at even indices + in the sum of flattened arrays. + - The second array is the sum of all elements at odd indices + in the sum of flattened arrays. + """ + + def flatten(arr): + if isinstance(arr, (list, tuple)): + return [item for sublist in arr for item in flatten(sublist)] + return [arr] + + flat = flatten(arrs) + aggregated_arr = [np.zeros_like(flat[0]), np.zeros_like(flat[1])] + for i in range(len(flat) // 2): + aggregated_arr[0] += flat[2 * i] # 0, 2, 4, ... + aggregated_arr[1] += flat[2 * i + 1] # 1, 3, 5, ... + return aggregated_arr class InterRDF(AnalysisBase): @@ -221,8 +257,23 @@ class InterRDF(AnalysisBase): Store results as attributes `bins`, `edges`, `rdf` and `count` of the `results` attribute of :class:`~MDAnalysis.analysis.AnalysisBase`. + + .. versionchanged:: 2.9.0 + Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` + backends; use the new method :meth:`get_supported_backends` to see all + supported backends. """ + @classmethod + def get_supported_backends(cls): + return ( + "serial", + "multiprocessing", + "dask", + ) + + _analysis_algorithm_is_parallelizable = True + def __init__( self, g1, @@ -281,7 +332,7 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization - self.volume_cum = 0 + self.results.volume_cum = 0 # Set the max range to filter the search radius self._maxrange = self.rdf_settings["range"][1] @@ -311,7 +362,17 @@ def _single_frame(self): self.results.count += count if self.norm == "rdf": - self.volume_cum += self._ts.volume + self.results.volume_cum += self._ts.volume + + def _get_aggregator(self): + return ResultsGroup( + lookup={ + "count": ResultsGroup.ndarray_sum, + "volume_cum": ResultsGroup.ndarray_sum, + "bins": ResultsGroup.ndarray_sum, + "edges": ResultsGroup.ndarray_mean, + } + ) def _conclude(self): norm = self.n_frames @@ -333,6 +394,7 @@ def _conclude(self): N -= xA * xB * nblocks # Average number density + self.volume_cum = self.results.volume_cum box_vol = self.volume_cum / self.n_frames norm *= N / box_vol @@ -576,8 +638,32 @@ class InterRDF_s(AnalysisBase): Instead of `density=True` use `norm='density'` .. deprecated:: 2.3.0 The `universe` parameter is superflous. + .. versionchanged:: 2.9.0 + Enabled **parallel execution** with the ``multiprocessing`` and ``dask`` + backends; use the new method :meth:`get_supported_backends` to see all + supported backends. """ + @classmethod + def get_supported_backends(cls): + return ( + "serial", + "multiprocessing", + "dask", + ) + + _analysis_algorithm_is_parallelizable = True + + def _get_aggregator(self): + return ResultsGroup( + lookup={ + "count": nested_array_sum, + "volume_cum": ResultsGroup.ndarray_sum, + "bins": ResultsGroup.ndarray_mean, + "edges": ResultsGroup.ndarray_mean, + } + ) + def __init__( self, u, @@ -632,7 +718,7 @@ def _prepare(self): if self.norm == "rdf": # Cumulative volume for rdf normalization - self.volume_cum = 0 + self.results.volume_cum = 0 self._maxrange = self.rdf_settings["range"][1] def _single_frame(self): @@ -650,7 +736,7 @@ def _single_frame(self): self.results.count[i][idx1, idx2, :] += count if self.norm == "rdf": - self.volume_cum += self._ts.volume + self.results.volume_cum += self._ts.volume def _conclude(self): norm = self.n_frames @@ -661,6 +747,7 @@ def _conclude(self): if self.norm == "rdf": # Average number density + self.volume_cum = self.results.volume_cum norm *= 1 / (self.volume_cum / self.n_frames) # Empty lists to restore indices, RDF diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py index 3579c66a83d..7918d25f137 100644 --- a/testsuite/MDAnalysisTests/analysis/conftest.py +++ b/testsuite/MDAnalysisTests/analysis/conftest.py @@ -19,6 +19,7 @@ from MDAnalysis.analysis.density import DensityAnalysis from MDAnalysis.analysis.lineardensity import LinearDensity from MDAnalysis.analysis.polymer import PersistenceLength +from MDAnalysis.analysis.rdf import InterRDF, InterRDF_s from MDAnalysis.lib.util import is_installed @@ -194,3 +195,16 @@ def client_LinearDensity(request): @pytest.fixture(scope="module", params=params_for_cls(PersistenceLength)) def client_PersistenceLength(request): return request.param + + +# MDAnalysis.analysis.rdf + + +@pytest.fixture(scope="module", params=params_for_cls(InterRDF)) +def client_InterRDF(request): + return request.param + + +@pytest.fixture(scope="module", params=params_for_cls(InterRDF_s)) +def client_InterRDF_s(request): + return request.param diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf.py b/testsuite/MDAnalysisTests/analysis/test_rdf.py index aa14071069f..b438b73b7fe 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf.py @@ -49,83 +49,85 @@ def sels(u): return s1, s2 -def test_nbins(u): +def test_nbins(u, client_InterRDF): s1 = u.atoms[:3] s2 = u.atoms[3:] - rdf = InterRDF(s1, s2, nbins=412).run() + rdf = InterRDF(s1, s2, nbins=412).run(**client_InterRDF) assert len(rdf.results.bins) == 412 -def test_range(u): +def test_range(u, client_InterRDF): s1 = u.atoms[:3] s2 = u.atoms[3:] rmin, rmax = 1.0, 13.0 - rdf = InterRDF(s1, s2, range=(rmin, rmax)).run() + rdf = InterRDF(s1, s2, range=(rmin, rmax)).run(**client_InterRDF) assert rdf.results.edges[0] == rmin assert rdf.results.edges[-1] == rmax -def test_count_sum(sels): +def test_count_sum(sels, client_InterRDF): # OW vs HW # should see 8 comparisons in count s1, s2 = sels - rdf = InterRDF(s1, s2).run() + rdf = InterRDF(s1, s2).run(**client_InterRDF) assert rdf.results.count.sum() == 8 -def test_count(sels): +def test_count(sels, client_InterRDF): # should see two distances with 4 counts each s1, s2 = sels - rdf = InterRDF(s1, s2).run() + rdf = InterRDF(s1, s2).run(**client_InterRDF) assert len(rdf.results.count[rdf.results.count == 4]) == 2 -def test_double_run(sels): +def test_double_run(sels, client_InterRDF): # running rdf twice should give the same result s1, s2 = sels - rdf = InterRDF(s1, s2).run() - rdf.run() + rdf = InterRDF(s1, s2).run(**client_InterRDF) + rdf.run(**client_InterRDF) assert len(rdf.results.count[rdf.results.count == 4]) == 2 -def test_exclusion(sels): +def test_exclusion(sels, client_InterRDF): # should see two distances with 4 counts each s1, s2 = sels - rdf = InterRDF(s1, s2, exclusion_block=(1, 2)).run() + rdf = InterRDF(s1, s2, exclusion_block=(1, 2)).run(**client_InterRDF) assert rdf.results.count.sum() == 4 @pytest.mark.parametrize( "attr, count", [("residue", 8), ("segment", 0), ("chain", 8)] ) -def test_ignore_same_residues(sels, attr, count): +def test_ignore_same_residues(sels, attr, count, client_InterRDF): # should see two distances with 4 counts each s1, s2 = sels - rdf = InterRDF(s2, s2, exclude_same=attr).run() + rdf = InterRDF(s2, s2, exclude_same=attr).run(**client_InterRDF) assert rdf.rdf[0] == 0 assert rdf.results.count.sum() == count -def test_ignore_same_residues_fails(sels): +def test_ignore_same_residues_fails(sels, client_InterRDF): s1, s2 = sels with pytest.raises( ValueError, match="The exclude_same argument to InterRDF must be" ): - InterRDF(s2, s2, exclude_same="unsupported").run() + InterRDF(s2, s2, exclude_same="unsupported").run(**client_InterRDF) with pytest.raises( ValueError, match="The exclude_same argument to InterRDF cannot be used with", ): - InterRDF(s2, s2, exclude_same="residue", exclusion_block=tuple()).run() + InterRDF(s2, s2, exclude_same="residue", exclusion_block=tuple()).run( + **client_InterRDF + ) @pytest.mark.parametrize("attr", ("rdf", "bins", "edges", "count")) -def test_rdf_attr_warning(sels, attr): +def test_rdf_attr_warning(sels, attr, client_InterRDF): s1, s2 = sels - rdf = InterRDF(s1, s2).run() + rdf = InterRDF(s1, s2).run(**client_InterRDF) wmsg = f"The `{attr}` attribute was deprecated in MDAnalysis 2.0.0" with pytest.warns(DeprecationWarning, match=wmsg): getattr(rdf, attr) is rdf.results[attr] @@ -134,18 +136,18 @@ def test_rdf_attr_warning(sels, attr): @pytest.mark.parametrize( "norm, value", [("density", 1.956823), ("rdf", 244602.88385), ("none", 4)] ) -def test_norm(sels, norm, value): +def test_norm(sels, norm, value, client_InterRDF): s1, s2 = sels - rdf = InterRDF(s1, s2, norm=norm).run() + rdf = InterRDF(s1, s2, norm=norm).run(**client_InterRDF) assert_allclose(max(rdf.results.rdf), value) @pytest.mark.parametrize( "norm, norm_required", [("Density", "density"), (None, "none")] ) -def test_norm_values(sels, norm, norm_required): +def test_norm_values(sels, norm, norm_required, client_InterRDF): s1, s2 = sels - rdf = InterRDF(s1, s2, norm=norm).run() + rdf = InterRDF(s1, s2, norm=norm).run(**client_InterRDF) assert rdf.norm == norm_required diff --git a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py index 3faac69ce36..5095c4d4326 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rdf_s.py +++ b/testsuite/MDAnalysisTests/analysis/test_rdf_s.py @@ -21,11 +21,12 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # import pytest +import numpy as np from numpy.testing import assert_allclose, assert_almost_equal import MDAnalysis as mda -from MDAnalysis.analysis.rdf import InterRDF_s, InterRDF +from MDAnalysis.analysis.rdf import InterRDF_s, InterRDF, nested_array_sum from MDAnalysisTests.util import distopia_conditional_backend from MDAnalysisTests.datafiles import GRO_MEMPROT, XTC_MEMPROT @@ -49,19 +50,20 @@ def sels(u): @pytest.fixture(scope="module") -def rdf(u, sels): - return InterRDF_s(u, sels).run() +def rdf(u, sels, client_InterRDF_s): + r = InterRDF_s(u, sels).run(**client_InterRDF_s) + return r -def test_nbins(u, sels): - rdf = InterRDF_s(u, sels, nbins=412).run() +def test_nbins(u, sels, client_InterRDF_s): + rdf = InterRDF_s(u, sels, nbins=412).run(**client_InterRDF_s) assert len(rdf.results.bins) == 412 -def test_range(u, sels): +def test_range(u, sels, client_InterRDF_s): rmin, rmax = 1.0, 13.0 - rdf = InterRDF_s(u, sels, range=(rmin, rmax)).run() + rdf = InterRDF_s(u, sels, range=(rmin, rmax)).run(**client_InterRDF_s) assert rdf.results.edges[0] == rmin assert rdf.results.edges[-1] == rmax @@ -114,16 +116,16 @@ def test_cdf(rdf): (True, 0.021915460340071267), ], ) -def test_density(u, sels, density, value): +def test_density(u, sels, density, value, client_InterRDF_s): kwargs = {"density": density} if density is not None else {} - rdf = InterRDF_s(u, sels, **kwargs).run() + rdf = InterRDF_s(u, sels, **kwargs).run(**client_InterRDF_s) assert_almost_equal(max(rdf.results.rdf[0][0][0]), value) if not density: s1 = u.select_atoms("name ZND and resid 289") s2 = u.select_atoms( "name OD1 and resid 51 and sphzone 5.0 (resid 289)" ) - rdf_ref = InterRDF(s1, s2).run() + rdf_ref = InterRDF(s1, s2).run(**client_InterRDF_s) assert_almost_equal(rdf_ref.results.rdf, rdf.results.rdf[0][0][0]) @@ -140,23 +142,23 @@ def test_overwrite_norm(u, sels): ("none", 0.6), ], ) -def test_norm(u, sels, norm, value): - rdf = InterRDF_s(u, sels, norm=norm).run() +def test_norm(u, sels, norm, value, client_InterRDF_s): + rdf = InterRDF_s(u, sels, norm=norm).run(**client_InterRDF_s) assert_allclose(max(rdf.results.rdf[0][0][0]), value) if norm == "rdf": s1 = u.select_atoms("name ZND and resid 289") s2 = u.select_atoms( "name OD1 and resid 51 and sphzone 5.0 (resid 289)" ) - rdf_ref = InterRDF(s1, s2).run() + rdf_ref = InterRDF(s1, s2).run(**client_InterRDF_s) assert_almost_equal(rdf_ref.results.rdf, rdf.results.rdf[0][0][0]) @pytest.mark.parametrize( "norm, norm_required", [("Density", "density"), (None, "none")] ) -def test_norm_values(u, sels, norm, norm_required): - rdf = InterRDF_s(u, sels, norm=norm).run() +def test_norm_values(u, sels, norm, norm_required, client_InterRDF_s): + rdf = InterRDF_s(u, sels, norm=norm).run(**client_InterRDF_s) assert rdf.norm == norm_required @@ -178,3 +180,21 @@ def test_rdf_attr_warning(rdf, attr): def test_backend(u, sels, backend): rdf = InterRDF_s(u, sels, norm="none", backend=backend).run() assert_allclose(max(rdf.results.rdf[0][0][0]), 0.6) + + +def test_nested_array_sum(): + arr_1 = np.random.rand(1, 2, 75) + arr_2 = np.random.rand(2, 2, 75) + arr_3 = np.random.rand(1, 2, 75) + arr_4 = np.random.rand(2, 2, 75) + arrs = [[arr_1, arr_2], [arr_3, arr_4]] + + result = nested_array_sum(arrs) + + assert result[0].shape == arrs[0][0].shape + assert result[0].shape == arrs[1][0].shape + assert result[1].shape == arrs[0][1].shape + assert result[1].shape == arrs[1][1].shape + + assert np.array_equal(result[0], arr_1 + arr_3) + assert np.array_equal(result[1], arr_2 + arr_4)