diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 6daf8562..5969c53a 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,6 +3,7 @@ Changes 5.0.1 ~~~~~ +* FIX: Prevented duplicate or inconsistent profiler output under Python 3.14 when multiprocessing is used. * ENH: Add %%lprun_all for more beginner-friendly profiling in IPython/Jupyter #383 * FIX: mitigate speed regressions introduced in 5.0.0 * ENH: Added capability to combine profiling data both programmatically (``LineStats.__add__()``) and via the CLI (``python -m line_profiler``) (#380, originally proposed in #219) diff --git a/line_profiler/explicit_profiler.py b/line_profiler/explicit_profiler.py index bb7a904a..787c4ac2 100644 --- a/line_profiler/explicit_profiler.py +++ b/line_profiler/explicit_profiler.py @@ -163,14 +163,30 @@ def func4(): The core functionality in this module was ported from :mod:`xdev`. """ +from __future__ import annotations import atexit +import multiprocessing import os +import pathlib import sys +import typing +from typing import Any, Callable + +if typing.TYPE_CHECKING: + ConfigArg = str | pathlib.PurePath | bool | None + + # This is for compatibility from .cli_utils import boolean, get_python_executable as _python_command from .line_profiler import LineProfiler from .toml_config import ConfigSource +# The first process that enables profiling records its PID here. Child processes +# created via multiprocessing (spawn/forkserver) inherit this environment value, +# which helps prevent helper processes from claiming ownership and clobbering +# output. Standalone subprocess runs should always be able to reset this value. +_OWNER_PID_ENVVAR: str = 'LINE_PROFILER_OWNER_PID' + class GlobalProfiler: """ @@ -179,7 +195,7 @@ class GlobalProfiler: The :py:obj:`line_profile.profile` decorator is an instance of this object. Arguments: - config (Union[str, PurePath, bool, None]): + config (str | PurePath | bool | None): Optional TOML config file from which to load the configurations (see Attributes); if not explicitly given (= :py:data:`True` or @@ -187,8 +203,7 @@ class GlobalProfiler: :envvar:`!LINE_PROFILER_RC` environment variable or looked up among the current directory or its ancestors. Should all that fail, the default config file at - ``importlib.resources.path('line_profiler.rc', \ -'line_profiler.toml')`` is used; + ``importlib.resources.path('line_profiler.rc', 'line_profiler.toml')`` is used; passing :py:data:`False` disables all lookup and falls back to the default configuration @@ -258,14 +273,24 @@ class GlobalProfiler: >>> self.show() """ - def __init__(self, config=None): + _config: pathlib.PurePath | None + _profile: LineProfiler | None + _owner_pid: int | None + enabled: bool | None + + setup_config: dict[str, list[str]] + write_config: dict[str, Any] + show_config: dict[str, Any] + output_prefix: str + + def __init__(self, config: ConfigArg = None) -> None: # Remember which config file we loaded settings from config_source = ConfigSource.from_config(config) self._config = config_source.path self._profile = None + self._owner_pid = None self.enabled = None - # Configs: # - How to toggle the profiler self.setup_config = config_source.conf_dict['setup'] @@ -280,7 +305,7 @@ def __init__(self, config=None): # supplied `config`) self.show_config.pop('column_widths') - def _kernprof_overwrite(self, profile): + def _kernprof_overwrite(self, profile: LineProfiler) -> None: """ Kernprof will call this when it runs, so we can use its profile object instead of our own. Note: when kernprof overwrites us we wont register @@ -290,7 +315,7 @@ def _kernprof_overwrite(self, profile): self._profile = profile self.enabled = True - def _implicit_setup(self): + def _implicit_setup(self) -> None: """ Called once the first time the user decorates a function with ``line_profiler.profile`` and they have not explicitly setup the global @@ -306,29 +331,96 @@ def _implicit_setup(self): else: self.disable() - def enable(self, output_prefix=None): + def enable(self, output_prefix: str | None = None) -> None: """ Explicitly enables global profiler and controls its settings. + + Notes: + Multiprocessing start methods like 'spawn'/'forkserver' can create + helper/bootstrap interpreters that import this module. Those helpers + must not claim ownership or register an atexit hook, otherwise they can + clobber output from the real script process. """ + self._debug("enable:ENTER") + + if is_mp_bootstrap(): + self._debug("enable:skip-mp-bootstrap") + self.enabled = False + return + + if self._should_skip_due_to_owner(): + self._debug("enable:skip-due-to-owner") + self.enabled = False + return + + owner_pid = os.getpid() + os.environ[_OWNER_PID_ENVVAR] = str(owner_pid) + self._owner_pid = owner_pid + self._debug("enable:owner-claimed", owner_pid=owner_pid) + if self._profile is None: - # Try to only ever create one real LineProfiler object atexit.register(self.show) - self._profile = LineProfiler() # type: ignore + self._profile = LineProfiler() - # The user can call this function more than once to update the final - # reporting or to re-enable the profiler after it a disable. self.enabled = True - if output_prefix is not None: self.output_prefix = output_prefix - def disable(self): + def _should_skip_due_to_owner(self) -> bool: + """ + Return True if another process has already claimed ownership. + + The first process to enable profiling records its PID in an env var. + Child interpreters can inherit that value; they must not steal ownership. + """ + owner = os.environ.get(_OWNER_PID_ENVVAR) + if not owner: + self._debug("owner:no-owner-env") + return False + + current = str(os.getpid()) + if owner == current: + self._debug("owner:is-us", owner=owner) + return False + + if is_mp_bootstrap(): + self._debug("owner:skip-mp-bootstrap", owner=owner, current=current) + return True + + # Standalone run: allow this interpreter to become the owner. + self._debug("owner:allow-standalone-reset", owner=owner, current=current) + return False + + def _debug(self, message: str, **extra: Any) -> None: + if not os.environ.get('LINE_PROFILER_DEBUG'): + return + try: + parent = multiprocessing.parent_process() + parent_pid = parent.pid if parent is not None else None + except Exception: + parent_pid = None + + pid = os.getpid() + + info: dict[str, Any] = { + 'ppid': os.getppid(), + 'process': getattr(multiprocessing.current_process(), 'name', None), + 'parent_pid': parent_pid, + 'owner_env': os.environ.get(_OWNER_PID_ENVVAR), + 'owner_pid': self._owner_pid, + 'enabled': self.enabled, + } + info.update(extra) + payload = ' '.join(f'{k}={v!r}' for k, v in info.items()) + print(f'[line_profiler debug {pid=}] {message} {payload}') + + def disable(self) -> None: """ Explicitly initialize and disable this global profiler. """ self.enabled = False - def __call__(self, func): + def __call__(self, func: Callable) -> Callable: """ If the global profiler is enabled, decorate a function to start the profiler on function entry and stop it on function exit. Otherwise @@ -349,31 +441,43 @@ def __call__(self, func): self._implicit_setup() if not self.enabled: return func - return self._profile(func) + assert self._profile is not None + + wrapped = self._profile(func) + return wrapped - def show(self): + def show(self) -> None: """ Write the managed profiler stats to enabled outputs. If the implicit setup triggered, then this will be called by :py:mod:`atexit`. """ + self._debug('show:enter') + owner_env = os.environ.get(_OWNER_PID_ENVVAR) + if os.getppid() == 1 and owner_env == str(os.getpid()): + self._debug('show:skip-orphan-owner', owner_env=owner_env) + return + if self._owner_pid is not None and os.getpid() != self._owner_pid: + self._debug('show:skip-non-owner', current_pid=os.getpid()) + return import io - import pathlib write_stdout = self.write_config['stdout'] write_text = self.write_config['text'] write_timestamped_text = self.write_config['timestamped_text'] write_lprof = self.write_config['lprof'] + assert self._profile is not None + + kwargs: dict[str, Any] = {'config': self._config, **self.show_config} if write_stdout: - kwargs = {'config': self._config, **self.show_config} self._profile.print_stats(**kwargs) if write_text or write_timestamped_text: stream = io.StringIO() # Text output always contains details, and cannot be rich. - text_kwargs = {**kwargs, 'rich': False, 'details': True} + text_kwargs: dict[str, Any] = {**kwargs, 'rich': False, 'details': True} self._profile.print_stats(stream=stream, **text_kwargs) raw_text = stream.getvalue() @@ -401,6 +505,61 @@ def show(self): + str(lprof_output_fpath)) +def is_mp_bootstrap() -> bool: + """ + True when this interpreter invocation looks like multiprocessing + bootstrapping/plumbing, where we must not claim ownership / write outputs. + + Example: + >>> # xdoctest: +SKIP('can be flaky at test time') + >>> import pytest + >>> if is_mp_bootstrap(): + ... pytest.skip('Cannot test mp bootstrap detection from within an mp bootstrap process') + >>> import sys, subprocess, textwrap + >>> code = textwrap.dedent(r''' + ... import multiprocessing as mp + ... from line_profiler.explicit_profiler import is_mp_bootstrap + ... + ... def child(q): + ... q.put(is_mp_bootstrap()) + ... + ... if __name__ == "__main__": + ... ctx = mp.get_context("spawn") + ... q = ctx.Queue() + ... p = ctx.Process(target=child, args=(q,)) + ... p.start() + ... val = q.get() + ... p.join() + ... print(val) + ... ''') + >>> out = subprocess.check_output([sys.executable, "-c", code], text=True).strip() + >>> out in {"True", "False"} + True + """ + try: + import multiprocessing.spawn as mp_spawn + if getattr(mp_spawn, "_inheriting", False): + return True + except Exception: + pass + + orig = getattr(sys, "orig_argv", None) or [] + if any(a.startswith("--multiprocessing") for a in orig): + return True + if any("multiprocessing.forkserver" in a for a in orig): + return True + if any("multiprocessing.spawn" in a for a in orig): + return True + + try: + if multiprocessing.current_process().name != "MainProcess": + return True + except Exception: + pass + + return False + + # Construct the global profiler. # The first time it is called, it will be initialized. This is usually a # NoOpProfiler unless the user requested the real one. diff --git a/line_profiler/explicit_profiler.pyi b/line_profiler/explicit_profiler.pyi deleted file mode 100644 index f777689b..00000000 --- a/line_profiler/explicit_profiler.pyi +++ /dev/null @@ -1,33 +0,0 @@ -from pathlib import PurePath -from typing import Dict -from typing import List -from typing import Callable -from typing import Union -from _typeshed import Incomplete - - -class GlobalProfiler: - setup_config: Dict[str, List[str]] - output_prefix: str - write_config: Dict[str, bool] - show_config: Dict[str, bool] - enabled: bool | None - - def __init__(self, - config: Union[str, PurePath, bool, None] = None) -> None: - ... - - def enable(self, output_prefix: Incomplete | None = ...) -> None: - ... - - def disable(self) -> None: - ... - - def __call__(self, func: Callable) -> Callable: - ... - - def show(self) -> None: - ... - - -profile: Incomplete diff --git a/line_profiler/line_profiler.pyi b/line_profiler/line_profiler.pyi index 7493dc58..0a9bdbb0 100644 --- a/line_profiler/line_profiler.pyi +++ b/line_profiler/line_profiler.pyi @@ -117,7 +117,7 @@ class LineProfiler(CLineProfiler, ByCountProfilerMixin): # Fallback: just wrap the `.__call__()` of a generic callable @overload - def __call__(self, func: Callable) -> FunctionType: + def __call__(self, func: Callable) -> Callable: ... def add_callable( diff --git a/tests/test_complex_case.py b/tests/test_complex_case.py index bd705929..ace7502b 100644 --- a/tests/test_complex_case.py +++ b/tests/test_complex_case.py @@ -95,6 +95,8 @@ def test_varied_complex_invocations(): temp_dpath = stack.enter_context(tempfile.TemporaryDirectory()) stack.enter_context(ub.ChDir(temp_dpath)) env = {} + # Can enable if this breaks again + # env['LINE_PROFILER_DEBUG'] = '1' outpath = case['outpath'] if outpath: diff --git a/tests/test_explicit_profile.py b/tests/test_explicit_profile.py index 038e6582..511509b1 100644 --- a/tests/test_explicit_profile.py +++ b/tests/test_explicit_profile.py @@ -141,6 +141,88 @@ def test_explicit_profile_with_environ_on(): assert (temp_dpath / 'profile_output.lprof').exists() +def test_explicit_profile_ignores_inherited_owner_marker(): + """ + Standalone runs should not be blocked by an inherited owner marker. + """ + with tempfile.TemporaryDirectory() as tmp: + temp_dpath = ub.Path(tmp) + env = os.environ.copy() + env['LINE_PROFILE'] = '1' + env['LINE_PROFILER_OWNER_PID'] = str(os.getpid() + 100000) + env['PYTHONPATH'] = os.getcwd() + + with ub.ChDir(temp_dpath): + + script_fpath = ub.Path('script.py') + script_fpath.write_text(_demo_explicit_profile_script()) + + args = [sys.executable, os.fspath(script_fpath)] + proc = ub.cmd(args, env=env) + print(proc.stdout) + print(proc.stderr) + proc.check_returncode() + + assert (temp_dpath / 'profile_output.txt').exists() + assert (temp_dpath / 'profile_output.lprof').exists() + + +def test_explicit_profile_process_pool_forkserver(): + """ + Ensure explicit profiler works with forkserver ProcessPoolExecutor. + """ + import multiprocessing as mp + if 'forkserver' not in mp.get_all_start_methods(): + pytest.skip('forkserver start method not available') + with tempfile.TemporaryDirectory() as tmp: + temp_dpath = ub.Path(tmp) + env = os.environ.copy() + env['LINE_PROFILE'] = '1' + # env['LINE_PROFILER_DEBUG'] = '1' + env['PYTHONPATH'] = os.getcwd() + + with ub.ChDir(temp_dpath): + + script_fpath = ub.Path('script.py') + script_fpath.write_text(ub.codeblock( + ''' + import multiprocessing as mp + from concurrent.futures import ProcessPoolExecutor + from line_profiler import profile + + def worker(x): + return x * x + + @profile + def run(): + total = 0 + for i in range(1000): + total += i % 7 + with ProcessPoolExecutor(max_workers=2) as ex: + list(ex.map(worker, range(4))) + return total + + def main(): + if 'forkserver' in mp.get_all_start_methods(): + mp.set_start_method('forkserver', force=True) + run() + + if __name__ == '__main__': + main() + ''').strip()) + + args = [sys.executable, os.fspath(script_fpath)] + proc = ub.cmd(args, env=env) + print(proc.stdout) + print(proc.stderr) + proc.check_returncode() + + output_path = temp_dpath / 'profile_output.txt' + assert output_path.exists() + assert output_path.stat().st_size > 100 + assert proc.stdout.count('Wrote profile results to profile_output.txt') == 1 + + def test_explicit_profile_with_environ_off(): """ When LINE_PROFILE is falsy, profiling should not run.