From 32d4397157d3cc657abaf3c7219b23c010c6a25e Mon Sep 17 00:00:00 2001 From: Sangtaek Lee Date: Wed, 17 Dec 2025 17:15:14 +0000 Subject: [PATCH 1/5] add error percentage to cpu and ram monitor --- diagnostic_common_diagnostics/README.md | 2 ++ .../cpu_monitor.py | 21 ++++++++++--------- .../ram_monitor.py | 11 ++++++++-- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/diagnostic_common_diagnostics/README.md b/diagnostic_common_diagnostics/README.md index 959adc85f..6b8fe687d 100644 --- a/diagnostic_common_diagnostics/README.md +++ b/diagnostic_common_diagnostics/README.md @@ -14,6 +14,7 @@ It publishes the usage percentage in a diagnostic message. * Name of the node is "cpu_monitor_" + hostname. * Uses the following args: * warning_percentage: If the CPU usage is > warning_percentage, a WARN status will be publised. + * error_percentage: If the CPU usage is > error_percentage, a ERROR status will be published. * window: the maximum length of the used collections.deque for queuing CPU readings. ### Published Topics @@ -97,6 +98,7 @@ It publishes the usage percentage in a diagnostic message. * Name of the node is "ram_monitor_" + hostname. * Uses the following args: * warning_percentage: If the RAM usage is > warning_percentage, a WARN status will be published. + * error_percentage: If the RAM usage is > error_percentage, a ERROR status will be published. * window: the maximum length of the used collections.deque for queuing RAM readings. ### Published Topics diff --git a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py index 32dd60eb3..ccc74616a 100755 --- a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py +++ b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py @@ -51,10 +51,11 @@ class CpuTask(DiagnosticTask): - def __init__(self, warning_percentage=90, window=1): + def __init__(self, warning_percentage=90, error_percentage=100, window=1): DiagnosticTask.__init__(self, 'CPU Information') self._warning_percentage = int(warning_percentage) + self._error_percentage = int(error_percentage) self._readings = deque(maxlen=window) def _get_average_reading(self): @@ -71,15 +72,12 @@ def run(self, stat): stat.add('CPU Load Average', f'{cpu_average:.2f}') - warn = False - for idx, cpu_percentage in enumerate(cpu_percentages): - stat.add(f'CPU {idx} Load', f'{cpu_percentage:.2f}') - if cpu_percentage > self._warning_percentage: - warn = True - - if warn: + if cpu_average > self._error_percentage: + stat.summary(DiagnosticStatus.ERROR, + f'CPU Average exceeds {self._error_percentage} percent') + elif cpu_average > self._warning_percentage: stat.summary(DiagnosticStatus.WARN, - f'At least one CPU exceeds {self._warning_percentage} percent') + f'CPU Average exceeds {self._warning_percentage} percent') else: stat.summary(DiagnosticStatus.OK, f'CPU Average {cpu_average:.2f} percent') @@ -100,16 +98,19 @@ def main(args=None): # Declare and get parameters node.declare_parameter('warning_percentage', 90) + node.declare_parameter('error_percentage', 100) node.declare_parameter('window', 1) warning_percentage = node.get_parameter( 'warning_percentage').get_parameter_value().integer_value + error_percentage = node.get_parameter( + 'error_percentage').get_parameter_value().integer_value window = node.get_parameter('window').get_parameter_value().integer_value # Create diagnostic updater with default updater rate of 1 hz updater = Updater(node) updater.setHardwareID(hostname) - updater.add(CpuTask(warning_percentage=warning_percentage, window=window)) + updater.add(CpuTask(warning_percentage=warning_percentage, error_percentage=error_percentage, window=window)) rclpy.spin(node) diff --git a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/ram_monitor.py b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/ram_monitor.py index da59a6d25..92eebe3c9 100755 --- a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/ram_monitor.py +++ b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/ram_monitor.py @@ -48,9 +48,10 @@ class RamTask(DiagnosticTask): - def __init__(self, warning_percentage, window): + def __init__(self, warning_percentage, error_percentage, window): DiagnosticTask.__init__(self, 'RAM Information') self._warning_percentage = int(warning_percentage) + self._error_percentage = int(error_percentage) self._readings = collections.deque(maxlen=window) def run(self, stat): @@ -59,7 +60,12 @@ def run(self, stat): stat.add('RAM Load Average', f'{ram_average:.2f}') - if ram_average > self._warning_percentage: + if ram_average > self._error_percentage: + stat.summary( + DiagnosticStatus.ERROR, + f'RAM Average exceeds {self._error_percentage:d} percent', + ) + elif ram_average > self._warning_percentage: stat.summary( DiagnosticStatus.WARN, f'RAM Average exceeds {self._warning_percentage:d} percent', @@ -84,6 +90,7 @@ def main(): updater.add( RamTask( node.declare_parameter('warning_percentage', 90).value, + node.declare_parameter('error_percentage', 100).value, node.declare_parameter('window', 1).value, ) ) From f20f0ca5b85a7ee4e4ce88ed85e72912d2352b1d Mon Sep 17 00:00:00 2001 From: Sangtaek Lee Date: Wed, 17 Dec 2025 17:23:20 +0000 Subject: [PATCH 2/5] fix doc --- diagnostic_common_diagnostics/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/diagnostic_common_diagnostics/README.md b/diagnostic_common_diagnostics/README.md index 6b8fe687d..d2b5bd6de 100644 --- a/diagnostic_common_diagnostics/README.md +++ b/diagnostic_common_diagnostics/README.md @@ -14,7 +14,7 @@ It publishes the usage percentage in a diagnostic message. * Name of the node is "cpu_monitor_" + hostname. * Uses the following args: * warning_percentage: If the CPU usage is > warning_percentage, a WARN status will be publised. - * error_percentage: If the CPU usage is > error_percentage, a ERROR status will be published. + * error_percentage: If the CPU usage is > error_percentage, an ERROR status will be published. * window: the maximum length of the used collections.deque for queuing CPU readings. ### Published Topics @@ -98,7 +98,7 @@ It publishes the usage percentage in a diagnostic message. * Name of the node is "ram_monitor_" + hostname. * Uses the following args: * warning_percentage: If the RAM usage is > warning_percentage, a WARN status will be published. - * error_percentage: If the RAM usage is > error_percentage, a ERROR status will be published. + * error_percentage: If the RAM usage is > error_percentage, an ERROR status will be published. * window: the maximum length of the used collections.deque for queuing RAM readings. ### Published Topics From d42401c1937e72734cc6d99b3a9fdcd9a855604d Mon Sep 17 00:00:00 2001 From: Sangtaek Lee Date: Wed, 17 Dec 2025 17:33:13 +0000 Subject: [PATCH 3/5] fix format --- .../diagnostic_common_diagnostics/cpu_monitor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py index ccc74616a..b94af3289 100755 --- a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py +++ b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py @@ -110,7 +110,8 @@ def main(args=None): # Create diagnostic updater with default updater rate of 1 hz updater = Updater(node) updater.setHardwareID(hostname) - updater.add(CpuTask(warning_percentage=warning_percentage, error_percentage=error_percentage, window=window)) + updater.add(CpuTask(warning_percentage=warning_percentage, error_percentage=error_percentage, + window=window)) rclpy.spin(node) From fe701e998e07abc182e48346c869018bc99d73dd Mon Sep 17 00:00:00 2001 From: Sangtaek Lee Date: Wed, 17 Dec 2025 17:44:03 +0000 Subject: [PATCH 4/5] fix test --- .../test/systemtest/test_cpu_monitor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/diagnostic_common_diagnostics/test/systemtest/test_cpu_monitor.py b/diagnostic_common_diagnostics/test/systemtest/test_cpu_monitor.py index 28430c482..4dc83f0c1 100644 --- a/diagnostic_common_diagnostics/test/systemtest/test_cpu_monitor.py +++ b/diagnostic_common_diagnostics/test/systemtest/test_cpu_monitor.py @@ -85,7 +85,7 @@ def test_warn(self): print(f'Raw readings: {task._readings}') self.assertEqual(task.name, 'CPU Information') self.assertEqual(stat.level, DiagnosticStatus.WARN) - self.assertIn(str('At least one CPU exceeds'), stat.message) + self.assertIn(str('CPU Average exceeds'), stat.message) # Check for at least 1 CPU Load Average and 1 CPU Load self.assertGreaterEqual(len(stat.values), 2) From 374e5ac2d555dd775a995907952d6092f9d398e8 Mon Sep 17 00:00:00 2001 From: Sangtaek Lee Date: Wed, 17 Dec 2025 17:46:41 +0000 Subject: [PATCH 5/5] revert each cpu load stat --- .../diagnostic_common_diagnostics/cpu_monitor.py | 3 +++ .../diagnostic_common_diagnostics/param_decl.yaml | 0 2 files changed, 3 insertions(+) create mode 100644 diagnostic_common_diagnostics/diagnostic_common_diagnostics/param_decl.yaml diff --git a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py index b94af3289..a997ce633 100755 --- a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py +++ b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/cpu_monitor.py @@ -72,6 +72,9 @@ def run(self, stat): stat.add('CPU Load Average', f'{cpu_average:.2f}') + for idx, cpu_percentage in enumerate(cpu_percentages): + stat.add(f'CPU {idx} Load', f'{cpu_percentage:.2f}') + if cpu_average > self._error_percentage: stat.summary(DiagnosticStatus.ERROR, f'CPU Average exceeds {self._error_percentage} percent') diff --git a/diagnostic_common_diagnostics/diagnostic_common_diagnostics/param_decl.yaml b/diagnostic_common_diagnostics/diagnostic_common_diagnostics/param_decl.yaml new file mode 100644 index 000000000..e69de29bb