diff --git a/src/pvecommon/__init__.py b/src/pvecommon/__init__.py index ce54b8b..14e5af9 100644 --- a/src/pvecommon/__init__.py +++ b/src/pvecommon/__init__.py @@ -1,8 +1,14 @@ import time import random import pexpect +import logging from functools import wraps +from datetime import datetime, timedelta + +qm_monitor_defer_close = True +deferred_closing = [] + global_qm_timeout = 10 qm_max_ttl = 600 qm_rand = 60 @@ -29,6 +35,7 @@ def ttl_cache_with_randomness(max_ttl, randomness_factor): @ttl_cache_with_randomness(qm_max_ttl, qm_rand) def qm_term_cmd(vm_id, cmd, timeout=global_qm_timeout): + global deferred_closing child = pexpect.spawn(f'qm monitor {vm_id}') try: child.expect('qm>', timeout=timeout) @@ -36,6 +43,28 @@ def qm_term_cmd(vm_id, cmd, timeout=global_qm_timeout): child.expect('qm>', timeout=timeout) raw_output = child.before.decode('utf-8').strip() finally: - child.close() + try: + child.close() + except pexpect.exceptions.ExceptionPexpect: + if qm_monitor_defer_close: + logging.warn(f"Failed to close {vm_id=}, {cmd=}; deferring") + deferred_closing.append((child, datetime.now())) + + if qm_monitor_defer_close: + # Reattempt closing deferred child processes + still_deferred = [] + for child, timestamp in deferred_closing: + if datetime.now() - timestamp > timedelta(seconds=10): + try: + child.close() + except pexpect.exceptions.ExceptionPexpect: + still_deferred.append((child, timestamp)) + else: + still_deferred.append((child, timestamp)) + + deferred_closing = still_deferred + + if deferred_closing: + raise Exception("Could not terminate some child processes after 10 seconds.") return raw_output diff --git a/src/pvemon/__init__.py b/src/pvemon/__init__.py index c7948ca..0e8e2ca 100644 --- a/src/pvemon/__init__.py +++ b/src/pvemon/__init__.py @@ -203,6 +203,7 @@ def main(): parser.add_argument('--qm-terminal-timeout', type=int, default=10, help='timeout for qm terminal commands') parser.add_argument('--qm-max-ttl', type=int, default=600, help='cache ttl for data pulled from qm monitor') parser.add_argument('--qm-rand', type=int, default=60, help='randomize qm monitor cache expiry') + parser.add_argument('--qm-monitor-defer-close', type=str, default="true", help='defer and retry closing unresponsive qm monitor sessions') args = parser.parse_args() @@ -216,6 +217,7 @@ def main(): pvecommon.global_qm_timeout = args.qm_terminal_timeout pvecommon.qm_max_ttl = args.qm_max_ttl pvecommon.qm_rand = args.qm_rand + pvecommon.qm_monitor_defer_close = args.qm_monitor_defer_close for name, description, labels in gauge_settings: gauge_dict[name] = Gauge(f"{prefix}_{name}", description, labels)