handle slow qm commands better

This commit is contained in:
illustris 2023-10-17 22:59:18 +05:30
parent 31b2bf6b62
commit 38492cc807
Signed by: illustris
GPG Key ID: 56C8FC0B899FEFA3
2 changed files with 32 additions and 1 deletions

View File

@ -1,8 +1,14 @@
import time import time
import random import random
import pexpect import pexpect
import logging
from functools import wraps from functools import wraps
from datetime import datetime, timedelta
qm_monitor_defer_close = True
deferred_closing = []
global_qm_timeout = 10 global_qm_timeout = 10
qm_max_ttl = 600 qm_max_ttl = 600
qm_rand = 60 qm_rand = 60
@ -29,6 +35,7 @@ def ttl_cache_with_randomness(max_ttl, randomness_factor):
@ttl_cache_with_randomness(qm_max_ttl, qm_rand) @ttl_cache_with_randomness(qm_max_ttl, qm_rand)
def qm_term_cmd(vm_id, cmd, timeout=global_qm_timeout): def qm_term_cmd(vm_id, cmd, timeout=global_qm_timeout):
global deferred_closing
child = pexpect.spawn(f'qm monitor {vm_id}') child = pexpect.spawn(f'qm monitor {vm_id}')
try: try:
child.expect('qm>', timeout=timeout) child.expect('qm>', timeout=timeout)
@ -36,6 +43,28 @@ def qm_term_cmd(vm_id, cmd, timeout=global_qm_timeout):
child.expect('qm>', timeout=timeout) child.expect('qm>', timeout=timeout)
raw_output = child.before.decode('utf-8').strip() raw_output = child.before.decode('utf-8').strip()
finally: finally:
try:
child.close() child.close()
except pexpect.exceptions.ExceptionPexpect:
if qm_monitor_defer_close:
logging.warn(f"Failed to close {vm_id=}, {cmd=}; deferring")
deferred_closing.append((child, datetime.now()))
if qm_monitor_defer_close:
# Reattempt closing deferred child processes
still_deferred = []
for child, timestamp in deferred_closing:
if datetime.now() - timestamp > timedelta(seconds=10):
try:
child.close()
except pexpect.exceptions.ExceptionPexpect:
still_deferred.append((child, timestamp))
else:
still_deferred.append((child, timestamp))
deferred_closing = still_deferred
if deferred_closing:
raise Exception("Could not terminate some child processes after 10 seconds.")
return raw_output return raw_output

View File

@ -203,6 +203,7 @@ def main():
parser.add_argument('--qm-terminal-timeout', type=int, default=10, help='timeout for qm terminal commands') parser.add_argument('--qm-terminal-timeout', type=int, default=10, help='timeout for qm terminal commands')
parser.add_argument('--qm-max-ttl', type=int, default=600, help='cache ttl for data pulled from qm monitor') parser.add_argument('--qm-max-ttl', type=int, default=600, help='cache ttl for data pulled from qm monitor')
parser.add_argument('--qm-rand', type=int, default=60, help='randomize qm monitor cache expiry') parser.add_argument('--qm-rand', type=int, default=60, help='randomize qm monitor cache expiry')
parser.add_argument('--qm-monitor-defer-close', type=str, default="true", help='defer and retry closing unresponsive qm monitor sessions')
args = parser.parse_args() args = parser.parse_args()
@ -216,6 +217,7 @@ def main():
pvecommon.global_qm_timeout = args.qm_terminal_timeout pvecommon.global_qm_timeout = args.qm_terminal_timeout
pvecommon.qm_max_ttl = args.qm_max_ttl pvecommon.qm_max_ttl = args.qm_max_ttl
pvecommon.qm_rand = args.qm_rand pvecommon.qm_rand = args.qm_rand
pvecommon.qm_monitor_defer_close = args.qm_monitor_defer_close
for name, description, labels in gauge_settings: for name, description, labels in gauge_settings:
gauge_dict[name] = Gauge(f"{prefix}_{name}", description, labels) gauge_dict[name] = Gauge(f"{prefix}_{name}", description, labels)