Compare commits

..

10 Commits

Author SHA1 Message Date
illustris
80eadbdddb
Bump version to 1.1.6 2024-06-05 14:08:09 +05:30
illustris
1eb1bf9053
fix crash when node has incoming VM migration 2024-06-05 14:05:37 +05:30
illustris
4416fbb6fe
bump inputs, import bundler without flake inputs 2024-01-12 23:52:47 +05:30
illustris
2b4be475b6
Bump version to 1.1.5 2023-12-11 15:34:17 +05:30
illustris
66b09d0fa5
fix mb-byte conversion for non-numa VMs 2023-12-11 15:34:06 +05:30
illustris
ce6ecf63ff
Bump version to 1.1.4 2023-12-11 15:31:57 +05:30
illustris
bd2b2c2a6e
fix crash with NUMA-enabled VMs 2023-12-11 15:31:27 +05:30
illustris
8e42f6306c
Bump version to 1.1.3 2023-12-10 01:12:04 +05:30
illustris
ffcdcff16e
fix 5xx until cache expiry when migrating VM disks 2023-12-10 01:11:15 +05:30
illustris
173369ff8e
Bump version to 1.1.2 2023-11-08 17:18:37 +05:30
6 changed files with 52 additions and 41 deletions

40
flake.lock generated
View File

@ -1,18 +1,13 @@
{
"nodes": {
"debBundler": {
"inputs": {
"home-manager": "home-manager",
"nixpkgs": [
"nixpkgs"
]
},
"flake": false,
"locked": {
"lastModified": 1699154900,
"narHash": "sha256-y+PK9ToYcAyY86EoM7Iam7gC++rCuAGndlnPTEzd3EA=",
"lastModified": 1705083181,
"narHash": "sha256-o1zolChrQk7UpMmFLjymjQWuoDIL1XStV56kuOprMDQ=",
"owner": "illustris",
"repo": "flake",
"rev": "a56221a54571b0e4326af29cf75b4cec081b8de7",
"rev": "6a9df656834b5111f7ffb0b1f6d97a0d8700de58",
"type": "github"
},
"original": {
@ -21,34 +16,13 @@
"type": "github"
}
},
"home-manager": {
"inputs": {
"nixpkgs": [
"debBundler",
"nixpkgs"
]
},
"locked": {
"lastModified": 1699025595,
"narHash": "sha256-e+o4PoSu2Z6Ww8y/AVUmMU200rNZoRK+p2opQ7Db8Rg=",
"owner": "nix-community",
"repo": "home-manager",
"rev": "8765d4e38aa0be53cdeee26f7386173e6c65618d",
"type": "github"
},
"original": {
"owner": "nix-community",
"repo": "home-manager",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1699099776,
"narHash": "sha256-X09iKJ27mGsGambGfkKzqvw5esP1L/Rf8H3u3fCqIiU=",
"lastModified": 1704722960,
"narHash": "sha256-mKGJ3sPsT6//s+Knglai5YflJUF2DGj7Ai6Ynopz0kI=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "85f1ba3e51676fa8cc604a3d863d729026a6b8eb",
"rev": "317484b1ead87b9c1b8ac5261a8d2dd748a0492d",
"type": "github"
},
"original": {

View File

@ -5,7 +5,7 @@ rec {
nixpkgs.url = github:nixos/nixpkgs/nixos-unstable;
debBundler = {
url = github:illustris/flake;
inputs.nixpkgs.follows = "nixpkgs";
flake = false;
};
};
@ -14,7 +14,7 @@ rec {
packages.x86_64-linux = with nixpkgs.legacyPackages.x86_64-linux; rec {
pvemon = python3Packages.buildPythonApplication {
pname = "pvemon";
version = "1.1.1";
version = "1.1.6";
src = ./src;
propagatedBuildInputs = with python3Packages; [
pexpect
@ -28,7 +28,7 @@ rec {
};
};
default = pvemon;
deb = debBundler.bundlers.x86_64-linux.deb default;
deb = (import "${debBundler}/bundlers/deb" { inherit pkgs; }) default;
updateRelease = writeScriptBin "update-release" (builtins.readFile ./utils/update-release.sh);
};

View File

@ -30,11 +30,20 @@ def ttl_cache_with_randomness(max_ttl, randomness_factor):
result = func(*args, **kwargs)
cache[key] = (result, time.time())
return result
def invalidate_cache(*args, **kwargs):
key = str(args) + str(kwargs)
if key in cache:
del cache[key]
# Attach the invalidation function to the wrapper
wrapper.invalidate_cache = invalidate_cache
return wrapper
return decorator
@ttl_cache_with_randomness(qm_max_ttl, qm_rand)
def qm_term_cmd(vm_id, cmd, timeout=global_qm_timeout):
def qm_term_cmd(vm_id, cmd, timeout=global_qm_timeout): # TODO: ignore cmd timeout in cache key
global deferred_closing
child = pexpect.spawn(f'qm monitor {vm_id}')
try:

View File

@ -51,6 +51,21 @@ info_settings = [
flag_to_label_value = lambda args, match: next((args[i+1] for i, x in enumerate(args[:-1]) if x == match), "unknown").split(",")[0]
def parse_mem(cmdline):
ret = flag_to_label_value(cmdline, "-m")
# lazy way to detect NUMA
# the token after -m might look something like 'size=1024,slots=255,maxmem=4194304M'
if ret.isnumeric():
return int(ret)*1024
# probably using NUMA
ret = 0
for arg in cmdline:
if "memory-backend-ram" in arg:
assert(arg[-1]=='M')
ret += 1024*int(arg.split("=")[-1][:-1])
return ret
def create_or_get_gauge(metric_name, labels, dynamic_gauges, gauge_lock):
with gauge_lock:
if metric_name not in dynamic_gauges:
@ -132,11 +147,17 @@ def collect_kvm_metrics():
for proc in psutil.process_iter(['pid', 'name', 'exe', 'cmdline', 'cpu_percent', 'memory_percent', 'num_threads']):
try:
if proc.info['exe'] == '/usr/bin/qemu-system-x86_64':
vmid = flag_to_label_value(proc.info['cmdline'], "-id")
# Check if VM definition exists. If it is missing, qm commands will fail.
# VM configs are typically missing when a VM is migrating in.
# The config file is moved after the drives and memory are synced.
if not os.path.exists(f'/etc/pve/qemu-server/{vmid}.conf'):
continue
procs.append(
(
proc,
proc.info['cmdline'],
flag_to_label_value(proc.info['cmdline'], "-id")
vmid
)
)
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
@ -151,7 +172,7 @@ def collect_kvm_metrics():
d = {
"kvm_vcores": flag_to_label_value(cmdline,"-smp"),
"kvm_maxmem": int(flag_to_label_value(cmdline,"-m"))*1024,
"kvm_maxmem": parse_mem(cmdline),
"kvm_memory_percent": proc.info['memory_percent'],
"kvm_threads": proc.info['num_threads'],
}

View File

@ -5,6 +5,8 @@ import json
import pvecommon
extract_disk_info_max_retries = 1
def get_device(disk_path):
try:
return os.readlink(disk_path).split('/')[-1]
@ -26,7 +28,7 @@ def handle_json_path(path):
raise ValueError('No host_device driver found or filename is missing')
return filename
def extract_disk_info_from_monitor(vm_id):
def extract_disk_info_from_monitor(vm_id, retries = 0):
raw_output = pvecommon.qm_term_cmd(vm_id, 'info block')
disks_map = {}
disks = [x.strip() for x in raw_output.split("drive-")[1:]]
@ -73,6 +75,11 @@ def extract_disk_info_from_monitor(vm_id):
disks_map[disk_name]["vg_name"] = vg_name
disks_map[disk_name]["vol_name"] = vol_name
disks_map[disk_name]["device"] = get_device(disk_path)
# At this point, if disks_map[disk_name]["device"] exists and is None, the cache might be stale
# Flush the cache for this VMID and try again
if "device" in disks_map[disk_name] and disks_map[disk_name]["device"] == None and retries < extract_disk_info_max_retries:
pvecommon.qm_term_cmd.invalidate_cache(vm_id, 'info block')
return extract_disk_info_from_monitor(vm_id, retries+1)
for line in data[1:-1]:
if "Attached to" in line:
attached_to = line.split(":")[-1].strip()

View File

@ -2,7 +2,7 @@ from setuptools import setup, find_packages
setup(
name='pvemon',
version = "1.1.1",
version = "1.1.6",
packages=find_packages(),
entry_points={
'console_scripts': [