This commit is contained in:
illustris 2023-10-02 12:07:55 +05:30
commit ce4c98cbcb
Signed by: illustris
GPG Key ID: 56C8FC0B899FEFA3
5 changed files with 304 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
result
*~
*.deb

64
flake.lock generated Normal file
View File

@ -0,0 +1,64 @@
{
"nodes": {
"debBundler": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1632973430,
"narHash": "sha256-9G8zo+0nfYAALV5umCyQR/2hVUFNH10JropBkyxZGGw=",
"owner": "juliosueiras-nix",
"repo": "nix-utils",
"rev": "b44e1ffd726aa03056db9df469efb497d8b9871b",
"type": "github"
},
"original": {
"owner": "juliosueiras-nix",
"repo": "nix-utils",
"type": "github"
}
},
"flake-utils": {
"locked": {
"lastModified": 1623875721,
"narHash": "sha256-A8BU7bjS5GirpAUv4QA+QnJ4CceLHkcXdRp4xITDB0s=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "f7e004a55b120c02ecb6219596820fcd32ca8772",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1696019113,
"narHash": "sha256-X3+DKYWJm93DRSdC5M6K5hLqzSya9BjibtBsuARoPco=",
"owner": "nixos",
"repo": "nixpkgs",
"rev": "f5892ddac112a1e9b3612c39af1b72987ee5783a",
"type": "github"
},
"original": {
"owner": "nixos",
"ref": "nixos-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"debBundler": "debBundler",
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

38
flake.nix Normal file
View File

@ -0,0 +1,38 @@
rec {
description = "PVE prometheus exporter that collects metrics locally rather than use the PVE API";
inputs = {
nixpkgs.url = github:nixos/nixpkgs/nixos-unstable;
debBundler = {
url = github:juliosueiras-nix/nix-utils;
inputs.nixpkgs.follows = "nixpkgs";
};
};
outputs = { self, nixpkgs, debBundler }: {
packages.x86_64-linux = with nixpkgs.legacyPackages.x86_64-linux; rec {
pvemon = python3Packages.buildPythonApplication {
pname = "pvemon";
version = "0.1.0";
src = ./src;
propagatedBuildInputs = with python3Packages; [
pexpect
prometheus-client
psutil
];
meta = {
inherit description;
license = lib.licenses.mit;
};
};
default = pvemon;
deb = debBundler.bundlers.deb {
inherit system;
program = "${default}/bin/${default.pname}";
};
};
};
}

187
src/pvemon/__init__.py Normal file
View File

@ -0,0 +1,187 @@
from prometheus_client import start_http_server, Gauge, Info
import psutil
import time
import argparse
import re
import itertools
import os
import pexpect
DEFAULT_PORT = 9116
DEFAULT_INTERVAL = 10
gauge_settings = [
('pve_kvm_cpu', 'CPU time for VM', ['id', 'mode']),
('pve_kvm_vcores', 'vCores allocated to the VM', ['id']),
('pve_kvm_maxmem', 'Maximum memory (bytes) allocated to the VM', ['id']),
('pve_kvm_memory_percent', 'Percentage of host memory used by VM', ['id']),
('pve_kvm_memory_extended', 'Detailed memory metrics for VM', ['id', 'type']),
('pve_kvm_threads', 'Threads used by the KVM process', ['id']),
('pve_kvm_io_read_count', 'Number of read system calls made by the KVM process', ['id']),
('pve_kvm_io_read_bytes', 'Number of bytes read from disk', ['id']),
('pve_kvm_io_read_chars', 'Number of bytes read including buffers', ['id']),
('pve_kvm_ctx_switches', 'Context switches', ['id', 'type']),
('pve_kvm_io_write_count', 'Number of write system calls made by the KVM process', ['id']),
('pve_kvm_io_write_bytes', 'Number of bytes written to disk', ['id']),
('pve_kvm_io_write_chars', 'Number of bytes written including buffers', ['id']),
('pve_kvm_nic_queues', 'Number of queues in multiqueue config', ['id', 'ifname']),
]
gauge_dict = {}
for name, description, labels in gauge_settings:
gauge_dict[name] = Gauge(name, description, labels)
label_flags = [ "-id", "-name", "-cpu" ]
get_label_name = lambda flag: flag[1:]
info_settings = [
('pve_kvm', 'information for each KVM process'),
]
info_dict = {}
for name, description in info_settings:
info_dict[name] = Info(name, description)
flag_to_label_value = lambda args, match: next((args[i+1] for i, x in enumerate(args[:-1]) if x == match), "unknown").split(",")[0]
dynamic_gauges = {}
def create_or_get_gauge(metric_name, labels):
if metric_name not in dynamic_gauges:
dynamic_gauges[metric_name] = Gauge(metric_name, f'{metric_name} for KVM process', labels)
return dynamic_gauges[metric_name]
dynamic_infos = {}
def create_or_get_info(info_name, labels):
if (info_name,str(labels)) not in dynamic_infos:
dynamic_infos[(info_name,str(labels))] = Info(info_name, f'{info_name} for {str(labels)}', labels)
return dynamic_infos[(info_name,str(labels))]
def extract_nic_info_from_monitor(vm_id):
child = pexpect.spawn(f'qm monitor {vm_id}')
# Wait for the QEMU monitor prompt
child.expect('qm>', timeout=10)
# Execute 'info network'
child.sendline('info network')
# Wait for the prompt again
child.expect('qm>', timeout=10)
# Parse the output
raw_output = child.before.decode('utf-8').strip()
child.close()
nic_info_list = re.findall(r'(net\d+:.*?)(?=(net\d+:|$))', raw_output, re.S)
nics_map = {}
for netdev, cfg in [x.strip().split(": ") for x in re.findall(r'[^\n]*(net\d+:[^\n]*)\n', raw_output, re.S)]:
for cfg_pair in cfg.split(","):
if cfg_pair=='':
continue
key, value = cfg_pair.split('=')
if netdev not in nics_map:
nics_map[netdev] = {}
nics_map[netdev][key] = value
return [
{
"netdev": netdev,
"queues": int(cfg["index"])+1,
"type": cfg["type"],
"model": cfg["model"],
"macaddr": cfg["macaddr"],
"ifname": cfg["ifname"]
}
for netdev, cfg in nics_map.items()
]
def read_interface_stats(ifname):
stats_dir = f"/sys/class/net/{ifname}/statistics/"
stats = {}
try:
for filename in os.listdir(stats_dir):
with open(f"{stats_dir}{filename}", "r") as f:
stats[filename] = int(f.read().strip())
except FileNotFoundError:
pass
return stats
def collect_kvm_metrics():
for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'cpu_percent', 'memory_percent', 'num_threads']):
if 'kvm' == proc.info['name']:
cmdline = proc.cmdline()
id = flag_to_label_value(cmdline,"-id")
# Extract vm labels from cmdline
info_label_dict = {get_label_name(l): flag_to_label_value(cmdline,l) for l in label_flags}
info_label_dict['pid']=str(proc.pid)
info_dict["pve_kvm"].info(info_label_dict)
d = {
"pve_kvm_vcores": flag_to_label_value(cmdline,"-smp"),
"pve_kvm_maxmem": int(flag_to_label_value(cmdline,"-m"))*1024,
"pve_kvm_memory_percent": proc.info['memory_percent'],
"pve_kvm_threads": proc.info['num_threads'],
}
for k, v in d.items():
gauge_dict[k].labels(id=id).set(v)
cpu_times = proc.cpu_times()
for mode in ['user', 'system', 'iowait']:
gauge_dict["pve_kvm_cpu"].labels(id=id, mode=mode).set(getattr(cpu_times, mode))
io = proc.io_counters()
for io_type, attr in itertools.product(['read', 'write'], ['count', 'bytes', 'chars']):
gauge = globals()["gauge_dict"][f'pve_kvm_io_{io_type}_{attr}']
gauge.labels(id=id).set(getattr(io, f"{io_type}_{attr}"))
for type in [ "voluntary", "involuntary" ]:
gauge_dict["pve_kvm_ctx_switches"].labels(id=id, type=type).set(getattr(proc.num_ctx_switches(),type))
for attr in dir(proc.memory_full_info()):
if not attr.startswith('_'):
value = getattr(proc.memory_full_info(), attr)
if not callable(value):
gauge_dict["pve_kvm_memory_extended"].labels(id=id, type=attr).set(value)
for nic_info in extract_nic_info_from_monitor(id):
queues = nic_info["queues"]
del nic_info["queues"]
nic_labels = {"id": id, "ifname": nic_info["ifname"]}
prom_nic_info = create_or_get_info("pve_kvm_nic", nic_labels.keys())
prom_nic_info.labels(**nic_labels).info({k: v for k, v in nic_info.items() if k not in nic_labels.keys()})
gauge_dict["pve_kvm_nic_queues"].labels(**nic_labels).set(queues)
interface_stats = read_interface_stats(nic_info["ifname"])
for filename, value in interface_stats.items():
metric_name = f"pve_kvm_nic_{filename}"
gauge = create_or_get_gauge(metric_name, nic_labels.keys())
gauge.labels(**nic_labels).set(value)
def main():
parser = argparse.ArgumentParser(description='PVE metrics exporter for Prometheus')
parser.add_argument('--port', type=int, default=DEFAULT_PORT, help='Port for the exporter to listen on')
parser.add_argument('--interval', type=int, default=DEFAULT_INTERVAL, help='Interval between metric collections in seconds')
parser.add_argument('--collect-running-vms', type=str, default='true', help='Enable or disable collecting running VMs metric (true/false)')
args = parser.parse_args()
start_http_server(args.port)
while True:
if args.collect_running_vms.lower() == 'true':
collect_kvm_metrics()
time.sleep(args.interval)
if __name__ == "__main__":
main()

12
src/setup.py Normal file
View File

@ -0,0 +1,12 @@
from setuptools import setup, find_packages
setup(
name='pvemon',
version='0.1',
packages=find_packages(),
entry_points={
'console_scripts': [
'pvemon=pvemon:main',
],
},
)