optimize mem stats collection, add profiling
This commit is contained in:
parent
511e55d269
commit
5c39259f0b
@ -8,6 +8,9 @@ import os
|
|||||||
|
|
||||||
import pexpect
|
import pexpect
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import cProfile
|
||||||
|
|
||||||
DEFAULT_PORT = 9116
|
DEFAULT_PORT = 9116
|
||||||
DEFAULT_INTERVAL = 10
|
DEFAULT_INTERVAL = 10
|
||||||
DEFAULT_PREFIX = "pve"
|
DEFAULT_PREFIX = "pve"
|
||||||
@ -54,6 +57,17 @@ def create_or_get_info(info_name, labels):
|
|||||||
dynamic_infos[(info_name,str(labels))] = Info(f"{prefix}_{info_name}", f'{info_name} for {str(labels)}', labels)
|
dynamic_infos[(info_name,str(labels))] = Info(f"{prefix}_{info_name}", f'{info_name} for {str(labels)}', labels)
|
||||||
return dynamic_infos[(info_name,str(labels))]
|
return dynamic_infos[(info_name,str(labels))]
|
||||||
|
|
||||||
|
def get_memory_info(pid):
|
||||||
|
metrics = {}
|
||||||
|
with open(f"/proc/{pid}/status") as f:
|
||||||
|
for line in f:
|
||||||
|
if line.startswith(("Vm", "Rss", "Hugetlb")):
|
||||||
|
key, value, unit = line.split()
|
||||||
|
if unit == "kB":
|
||||||
|
metrics[key.lower()] = int(value) * 1024 # convert KB to bytes
|
||||||
|
return metrics
|
||||||
|
|
||||||
|
|
||||||
def extract_nic_info_from_monitor(vm_id):
|
def extract_nic_info_from_monitor(vm_id):
|
||||||
child = pexpect.spawn(f'qm monitor {vm_id}')
|
child = pexpect.spawn(f'qm monitor {vm_id}')
|
||||||
|
|
||||||
@ -108,6 +122,7 @@ def read_interface_stats(ifname):
|
|||||||
|
|
||||||
def collect_kvm_metrics():
|
def collect_kvm_metrics():
|
||||||
for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'cpu_percent', 'memory_percent', 'num_threads']):
|
for proc in psutil.process_iter(['pid', 'name', 'cmdline', 'cpu_percent', 'memory_percent', 'num_threads']):
|
||||||
|
|
||||||
if 'kvm' == proc.info['name']:
|
if 'kvm' == proc.info['name']:
|
||||||
cmdline = proc.cmdline()
|
cmdline = proc.cmdline()
|
||||||
id = flag_to_label_value(cmdline,"-id")
|
id = flag_to_label_value(cmdline,"-id")
|
||||||
@ -115,6 +130,7 @@ def collect_kvm_metrics():
|
|||||||
# Extract vm labels from cmdline
|
# Extract vm labels from cmdline
|
||||||
info_label_dict = {get_label_name(l): flag_to_label_value(cmdline,l) for l in label_flags}
|
info_label_dict = {get_label_name(l): flag_to_label_value(cmdline,l) for l in label_flags}
|
||||||
info_label_dict['pid']=str(proc.pid)
|
info_label_dict['pid']=str(proc.pid)
|
||||||
|
logging.debug(f"got PID: {proc.pid}")
|
||||||
info_dict["kvm"].info(info_label_dict)
|
info_dict["kvm"].info(info_label_dict)
|
||||||
|
|
||||||
d = {
|
d = {
|
||||||
@ -126,6 +142,7 @@ def collect_kvm_metrics():
|
|||||||
|
|
||||||
for k, v in d.items():
|
for k, v in d.items():
|
||||||
gauge_dict[k].labels(id=id).set(v)
|
gauge_dict[k].labels(id=id).set(v)
|
||||||
|
logging.debug(f"gauge_dict[{k}].labels(id={id}).set({v})")
|
||||||
|
|
||||||
cpu_times = proc.cpu_times()
|
cpu_times = proc.cpu_times()
|
||||||
for mode in ['user', 'system', 'iowait']:
|
for mode in ['user', 'system', 'iowait']:
|
||||||
@ -139,11 +156,9 @@ def collect_kvm_metrics():
|
|||||||
for type in [ "voluntary", "involuntary" ]:
|
for type in [ "voluntary", "involuntary" ]:
|
||||||
gauge_dict["kvm_ctx_switches"].labels(id=id, type=type).set(getattr(proc.num_ctx_switches(),type))
|
gauge_dict["kvm_ctx_switches"].labels(id=id, type=type).set(getattr(proc.num_ctx_switches(),type))
|
||||||
|
|
||||||
for attr in dir(proc.memory_full_info()):
|
memory_metrics = get_memory_info(proc.pid) # Assuming proc.pid gives you the PID of the process
|
||||||
if not attr.startswith('_'):
|
for key, value in memory_metrics.items():
|
||||||
value = getattr(proc.memory_full_info(), attr)
|
gauge_dict["kvm_memory_extended"].labels(id=id, type=key).set(value)
|
||||||
if not callable(value):
|
|
||||||
gauge_dict["kvm_memory_extended"].labels(id=id, type=attr).set(value)
|
|
||||||
|
|
||||||
for nic_info in extract_nic_info_from_monitor(id):
|
for nic_info in extract_nic_info_from_monitor(id):
|
||||||
queues = nic_info["queues"]
|
queues = nic_info["queues"]
|
||||||
@ -168,9 +183,16 @@ def main():
|
|||||||
parser.add_argument('--interval', type=int, default=DEFAULT_INTERVAL, help='Interval between metric collections in seconds')
|
parser.add_argument('--interval', type=int, default=DEFAULT_INTERVAL, help='Interval between metric collections in seconds')
|
||||||
parser.add_argument('--collect-running-vms', type=str, default='true', help='Enable or disable collecting running VMs metric (true/false)')
|
parser.add_argument('--collect-running-vms', type=str, default='true', help='Enable or disable collecting running VMs metric (true/false)')
|
||||||
parser.add_argument('--metrics-prefix', type=str, default=DEFAULT_PREFIX, help='<prefix>_ will be prepended to each metric name')
|
parser.add_argument('--metrics-prefix', type=str, default=DEFAULT_PREFIX, help='<prefix>_ will be prepended to each metric name')
|
||||||
|
parser.add_argument('--loglevel', type=str, default='INFO', help='Set log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)')
|
||||||
|
parser.add_argument('--profile', type=str, default='false', help='collect metrics once, and print profiling stats')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
loglevel = getattr(logging, args.loglevel.upper(), None)
|
||||||
|
if not isinstance(loglevel, int):
|
||||||
|
raise ValueError(f'Invalid log level: {args.loglevel}')
|
||||||
|
logging.basicConfig(level=loglevel,format='%(asctime)s: %(message)s')
|
||||||
|
|
||||||
global prefix
|
global prefix
|
||||||
prefix = args.metrics_prefix
|
prefix = args.metrics_prefix
|
||||||
|
|
||||||
@ -182,6 +204,14 @@ def main():
|
|||||||
|
|
||||||
start_http_server(args.port)
|
start_http_server(args.port)
|
||||||
|
|
||||||
|
if args.profile.lower() == 'true':
|
||||||
|
profiler = cProfile.Profile()
|
||||||
|
profiler.enable()
|
||||||
|
collect_kvm_metrics()
|
||||||
|
profiler.disable()
|
||||||
|
profiler.print_stats(sort='cumulative')
|
||||||
|
return
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
if args.collect_running_vms.lower() == 'true':
|
if args.collect_running_vms.lower() == 'true':
|
||||||
collect_kvm_metrics()
|
collect_kvm_metrics()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user