Compare commits
8 Commits
e9b60a3af6
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8c761d8c96
|
||
|
|
6495953dc7
|
||
|
|
9c2b1bf384
|
||
|
|
8d9007632a
|
||
|
|
ac2ac7a27c
|
||
|
|
f15d0a90c8
|
||
|
|
0ba7c190f8
|
||
|
|
3808e3b672
|
61
README.md
61
README.md
@@ -4,6 +4,8 @@ Prometheus exporter for Proxmox VE that collects VM and storage metrics directly
|
|||||||
|
|
||||||
Metrics are gathered by reading `/proc`, `/sys`, `/etc/pve`, and running `qm monitor` commands.
|
Metrics are gathered by reading `/proc`, `/sys`, `/etc/pve`, and running `qm monitor` commands.
|
||||||
|
|
||||||
|
> **Disclaimer:** This is a heavily vibe-coded rewrite of [pvemon](https://github.com/illustris/pvemon) for better maintainability and easier distribution. This disclaimer will remain up until the codebase has been reviewed and validated.
|
||||||
|
|
||||||
## Building
|
## Building
|
||||||
|
|
||||||
Requires [Nix](https://nixos.org/):
|
Requires [Nix](https://nixos.org/):
|
||||||
@@ -53,35 +55,42 @@ All metric names are prefixed with the configured `--metrics-prefix` (default `p
|
|||||||
|
|
||||||
### Per-VM metrics
|
### Per-VM metrics
|
||||||
|
|
||||||
| Metric | Labels | Description |
|
| Metric | Type | Labels | Description |
|
||||||
|--------|--------|-------------|
|
|--------|------|--------|-------------|
|
||||||
| `_kvm` | id, name, cpu, pid, pool, pool_levels, pool1-3 | VM info (value 1) |
|
| `_kvm_info` | gauge | id, name, cpu, pid, pool, pool_levels, pool1, pool2, pool3 | VM info (value 1) |
|
||||||
| `_kvm_cpu` | id, mode | CPU time (user/system/iowait) |
|
| `_kvm_cpu_seconds_total` | counter | id, mode | KVM CPU time (mode: user, system, iowait) |
|
||||||
| `_kvm_vcores` | id | Allocated vCPU count |
|
| `_kvm_vcores` | gauge | id | vCores allocated |
|
||||||
| `_kvm_maxmem` | id | Maximum memory in bytes |
|
| `_kvm_maxmem_bytes` | gauge | id | Maximum memory in bytes |
|
||||||
| `_kvm_memory_percent` | id | RSS as percent of host memory |
|
| `_kvm_memory_percent` | gauge | id | Memory percent of host |
|
||||||
| `_kvm_memory_extended` | id, type | Detailed memory fields from /proc status |
|
| `_kvm_memory_extended` | gauge | id, type | Extended memory info from /proc status (vmrss, vmpeak, etc.) |
|
||||||
| `_kvm_threads` | id | Thread count |
|
| `_kvm_threads` | gauge | id | Threads used |
|
||||||
| `_kvm_ctx_switches` | id, type | Context switches (voluntary/involuntary) |
|
| `_kvm_ctx_switches_total` | counter | id, type | Context switches (type: voluntary, involuntary) |
|
||||||
| `_kvm_io_read_bytes` | id | I/O read bytes |
|
| `_kvm_io_read_count_total` | counter | id | Read system calls by KVM process |
|
||||||
| `_kvm_io_write_bytes` | id | I/O write bytes |
|
| `_kvm_io_read_bytes_total` | counter | id | Bytes read from disk by KVM process |
|
||||||
| `_kvm_io_read_chars` | id | I/O read chars |
|
| `_kvm_io_read_chars_total` | counter | id | Bytes read including buffers by KVM process |
|
||||||
| `_kvm_io_write_chars` | id | I/O write chars |
|
| `_kvm_io_write_count_total` | counter | id | Write system calls by KVM process |
|
||||||
| `_kvm_io_read_count` | id | I/O read syscalls |
|
| `_kvm_io_write_bytes_total` | counter | id | Bytes written to disk by KVM process |
|
||||||
| `_kvm_io_write_count` | id | I/O write syscalls |
|
| `_kvm_io_write_chars_total` | counter | id | Bytes written including buffers by KVM process |
|
||||||
| `_kvm_nic` | id, ifname, netdev, queues, type, model, macaddr | NIC info (value 1) |
|
| `_kvm_nic_info` | gauge | id, ifname, netdev, queues, type, model, macaddr | NIC info (value 1) |
|
||||||
| `_kvm_nic_queues` | id, ifname | NIC queue count |
|
| `_kvm_nic_queues` | gauge | id, ifname | NIC queue count |
|
||||||
| `_kvm_nic_*` | id, ifname | Per-NIC sysfs counters (rx_bytes, tx_bytes, etc.) |
|
| `_kvm_nic_{stat}_total` | counter | id, ifname | Per-NIC sysfs counters (rx_bytes, tx_bytes, rx_packets, etc.) |
|
||||||
| `_kvm_disk` | id, disk_name, block_id, disk_path, disk_type, ... | Disk info (value 1) |
|
| `_kvm_disk_info` | gauge | id, disk_name, block_id, disk_path, disk_type, vol_name, pool, pool_name, cluster_id, vg_name, device, attached_to, cache_mode, detect_zeroes, read_only | Disk info (value 1) |
|
||||||
| `_kvm_disk_size` | id, disk_name | Disk size in bytes |
|
| `_kvm_disk_size_bytes` | gauge | id, disk_name | Disk size in bytes |
|
||||||
|
|
||||||
### Storage metrics
|
### Storage metrics
|
||||||
|
|
||||||
| Metric | Labels | Description |
|
| Metric | Type | Labels | Description |
|
||||||
|--------|--------|-------------|
|
|--------|------|--------|-------------|
|
||||||
| `_node_storage` | name, type, ... | Storage pool info (value 1) |
|
| `_node_storage_info` | gauge | (dynamic, varies by storage config) | Storage pool info (value 1) |
|
||||||
| `_node_storage_size` | name, type | Total storage size in bytes |
|
| `_node_storage_size_bytes` | gauge | name, type | Storage total size in bytes |
|
||||||
| `_node_storage_free` | name, type | Free storage space in bytes |
|
| `_node_storage_free_bytes` | gauge | name, type | Storage free space in bytes |
|
||||||
|
|
||||||
|
### Operational metrics
|
||||||
|
|
||||||
|
| Metric | Type | Labels | Description |
|
||||||
|
|--------|------|--------|-------------|
|
||||||
|
| `_scrape_duration_seconds` | gauge | | Duration of metrics collection |
|
||||||
|
| `_exporter_build_info` | gauge | version | Build information (value 1) |
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
"slices"
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -15,6 +16,7 @@ import (
|
|||||||
|
|
||||||
"pve_local_exporter/internal/cache"
|
"pve_local_exporter/internal/cache"
|
||||||
"pve_local_exporter/internal/config"
|
"pve_local_exporter/internal/config"
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
"pve_local_exporter/internal/procfs"
|
"pve_local_exporter/internal/procfs"
|
||||||
"pve_local_exporter/internal/pveconfig"
|
"pve_local_exporter/internal/pveconfig"
|
||||||
"pve_local_exporter/internal/qmmonitor"
|
"pve_local_exporter/internal/qmmonitor"
|
||||||
@@ -49,6 +51,7 @@ type PVECollector struct {
|
|||||||
descCtxSwitches *prometheus.Desc
|
descCtxSwitches *prometheus.Desc
|
||||||
descNicInfo *prometheus.Desc
|
descNicInfo *prometheus.Desc
|
||||||
descNicQueues *prometheus.Desc
|
descNicQueues *prometheus.Desc
|
||||||
|
descDiskInfo *prometheus.Desc
|
||||||
descDiskSize *prometheus.Desc
|
descDiskSize *prometheus.Desc
|
||||||
descStorageSize *prometheus.Desc
|
descStorageSize *prometheus.Desc
|
||||||
descStorageFree *prometheus.Desc
|
descStorageFree *prometheus.Desc
|
||||||
@@ -139,9 +142,14 @@ func NewWithDeps(cfg config.Config, proc procfs.ProcReader, sys sysfs.SysReader,
|
|||||||
descMemExt: prometheus.NewDesc(p+"_kvm_memory_extended", "Extended memory info", []string{"id", "type"}, nil),
|
descMemExt: prometheus.NewDesc(p+"_kvm_memory_extended", "Extended memory info", []string{"id", "type"}, nil),
|
||||||
descThreads: prometheus.NewDesc(p+"_kvm_threads", "Threads used", []string{"id"}, nil),
|
descThreads: prometheus.NewDesc(p+"_kvm_threads", "Threads used", []string{"id"}, nil),
|
||||||
descCtxSwitches: prometheus.NewDesc(p+"_kvm_ctx_switches_total", "Context switches", []string{"id", "type"}, nil),
|
descCtxSwitches: prometheus.NewDesc(p+"_kvm_ctx_switches_total", "Context switches", []string{"id", "type"}, nil),
|
||||||
descNicInfo: prometheus.NewDesc(p+"_kvm_nic", "NIC info", []string{"id", "ifname", "netdev", "queues", "type", "model", "macaddr"}, nil),
|
descNicInfo: prometheus.NewDesc(p+"_kvm_nic_info", "NIC info", []string{"id", "ifname", "netdev", "queues", "type", "model", "macaddr"}, nil),
|
||||||
descNicQueues: prometheus.NewDesc(p+"_kvm_nic_queues", "NIC queue count", []string{"id", "ifname"}, nil),
|
descNicQueues: prometheus.NewDesc(p+"_kvm_nic_queues", "NIC queue count", []string{"id", "ifname"}, nil),
|
||||||
descDiskSize: prometheus.NewDesc(p+"_kvm_disk_size_bytes", "Disk size bytes", []string{"id", "disk_name"}, nil),
|
descDiskInfo: prometheus.NewDesc(p+"_kvm_disk_info", "Disk info", []string{
|
||||||
|
"id", "disk_name", "block_id", "disk_path", "disk_type",
|
||||||
|
"vol_name", "pool", "pool_name", "cluster_id", "vg_name",
|
||||||
|
"device", "attached_to", "cache_mode", "detect_zeroes", "read_only",
|
||||||
|
}, nil),
|
||||||
|
descDiskSize: prometheus.NewDesc(p+"_kvm_disk_size_bytes", "Disk size bytes", []string{"id", "disk_name"}, nil),
|
||||||
descStorageSize: prometheus.NewDesc(p+"_node_storage_size_bytes", "Storage total size", []string{"name", "type"}, nil),
|
descStorageSize: prometheus.NewDesc(p+"_node_storage_size_bytes", "Storage total size", []string{"name", "type"}, nil),
|
||||||
descStorageFree: prometheus.NewDesc(p+"_node_storage_free_bytes", "Storage free space", []string{"name", "type"}, nil),
|
descStorageFree: prometheus.NewDesc(p+"_node_storage_free_bytes", "Storage free space", []string{"name", "type"}, nil),
|
||||||
|
|
||||||
@@ -185,9 +193,11 @@ func (c *PVECollector) collectVMs(ch chan<- prometheus.Metric) {
|
|||||||
slog.Error("discover QEMU processes", "err", err)
|
slog.Error("discover QEMU processes", "err", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
logging.Trace("collectVMs", "vm_count", len(procs))
|
||||||
|
|
||||||
// Load pool info
|
// Load pool info
|
||||||
vmPoolMap, pools := c.getPoolInfo()
|
vmPoolMap, pools := c.getPoolInfo()
|
||||||
|
logging.Trace("pool info loaded", "vm_pool_map_size", len(vmPoolMap), "pools_count", len(pools))
|
||||||
|
|
||||||
for _, proc := range procs {
|
for _, proc := range procs {
|
||||||
c.collectVMMetrics(ch, proc, vmPoolMap, pools)
|
c.collectVMMetrics(ch, proc, vmPoolMap, pools)
|
||||||
@@ -275,7 +285,7 @@ func (c *PVECollector) collectVMMetrics(ch chan<- prometheus.Metric, proc procfs
|
|||||||
poolName := vmPoolMap[id]
|
poolName := vmPoolMap[id]
|
||||||
poolInfo := pools[poolName]
|
poolInfo := pools[poolName]
|
||||||
ch <- prometheus.MustNewConstMetric(
|
ch <- prometheus.MustNewConstMetric(
|
||||||
prometheus.NewDesc(c.prefix+"_kvm", "VM info", []string{
|
prometheus.NewDesc(c.prefix+"_kvm_info", "VM info", []string{
|
||||||
"id", "name", "cpu", "pid", "pool", "pool_levels", "pool1", "pool2", "pool3",
|
"id", "name", "cpu", "pid", "pool", "pool_levels", "pool1", "pool2", "pool3",
|
||||||
}, nil),
|
}, nil),
|
||||||
prometheus.GaugeValue, 1,
|
prometheus.GaugeValue, 1,
|
||||||
@@ -293,8 +303,10 @@ func (c *PVECollector) collectNICMetrics(ch chan<- prometheus.Metric, proc procf
|
|||||||
slog.Error("qm info network", "vmid", id, "err", err)
|
slog.Error("qm info network", "vmid", id, "err", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
logging.Trace("qm info network response", "vmid", id, "raw_len", len(raw))
|
||||||
|
|
||||||
nics := qmmonitor.ParseNetworkInfo(raw)
|
nics := qmmonitor.ParseNetworkInfo(raw)
|
||||||
|
logging.Trace("parsed NICs", "vmid", id, "nic_count", len(nics))
|
||||||
for _, nic := range nics {
|
for _, nic := range nics {
|
||||||
// NIC info metric
|
// NIC info metric
|
||||||
ch <- prometheus.MustNewConstMetric(c.descNicInfo, prometheus.GaugeValue, 1,
|
ch <- prometheus.MustNewConstMetric(c.descNicInfo, prometheus.GaugeValue, 1,
|
||||||
@@ -328,14 +340,16 @@ func (c *PVECollector) collectDiskMetrics(ch chan<- prometheus.Metric, proc proc
|
|||||||
slog.Error("qm info block", "vmid", id, "err", err)
|
slog.Error("qm info block", "vmid", id, "err", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
logging.Trace("qm info block response", "vmid", id, "raw_len", len(raw))
|
||||||
|
|
||||||
disks := qmmonitor.ParseBlockInfo(raw)
|
disks := qmmonitor.ParseBlockInfo(raw)
|
||||||
|
logging.Trace("parsed disks", "vmid", id, "disk_count", len(disks))
|
||||||
for diskName, disk := range disks {
|
for diskName, disk := range disks {
|
||||||
// Try to get device symlink target for zvol/rbd/lvm
|
// Try to get device symlink target for zvol/rbd/lvm
|
||||||
if disk.DiskType == "zvol" || disk.DiskType == "rbd" || disk.DiskType == "lvm" {
|
if disk.DiskType == "zvol" || disk.DiskType == "rbd" || disk.DiskType == "lvm" {
|
||||||
target, err := sysfs.GetDeviceSymlinkTarget(disk.DiskPath)
|
target, err := sysfs.GetDeviceSymlinkTarget(disk.DiskPath)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
disk.Labels["device"] = target
|
disk.Labels["device"] = filepath.Base(target)
|
||||||
} else {
|
} else {
|
||||||
slog.Debug("resolve device symlink", "path", disk.DiskPath, "err", err)
|
slog.Debug("resolve device symlink", "path", disk.DiskPath, "err", err)
|
||||||
// Retry with cache invalidation
|
// Retry with cache invalidation
|
||||||
@@ -359,45 +373,63 @@ func (c *PVECollector) collectDiskMetrics(ch chan<- prometheus.Metric, proc proc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logging.Trace("disk metric", "vmid", id, "disk", diskName, "type", disk.DiskType,
|
||||||
|
"path", disk.DiskPath, "size", diskSize, "device", disk.Labels["device"])
|
||||||
|
|
||||||
if diskSize > 0 {
|
if diskSize > 0 {
|
||||||
ch <- prometheus.MustNewConstMetric(c.descDiskSize, prometheus.GaugeValue, float64(diskSize), id, diskName)
|
ch <- prometheus.MustNewConstMetric(c.descDiskSize, prometheus.GaugeValue, float64(diskSize), id, diskName)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Disk info metric - collect all labels
|
// Disk info metric with fixed label set
|
||||||
labelNames := []string{"id", "disk_name", "block_id", "disk_path", "disk_type"}
|
ch <- prometheus.MustNewConstMetric(c.descDiskInfo, prometheus.GaugeValue, 1,
|
||||||
labelValues := []string{id, diskName, disk.BlockID, disk.DiskPath, disk.DiskType}
|
id,
|
||||||
|
diskName,
|
||||||
// Add variable labels in sorted-ish order
|
disk.BlockID,
|
||||||
for _, key := range sortedKeys(disk.Labels) {
|
disk.DiskPath,
|
||||||
labelNames = append(labelNames, key)
|
disk.DiskType,
|
||||||
labelValues = append(labelValues, disk.Labels[key])
|
disk.Labels["vol_name"],
|
||||||
}
|
disk.Labels["pool"],
|
||||||
|
disk.Labels["pool_name"],
|
||||||
ch <- prometheus.MustNewConstMetric(
|
disk.Labels["cluster_id"],
|
||||||
prometheus.NewDesc(c.prefix+"_kvm_disk", "Disk info", labelNames, nil),
|
disk.Labels["vg_name"],
|
||||||
prometheus.GaugeValue, 1, labelValues...,
|
disk.Labels["device"],
|
||||||
|
disk.Labels["attached_to"],
|
||||||
|
disk.Labels["cache_mode"],
|
||||||
|
disk.Labels["detect_zeroes"],
|
||||||
|
disk.Labels["read_only"],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *PVECollector) collectStorage(ch chan<- prometheus.Metric) {
|
func (c *PVECollector) collectStorage(ch chan<- prometheus.Metric) {
|
||||||
entries := c.getStorageEntries()
|
entries := c.getStorageEntries()
|
||||||
|
logging.Trace("collectStorage", "entries_count", len(entries))
|
||||||
|
|
||||||
|
// Compute superset of property keys across all entries
|
||||||
|
keySet := make(map[string]struct{})
|
||||||
|
for _, entry := range entries {
|
||||||
|
for k := range entry.Properties {
|
||||||
|
keySet[k] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
allKeys := sortedKeySet(keySet)
|
||||||
|
|
||||||
|
// Create descriptor once with fixed labels for this scrape
|
||||||
|
storageInfoDesc := prometheus.NewDesc(
|
||||||
|
c.prefix+"_node_storage_info", "Storage info", allKeys, nil,
|
||||||
|
)
|
||||||
|
|
||||||
for _, entry := range entries {
|
for _, entry := range entries {
|
||||||
storageType := entry.Properties["type"]
|
storageType := entry.Properties["type"]
|
||||||
storageName := entry.Properties["name"]
|
storageName := entry.Properties["name"]
|
||||||
|
logging.Trace("storage entry", "name", storageName, "type", storageType)
|
||||||
|
|
||||||
// Info metric
|
// Info metric with consistent labels
|
||||||
labelNames := make([]string, 0, len(entry.Properties))
|
vals := make([]string, len(allKeys))
|
||||||
labelValues := make([]string, 0, len(entry.Properties))
|
for i, k := range allKeys {
|
||||||
for _, key := range sortedKeys(entry.Properties) {
|
vals[i] = entry.Properties[k] // "" if missing
|
||||||
labelNames = append(labelNames, key)
|
|
||||||
labelValues = append(labelValues, entry.Properties[key])
|
|
||||||
}
|
}
|
||||||
ch <- prometheus.MustNewConstMetric(
|
ch <- prometheus.MustNewConstMetric(storageInfoDesc, prometheus.GaugeValue, 1, vals...)
|
||||||
prometheus.NewDesc(c.prefix+"_node_storage", "Storage info", labelNames, nil),
|
|
||||||
prometheus.GaugeValue, 1, labelValues...,
|
|
||||||
)
|
|
||||||
|
|
||||||
// Size metrics
|
// Size metrics
|
||||||
var size storage.StorageSize
|
var size storage.StorageSize
|
||||||
@@ -419,7 +451,7 @@ func (c *PVECollector) collectStorage(ch chan<- prometheus.Metric) {
|
|||||||
poolName := strings.Split(pool, "/")[0]
|
poolName := strings.Split(pool, "/")[0]
|
||||||
out, runErr := c.cmdRunner.Run("zpool", "list", "-p", poolName)
|
out, runErr := c.cmdRunner.Run("zpool", "list", "-p", poolName)
|
||||||
if runErr != nil {
|
if runErr != nil {
|
||||||
slog.Error("zpool list", "pool", poolName, "err", runErr)
|
slog.Warn("zpool list", "pool", poolName, "err", runErr)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
size, err = storage.GetZPoolSize(out)
|
size, err = storage.GetZPoolSize(out)
|
||||||
@@ -478,3 +510,12 @@ func sortedKeys(m map[string]string) []string {
|
|||||||
return keys
|
return keys
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func sortedKeySet(m map[string]struct{}) []string {
|
||||||
|
keys := make([]string, 0, len(m))
|
||||||
|
for k := range m {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
slices.Sort(keys)
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -250,7 +250,7 @@ func TestCollector_BasicVMMetrics(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check VM info metric
|
// Check VM info metric
|
||||||
infoMetrics := metrics["pve_kvm"]
|
infoMetrics := metrics["pve_kvm_info"]
|
||||||
if len(infoMetrics) != 1 {
|
if len(infoMetrics) != 1 {
|
||||||
t.Fatalf("expected 1 kvm info metric, got %d", len(infoMetrics))
|
t.Fatalf("expected 1 kvm info metric, got %d", len(infoMetrics))
|
||||||
}
|
}
|
||||||
@@ -307,7 +307,7 @@ func TestCollector_StorageMetrics(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check storage info
|
// Check storage info
|
||||||
infoMetrics := metrics["pve_node_storage"]
|
infoMetrics := metrics["pve_node_storage_info"]
|
||||||
if len(infoMetrics) != 1 {
|
if len(infoMetrics) != 1 {
|
||||||
t.Fatalf("expected 1 storage info metric, got %d", len(infoMetrics))
|
t.Fatalf("expected 1 storage info metric, got %d", len(infoMetrics))
|
||||||
}
|
}
|
||||||
@@ -348,7 +348,7 @@ func TestCollector_NICMetrics(t *testing.T) {
|
|||||||
metrics := collectMetrics(c)
|
metrics := collectMetrics(c)
|
||||||
|
|
||||||
// NIC info
|
// NIC info
|
||||||
nicInfo := metrics["pve_kvm_nic"]
|
nicInfo := metrics["pve_kvm_nic_info"]
|
||||||
if len(nicInfo) != 1 {
|
if len(nicInfo) != 1 {
|
||||||
t.Fatalf("expected 1 nic info, got %d", len(nicInfo))
|
t.Fatalf("expected 1 nic info, got %d", len(nicInfo))
|
||||||
}
|
}
|
||||||
@@ -409,7 +409,7 @@ func TestCollector_PoolReadError(t *testing.T) {
|
|||||||
metrics := collectMetrics(c)
|
metrics := collectMetrics(c)
|
||||||
|
|
||||||
// Should still produce VM info with empty pool
|
// Should still produce VM info with empty pool
|
||||||
infoMetrics := metrics["pve_kvm"]
|
infoMetrics := metrics["pve_kvm_info"]
|
||||||
if len(infoMetrics) != 1 {
|
if len(infoMetrics) != 1 {
|
||||||
t.Fatalf("expected 1 kvm info metric, got %d", len(infoMetrics))
|
t.Fatalf("expected 1 kvm info metric, got %d", len(infoMetrics))
|
||||||
}
|
}
|
||||||
@@ -476,3 +476,89 @@ func TestCollector_BuildInfo(t *testing.T) {
|
|||||||
t.Error("build_info missing version label")
|
t.Error("build_info missing version label")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCollector_DiskInfoMetrics(t *testing.T) {
|
||||||
|
cfg := config.Config{
|
||||||
|
CollectRunningVMs: true,
|
||||||
|
CollectStorage: false,
|
||||||
|
MetricsPrefix: "pve",
|
||||||
|
}
|
||||||
|
|
||||||
|
proc := &mockProcReader{
|
||||||
|
procs: []procfs.QEMUProcess{
|
||||||
|
{PID: 1, VMID: "100", Name: "vm", Vcores: 1, MaxMem: 1024},
|
||||||
|
},
|
||||||
|
cpuTimes: map[int]procfs.CPUTimes{1: {}},
|
||||||
|
ioCount: map[int]procfs.IOCounters{1: {}},
|
||||||
|
status: map[int]procfs.StatusInfo{
|
||||||
|
1: {Threads: 1, MemoryExtended: procfs.MemoryExtended{}},
|
||||||
|
},
|
||||||
|
memPct: map[int]float64{1: 0},
|
||||||
|
}
|
||||||
|
|
||||||
|
blockOutput := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
|
||||||
|
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
|
||||||
|
Cache mode: writeback, direct
|
||||||
|
Detect zeroes: on
|
||||||
|
drive-scsi1 (#block101): /mnt/storage/images/100/vm-100-disk-1.qcow2 (qcow2, read-only)
|
||||||
|
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
|
||||||
|
`
|
||||||
|
|
||||||
|
sys := &mockSysReader{
|
||||||
|
blockSize: map[string]int64{
|
||||||
|
"/dev/zvol/rpool/data/vm-100-disk-0": 10737418240,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
qm := &mockQMMonitor{responses: map[string]string{
|
||||||
|
"100:info network": "",
|
||||||
|
"100:info block": blockOutput,
|
||||||
|
}}
|
||||||
|
|
||||||
|
fr := &mockFileReader{files: map[string]string{"/etc/pve/user.cfg": ""}}
|
||||||
|
c := NewWithDeps(cfg, proc, sys, qm, &mockStatFS{}, &mockCmdRunner{}, fr)
|
||||||
|
metrics := collectMetrics(c)
|
||||||
|
|
||||||
|
diskInfo := metrics["pve_kvm_disk_info"]
|
||||||
|
if len(diskInfo) != 2 {
|
||||||
|
t.Fatalf("expected 2 disk info metrics, got %d", len(diskInfo))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check zvol disk
|
||||||
|
m := findMetricWithLabels(diskInfo, map[string]string{
|
||||||
|
"id": "100",
|
||||||
|
"disk_name": "scsi0",
|
||||||
|
"disk_type": "zvol",
|
||||||
|
"cache_mode": "writeback, direct",
|
||||||
|
"detect_zeroes": "on",
|
||||||
|
"read_only": "",
|
||||||
|
"vol_name": "vm-100-disk-0",
|
||||||
|
"pool": "rpool/data",
|
||||||
|
})
|
||||||
|
if m == nil {
|
||||||
|
t.Error("zvol disk info metric not found with expected labels")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check qcow2 disk (read-only, no cache_mode)
|
||||||
|
m = findMetricWithLabels(diskInfo, map[string]string{
|
||||||
|
"id": "100",
|
||||||
|
"disk_name": "scsi1",
|
||||||
|
"disk_type": "qcow2",
|
||||||
|
"read_only": "true",
|
||||||
|
"cache_mode": "",
|
||||||
|
"vol_name": "vm-100-disk-1",
|
||||||
|
})
|
||||||
|
if m == nil {
|
||||||
|
t.Error("qcow2 disk info metric not found with expected labels")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify disk size for zvol
|
||||||
|
diskSize := metrics["pve_kvm_disk_size_bytes"]
|
||||||
|
if len(diskSize) < 1 {
|
||||||
|
t.Fatal("expected at least 1 disk size metric")
|
||||||
|
}
|
||||||
|
m = findMetricWithLabels(diskSize, map[string]string{"disk_name": "scsi0"})
|
||||||
|
if m == nil || metricValue(m) != 10737418240 {
|
||||||
|
t.Errorf("disk size for scsi0 = %v", m)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
14
src/internal/logging/logging.go
Normal file
14
src/internal/logging/logging.go
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
package logging
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"log/slog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// LevelTrace is a custom log level below Debug for very detailed diagnostic output.
|
||||||
|
const LevelTrace = slog.Level(-8)
|
||||||
|
|
||||||
|
// Trace logs a message at TRACE level using the default logger.
|
||||||
|
func Trace(msg string, args ...any) {
|
||||||
|
slog.Default().Log(context.Background(), LevelTrace, msg, args...)
|
||||||
|
}
|
||||||
@@ -2,10 +2,13 @@ package procfs
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
const clkTck = 100 // sysconf(_SC_CLK_TCK) on Linux
|
const clkTck = 100 // sysconf(_SC_CLK_TCK) on Linux
|
||||||
@@ -83,6 +86,7 @@ func (r *RealProcReader) DiscoverQEMUProcesses() ([]QEMUProcess, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
numericPIDs := 0
|
||||||
var procs []QEMUProcess
|
var procs []QEMUProcess
|
||||||
for _, e := range entries {
|
for _, e := range entries {
|
||||||
if !e.IsDir() {
|
if !e.IsDir() {
|
||||||
@@ -92,26 +96,32 @@ func (r *RealProcReader) DiscoverQEMUProcesses() ([]QEMUProcess, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
numericPIDs++
|
||||||
|
|
||||||
exe, err := os.Readlink(filepath.Join(r.ProcPath, e.Name(), "exe"))
|
exe, err := os.Readlink(filepath.Join(r.ProcPath, e.Name(), "exe"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
logging.Trace("proc readlink failed", "pid", pid, "err", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if exe != "/usr/bin/qemu-system-x86_64" {
|
if exe != "/usr/bin/qemu-system-x86_64" && exe != "/usr/bin/qemu-system-x86_64 (deleted)" {
|
||||||
|
logging.Trace("proc exe skip", "pid", pid, "exe", exe)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
cmdlineBytes, err := os.ReadFile(filepath.Join(r.ProcPath, e.Name(), "cmdline"))
|
cmdlineBytes, err := os.ReadFile(filepath.Join(r.ProcPath, e.Name(), "cmdline"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
logging.Trace("proc cmdline read failed", "pid", pid, "err", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
cmdline := ParseCmdline(cmdlineBytes)
|
cmdline := ParseCmdline(cmdlineBytes)
|
||||||
|
|
||||||
vmid := FlagValue(cmdline, "-id")
|
vmid := FlagValue(cmdline, "-id")
|
||||||
if vmid == "" {
|
if vmid == "" {
|
||||||
|
logging.Trace("proc no -id flag", "pid", pid)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if !r.VMConfigExists(vmid) {
|
if !r.VMConfigExists(vmid) {
|
||||||
|
logging.Trace("proc no config", "pid", pid, "vmid", vmid)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -123,8 +133,14 @@ func (r *RealProcReader) DiscoverQEMUProcesses() ([]QEMUProcess, error) {
|
|||||||
}
|
}
|
||||||
proc.Vcores = ParseVcores(cmdline)
|
proc.Vcores = ParseVcores(cmdline)
|
||||||
proc.MaxMem = ParseMem(cmdline)
|
proc.MaxMem = ParseMem(cmdline)
|
||||||
|
logging.Trace("proc discovered VM", "pid", pid, "vmid", vmid, "name", proc.Name)
|
||||||
procs = append(procs, proc)
|
procs = append(procs, proc)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logging.Trace("proc scan complete", "numeric_pids", numericPIDs, "qemu_count", len(procs))
|
||||||
|
if len(procs) == 0 {
|
||||||
|
slog.Warn("no QEMU processes discovered", "numeric_pids", numericPIDs, "proc_path", r.ProcPath, "pve_cfg_path", r.PVECfgPath)
|
||||||
|
}
|
||||||
return procs, nil
|
return procs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
package procfs
|
package procfs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -251,3 +253,78 @@ func TestParseIO_MalformedLines(t *testing.T) {
|
|||||||
t.Errorf("WriteChars = %d, want 100", io.WriteChars)
|
t.Errorf("WriteChars = %d, want 100", io.WriteChars)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestDiscoverQEMUProcesses_DeletedExe verifies that QEMU processes whose
|
||||||
|
// /proc/{pid}/exe has a " (deleted)" suffix (common after package upgrades)
|
||||||
|
// are still discovered.
|
||||||
|
func TestDiscoverQEMUProcesses_DeletedExe(t *testing.T) {
|
||||||
|
// Build a fake /proc tree with two "QEMU" PIDs:
|
||||||
|
// 1000 -> normal exe
|
||||||
|
// 1001 -> exe with " (deleted)" suffix
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
procDir := filepath.Join(tmpDir, "proc")
|
||||||
|
pveCfgDir := filepath.Join(tmpDir, "pve")
|
||||||
|
|
||||||
|
cmdline100 := "/usr/bin/qemu-system-x86_64\x00-id\x00100\x00-name\x00vm100\x00-cpu\x00host\x00-smp\x004\x00-m\x002048\x00"
|
||||||
|
cmdline101 := "/usr/bin/qemu-system-x86_64\x00-id\x00101\x00-name\x00vm101\x00-cpu\x00host\x00-smp\x002\x00-m\x001024\x00"
|
||||||
|
|
||||||
|
for _, tc := range []struct {
|
||||||
|
pid, vmid, exe, cmdline string
|
||||||
|
}{
|
||||||
|
{"1000", "100", "/usr/bin/qemu-system-x86_64", cmdline100},
|
||||||
|
{"1001", "101", "/usr/bin/qemu-system-x86_64 (deleted)", cmdline101},
|
||||||
|
} {
|
||||||
|
pidDir := filepath.Join(procDir, tc.pid)
|
||||||
|
if err := os.MkdirAll(pidDir, 0o755); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
// Create a real file as the symlink target, then symlink "exe" -> that file.
|
||||||
|
// os.Readlink returns the target path, which is what DiscoverQEMUProcesses reads.
|
||||||
|
target := filepath.Join(tmpDir, "bin-"+tc.pid)
|
||||||
|
if err := os.WriteFile(target, nil, 0o755); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
// We can't make Readlink return an arbitrary string with a real symlink,
|
||||||
|
// so instead we write the exe path to a regular file and override the
|
||||||
|
// readlink behavior. But DiscoverQEMUProcesses uses os.Readlink...
|
||||||
|
// The trick: symlink to the exact path string. On Linux, symlink targets
|
||||||
|
// don't need to exist -- Readlink returns the raw target.
|
||||||
|
if err := os.Symlink(tc.exe, filepath.Join(pidDir, "exe")); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(filepath.Join(pidDir, "cmdline"), []byte(tc.cmdline), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
// Create VM config so VMConfigExists returns true
|
||||||
|
if err := os.MkdirAll(pveCfgDir, 0o755); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(filepath.Join(pveCfgDir, tc.vmid+".conf"), nil, 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
r := &RealProcReader{
|
||||||
|
ProcPath: procDir,
|
||||||
|
PVECfgPath: pveCfgDir,
|
||||||
|
}
|
||||||
|
procs, err := r.DiscoverQEMUProcesses()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if len(procs) != 2 {
|
||||||
|
t.Fatalf("expected 2 procs, got %d", len(procs))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect discovered VMIDs
|
||||||
|
vmids := map[string]bool{}
|
||||||
|
for _, p := range procs {
|
||||||
|
vmids[p.VMID] = true
|
||||||
|
}
|
||||||
|
if !vmids["100"] {
|
||||||
|
t.Error("VM 100 (normal exe) not discovered")
|
||||||
|
}
|
||||||
|
if !vmids["101"] {
|
||||||
|
t.Error("VM 101 (deleted exe) not discovered")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ package pveconfig
|
|||||||
import (
|
import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
// StorageEntry holds a parsed storage definition from storage.cfg.
|
// StorageEntry holds a parsed storage definition from storage.cfg.
|
||||||
@@ -47,6 +49,7 @@ func ParseStorageConfig(data string) []StorageEntry {
|
|||||||
"name": SanitizeKey(sectionName),
|
"name": SanitizeKey(sectionName),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
logging.Trace("storage.cfg section", "type", sectionType, "name", sectionName)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -66,5 +69,6 @@ func ParseStorageConfig(data string) []StorageEntry {
|
|||||||
result = append(result, *current)
|
result = append(result, *current)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logging.Trace("ParseStorageConfig complete", "entries", len(result))
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,8 +3,11 @@ package qmmonitor
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
// DiskInfo holds parsed block device info from "info block".
|
// DiskInfo holds parsed block device info from "info block".
|
||||||
@@ -16,8 +19,10 @@ type DiskInfo struct {
|
|||||||
Labels map[string]string // additional labels: vol_name, pool, device, etc.
|
Labels map[string]string // additional labels: vol_name, pool, device, etc.
|
||||||
}
|
}
|
||||||
|
|
||||||
// blockHeaderRe matches: "disk_name (#blockN): /path/to/disk (type, mode)"
|
// blockHeaderRe matches block device headers in both old and new QEMU formats:
|
||||||
var blockHeaderRe = regexp.MustCompile(`^(\w+) \(#block(\d+)\): (.+) \(([\w, -]+)\)$`)
|
// Old: "disk_name (#blockN): /path/to/disk (type, mode)"
|
||||||
|
// New: "disk_name: /path/to/disk (type, mode)"
|
||||||
|
var blockHeaderRe = regexp.MustCompile(`^(\w+)(?:\s+\(#block(\d+)\))?: (.+) \(([\w, -]+)\)$`)
|
||||||
|
|
||||||
// lvmRe matches: /dev/{vg_name}/vm-{N}-disk-{N}
|
// lvmRe matches: /dev/{vg_name}/vm-{N}-disk-{N}
|
||||||
var lvmRe = regexp.MustCompile(`^/dev/([^/]+)/(vm-\d+-disk-\d+)$`)
|
var lvmRe = regexp.MustCompile(`^/dev/([^/]+)/(vm-\d+-disk-\d+)$`)
|
||||||
@@ -41,6 +46,7 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
|
|||||||
|
|
||||||
match := blockHeaderRe.FindStringSubmatch(strings.TrimSpace(lines[0]))
|
match := blockHeaderRe.FindStringSubmatch(strings.TrimSpace(lines[0]))
|
||||||
if match == nil {
|
if match == nil {
|
||||||
|
logging.Trace("block header no match", "line", lines[0])
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -66,8 +72,10 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
|
|||||||
if strings.HasPrefix(diskPath, "json:") {
|
if strings.HasPrefix(diskPath, "json:") {
|
||||||
resolved, err := HandleJSONPath(diskPath)
|
resolved, err := HandleJSONPath(diskPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
logging.Trace("block json path error", "disk", diskName, "err", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
logging.Trace("block json resolved", "disk", diskName, "resolved", resolved)
|
||||||
diskPath = resolved
|
diskPath = resolved
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -85,6 +93,7 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
|
|||||||
|
|
||||||
// Detect disk type from path
|
// Detect disk type from path
|
||||||
classifyDisk(&info)
|
classifyDisk(&info)
|
||||||
|
logging.Trace("block classified", "disk", diskName, "type", info.DiskType, "path", diskPath)
|
||||||
|
|
||||||
// Parse additional info from remaining lines
|
// Parse additional info from remaining lines
|
||||||
for _, line := range lines[1:] {
|
for _, line := range lines[1:] {
|
||||||
@@ -92,24 +101,31 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
|
|||||||
if strings.HasPrefix(line, "Attached to:") {
|
if strings.HasPrefix(line, "Attached to:") {
|
||||||
// Extract device ID, e.g. "Attached to: /machine/peripheral/virtio0/virtio-backend"
|
// Extract device ID, e.g. "Attached to: /machine/peripheral/virtio0/virtio-backend"
|
||||||
val := strings.TrimSpace(strings.TrimPrefix(line, "Attached to:"))
|
val := strings.TrimSpace(strings.TrimPrefix(line, "Attached to:"))
|
||||||
|
// Extract short device name from QOM path
|
||||||
|
if strings.Contains(val, "/") {
|
||||||
|
qomParts := strings.Split(val, "/")
|
||||||
|
if len(qomParts) > 3 {
|
||||||
|
val = qomParts[3]
|
||||||
|
}
|
||||||
|
}
|
||||||
info.Labels["attached_to"] = val
|
info.Labels["attached_to"] = val
|
||||||
} else if strings.HasPrefix(line, "Cache mode:") {
|
} else if strings.HasPrefix(line, "Cache mode:") {
|
||||||
val := strings.TrimSpace(strings.TrimPrefix(line, "Cache mode:"))
|
val := strings.TrimSpace(strings.TrimPrefix(line, "Cache mode:"))
|
||||||
for _, mode := range strings.Split(val, ", ") {
|
info.Labels["cache_mode"] = val
|
||||||
mode = strings.TrimSpace(mode)
|
|
||||||
if mode != "" {
|
|
||||||
key := "cache_mode_" + strings.ReplaceAll(mode, " ", "_")
|
|
||||||
info.Labels[key] = "true"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if strings.HasPrefix(line, "Detect zeroes:") {
|
} else if strings.HasPrefix(line, "Detect zeroes:") {
|
||||||
info.Labels["detect_zeroes"] = "on"
|
val := strings.TrimSpace(strings.TrimPrefix(line, "Detect zeroes:"))
|
||||||
|
info.Labels["detect_zeroes"] = val
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logging.Trace("block parsed", "disk", diskName, "labels", info.Labels)
|
||||||
result[diskName] = info
|
result[diskName] = info
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logging.Trace("ParseBlockInfo complete", "disk_count", len(result))
|
||||||
|
if len(result) == 0 && raw != "" {
|
||||||
|
slog.Debug("ParseBlockInfo found no disks", "rawLen", len(raw))
|
||||||
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -27,11 +27,8 @@ func TestParseBlockInfo_Qcow2(t *testing.T) {
|
|||||||
if d.Labels["detect_zeroes"] != "on" {
|
if d.Labels["detect_zeroes"] != "on" {
|
||||||
t.Errorf("detect_zeroes = %q", d.Labels["detect_zeroes"])
|
t.Errorf("detect_zeroes = %q", d.Labels["detect_zeroes"])
|
||||||
}
|
}
|
||||||
if d.Labels["cache_mode_writeback"] != "true" {
|
if d.Labels["cache_mode"] != "writeback, direct" {
|
||||||
t.Errorf("cache_mode_writeback missing")
|
t.Errorf("cache_mode = %q, want %q", d.Labels["cache_mode"], "writeback, direct")
|
||||||
}
|
|
||||||
if d.Labels["cache_mode_direct"] != "true" {
|
|
||||||
t.Errorf("cache_mode_direct missing")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -175,6 +172,85 @@ func TestParseBlockInfo_JSONError(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseBlockInfo_Throttle(t *testing.T) {
|
||||||
|
// PVE 9.x / newer QEMU format: no (#blockN)
|
||||||
|
raw := `drive-scsi0: json:{"driver":"raw","file":{"driver":"host_device","filename":"/dev/zvol/rpool/data/vm-100-disk-0"}} (throttle, read-write)
|
||||||
|
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
|
||||||
|
Cache mode: writeback, direct
|
||||||
|
Detect zeroes: unmap
|
||||||
|
`
|
||||||
|
disks := ParseBlockInfo(raw)
|
||||||
|
if len(disks) != 1 {
|
||||||
|
t.Fatalf("expected 1 disk, got %d", len(disks))
|
||||||
|
}
|
||||||
|
d := disks["scsi0"]
|
||||||
|
if d.BlockID != "" {
|
||||||
|
t.Errorf("block_id = %q, want empty", d.BlockID)
|
||||||
|
}
|
||||||
|
if d.DiskType != "zvol" {
|
||||||
|
t.Errorf("type = %q, want zvol", d.DiskType)
|
||||||
|
}
|
||||||
|
if d.DiskPath != "/dev/zvol/rpool/data/vm-100-disk-0" {
|
||||||
|
t.Errorf("path = %q", d.DiskPath)
|
||||||
|
}
|
||||||
|
if d.Labels["pool"] != "rpool/data" {
|
||||||
|
t.Errorf("pool = %q", d.Labels["pool"])
|
||||||
|
}
|
||||||
|
if d.Labels["vol_name"] != "vm-100-disk-0" {
|
||||||
|
t.Errorf("vol_name = %q", d.Labels["vol_name"])
|
||||||
|
}
|
||||||
|
if d.Labels["detect_zeroes"] != "unmap" {
|
||||||
|
t.Errorf("detect_zeroes = %q, want unmap", d.Labels["detect_zeroes"])
|
||||||
|
}
|
||||||
|
if d.Labels["cache_mode"] != "writeback, direct" {
|
||||||
|
t.Errorf("cache_mode = %q", d.Labels["cache_mode"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseBlockInfo_DetectZeroesUnmap(t *testing.T) {
|
||||||
|
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
|
||||||
|
Detect zeroes: unmap
|
||||||
|
`
|
||||||
|
disks := ParseBlockInfo(raw)
|
||||||
|
d := disks["scsi0"]
|
||||||
|
if d.Labels["detect_zeroes"] != "unmap" {
|
||||||
|
t.Errorf("detect_zeroes = %q, want unmap", d.Labels["detect_zeroes"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseBlockInfo_AttachedToVirtio(t *testing.T) {
|
||||||
|
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
|
||||||
|
Attached to: /machine/peripheral/virtio0/virtio-backend
|
||||||
|
`
|
||||||
|
disks := ParseBlockInfo(raw)
|
||||||
|
d := disks["scsi0"]
|
||||||
|
if d.Labels["attached_to"] != "virtio0" {
|
||||||
|
t.Errorf("attached_to = %q, want %q", d.Labels["attached_to"], "virtio0")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseBlockInfo_AttachedToVirtioScsi(t *testing.T) {
|
||||||
|
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
|
||||||
|
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
|
||||||
|
`
|
||||||
|
disks := ParseBlockInfo(raw)
|
||||||
|
d := disks["scsi0"]
|
||||||
|
if d.Labels["attached_to"] != "virtioscsi0" {
|
||||||
|
t.Errorf("attached_to = %q, want %q", d.Labels["attached_to"], "virtioscsi0")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseBlockInfo_AttachedToBare(t *testing.T) {
|
||||||
|
raw := `drive-ide2 (#block100): /path/to/disk.iso (raw, read-only)
|
||||||
|
Attached to: ide2
|
||||||
|
`
|
||||||
|
disks := ParseBlockInfo(raw)
|
||||||
|
d := disks["ide2"]
|
||||||
|
if d.Labels["attached_to"] != "ide2" {
|
||||||
|
t.Errorf("attached_to = %q, want %q", d.Labels["attached_to"], "ide2")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseBlockInfo_MultiDisk(t *testing.T) {
|
func TestParseBlockInfo_MultiDisk(t *testing.T) {
|
||||||
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
|
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
|
||||||
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
|
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ package qmmonitor
|
|||||||
import (
|
import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NICInfo holds parsed network interface info from "info network".
|
// NICInfo holds parsed network interface info from "info network".
|
||||||
@@ -64,14 +66,17 @@ func ParseNetworkInfo(raw string) []NICInfo {
|
|||||||
var result []NICInfo
|
var result []NICInfo
|
||||||
for netdev, cfg := range nicsMap {
|
for netdev, cfg := range nicsMap {
|
||||||
idx, _ := strconv.Atoi(cfg["index"])
|
idx, _ := strconv.Atoi(cfg["index"])
|
||||||
result = append(result, NICInfo{
|
nic := NICInfo{
|
||||||
Netdev: netdev,
|
Netdev: netdev,
|
||||||
Queues: idx + 1,
|
Queues: idx + 1,
|
||||||
Type: cfg["type"],
|
Type: cfg["type"],
|
||||||
Model: cfg["model"],
|
Model: cfg["model"],
|
||||||
Macaddr: cfg["macaddr"],
|
Macaddr: cfg["macaddr"],
|
||||||
Ifname: cfg["ifname"],
|
Ifname: cfg["ifname"],
|
||||||
})
|
}
|
||||||
|
logging.Trace("parsed NIC", "netdev", netdev, "ifname", nic.Ifname, "queues", nic.Queues, "model", nic.Model)
|
||||||
|
result = append(result, nic)
|
||||||
}
|
}
|
||||||
|
logging.Trace("ParseNetworkInfo complete", "nic_count", len(result))
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import (
|
|||||||
|
|
||||||
"github.com/creack/pty"
|
"github.com/creack/pty"
|
||||||
"pve_local_exporter/internal/cache"
|
"pve_local_exporter/internal/cache"
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
var errTimeout = errors.New("timeout waiting for qm monitor")
|
var errTimeout = errors.New("timeout waiting for qm monitor")
|
||||||
@@ -75,6 +76,7 @@ func (m *RealQMMonitor) execQMMonitor(vmid, cmd string) (string, error) {
|
|||||||
slog.Debug("qm monitor exec", "vmid", vmid, "cmd", cmd)
|
slog.Debug("qm monitor exec", "vmid", vmid, "cmd", cmd)
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
|
||||||
|
logging.Trace("qm pty spawn start", "vmid", vmid)
|
||||||
qmCmd := exec.Command("qm", "monitor", vmid)
|
qmCmd := exec.Command("qm", "monitor", vmid)
|
||||||
qmCmd.Env = append(os.Environ(), "TERM=dumb")
|
qmCmd.Env = append(os.Environ(), "TERM=dumb")
|
||||||
|
|
||||||
@@ -82,6 +84,7 @@ func (m *RealQMMonitor) execQMMonitor(vmid, cmd string) (string, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("start qm monitor: %w", err)
|
return "", fmt.Errorf("start qm monitor: %w", err)
|
||||||
}
|
}
|
||||||
|
logging.Trace("qm pty spawn success", "vmid", vmid, "pid", qmCmd.Process.Pid)
|
||||||
|
|
||||||
reader := bufio.NewReader(ptmx)
|
reader := bufio.NewReader(ptmx)
|
||||||
|
|
||||||
@@ -93,8 +96,10 @@ func (m *RealQMMonitor) execQMMonitor(vmid, cmd string) (string, error) {
|
|||||||
m.killOrDefer(qmCmd, ptmx)
|
m.killOrDefer(qmCmd, ptmx)
|
||||||
return "", fmt.Errorf("initial prompt: %w", err)
|
return "", fmt.Errorf("initial prompt: %w", err)
|
||||||
}
|
}
|
||||||
|
logging.Trace("qm initial prompt received", "vmid", vmid)
|
||||||
|
|
||||||
// Send command
|
// Send command
|
||||||
|
logging.Trace("qm send command", "vmid", vmid, "cmd", cmd)
|
||||||
fmt.Fprintf(ptmx, "%s\n", cmd)
|
fmt.Fprintf(ptmx, "%s\n", cmd)
|
||||||
|
|
||||||
// Read response until next "qm>" prompt
|
// Read response until next "qm>" prompt
|
||||||
@@ -105,6 +110,7 @@ func (m *RealQMMonitor) execQMMonitor(vmid, cmd string) (string, error) {
|
|||||||
m.killOrDefer(qmCmd, ptmx)
|
m.killOrDefer(qmCmd, ptmx)
|
||||||
return "", fmt.Errorf("read response: %w", err)
|
return "", fmt.Errorf("read response: %w", err)
|
||||||
}
|
}
|
||||||
|
logging.Trace("qm raw response", "vmid", vmid, "raw_len", len(raw))
|
||||||
|
|
||||||
response := parseQMResponse(raw)
|
response := parseQMResponse(raw)
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
// StorageSize holds the total and free bytes of a storage pool.
|
// StorageSize holds the total and free bytes of a storage pool.
|
||||||
@@ -51,6 +53,8 @@ func GetZPoolSize(output string) (StorageSize, error) {
|
|||||||
return StorageSize{}, fmt.Errorf("not enough fields in zpool output: %q", lines[1])
|
return StorageSize{}, fmt.Errorf("not enough fields in zpool output: %q", lines[1])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logging.Trace("zpool fields", "name", fields[0], "size", fields[1], "alloc", fields[2], "free", fields[3])
|
||||||
|
|
||||||
total, err := strconv.ParseInt(fields[1], 10, 64)
|
total, err := strconv.ParseInt(fields[1], 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return StorageSize{}, fmt.Errorf("parse total: %w", err)
|
return StorageSize{}, fmt.Errorf("parse total: %w", err)
|
||||||
@@ -60,5 +64,6 @@ func GetZPoolSize(output string) (StorageSize, error) {
|
|||||||
return StorageSize{}, fmt.Errorf("parse free: %w", err)
|
return StorageSize{}, fmt.Errorf("parse free: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logging.Trace("zpool parsed", "total", total, "free", free)
|
||||||
return StorageSize{Total: total, Free: free}, nil
|
return StorageSize{Total: total, Free: free}, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,8 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
// SysReader abstracts /sys access for testability.
|
// SysReader abstracts /sys access for testability.
|
||||||
@@ -46,6 +48,7 @@ func (r *RealSysReader) ReadInterfaceStats(ifname string) (map[string]int64, err
|
|||||||
}
|
}
|
||||||
stats[e.Name()] = val
|
stats[e.Name()] = val
|
||||||
}
|
}
|
||||||
|
logging.Trace("interface stats", "ifname", ifname, "stat_count", len(stats))
|
||||||
return stats, nil
|
return stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -58,6 +61,7 @@ func (r *RealSysReader) GetBlockDeviceSize(devPath string) (int64, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, fmt.Errorf("resolve symlink %s: %w", devPath, err)
|
return 0, fmt.Errorf("resolve symlink %s: %w", devPath, err)
|
||||||
}
|
}
|
||||||
|
logging.Trace("block device resolved", "path", devPath, "resolved", resolved)
|
||||||
|
|
||||||
// Extract device name from /dev/XXX
|
// Extract device name from /dev/XXX
|
||||||
devName := filepath.Base(resolved)
|
devName := filepath.Base(resolved)
|
||||||
@@ -84,5 +88,6 @@ func GetDeviceSymlinkTarget(devPath string) (string, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
logging.Trace("device symlink resolved", "path", devPath, "target", resolved)
|
||||||
return resolved, nil
|
return resolved, nil
|
||||||
}
|
}
|
||||||
|
|||||||
18
src/main.go
18
src/main.go
@@ -15,6 +15,7 @@ import (
|
|||||||
|
|
||||||
"pve_local_exporter/internal/collector"
|
"pve_local_exporter/internal/collector"
|
||||||
"pve_local_exporter/internal/config"
|
"pve_local_exporter/internal/config"
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
var version string
|
var version string
|
||||||
@@ -30,6 +31,8 @@ func main() {
|
|||||||
|
|
||||||
level := slog.LevelInfo
|
level := slog.LevelInfo
|
||||||
switch strings.ToUpper(cfg.LogLevel) {
|
switch strings.ToUpper(cfg.LogLevel) {
|
||||||
|
case "TRACE":
|
||||||
|
level = logging.LevelTrace
|
||||||
case "DEBUG":
|
case "DEBUG":
|
||||||
level = slog.LevelDebug
|
level = slog.LevelDebug
|
||||||
case "WARNING", "WARN":
|
case "WARNING", "WARN":
|
||||||
@@ -37,7 +40,18 @@ func main() {
|
|||||||
case "ERROR", "CRITICAL":
|
case "ERROR", "CRITICAL":
|
||||||
level = slog.LevelError
|
level = slog.LevelError
|
||||||
}
|
}
|
||||||
slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: level})))
|
slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
||||||
|
Level: level,
|
||||||
|
ReplaceAttr: func(groups []string, a slog.Attr) slog.Attr {
|
||||||
|
if a.Key == slog.LevelKey {
|
||||||
|
lvl := a.Value.Any().(slog.Level)
|
||||||
|
if lvl == logging.LevelTrace {
|
||||||
|
a.Value = slog.StringValue("TRACE")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return a
|
||||||
|
},
|
||||||
|
})))
|
||||||
|
|
||||||
reg := prometheus.NewRegistry()
|
reg := prometheus.NewRegistry()
|
||||||
c := collector.New(cfg)
|
c := collector.New(cfg)
|
||||||
@@ -60,7 +74,7 @@ func main() {
|
|||||||
server.Close()
|
server.Close()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
log.Printf("listening on %s", addr)
|
slog.Info("listening", "addr", addr)
|
||||||
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user