Compare commits
10 Commits
f332a2f6ac
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8c761d8c96
|
||
|
|
6495953dc7
|
||
|
|
9c2b1bf384
|
||
|
|
8d9007632a
|
||
|
|
ac2ac7a27c
|
||
|
|
f15d0a90c8
|
||
|
|
0ba7c190f8
|
||
|
|
3808e3b672
|
||
|
|
e9b60a3af6 | ||
|
|
00404095b9
|
61
README.md
61
README.md
@@ -4,6 +4,8 @@ Prometheus exporter for Proxmox VE that collects VM and storage metrics directly
|
|||||||
|
|
||||||
Metrics are gathered by reading `/proc`, `/sys`, `/etc/pve`, and running `qm monitor` commands.
|
Metrics are gathered by reading `/proc`, `/sys`, `/etc/pve`, and running `qm monitor` commands.
|
||||||
|
|
||||||
|
> **Disclaimer:** This is a heavily vibe-coded rewrite of [pvemon](https://github.com/illustris/pvemon) for better maintainability and easier distribution. This disclaimer will remain up until the codebase has been reviewed and validated.
|
||||||
|
|
||||||
## Building
|
## Building
|
||||||
|
|
||||||
Requires [Nix](https://nixos.org/):
|
Requires [Nix](https://nixos.org/):
|
||||||
@@ -53,35 +55,42 @@ All metric names are prefixed with the configured `--metrics-prefix` (default `p
|
|||||||
|
|
||||||
### Per-VM metrics
|
### Per-VM metrics
|
||||||
|
|
||||||
| Metric | Labels | Description |
|
| Metric | Type | Labels | Description |
|
||||||
|--------|--------|-------------|
|
|--------|------|--------|-------------|
|
||||||
| `_kvm` | id, name, cpu, pid, pool, pool_levels, pool1-3 | VM info (value 1) |
|
| `_kvm_info` | gauge | id, name, cpu, pid, pool, pool_levels, pool1, pool2, pool3 | VM info (value 1) |
|
||||||
| `_kvm_cpu` | id, mode | CPU time (user/system/iowait) |
|
| `_kvm_cpu_seconds_total` | counter | id, mode | KVM CPU time (mode: user, system, iowait) |
|
||||||
| `_kvm_vcores` | id | Allocated vCPU count |
|
| `_kvm_vcores` | gauge | id | vCores allocated |
|
||||||
| `_kvm_maxmem` | id | Maximum memory in bytes |
|
| `_kvm_maxmem_bytes` | gauge | id | Maximum memory in bytes |
|
||||||
| `_kvm_memory_percent` | id | RSS as percent of host memory |
|
| `_kvm_memory_percent` | gauge | id | Memory percent of host |
|
||||||
| `_kvm_memory_extended` | id, type | Detailed memory fields from /proc status |
|
| `_kvm_memory_extended` | gauge | id, type | Extended memory info from /proc status (vmrss, vmpeak, etc.) |
|
||||||
| `_kvm_threads` | id | Thread count |
|
| `_kvm_threads` | gauge | id | Threads used |
|
||||||
| `_kvm_ctx_switches` | id, type | Context switches (voluntary/involuntary) |
|
| `_kvm_ctx_switches_total` | counter | id, type | Context switches (type: voluntary, involuntary) |
|
||||||
| `_kvm_io_read_bytes` | id | I/O read bytes |
|
| `_kvm_io_read_count_total` | counter | id | Read system calls by KVM process |
|
||||||
| `_kvm_io_write_bytes` | id | I/O write bytes |
|
| `_kvm_io_read_bytes_total` | counter | id | Bytes read from disk by KVM process |
|
||||||
| `_kvm_io_read_chars` | id | I/O read chars |
|
| `_kvm_io_read_chars_total` | counter | id | Bytes read including buffers by KVM process |
|
||||||
| `_kvm_io_write_chars` | id | I/O write chars |
|
| `_kvm_io_write_count_total` | counter | id | Write system calls by KVM process |
|
||||||
| `_kvm_io_read_count` | id | I/O read syscalls |
|
| `_kvm_io_write_bytes_total` | counter | id | Bytes written to disk by KVM process |
|
||||||
| `_kvm_io_write_count` | id | I/O write syscalls |
|
| `_kvm_io_write_chars_total` | counter | id | Bytes written including buffers by KVM process |
|
||||||
| `_kvm_nic` | id, ifname, netdev, queues, type, model, macaddr | NIC info (value 1) |
|
| `_kvm_nic_info` | gauge | id, ifname, netdev, queues, type, model, macaddr | NIC info (value 1) |
|
||||||
| `_kvm_nic_queues` | id, ifname | NIC queue count |
|
| `_kvm_nic_queues` | gauge | id, ifname | NIC queue count |
|
||||||
| `_kvm_nic_*` | id, ifname | Per-NIC sysfs counters (rx_bytes, tx_bytes, etc.) |
|
| `_kvm_nic_{stat}_total` | counter | id, ifname | Per-NIC sysfs counters (rx_bytes, tx_bytes, rx_packets, etc.) |
|
||||||
| `_kvm_disk` | id, disk_name, block_id, disk_path, disk_type, ... | Disk info (value 1) |
|
| `_kvm_disk_info` | gauge | id, disk_name, block_id, disk_path, disk_type, vol_name, pool, pool_name, cluster_id, vg_name, device, attached_to, cache_mode, detect_zeroes, read_only | Disk info (value 1) |
|
||||||
| `_kvm_disk_size` | id, disk_name | Disk size in bytes |
|
| `_kvm_disk_size_bytes` | gauge | id, disk_name | Disk size in bytes |
|
||||||
|
|
||||||
### Storage metrics
|
### Storage metrics
|
||||||
|
|
||||||
| Metric | Labels | Description |
|
| Metric | Type | Labels | Description |
|
||||||
|--------|--------|-------------|
|
|--------|------|--------|-------------|
|
||||||
| `_node_storage` | name, type, ... | Storage pool info (value 1) |
|
| `_node_storage_info` | gauge | (dynamic, varies by storage config) | Storage pool info (value 1) |
|
||||||
| `_node_storage_size` | name, type | Total storage size in bytes |
|
| `_node_storage_size_bytes` | gauge | name, type | Storage total size in bytes |
|
||||||
| `_node_storage_free` | name, type | Free storage space in bytes |
|
| `_node_storage_free_bytes` | gauge | name, type | Storage free space in bytes |
|
||||||
|
|
||||||
|
### Operational metrics
|
||||||
|
|
||||||
|
| Metric | Type | Labels | Description |
|
||||||
|
|--------|------|--------|-------------|
|
||||||
|
| `_scrape_duration_seconds` | gauge | | Duration of metrics collection |
|
||||||
|
| `_exporter_build_info` | gauge | version | Build information (value 1) |
|
||||||
|
|
||||||
## Architecture
|
## Architecture
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ buildGoModule rec {
|
|||||||
pname = "pve-local-exporter";
|
pname = "pve-local-exporter";
|
||||||
version = "0.1.0";
|
version = "0.1.0";
|
||||||
src = ./src;
|
src = ./src;
|
||||||
vendorHash = "sha256-f0f8tYmoI6DtuB/K4++gu9b2na/d0ECTaF2zvDijW58=";
|
vendorHash = "sha256-MLB7y7shnOhxW8K2R6+d9E63wGEhlErnv+1MYOJO3Hw=";
|
||||||
ldflags = [
|
ldflags = [
|
||||||
"-X=main.version=${version}"
|
"-X=main.version=${version}"
|
||||||
];
|
];
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ module pve_local_exporter
|
|||||||
go 1.25.7
|
go 1.25.7
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
github.com/creack/pty v1.1.24
|
||||||
github.com/prometheus/client_golang v1.22.0
|
github.com/prometheus/client_golang v1.22.0
|
||||||
github.com/prometheus/client_model v0.6.1
|
github.com/prometheus/client_model v0.6.1
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -2,6 +2,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
|||||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
|
github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s=
|
||||||
|
github.com/creack/pty v1.1.24/go.mod h1:08sCNb52WyoAwi2QDyzUCTgcvVFhUzewun7wtTfvcwE=
|
||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
package collector
|
package collector
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
"path/filepath"
|
||||||
"slices"
|
"slices"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@@ -14,6 +16,7 @@ import (
|
|||||||
|
|
||||||
"pve_local_exporter/internal/cache"
|
"pve_local_exporter/internal/cache"
|
||||||
"pve_local_exporter/internal/config"
|
"pve_local_exporter/internal/config"
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
"pve_local_exporter/internal/procfs"
|
"pve_local_exporter/internal/procfs"
|
||||||
"pve_local_exporter/internal/pveconfig"
|
"pve_local_exporter/internal/pveconfig"
|
||||||
"pve_local_exporter/internal/qmmonitor"
|
"pve_local_exporter/internal/qmmonitor"
|
||||||
@@ -48,9 +51,22 @@ type PVECollector struct {
|
|||||||
descCtxSwitches *prometheus.Desc
|
descCtxSwitches *prometheus.Desc
|
||||||
descNicInfo *prometheus.Desc
|
descNicInfo *prometheus.Desc
|
||||||
descNicQueues *prometheus.Desc
|
descNicQueues *prometheus.Desc
|
||||||
|
descDiskInfo *prometheus.Desc
|
||||||
descDiskSize *prometheus.Desc
|
descDiskSize *prometheus.Desc
|
||||||
descStorageSize *prometheus.Desc
|
descStorageSize *prometheus.Desc
|
||||||
descStorageFree *prometheus.Desc
|
descStorageFree *prometheus.Desc
|
||||||
|
|
||||||
|
// IO counter descriptors (counters).
|
||||||
|
descIOReadCount *prometheus.Desc
|
||||||
|
descIOReadBytes *prometheus.Desc
|
||||||
|
descIOReadChars *prometheus.Desc
|
||||||
|
descIOWriteCount *prometheus.Desc
|
||||||
|
descIOWriteBytes *prometheus.Desc
|
||||||
|
descIOWriteChars *prometheus.Desc
|
||||||
|
|
||||||
|
// Operational metrics.
|
||||||
|
descScrapeDuration *prometheus.Desc
|
||||||
|
descBuildInfo *prometheus.Desc
|
||||||
}
|
}
|
||||||
|
|
||||||
type poolData struct {
|
type poolData struct {
|
||||||
@@ -119,18 +135,33 @@ func NewWithDeps(cfg config.Config, proc procfs.ProcReader, sys sysfs.SysReader,
|
|||||||
fileReader: fr,
|
fileReader: fr,
|
||||||
prefix: p,
|
prefix: p,
|
||||||
|
|
||||||
descCPU: prometheus.NewDesc(p+"_kvm_cpu", "KVM CPU time", []string{"id", "mode"}, nil),
|
descCPU: prometheus.NewDesc(p+"_kvm_cpu_seconds_total", "KVM CPU time", []string{"id", "mode"}, nil),
|
||||||
descVcores: prometheus.NewDesc(p+"_kvm_vcores", "vCores allocated", []string{"id"}, nil),
|
descVcores: prometheus.NewDesc(p+"_kvm_vcores", "vCores allocated", []string{"id"}, nil),
|
||||||
descMaxmem: prometheus.NewDesc(p+"_kvm_maxmem", "Maximum memory bytes", []string{"id"}, nil),
|
descMaxmem: prometheus.NewDesc(p+"_kvm_maxmem_bytes", "Maximum memory bytes", []string{"id"}, nil),
|
||||||
descMemPct: prometheus.NewDesc(p+"_kvm_memory_percent", "Memory percent of host", []string{"id"}, nil),
|
descMemPct: prometheus.NewDesc(p+"_kvm_memory_percent", "Memory percent of host", []string{"id"}, nil),
|
||||||
descMemExt: prometheus.NewDesc(p+"_kvm_memory_extended", "Extended memory info", []string{"id", "type"}, nil),
|
descMemExt: prometheus.NewDesc(p+"_kvm_memory_extended", "Extended memory info", []string{"id", "type"}, nil),
|
||||||
descThreads: prometheus.NewDesc(p+"_kvm_threads", "Threads used", []string{"id"}, nil),
|
descThreads: prometheus.NewDesc(p+"_kvm_threads", "Threads used", []string{"id"}, nil),
|
||||||
descCtxSwitches: prometheus.NewDesc(p+"_kvm_ctx_switches", "Context switches", []string{"id", "type"}, nil),
|
descCtxSwitches: prometheus.NewDesc(p+"_kvm_ctx_switches_total", "Context switches", []string{"id", "type"}, nil),
|
||||||
descNicInfo: prometheus.NewDesc(p+"_kvm_nic", "NIC info", []string{"id", "ifname", "netdev", "queues", "type", "model", "macaddr"}, nil),
|
descNicInfo: prometheus.NewDesc(p+"_kvm_nic_info", "NIC info", []string{"id", "ifname", "netdev", "queues", "type", "model", "macaddr"}, nil),
|
||||||
descNicQueues: prometheus.NewDesc(p+"_kvm_nic_queues", "NIC queue count", []string{"id", "ifname"}, nil),
|
descNicQueues: prometheus.NewDesc(p+"_kvm_nic_queues", "NIC queue count", []string{"id", "ifname"}, nil),
|
||||||
descDiskSize: prometheus.NewDesc(p+"_kvm_disk_size", "Disk size bytes", []string{"id", "disk_name"}, nil),
|
descDiskInfo: prometheus.NewDesc(p+"_kvm_disk_info", "Disk info", []string{
|
||||||
descStorageSize: prometheus.NewDesc(p+"_node_storage_size", "Storage total size", []string{"name", "type"}, nil),
|
"id", "disk_name", "block_id", "disk_path", "disk_type",
|
||||||
descStorageFree: prometheus.NewDesc(p+"_node_storage_free", "Storage free space", []string{"name", "type"}, nil),
|
"vol_name", "pool", "pool_name", "cluster_id", "vg_name",
|
||||||
|
"device", "attached_to", "cache_mode", "detect_zeroes", "read_only",
|
||||||
|
}, nil),
|
||||||
|
descDiskSize: prometheus.NewDesc(p+"_kvm_disk_size_bytes", "Disk size bytes", []string{"id", "disk_name"}, nil),
|
||||||
|
descStorageSize: prometheus.NewDesc(p+"_node_storage_size_bytes", "Storage total size", []string{"name", "type"}, nil),
|
||||||
|
descStorageFree: prometheus.NewDesc(p+"_node_storage_free_bytes", "Storage free space", []string{"name", "type"}, nil),
|
||||||
|
|
||||||
|
descIOReadCount: prometheus.NewDesc(p+"_kvm_io_read_count_total", "Read system calls by KVM process", []string{"id"}, nil),
|
||||||
|
descIOReadBytes: prometheus.NewDesc(p+"_kvm_io_read_bytes_total", "Bytes read from disk by KVM process", []string{"id"}, nil),
|
||||||
|
descIOReadChars: prometheus.NewDesc(p+"_kvm_io_read_chars_total", "Bytes read including buffers by KVM process", []string{"id"}, nil),
|
||||||
|
descIOWriteCount: prometheus.NewDesc(p+"_kvm_io_write_count_total", "Write system calls by KVM process", []string{"id"}, nil),
|
||||||
|
descIOWriteBytes: prometheus.NewDesc(p+"_kvm_io_write_bytes_total", "Bytes written to disk by KVM process", []string{"id"}, nil),
|
||||||
|
descIOWriteChars: prometheus.NewDesc(p+"_kvm_io_write_chars_total", "Bytes written including buffers by KVM process", []string{"id"}, nil),
|
||||||
|
|
||||||
|
descScrapeDuration: prometheus.NewDesc(p+"_scrape_duration_seconds", "Duration of metrics collection", nil, nil),
|
||||||
|
descBuildInfo: prometheus.NewDesc(p+"_exporter_build_info", "Build information", []string{"version"}, nil),
|
||||||
}
|
}
|
||||||
c.poolCache = cache.NewMtimeCache[poolData]("/etc/pve/user.cfg", fileMtime)
|
c.poolCache = cache.NewMtimeCache[poolData]("/etc/pve/user.cfg", fileMtime)
|
||||||
c.storageCache = cache.NewMtimeCache[[]pveconfig.StorageEntry]("/etc/pve/storage.cfg", fileMtime)
|
c.storageCache = cache.NewMtimeCache[[]pveconfig.StorageEntry]("/etc/pve/storage.cfg", fileMtime)
|
||||||
@@ -143,12 +174,17 @@ func (c *PVECollector) Describe(ch chan<- *prometheus.Desc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (c *PVECollector) Collect(ch chan<- prometheus.Metric) {
|
func (c *PVECollector) Collect(ch chan<- prometheus.Metric) {
|
||||||
|
start := time.Now()
|
||||||
|
|
||||||
if c.cfg.CollectRunningVMs {
|
if c.cfg.CollectRunningVMs {
|
||||||
c.collectVMs(ch)
|
c.collectVMs(ch)
|
||||||
}
|
}
|
||||||
if c.cfg.CollectStorage {
|
if c.cfg.CollectStorage {
|
||||||
c.collectStorage(ch)
|
c.collectStorage(ch)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ch <- prometheus.MustNewConstMetric(c.descScrapeDuration, prometheus.GaugeValue, time.Since(start).Seconds())
|
||||||
|
ch <- prometheus.MustNewConstMetric(c.descBuildInfo, prometheus.GaugeValue, 1, c.cfg.Version)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *PVECollector) collectVMs(ch chan<- prometheus.Metric) {
|
func (c *PVECollector) collectVMs(ch chan<- prometheus.Metric) {
|
||||||
@@ -157,9 +193,11 @@ func (c *PVECollector) collectVMs(ch chan<- prometheus.Metric) {
|
|||||||
slog.Error("discover QEMU processes", "err", err)
|
slog.Error("discover QEMU processes", "err", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
logging.Trace("collectVMs", "vm_count", len(procs))
|
||||||
|
|
||||||
// Load pool info
|
// Load pool info
|
||||||
vmPoolMap, pools := c.getPoolInfo()
|
vmPoolMap, pools := c.getPoolInfo()
|
||||||
|
logging.Trace("pool info loaded", "vm_pool_map_size", len(vmPoolMap), "pools_count", len(pools))
|
||||||
|
|
||||||
for _, proc := range procs {
|
for _, proc := range procs {
|
||||||
c.collectVMMetrics(ch, proc, vmPoolMap, pools)
|
c.collectVMMetrics(ch, proc, vmPoolMap, pools)
|
||||||
@@ -203,7 +241,7 @@ func (c *PVECollector) collectVMMetrics(ch chan<- prometheus.Metric, proc procfs
|
|||||||
{"system", cpu.System},
|
{"system", cpu.System},
|
||||||
{"iowait", cpu.IOWait},
|
{"iowait", cpu.IOWait},
|
||||||
} {
|
} {
|
||||||
ch <- prometheus.MustNewConstMetric(c.descCPU, prometheus.GaugeValue, m.val, id, m.mode)
|
ch <- prometheus.MustNewConstMetric(c.descCPU, prometheus.CounterValue, m.val, id, m.mode)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -229,35 +267,25 @@ func (c *PVECollector) collectVMMetrics(ch chan<- prometheus.Metric, proc procfs
|
|||||||
ch <- prometheus.MustNewConstMetric(c.descThreads, prometheus.GaugeValue, float64(status.Threads), id)
|
ch <- prometheus.MustNewConstMetric(c.descThreads, prometheus.GaugeValue, float64(status.Threads), id)
|
||||||
|
|
||||||
// Context switches
|
// Context switches
|
||||||
ch <- prometheus.MustNewConstMetric(c.descCtxSwitches, prometheus.GaugeValue, float64(status.CtxSwitches.Voluntary), id, "voluntary")
|
ch <- prometheus.MustNewConstMetric(c.descCtxSwitches, prometheus.CounterValue, float64(status.CtxSwitches.Voluntary), id, "voluntary")
|
||||||
ch <- prometheus.MustNewConstMetric(c.descCtxSwitches, prometheus.GaugeValue, float64(status.CtxSwitches.Involuntary), id, "involuntary")
|
ch <- prometheus.MustNewConstMetric(c.descCtxSwitches, prometheus.CounterValue, float64(status.CtxSwitches.Involuntary), id, "involuntary")
|
||||||
}
|
}
|
||||||
|
|
||||||
// IO counters
|
// IO counters
|
||||||
if io, err := c.proc.GetIOCounters(proc.PID); err == nil {
|
if io, err := c.proc.GetIOCounters(proc.PID); err == nil {
|
||||||
for _, m := range []struct {
|
ch <- prometheus.MustNewConstMetric(c.descIOReadCount, prometheus.CounterValue, float64(io.ReadSyscalls), id)
|
||||||
name string
|
ch <- prometheus.MustNewConstMetric(c.descIOReadBytes, prometheus.CounterValue, float64(io.ReadBytes), id)
|
||||||
val uint64
|
ch <- prometheus.MustNewConstMetric(c.descIOReadChars, prometheus.CounterValue, float64(io.ReadChars), id)
|
||||||
}{
|
ch <- prometheus.MustNewConstMetric(c.descIOWriteCount, prometheus.CounterValue, float64(io.WriteSyscalls), id)
|
||||||
{"kvm_io_read_count", io.ReadSyscalls},
|
ch <- prometheus.MustNewConstMetric(c.descIOWriteBytes, prometheus.CounterValue, float64(io.WriteBytes), id)
|
||||||
{"kvm_io_read_bytes", io.ReadBytes},
|
ch <- prometheus.MustNewConstMetric(c.descIOWriteChars, prometheus.CounterValue, float64(io.WriteChars), id)
|
||||||
{"kvm_io_read_chars", io.ReadChars},
|
|
||||||
{"kvm_io_write_count", io.WriteSyscalls},
|
|
||||||
{"kvm_io_write_bytes", io.WriteBytes},
|
|
||||||
{"kvm_io_write_chars", io.WriteChars},
|
|
||||||
} {
|
|
||||||
ch <- prometheus.MustNewConstMetric(
|
|
||||||
prometheus.NewDesc(c.prefix+"_"+m.name, "", []string{"id"}, nil),
|
|
||||||
prometheus.GaugeValue, float64(m.val), id,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// VM info metric
|
// VM info metric
|
||||||
poolName := vmPoolMap[id]
|
poolName := vmPoolMap[id]
|
||||||
poolInfo := pools[poolName]
|
poolInfo := pools[poolName]
|
||||||
ch <- prometheus.MustNewConstMetric(
|
ch <- prometheus.MustNewConstMetric(
|
||||||
prometheus.NewDesc(c.prefix+"_kvm", "VM info", []string{
|
prometheus.NewDesc(c.prefix+"_kvm_info", "VM info", []string{
|
||||||
"id", "name", "cpu", "pid", "pool", "pool_levels", "pool1", "pool2", "pool3",
|
"id", "name", "cpu", "pid", "pool", "pool_levels", "pool1", "pool2", "pool3",
|
||||||
}, nil),
|
}, nil),
|
||||||
prometheus.GaugeValue, 1,
|
prometheus.GaugeValue, 1,
|
||||||
@@ -275,8 +303,10 @@ func (c *PVECollector) collectNICMetrics(ch chan<- prometheus.Metric, proc procf
|
|||||||
slog.Error("qm info network", "vmid", id, "err", err)
|
slog.Error("qm info network", "vmid", id, "err", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
logging.Trace("qm info network response", "vmid", id, "raw_len", len(raw))
|
||||||
|
|
||||||
nics := qmmonitor.ParseNetworkInfo(raw)
|
nics := qmmonitor.ParseNetworkInfo(raw)
|
||||||
|
logging.Trace("parsed NICs", "vmid", id, "nic_count", len(nics))
|
||||||
for _, nic := range nics {
|
for _, nic := range nics {
|
||||||
// NIC info metric
|
// NIC info metric
|
||||||
ch <- prometheus.MustNewConstMetric(c.descNicInfo, prometheus.GaugeValue, 1,
|
ch <- prometheus.MustNewConstMetric(c.descNicInfo, prometheus.GaugeValue, 1,
|
||||||
@@ -295,8 +325,8 @@ func (c *PVECollector) collectNICMetrics(ch chan<- prometheus.Metric, proc procf
|
|||||||
}
|
}
|
||||||
for statName, val := range stats {
|
for statName, val := range stats {
|
||||||
ch <- prometheus.MustNewConstMetric(
|
ch <- prometheus.MustNewConstMetric(
|
||||||
prometheus.NewDesc(c.prefix+"_kvm_nic_"+statName, "", []string{"id", "ifname"}, nil),
|
prometheus.NewDesc(c.prefix+"_kvm_nic_"+statName+"_total", fmt.Sprintf("NIC statistic %s", statName), []string{"id", "ifname"}, nil),
|
||||||
prometheus.GaugeValue, float64(val), id, nic.Ifname,
|
prometheus.CounterValue, float64(val), id, nic.Ifname,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -310,14 +340,16 @@ func (c *PVECollector) collectDiskMetrics(ch chan<- prometheus.Metric, proc proc
|
|||||||
slog.Error("qm info block", "vmid", id, "err", err)
|
slog.Error("qm info block", "vmid", id, "err", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
logging.Trace("qm info block response", "vmid", id, "raw_len", len(raw))
|
||||||
|
|
||||||
disks := qmmonitor.ParseBlockInfo(raw)
|
disks := qmmonitor.ParseBlockInfo(raw)
|
||||||
|
logging.Trace("parsed disks", "vmid", id, "disk_count", len(disks))
|
||||||
for diskName, disk := range disks {
|
for diskName, disk := range disks {
|
||||||
// Try to get device symlink target for zvol/rbd/lvm
|
// Try to get device symlink target for zvol/rbd/lvm
|
||||||
if disk.DiskType == "zvol" || disk.DiskType == "rbd" || disk.DiskType == "lvm" {
|
if disk.DiskType == "zvol" || disk.DiskType == "rbd" || disk.DiskType == "lvm" {
|
||||||
target, err := sysfs.GetDeviceSymlinkTarget(disk.DiskPath)
|
target, err := sysfs.GetDeviceSymlinkTarget(disk.DiskPath)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
disk.Labels["device"] = target
|
disk.Labels["device"] = filepath.Base(target)
|
||||||
} else {
|
} else {
|
||||||
slog.Debug("resolve device symlink", "path", disk.DiskPath, "err", err)
|
slog.Debug("resolve device symlink", "path", disk.DiskPath, "err", err)
|
||||||
// Retry with cache invalidation
|
// Retry with cache invalidation
|
||||||
@@ -341,45 +373,63 @@ func (c *PVECollector) collectDiskMetrics(ch chan<- prometheus.Metric, proc proc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logging.Trace("disk metric", "vmid", id, "disk", diskName, "type", disk.DiskType,
|
||||||
|
"path", disk.DiskPath, "size", diskSize, "device", disk.Labels["device"])
|
||||||
|
|
||||||
if diskSize > 0 {
|
if diskSize > 0 {
|
||||||
ch <- prometheus.MustNewConstMetric(c.descDiskSize, prometheus.GaugeValue, float64(diskSize), id, diskName)
|
ch <- prometheus.MustNewConstMetric(c.descDiskSize, prometheus.GaugeValue, float64(diskSize), id, diskName)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Disk info metric - collect all labels
|
// Disk info metric with fixed label set
|
||||||
labelNames := []string{"id", "disk_name", "block_id", "disk_path", "disk_type"}
|
ch <- prometheus.MustNewConstMetric(c.descDiskInfo, prometheus.GaugeValue, 1,
|
||||||
labelValues := []string{id, diskName, disk.BlockID, disk.DiskPath, disk.DiskType}
|
id,
|
||||||
|
diskName,
|
||||||
// Add variable labels in sorted-ish order
|
disk.BlockID,
|
||||||
for _, key := range sortedKeys(disk.Labels) {
|
disk.DiskPath,
|
||||||
labelNames = append(labelNames, key)
|
disk.DiskType,
|
||||||
labelValues = append(labelValues, disk.Labels[key])
|
disk.Labels["vol_name"],
|
||||||
}
|
disk.Labels["pool"],
|
||||||
|
disk.Labels["pool_name"],
|
||||||
ch <- prometheus.MustNewConstMetric(
|
disk.Labels["cluster_id"],
|
||||||
prometheus.NewDesc(c.prefix+"_kvm_disk", "Disk info", labelNames, nil),
|
disk.Labels["vg_name"],
|
||||||
prometheus.GaugeValue, 1, labelValues...,
|
disk.Labels["device"],
|
||||||
|
disk.Labels["attached_to"],
|
||||||
|
disk.Labels["cache_mode"],
|
||||||
|
disk.Labels["detect_zeroes"],
|
||||||
|
disk.Labels["read_only"],
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *PVECollector) collectStorage(ch chan<- prometheus.Metric) {
|
func (c *PVECollector) collectStorage(ch chan<- prometheus.Metric) {
|
||||||
entries := c.getStorageEntries()
|
entries := c.getStorageEntries()
|
||||||
|
logging.Trace("collectStorage", "entries_count", len(entries))
|
||||||
|
|
||||||
|
// Compute superset of property keys across all entries
|
||||||
|
keySet := make(map[string]struct{})
|
||||||
|
for _, entry := range entries {
|
||||||
|
for k := range entry.Properties {
|
||||||
|
keySet[k] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
allKeys := sortedKeySet(keySet)
|
||||||
|
|
||||||
|
// Create descriptor once with fixed labels for this scrape
|
||||||
|
storageInfoDesc := prometheus.NewDesc(
|
||||||
|
c.prefix+"_node_storage_info", "Storage info", allKeys, nil,
|
||||||
|
)
|
||||||
|
|
||||||
for _, entry := range entries {
|
for _, entry := range entries {
|
||||||
storageType := entry.Properties["type"]
|
storageType := entry.Properties["type"]
|
||||||
storageName := entry.Properties["name"]
|
storageName := entry.Properties["name"]
|
||||||
|
logging.Trace("storage entry", "name", storageName, "type", storageType)
|
||||||
|
|
||||||
// Info metric
|
// Info metric with consistent labels
|
||||||
labelNames := make([]string, 0, len(entry.Properties))
|
vals := make([]string, len(allKeys))
|
||||||
labelValues := make([]string, 0, len(entry.Properties))
|
for i, k := range allKeys {
|
||||||
for _, key := range sortedKeys(entry.Properties) {
|
vals[i] = entry.Properties[k] // "" if missing
|
||||||
labelNames = append(labelNames, key)
|
|
||||||
labelValues = append(labelValues, entry.Properties[key])
|
|
||||||
}
|
}
|
||||||
ch <- prometheus.MustNewConstMetric(
|
ch <- prometheus.MustNewConstMetric(storageInfoDesc, prometheus.GaugeValue, 1, vals...)
|
||||||
prometheus.NewDesc(c.prefix+"_node_storage", "Storage info", labelNames, nil),
|
|
||||||
prometheus.GaugeValue, 1, labelValues...,
|
|
||||||
)
|
|
||||||
|
|
||||||
// Size metrics
|
// Size metrics
|
||||||
var size storage.StorageSize
|
var size storage.StorageSize
|
||||||
@@ -401,7 +451,7 @@ func (c *PVECollector) collectStorage(ch chan<- prometheus.Metric) {
|
|||||||
poolName := strings.Split(pool, "/")[0]
|
poolName := strings.Split(pool, "/")[0]
|
||||||
out, runErr := c.cmdRunner.Run("zpool", "list", "-p", poolName)
|
out, runErr := c.cmdRunner.Run("zpool", "list", "-p", poolName)
|
||||||
if runErr != nil {
|
if runErr != nil {
|
||||||
slog.Error("zpool list", "pool", poolName, "err", runErr)
|
slog.Warn("zpool list", "pool", poolName, "err", runErr)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
size, err = storage.GetZPoolSize(out)
|
size, err = storage.GetZPoolSize(out)
|
||||||
@@ -460,3 +510,12 @@ func sortedKeys(m map[string]string) []string {
|
|||||||
return keys
|
return keys
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func sortedKeySet(m map[string]struct{}) []string {
|
||||||
|
keys := make([]string, 0, len(m))
|
||||||
|
for k := range m {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
slices.Sort(keys)
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -87,6 +87,17 @@ func (m *mockFileReader) ReadFile(path string) (string, error) {
|
|||||||
return m.files[path], nil
|
return m.files[path], nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// metricValue extracts the numeric value from a dto.Metric, whether it is a Gauge or Counter.
|
||||||
|
func metricValue(m *dto.Metric) float64 {
|
||||||
|
if m.Gauge != nil {
|
||||||
|
return m.Gauge.GetValue()
|
||||||
|
}
|
||||||
|
if m.Counter != nil {
|
||||||
|
return m.Counter.GetValue()
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
// collectMetrics collects all metrics from a collector into a map keyed by metric name.
|
// collectMetrics collects all metrics from a collector into a map keyed by metric name.
|
||||||
func collectMetrics(c prometheus.Collector) map[string][]*dto.Metric {
|
func collectMetrics(c prometheus.Collector) map[string][]*dto.Metric {
|
||||||
ch := make(chan prometheus.Metric, 200)
|
ch := make(chan prometheus.Metric, 200)
|
||||||
@@ -182,58 +193,64 @@ func TestCollector_BasicVMMetrics(t *testing.T) {
|
|||||||
c := NewWithDeps(cfg, proc, sys, qm, &mockStatFS{}, &mockCmdRunner{}, fr)
|
c := NewWithDeps(cfg, proc, sys, qm, &mockStatFS{}, &mockCmdRunner{}, fr)
|
||||||
metrics := collectMetrics(c)
|
metrics := collectMetrics(c)
|
||||||
|
|
||||||
// Check CPU metrics
|
// Check CPU metrics (counter)
|
||||||
cpuMetrics := metrics["pve_kvm_cpu"]
|
cpuMetrics := metrics["pve_kvm_cpu_seconds_total"]
|
||||||
if len(cpuMetrics) != 3 {
|
if len(cpuMetrics) != 3 {
|
||||||
t.Fatalf("expected 3 cpu metrics, got %d", len(cpuMetrics))
|
t.Fatalf("expected 3 cpu metrics, got %d", len(cpuMetrics))
|
||||||
}
|
}
|
||||||
m := findMetricWithLabels(cpuMetrics, map[string]string{"mode": "user"})
|
m := findMetricWithLabels(cpuMetrics, map[string]string{"mode": "user"})
|
||||||
if m == nil || m.Gauge.GetValue() != 5.0 {
|
if m == nil || metricValue(m) != 5.0 {
|
||||||
t.Errorf("cpu user = %v", m)
|
t.Errorf("cpu user = %v", m)
|
||||||
}
|
}
|
||||||
m = findMetricWithLabels(cpuMetrics, map[string]string{"mode": "system"})
|
m = findMetricWithLabels(cpuMetrics, map[string]string{"mode": "system"})
|
||||||
if m == nil || m.Gauge.GetValue() != 2.0 {
|
if m == nil || metricValue(m) != 2.0 {
|
||||||
t.Errorf("cpu system = %v", m)
|
t.Errorf("cpu system = %v", m)
|
||||||
}
|
}
|
||||||
m = findMetricWithLabels(cpuMetrics, map[string]string{"mode": "iowait"})
|
m = findMetricWithLabels(cpuMetrics, map[string]string{"mode": "iowait"})
|
||||||
if m == nil || m.Gauge.GetValue() != 0.5 {
|
if m == nil || metricValue(m) != 0.5 {
|
||||||
t.Errorf("cpu iowait = %v", m)
|
t.Errorf("cpu iowait = %v", m)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check vcores
|
// Check vcores
|
||||||
vcoreMetrics := metrics["pve_kvm_vcores"]
|
vcoreMetrics := metrics["pve_kvm_vcores"]
|
||||||
if len(vcoreMetrics) != 1 || vcoreMetrics[0].Gauge.GetValue() != 4 {
|
if len(vcoreMetrics) != 1 || metricValue(vcoreMetrics[0]) != 4 {
|
||||||
t.Errorf("vcores = %v", vcoreMetrics)
|
t.Errorf("vcores = %v", vcoreMetrics)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check threads
|
// Check threads
|
||||||
threadMetrics := metrics["pve_kvm_threads"]
|
threadMetrics := metrics["pve_kvm_threads"]
|
||||||
if len(threadMetrics) != 1 || threadMetrics[0].Gauge.GetValue() != 50 {
|
if len(threadMetrics) != 1 || metricValue(threadMetrics[0]) != 50 {
|
||||||
t.Errorf("threads = %v", threadMetrics)
|
t.Errorf("threads = %v", threadMetrics)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check memory percent
|
// Check memory percent
|
||||||
memPctMetrics := metrics["pve_kvm_memory_percent"]
|
memPctMetrics := metrics["pve_kvm_memory_percent"]
|
||||||
if len(memPctMetrics) != 1 || memPctMetrics[0].Gauge.GetValue() != 25.5 {
|
if len(memPctMetrics) != 1 || metricValue(memPctMetrics[0]) != 25.5 {
|
||||||
t.Errorf("memory_percent = %v", memPctMetrics)
|
t.Errorf("memory_percent = %v", memPctMetrics)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check IO
|
// Check maxmem (renamed with _bytes)
|
||||||
if m := metrics["pve_kvm_io_read_count"]; len(m) != 1 || m[0].Gauge.GetValue() != 10 {
|
maxmemMetrics := metrics["pve_kvm_maxmem_bytes"]
|
||||||
t.Errorf("io_read_count = %v", m)
|
if len(maxmemMetrics) != 1 || metricValue(maxmemMetrics[0]) != float64(4194304*1024) {
|
||||||
}
|
t.Errorf("maxmem_bytes = %v", maxmemMetrics)
|
||||||
if m := metrics["pve_kvm_io_write_bytes"]; len(m) != 1 || m[0].Gauge.GetValue() != 1000 {
|
|
||||||
t.Errorf("io_write_bytes = %v", m)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check context switches
|
// Check IO (counters, renamed with _total)
|
||||||
csMetrics := metrics["pve_kvm_ctx_switches"]
|
if m := metrics["pve_kvm_io_read_count_total"]; len(m) != 1 || metricValue(m[0]) != 10 {
|
||||||
|
t.Errorf("io_read_count_total = %v", m)
|
||||||
|
}
|
||||||
|
if m := metrics["pve_kvm_io_write_bytes_total"]; len(m) != 1 || metricValue(m[0]) != 1000 {
|
||||||
|
t.Errorf("io_write_bytes_total = %v", m)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check context switches (counter, renamed with _total)
|
||||||
|
csMetrics := metrics["pve_kvm_ctx_switches_total"]
|
||||||
if len(csMetrics) != 2 {
|
if len(csMetrics) != 2 {
|
||||||
t.Fatalf("expected 2 ctx_switches metrics, got %d", len(csMetrics))
|
t.Fatalf("expected 2 ctx_switches_total metrics, got %d", len(csMetrics))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check VM info metric
|
// Check VM info metric
|
||||||
infoMetrics := metrics["pve_kvm"]
|
infoMetrics := metrics["pve_kvm_info"]
|
||||||
if len(infoMetrics) != 1 {
|
if len(infoMetrics) != 1 {
|
||||||
t.Fatalf("expected 1 kvm info metric, got %d", len(infoMetrics))
|
t.Fatalf("expected 1 kvm info metric, got %d", len(infoMetrics))
|
||||||
}
|
}
|
||||||
@@ -241,6 +258,16 @@ func TestCollector_BasicVMMetrics(t *testing.T) {
|
|||||||
if m == nil {
|
if m == nil {
|
||||||
t.Error("kvm info metric not found with expected labels")
|
t.Error("kvm info metric not found with expected labels")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check scrape duration exists
|
||||||
|
if sd := metrics["pve_scrape_duration_seconds"]; len(sd) != 1 {
|
||||||
|
t.Errorf("expected 1 scrape_duration_seconds, got %d", len(sd))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check build info exists
|
||||||
|
if bi := metrics["pve_exporter_build_info"]; len(bi) != 1 {
|
||||||
|
t.Errorf("expected 1 build_info, got %d", len(bi))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCollector_StorageMetrics(t *testing.T) {
|
func TestCollector_StorageMetrics(t *testing.T) {
|
||||||
@@ -267,20 +294,20 @@ func TestCollector_StorageMetrics(t *testing.T) {
|
|||||||
|
|
||||||
metrics := collectMetrics(c)
|
metrics := collectMetrics(c)
|
||||||
|
|
||||||
// Check storage size
|
// Check storage size (renamed with _bytes)
|
||||||
sizeMetrics := metrics["pve_node_storage_size"]
|
sizeMetrics := metrics["pve_node_storage_size_bytes"]
|
||||||
if len(sizeMetrics) != 1 || sizeMetrics[0].Gauge.GetValue() != 1e9 {
|
if len(sizeMetrics) != 1 || metricValue(sizeMetrics[0]) != 1e9 {
|
||||||
t.Errorf("storage_size = %v", sizeMetrics)
|
t.Errorf("storage_size_bytes = %v", sizeMetrics)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check storage free
|
// Check storage free (renamed with _bytes)
|
||||||
freeMetrics := metrics["pve_node_storage_free"]
|
freeMetrics := metrics["pve_node_storage_free_bytes"]
|
||||||
if len(freeMetrics) != 1 || freeMetrics[0].Gauge.GetValue() != 5e8 {
|
if len(freeMetrics) != 1 || metricValue(freeMetrics[0]) != 5e8 {
|
||||||
t.Errorf("storage_free = %v", freeMetrics)
|
t.Errorf("storage_free_bytes = %v", freeMetrics)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check storage info
|
// Check storage info
|
||||||
infoMetrics := metrics["pve_node_storage"]
|
infoMetrics := metrics["pve_node_storage_info"]
|
||||||
if len(infoMetrics) != 1 {
|
if len(infoMetrics) != 1 {
|
||||||
t.Fatalf("expected 1 storage info metric, got %d", len(infoMetrics))
|
t.Fatalf("expected 1 storage info metric, got %d", len(infoMetrics))
|
||||||
}
|
}
|
||||||
@@ -321,19 +348,19 @@ func TestCollector_NICMetrics(t *testing.T) {
|
|||||||
metrics := collectMetrics(c)
|
metrics := collectMetrics(c)
|
||||||
|
|
||||||
// NIC info
|
// NIC info
|
||||||
nicInfo := metrics["pve_kvm_nic"]
|
nicInfo := metrics["pve_kvm_nic_info"]
|
||||||
if len(nicInfo) != 1 {
|
if len(nicInfo) != 1 {
|
||||||
t.Fatalf("expected 1 nic info, got %d", len(nicInfo))
|
t.Fatalf("expected 1 nic info, got %d", len(nicInfo))
|
||||||
}
|
}
|
||||||
|
|
||||||
// NIC stats
|
// NIC stats (counter, renamed with _total)
|
||||||
rxBytes := metrics["pve_kvm_nic_rx_bytes"]
|
rxBytes := metrics["pve_kvm_nic_rx_bytes_total"]
|
||||||
if len(rxBytes) != 1 || rxBytes[0].Gauge.GetValue() != 1000 {
|
if len(rxBytes) != 1 || metricValue(rxBytes[0]) != 1000 {
|
||||||
t.Errorf("rx_bytes = %v", rxBytes)
|
t.Errorf("rx_bytes_total = %v", rxBytes)
|
||||||
}
|
}
|
||||||
txBytes := metrics["pve_kvm_nic_tx_bytes"]
|
txBytes := metrics["pve_kvm_nic_tx_bytes_total"]
|
||||||
if len(txBytes) != 1 || txBytes[0].Gauge.GetValue() != 2000 {
|
if len(txBytes) != 1 || metricValue(txBytes[0]) != 2000 {
|
||||||
t.Errorf("tx_bytes = %v", txBytes)
|
t.Errorf("tx_bytes_total = %v", txBytes)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -382,7 +409,7 @@ func TestCollector_PoolReadError(t *testing.T) {
|
|||||||
metrics := collectMetrics(c)
|
metrics := collectMetrics(c)
|
||||||
|
|
||||||
// Should still produce VM info with empty pool
|
// Should still produce VM info with empty pool
|
||||||
infoMetrics := metrics["pve_kvm"]
|
infoMetrics := metrics["pve_kvm_info"]
|
||||||
if len(infoMetrics) != 1 {
|
if len(infoMetrics) != 1 {
|
||||||
t.Fatalf("expected 1 kvm info metric, got %d", len(infoMetrics))
|
t.Fatalf("expected 1 kvm info metric, got %d", len(infoMetrics))
|
||||||
}
|
}
|
||||||
@@ -409,8 +436,129 @@ func TestCollector_ProcessDiscoveryError(t *testing.T) {
|
|||||||
|
|
||||||
metrics := collectMetrics(c)
|
metrics := collectMetrics(c)
|
||||||
|
|
||||||
// No VM metrics should be emitted
|
// No VM metrics should be emitted, but scrape_duration + build_info are always present
|
||||||
if len(metrics) != 0 {
|
expectedNames := map[string]bool{
|
||||||
t.Errorf("expected 0 metrics on discovery error, got %d metric names", len(metrics))
|
"pve_scrape_duration_seconds": true,
|
||||||
|
"pve_exporter_build_info": true,
|
||||||
|
}
|
||||||
|
for name := range metrics {
|
||||||
|
if !expectedNames[name] {
|
||||||
|
t.Errorf("unexpected metric %q on discovery error", name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(metrics) != 2 {
|
||||||
|
t.Errorf("expected 2 metrics (scrape_duration + build_info) on discovery error, got %d", len(metrics))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCollector_BuildInfo(t *testing.T) {
|
||||||
|
cfg := config.Config{
|
||||||
|
CollectRunningVMs: false,
|
||||||
|
CollectStorage: false,
|
||||||
|
MetricsPrefix: "pve",
|
||||||
|
Version: "1.2.3",
|
||||||
|
}
|
||||||
|
|
||||||
|
c := NewWithDeps(cfg, &mockProcReader{}, &mockSysReader{}, &mockQMMonitor{responses: map[string]string{}},
|
||||||
|
&mockStatFS{}, &mockCmdRunner{}, &mockFileReader{files: map[string]string{}})
|
||||||
|
|
||||||
|
metrics := collectMetrics(c)
|
||||||
|
|
||||||
|
bi := metrics["pve_exporter_build_info"]
|
||||||
|
if len(bi) != 1 {
|
||||||
|
t.Fatalf("expected 1 build_info metric, got %d", len(bi))
|
||||||
|
}
|
||||||
|
if metricValue(bi[0]) != 1 {
|
||||||
|
t.Errorf("build_info value = %v, want 1", metricValue(bi[0]))
|
||||||
|
}
|
||||||
|
m := findMetricWithLabels(bi, map[string]string{"version": "1.2.3"})
|
||||||
|
if m == nil {
|
||||||
|
t.Error("build_info missing version label")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCollector_DiskInfoMetrics(t *testing.T) {
|
||||||
|
cfg := config.Config{
|
||||||
|
CollectRunningVMs: true,
|
||||||
|
CollectStorage: false,
|
||||||
|
MetricsPrefix: "pve",
|
||||||
|
}
|
||||||
|
|
||||||
|
proc := &mockProcReader{
|
||||||
|
procs: []procfs.QEMUProcess{
|
||||||
|
{PID: 1, VMID: "100", Name: "vm", Vcores: 1, MaxMem: 1024},
|
||||||
|
},
|
||||||
|
cpuTimes: map[int]procfs.CPUTimes{1: {}},
|
||||||
|
ioCount: map[int]procfs.IOCounters{1: {}},
|
||||||
|
status: map[int]procfs.StatusInfo{
|
||||||
|
1: {Threads: 1, MemoryExtended: procfs.MemoryExtended{}},
|
||||||
|
},
|
||||||
|
memPct: map[int]float64{1: 0},
|
||||||
|
}
|
||||||
|
|
||||||
|
blockOutput := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
|
||||||
|
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
|
||||||
|
Cache mode: writeback, direct
|
||||||
|
Detect zeroes: on
|
||||||
|
drive-scsi1 (#block101): /mnt/storage/images/100/vm-100-disk-1.qcow2 (qcow2, read-only)
|
||||||
|
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
|
||||||
|
`
|
||||||
|
|
||||||
|
sys := &mockSysReader{
|
||||||
|
blockSize: map[string]int64{
|
||||||
|
"/dev/zvol/rpool/data/vm-100-disk-0": 10737418240,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
qm := &mockQMMonitor{responses: map[string]string{
|
||||||
|
"100:info network": "",
|
||||||
|
"100:info block": blockOutput,
|
||||||
|
}}
|
||||||
|
|
||||||
|
fr := &mockFileReader{files: map[string]string{"/etc/pve/user.cfg": ""}}
|
||||||
|
c := NewWithDeps(cfg, proc, sys, qm, &mockStatFS{}, &mockCmdRunner{}, fr)
|
||||||
|
metrics := collectMetrics(c)
|
||||||
|
|
||||||
|
diskInfo := metrics["pve_kvm_disk_info"]
|
||||||
|
if len(diskInfo) != 2 {
|
||||||
|
t.Fatalf("expected 2 disk info metrics, got %d", len(diskInfo))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check zvol disk
|
||||||
|
m := findMetricWithLabels(diskInfo, map[string]string{
|
||||||
|
"id": "100",
|
||||||
|
"disk_name": "scsi0",
|
||||||
|
"disk_type": "zvol",
|
||||||
|
"cache_mode": "writeback, direct",
|
||||||
|
"detect_zeroes": "on",
|
||||||
|
"read_only": "",
|
||||||
|
"vol_name": "vm-100-disk-0",
|
||||||
|
"pool": "rpool/data",
|
||||||
|
})
|
||||||
|
if m == nil {
|
||||||
|
t.Error("zvol disk info metric not found with expected labels")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check qcow2 disk (read-only, no cache_mode)
|
||||||
|
m = findMetricWithLabels(diskInfo, map[string]string{
|
||||||
|
"id": "100",
|
||||||
|
"disk_name": "scsi1",
|
||||||
|
"disk_type": "qcow2",
|
||||||
|
"read_only": "true",
|
||||||
|
"cache_mode": "",
|
||||||
|
"vol_name": "vm-100-disk-1",
|
||||||
|
})
|
||||||
|
if m == nil {
|
||||||
|
t.Error("qcow2 disk info metric not found with expected labels")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Verify disk size for zvol
|
||||||
|
diskSize := metrics["pve_kvm_disk_size_bytes"]
|
||||||
|
if len(diskSize) < 1 {
|
||||||
|
t.Fatal("expected at least 1 disk size metric")
|
||||||
|
}
|
||||||
|
m = findMetricWithLabels(diskSize, map[string]string{"disk_name": "scsi0"})
|
||||||
|
if m == nil || metricValue(m) != 10737418240 {
|
||||||
|
t.Errorf("disk size for scsi0 = %v", m)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ type Config struct {
|
|||||||
QMRand time.Duration
|
QMRand time.Duration
|
||||||
QMMonitorDeferClose bool
|
QMMonitorDeferClose bool
|
||||||
ShowVersion bool
|
ShowVersion bool
|
||||||
|
Version string
|
||||||
}
|
}
|
||||||
|
|
||||||
func Parse() Config {
|
func Parse() Config {
|
||||||
|
|||||||
14
src/internal/logging/logging.go
Normal file
14
src/internal/logging/logging.go
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
package logging
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"log/slog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// LevelTrace is a custom log level below Debug for very detailed diagnostic output.
|
||||||
|
const LevelTrace = slog.Level(-8)
|
||||||
|
|
||||||
|
// Trace logs a message at TRACE level using the default logger.
|
||||||
|
func Trace(msg string, args ...any) {
|
||||||
|
slog.Default().Log(context.Background(), LevelTrace, msg, args...)
|
||||||
|
}
|
||||||
@@ -2,10 +2,13 @@ package procfs
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
const clkTck = 100 // sysconf(_SC_CLK_TCK) on Linux
|
const clkTck = 100 // sysconf(_SC_CLK_TCK) on Linux
|
||||||
@@ -83,6 +86,7 @@ func (r *RealProcReader) DiscoverQEMUProcesses() ([]QEMUProcess, error) {
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
numericPIDs := 0
|
||||||
var procs []QEMUProcess
|
var procs []QEMUProcess
|
||||||
for _, e := range entries {
|
for _, e := range entries {
|
||||||
if !e.IsDir() {
|
if !e.IsDir() {
|
||||||
@@ -92,26 +96,32 @@ func (r *RealProcReader) DiscoverQEMUProcesses() ([]QEMUProcess, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
numericPIDs++
|
||||||
|
|
||||||
exe, err := os.Readlink(filepath.Join(r.ProcPath, e.Name(), "exe"))
|
exe, err := os.Readlink(filepath.Join(r.ProcPath, e.Name(), "exe"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
logging.Trace("proc readlink failed", "pid", pid, "err", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if exe != "/usr/bin/qemu-system-x86_64" {
|
if exe != "/usr/bin/qemu-system-x86_64" && exe != "/usr/bin/qemu-system-x86_64 (deleted)" {
|
||||||
|
logging.Trace("proc exe skip", "pid", pid, "exe", exe)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
cmdlineBytes, err := os.ReadFile(filepath.Join(r.ProcPath, e.Name(), "cmdline"))
|
cmdlineBytes, err := os.ReadFile(filepath.Join(r.ProcPath, e.Name(), "cmdline"))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
logging.Trace("proc cmdline read failed", "pid", pid, "err", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
cmdline := ParseCmdline(cmdlineBytes)
|
cmdline := ParseCmdline(cmdlineBytes)
|
||||||
|
|
||||||
vmid := FlagValue(cmdline, "-id")
|
vmid := FlagValue(cmdline, "-id")
|
||||||
if vmid == "" {
|
if vmid == "" {
|
||||||
|
logging.Trace("proc no -id flag", "pid", pid)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if !r.VMConfigExists(vmid) {
|
if !r.VMConfigExists(vmid) {
|
||||||
|
logging.Trace("proc no config", "pid", pid, "vmid", vmid)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -123,8 +133,14 @@ func (r *RealProcReader) DiscoverQEMUProcesses() ([]QEMUProcess, error) {
|
|||||||
}
|
}
|
||||||
proc.Vcores = ParseVcores(cmdline)
|
proc.Vcores = ParseVcores(cmdline)
|
||||||
proc.MaxMem = ParseMem(cmdline)
|
proc.MaxMem = ParseMem(cmdline)
|
||||||
|
logging.Trace("proc discovered VM", "pid", pid, "vmid", vmid, "name", proc.Name)
|
||||||
procs = append(procs, proc)
|
procs = append(procs, proc)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logging.Trace("proc scan complete", "numeric_pids", numericPIDs, "qemu_count", len(procs))
|
||||||
|
if len(procs) == 0 {
|
||||||
|
slog.Warn("no QEMU processes discovered", "numeric_pids", numericPIDs, "proc_path", r.ProcPath, "pve_cfg_path", r.PVECfgPath)
|
||||||
|
}
|
||||||
return procs, nil
|
return procs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
package procfs
|
package procfs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -251,3 +253,78 @@ func TestParseIO_MalformedLines(t *testing.T) {
|
|||||||
t.Errorf("WriteChars = %d, want 100", io.WriteChars)
|
t.Errorf("WriteChars = %d, want 100", io.WriteChars)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestDiscoverQEMUProcesses_DeletedExe verifies that QEMU processes whose
|
||||||
|
// /proc/{pid}/exe has a " (deleted)" suffix (common after package upgrades)
|
||||||
|
// are still discovered.
|
||||||
|
func TestDiscoverQEMUProcesses_DeletedExe(t *testing.T) {
|
||||||
|
// Build a fake /proc tree with two "QEMU" PIDs:
|
||||||
|
// 1000 -> normal exe
|
||||||
|
// 1001 -> exe with " (deleted)" suffix
|
||||||
|
tmpDir := t.TempDir()
|
||||||
|
procDir := filepath.Join(tmpDir, "proc")
|
||||||
|
pveCfgDir := filepath.Join(tmpDir, "pve")
|
||||||
|
|
||||||
|
cmdline100 := "/usr/bin/qemu-system-x86_64\x00-id\x00100\x00-name\x00vm100\x00-cpu\x00host\x00-smp\x004\x00-m\x002048\x00"
|
||||||
|
cmdline101 := "/usr/bin/qemu-system-x86_64\x00-id\x00101\x00-name\x00vm101\x00-cpu\x00host\x00-smp\x002\x00-m\x001024\x00"
|
||||||
|
|
||||||
|
for _, tc := range []struct {
|
||||||
|
pid, vmid, exe, cmdline string
|
||||||
|
}{
|
||||||
|
{"1000", "100", "/usr/bin/qemu-system-x86_64", cmdline100},
|
||||||
|
{"1001", "101", "/usr/bin/qemu-system-x86_64 (deleted)", cmdline101},
|
||||||
|
} {
|
||||||
|
pidDir := filepath.Join(procDir, tc.pid)
|
||||||
|
if err := os.MkdirAll(pidDir, 0o755); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
// Create a real file as the symlink target, then symlink "exe" -> that file.
|
||||||
|
// os.Readlink returns the target path, which is what DiscoverQEMUProcesses reads.
|
||||||
|
target := filepath.Join(tmpDir, "bin-"+tc.pid)
|
||||||
|
if err := os.WriteFile(target, nil, 0o755); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
// We can't make Readlink return an arbitrary string with a real symlink,
|
||||||
|
// so instead we write the exe path to a regular file and override the
|
||||||
|
// readlink behavior. But DiscoverQEMUProcesses uses os.Readlink...
|
||||||
|
// The trick: symlink to the exact path string. On Linux, symlink targets
|
||||||
|
// don't need to exist -- Readlink returns the raw target.
|
||||||
|
if err := os.Symlink(tc.exe, filepath.Join(pidDir, "exe")); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(filepath.Join(pidDir, "cmdline"), []byte(tc.cmdline), 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
// Create VM config so VMConfigExists returns true
|
||||||
|
if err := os.MkdirAll(pveCfgDir, 0o755); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if err := os.WriteFile(filepath.Join(pveCfgDir, tc.vmid+".conf"), nil, 0o644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
r := &RealProcReader{
|
||||||
|
ProcPath: procDir,
|
||||||
|
PVECfgPath: pveCfgDir,
|
||||||
|
}
|
||||||
|
procs, err := r.DiscoverQEMUProcesses()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
if len(procs) != 2 {
|
||||||
|
t.Fatalf("expected 2 procs, got %d", len(procs))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect discovered VMIDs
|
||||||
|
vmids := map[string]bool{}
|
||||||
|
for _, p := range procs {
|
||||||
|
vmids[p.VMID] = true
|
||||||
|
}
|
||||||
|
if !vmids["100"] {
|
||||||
|
t.Error("VM 100 (normal exe) not discovered")
|
||||||
|
}
|
||||||
|
if !vmids["101"] {
|
||||||
|
t.Error("VM 101 (deleted exe) not discovered")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ package pveconfig
|
|||||||
import (
|
import (
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
// StorageEntry holds a parsed storage definition from storage.cfg.
|
// StorageEntry holds a parsed storage definition from storage.cfg.
|
||||||
@@ -47,6 +49,7 @@ func ParseStorageConfig(data string) []StorageEntry {
|
|||||||
"name": SanitizeKey(sectionName),
|
"name": SanitizeKey(sectionName),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
logging.Trace("storage.cfg section", "type", sectionType, "name", sectionName)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -66,5 +69,6 @@ func ParseStorageConfig(data string) []StorageEntry {
|
|||||||
result = append(result, *current)
|
result = append(result, *current)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logging.Trace("ParseStorageConfig complete", "entries", len(result))
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,8 +3,11 @@ package qmmonitor
|
|||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"log/slog"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
// DiskInfo holds parsed block device info from "info block".
|
// DiskInfo holds parsed block device info from "info block".
|
||||||
@@ -16,8 +19,10 @@ type DiskInfo struct {
|
|||||||
Labels map[string]string // additional labels: vol_name, pool, device, etc.
|
Labels map[string]string // additional labels: vol_name, pool, device, etc.
|
||||||
}
|
}
|
||||||
|
|
||||||
// blockHeaderRe matches: "disk_name (#blockN): /path/to/disk (type, mode)"
|
// blockHeaderRe matches block device headers in both old and new QEMU formats:
|
||||||
var blockHeaderRe = regexp.MustCompile(`^(\w+) \(#block(\d+)\): (.+) \(([\w, -]+)\)$`)
|
// Old: "disk_name (#blockN): /path/to/disk (type, mode)"
|
||||||
|
// New: "disk_name: /path/to/disk (type, mode)"
|
||||||
|
var blockHeaderRe = regexp.MustCompile(`^(\w+)(?:\s+\(#block(\d+)\))?: (.+) \(([\w, -]+)\)$`)
|
||||||
|
|
||||||
// lvmRe matches: /dev/{vg_name}/vm-{N}-disk-{N}
|
// lvmRe matches: /dev/{vg_name}/vm-{N}-disk-{N}
|
||||||
var lvmRe = regexp.MustCompile(`^/dev/([^/]+)/(vm-\d+-disk-\d+)$`)
|
var lvmRe = regexp.MustCompile(`^/dev/([^/]+)/(vm-\d+-disk-\d+)$`)
|
||||||
@@ -41,6 +46,7 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
|
|||||||
|
|
||||||
match := blockHeaderRe.FindStringSubmatch(strings.TrimSpace(lines[0]))
|
match := blockHeaderRe.FindStringSubmatch(strings.TrimSpace(lines[0]))
|
||||||
if match == nil {
|
if match == nil {
|
||||||
|
logging.Trace("block header no match", "line", lines[0])
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -66,8 +72,10 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
|
|||||||
if strings.HasPrefix(diskPath, "json:") {
|
if strings.HasPrefix(diskPath, "json:") {
|
||||||
resolved, err := HandleJSONPath(diskPath)
|
resolved, err := HandleJSONPath(diskPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
logging.Trace("block json path error", "disk", diskName, "err", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
logging.Trace("block json resolved", "disk", diskName, "resolved", resolved)
|
||||||
diskPath = resolved
|
diskPath = resolved
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -85,6 +93,7 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
|
|||||||
|
|
||||||
// Detect disk type from path
|
// Detect disk type from path
|
||||||
classifyDisk(&info)
|
classifyDisk(&info)
|
||||||
|
logging.Trace("block classified", "disk", diskName, "type", info.DiskType, "path", diskPath)
|
||||||
|
|
||||||
// Parse additional info from remaining lines
|
// Parse additional info from remaining lines
|
||||||
for _, line := range lines[1:] {
|
for _, line := range lines[1:] {
|
||||||
@@ -92,24 +101,31 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
|
|||||||
if strings.HasPrefix(line, "Attached to:") {
|
if strings.HasPrefix(line, "Attached to:") {
|
||||||
// Extract device ID, e.g. "Attached to: /machine/peripheral/virtio0/virtio-backend"
|
// Extract device ID, e.g. "Attached to: /machine/peripheral/virtio0/virtio-backend"
|
||||||
val := strings.TrimSpace(strings.TrimPrefix(line, "Attached to:"))
|
val := strings.TrimSpace(strings.TrimPrefix(line, "Attached to:"))
|
||||||
|
// Extract short device name from QOM path
|
||||||
|
if strings.Contains(val, "/") {
|
||||||
|
qomParts := strings.Split(val, "/")
|
||||||
|
if len(qomParts) > 3 {
|
||||||
|
val = qomParts[3]
|
||||||
|
}
|
||||||
|
}
|
||||||
info.Labels["attached_to"] = val
|
info.Labels["attached_to"] = val
|
||||||
} else if strings.HasPrefix(line, "Cache mode:") {
|
} else if strings.HasPrefix(line, "Cache mode:") {
|
||||||
val := strings.TrimSpace(strings.TrimPrefix(line, "Cache mode:"))
|
val := strings.TrimSpace(strings.TrimPrefix(line, "Cache mode:"))
|
||||||
for _, mode := range strings.Split(val, ", ") {
|
info.Labels["cache_mode"] = val
|
||||||
mode = strings.TrimSpace(mode)
|
|
||||||
if mode != "" {
|
|
||||||
key := "cache_mode_" + strings.ReplaceAll(mode, " ", "_")
|
|
||||||
info.Labels[key] = "true"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if strings.HasPrefix(line, "Detect zeroes:") {
|
} else if strings.HasPrefix(line, "Detect zeroes:") {
|
||||||
info.Labels["detect_zeroes"] = "on"
|
val := strings.TrimSpace(strings.TrimPrefix(line, "Detect zeroes:"))
|
||||||
|
info.Labels["detect_zeroes"] = val
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logging.Trace("block parsed", "disk", diskName, "labels", info.Labels)
|
||||||
result[diskName] = info
|
result[diskName] = info
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logging.Trace("ParseBlockInfo complete", "disk_count", len(result))
|
||||||
|
if len(result) == 0 && raw != "" {
|
||||||
|
slog.Debug("ParseBlockInfo found no disks", "rawLen", len(raw))
|
||||||
|
}
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -27,11 +27,8 @@ func TestParseBlockInfo_Qcow2(t *testing.T) {
|
|||||||
if d.Labels["detect_zeroes"] != "on" {
|
if d.Labels["detect_zeroes"] != "on" {
|
||||||
t.Errorf("detect_zeroes = %q", d.Labels["detect_zeroes"])
|
t.Errorf("detect_zeroes = %q", d.Labels["detect_zeroes"])
|
||||||
}
|
}
|
||||||
if d.Labels["cache_mode_writeback"] != "true" {
|
if d.Labels["cache_mode"] != "writeback, direct" {
|
||||||
t.Errorf("cache_mode_writeback missing")
|
t.Errorf("cache_mode = %q, want %q", d.Labels["cache_mode"], "writeback, direct")
|
||||||
}
|
|
||||||
if d.Labels["cache_mode_direct"] != "true" {
|
|
||||||
t.Errorf("cache_mode_direct missing")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -175,6 +172,85 @@ func TestParseBlockInfo_JSONError(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseBlockInfo_Throttle(t *testing.T) {
|
||||||
|
// PVE 9.x / newer QEMU format: no (#blockN)
|
||||||
|
raw := `drive-scsi0: json:{"driver":"raw","file":{"driver":"host_device","filename":"/dev/zvol/rpool/data/vm-100-disk-0"}} (throttle, read-write)
|
||||||
|
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
|
||||||
|
Cache mode: writeback, direct
|
||||||
|
Detect zeroes: unmap
|
||||||
|
`
|
||||||
|
disks := ParseBlockInfo(raw)
|
||||||
|
if len(disks) != 1 {
|
||||||
|
t.Fatalf("expected 1 disk, got %d", len(disks))
|
||||||
|
}
|
||||||
|
d := disks["scsi0"]
|
||||||
|
if d.BlockID != "" {
|
||||||
|
t.Errorf("block_id = %q, want empty", d.BlockID)
|
||||||
|
}
|
||||||
|
if d.DiskType != "zvol" {
|
||||||
|
t.Errorf("type = %q, want zvol", d.DiskType)
|
||||||
|
}
|
||||||
|
if d.DiskPath != "/dev/zvol/rpool/data/vm-100-disk-0" {
|
||||||
|
t.Errorf("path = %q", d.DiskPath)
|
||||||
|
}
|
||||||
|
if d.Labels["pool"] != "rpool/data" {
|
||||||
|
t.Errorf("pool = %q", d.Labels["pool"])
|
||||||
|
}
|
||||||
|
if d.Labels["vol_name"] != "vm-100-disk-0" {
|
||||||
|
t.Errorf("vol_name = %q", d.Labels["vol_name"])
|
||||||
|
}
|
||||||
|
if d.Labels["detect_zeroes"] != "unmap" {
|
||||||
|
t.Errorf("detect_zeroes = %q, want unmap", d.Labels["detect_zeroes"])
|
||||||
|
}
|
||||||
|
if d.Labels["cache_mode"] != "writeback, direct" {
|
||||||
|
t.Errorf("cache_mode = %q", d.Labels["cache_mode"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseBlockInfo_DetectZeroesUnmap(t *testing.T) {
|
||||||
|
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
|
||||||
|
Detect zeroes: unmap
|
||||||
|
`
|
||||||
|
disks := ParseBlockInfo(raw)
|
||||||
|
d := disks["scsi0"]
|
||||||
|
if d.Labels["detect_zeroes"] != "unmap" {
|
||||||
|
t.Errorf("detect_zeroes = %q, want unmap", d.Labels["detect_zeroes"])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseBlockInfo_AttachedToVirtio(t *testing.T) {
|
||||||
|
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
|
||||||
|
Attached to: /machine/peripheral/virtio0/virtio-backend
|
||||||
|
`
|
||||||
|
disks := ParseBlockInfo(raw)
|
||||||
|
d := disks["scsi0"]
|
||||||
|
if d.Labels["attached_to"] != "virtio0" {
|
||||||
|
t.Errorf("attached_to = %q, want %q", d.Labels["attached_to"], "virtio0")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseBlockInfo_AttachedToVirtioScsi(t *testing.T) {
|
||||||
|
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
|
||||||
|
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
|
||||||
|
`
|
||||||
|
disks := ParseBlockInfo(raw)
|
||||||
|
d := disks["scsi0"]
|
||||||
|
if d.Labels["attached_to"] != "virtioscsi0" {
|
||||||
|
t.Errorf("attached_to = %q, want %q", d.Labels["attached_to"], "virtioscsi0")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseBlockInfo_AttachedToBare(t *testing.T) {
|
||||||
|
raw := `drive-ide2 (#block100): /path/to/disk.iso (raw, read-only)
|
||||||
|
Attached to: ide2
|
||||||
|
`
|
||||||
|
disks := ParseBlockInfo(raw)
|
||||||
|
d := disks["ide2"]
|
||||||
|
if d.Labels["attached_to"] != "ide2" {
|
||||||
|
t.Errorf("attached_to = %q, want %q", d.Labels["attached_to"], "ide2")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseBlockInfo_MultiDisk(t *testing.T) {
|
func TestParseBlockInfo_MultiDisk(t *testing.T) {
|
||||||
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
|
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
|
||||||
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
|
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ package qmmonitor
|
|||||||
import (
|
import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NICInfo holds parsed network interface info from "info network".
|
// NICInfo holds parsed network interface info from "info network".
|
||||||
@@ -64,14 +66,17 @@ func ParseNetworkInfo(raw string) []NICInfo {
|
|||||||
var result []NICInfo
|
var result []NICInfo
|
||||||
for netdev, cfg := range nicsMap {
|
for netdev, cfg := range nicsMap {
|
||||||
idx, _ := strconv.Atoi(cfg["index"])
|
idx, _ := strconv.Atoi(cfg["index"])
|
||||||
result = append(result, NICInfo{
|
nic := NICInfo{
|
||||||
Netdev: netdev,
|
Netdev: netdev,
|
||||||
Queues: idx + 1,
|
Queues: idx + 1,
|
||||||
Type: cfg["type"],
|
Type: cfg["type"],
|
||||||
Model: cfg["model"],
|
Model: cfg["model"],
|
||||||
Macaddr: cfg["macaddr"],
|
Macaddr: cfg["macaddr"],
|
||||||
Ifname: cfg["ifname"],
|
Ifname: cfg["ifname"],
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
logging.Trace("parsed NIC", "netdev", netdev, "ifname", nic.Ifname, "queues", nic.Queues, "model", nic.Model)
|
||||||
|
result = append(result, nic)
|
||||||
|
}
|
||||||
|
logging.Trace("ParseNetworkInfo complete", "nic_count", len(result))
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,24 +2,30 @@ package qmmonitor
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/creack/pty"
|
||||||
"pve_local_exporter/internal/cache"
|
"pve_local_exporter/internal/cache"
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var errTimeout = errors.New("timeout waiting for qm monitor")
|
||||||
|
|
||||||
// QMMonitor runs commands against qm monitor and caches results.
|
// QMMonitor runs commands against qm monitor and caches results.
|
||||||
type QMMonitor interface {
|
type QMMonitor interface {
|
||||||
RunCommand(vmid, cmd string) (string, error)
|
RunCommand(vmid, cmd string) (string, error)
|
||||||
InvalidateCache(vmid, cmd string)
|
InvalidateCache(vmid, cmd string)
|
||||||
}
|
}
|
||||||
|
|
||||||
// RealQMMonitor spawns `qm monitor` via os/exec with pipe-based I/O.
|
// RealQMMonitor spawns `qm monitor` on a PTY via creack/pty.
|
||||||
type RealQMMonitor struct {
|
type RealQMMonitor struct {
|
||||||
timeout time.Duration
|
timeout time.Duration
|
||||||
deferClose bool
|
deferClose bool
|
||||||
@@ -53,6 +59,7 @@ func (m *RealQMMonitor) InvalidateCache(vmid, cmd string) {
|
|||||||
func (m *RealQMMonitor) RunCommand(vmid, cmd string) (string, error) {
|
func (m *RealQMMonitor) RunCommand(vmid, cmd string) (string, error) {
|
||||||
key := cacheKey(vmid, cmd)
|
key := cacheKey(vmid, cmd)
|
||||||
if v, ok := m.cache.Get(key); ok {
|
if v, ok := m.cache.Get(key); ok {
|
||||||
|
slog.Debug("qm cache hit", "vmid", vmid, "cmd", cmd)
|
||||||
return v, nil
|
return v, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -66,50 +73,61 @@ func (m *RealQMMonitor) RunCommand(vmid, cmd string) (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (m *RealQMMonitor) execQMMonitor(vmid, cmd string) (string, error) {
|
func (m *RealQMMonitor) execQMMonitor(vmid, cmd string) (string, error) {
|
||||||
|
slog.Debug("qm monitor exec", "vmid", vmid, "cmd", cmd)
|
||||||
|
start := time.Now()
|
||||||
|
|
||||||
|
logging.Trace("qm pty spawn start", "vmid", vmid)
|
||||||
qmCmd := exec.Command("qm", "monitor", vmid)
|
qmCmd := exec.Command("qm", "monitor", vmid)
|
||||||
|
qmCmd.Env = append(os.Environ(), "TERM=dumb")
|
||||||
|
|
||||||
stdin, err := qmCmd.StdinPipe()
|
ptmx, err := pty.Start(qmCmd)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("stdin pipe: %w", err)
|
|
||||||
}
|
|
||||||
stdout, err := qmCmd.StdoutPipe()
|
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("stdout pipe: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := qmCmd.Start(); err != nil {
|
|
||||||
return "", fmt.Errorf("start qm monitor: %w", err)
|
return "", fmt.Errorf("start qm monitor: %w", err)
|
||||||
}
|
}
|
||||||
|
logging.Trace("qm pty spawn success", "vmid", vmid, "pid", qmCmd.Process.Pid)
|
||||||
|
|
||||||
reader := bufio.NewReader(stdout)
|
reader := bufio.NewReader(ptmx)
|
||||||
|
|
||||||
// Wait for initial "qm>" prompt
|
// Wait for initial "qm>" prompt
|
||||||
if err := readUntilPrompt(reader, m.timeout); err != nil {
|
deadline := time.Now().Add(m.timeout)
|
||||||
m.deferCloseProcess(qmCmd, stdin)
|
_, err = readUntilMarker(reader, "qm>", deadline)
|
||||||
|
if err != nil {
|
||||||
|
slog.Debug("qm monitor initial prompt failed", "vmid", vmid, "err", err)
|
||||||
|
m.killOrDefer(qmCmd, ptmx)
|
||||||
return "", fmt.Errorf("initial prompt: %w", err)
|
return "", fmt.Errorf("initial prompt: %w", err)
|
||||||
}
|
}
|
||||||
|
logging.Trace("qm initial prompt received", "vmid", vmid)
|
||||||
|
|
||||||
// Send command
|
// Send command
|
||||||
fmt.Fprintf(stdin, "%s\n", cmd)
|
logging.Trace("qm send command", "vmid", vmid, "cmd", cmd)
|
||||||
|
fmt.Fprintf(ptmx, "%s\n", cmd)
|
||||||
|
|
||||||
// Read response until next "qm>" prompt
|
// Read response until next "qm>" prompt
|
||||||
response, err := readResponseUntilPrompt(reader, m.timeout)
|
deadline = time.Now().Add(m.timeout)
|
||||||
|
raw, err := readUntilMarker(reader, "qm>", deadline)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
m.deferCloseProcess(qmCmd, stdin)
|
slog.Debug("qm monitor response failed", "vmid", vmid, "cmd", cmd, "err", err)
|
||||||
|
m.killOrDefer(qmCmd, ptmx)
|
||||||
return "", fmt.Errorf("read response: %w", err)
|
return "", fmt.Errorf("read response: %w", err)
|
||||||
}
|
}
|
||||||
|
logging.Trace("qm raw response", "vmid", vmid, "raw_len", len(raw))
|
||||||
|
|
||||||
// Close cleanly
|
response := parseQMResponse(raw)
|
||||||
stdin.Close()
|
|
||||||
|
// Close cleanly: closing ptmx sends SIGHUP to child
|
||||||
|
ptmx.Close()
|
||||||
if err := qmCmd.Wait(); err != nil {
|
if err := qmCmd.Wait(); err != nil {
|
||||||
slog.Debug("qm monitor wait error", "vmid", vmid, "err", err)
|
slog.Debug("qm monitor wait error", "vmid", vmid, "err", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
slog.Debug("qm monitor done", "vmid", vmid, "cmd", cmd,
|
||||||
|
"duration", time.Since(start), "responseLen", len(response))
|
||||||
|
|
||||||
return response, nil
|
return response, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *RealQMMonitor) deferCloseProcess(cmd *exec.Cmd, stdin io.WriteCloser) {
|
func (m *RealQMMonitor) killOrDefer(cmd *exec.Cmd, closer io.Closer) {
|
||||||
stdin.Close()
|
closer.Close()
|
||||||
if m.deferClose {
|
if m.deferClose {
|
||||||
m.mu.Lock()
|
m.mu.Lock()
|
||||||
m.deferredProcs = append(m.deferredProcs, deferredProc{cmd: cmd, timestamp: time.Now()})
|
m.deferredProcs = append(m.deferredProcs, deferredProc{cmd: cmd, timestamp: time.Now()})
|
||||||
@@ -140,50 +158,63 @@ func (m *RealQMMonitor) cleanupDeferred() {
|
|||||||
m.deferredProcs = still
|
m.deferredProcs = still
|
||||||
}
|
}
|
||||||
|
|
||||||
func readUntilPrompt(r *bufio.Reader, timeout time.Duration) error {
|
// readUntilMarker reads from r byte-by-byte until the buffer ends with marker
|
||||||
deadline := time.Now().Add(timeout)
|
// or the deadline expires. Returns everything read before the marker.
|
||||||
|
// Uses a goroutine for reads so the deadline is enforced even when ReadByte blocks.
|
||||||
|
func readUntilMarker(r *bufio.Reader, marker string, deadline time.Time) (string, error) {
|
||||||
|
type result struct {
|
||||||
|
data string
|
||||||
|
err error
|
||||||
|
}
|
||||||
|
ch := make(chan result, 1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
var buf []byte
|
||||||
|
markerBytes := []byte(marker)
|
||||||
for {
|
for {
|
||||||
if time.Now().After(deadline) {
|
b, err := r.ReadByte()
|
||||||
return fmt.Errorf("timeout waiting for qm> prompt")
|
|
||||||
}
|
|
||||||
line, err := r.ReadString('\n')
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Check if we got the prompt without newline
|
ch <- result{"", err}
|
||||||
if strings.Contains(line, "qm>") {
|
return
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
return err
|
buf = append(buf, b)
|
||||||
|
if len(buf) >= len(markerBytes) &&
|
||||||
|
string(buf[len(buf)-len(markerBytes):]) == marker {
|
||||||
|
// Return everything before the marker
|
||||||
|
ch <- result{string(buf[:len(buf)-len(markerBytes)]), nil}
|
||||||
|
return
|
||||||
}
|
}
|
||||||
if strings.Contains(line, "qm>") {
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
remaining := time.Until(deadline)
|
||||||
|
if remaining <= 0 {
|
||||||
|
remaining = time.Millisecond
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case res := <-ch:
|
||||||
|
return res.data, res.err
|
||||||
|
case <-time.After(remaining):
|
||||||
|
return "", errTimeout
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func readResponseUntilPrompt(r *bufio.Reader, timeout time.Duration) (string, error) {
|
// parseQMResponse takes the raw output before the "qm>" marker from a command
|
||||||
deadline := time.Now().Add(timeout)
|
// response, skips the command echo (first line), and trims \r characters.
|
||||||
var lines []string
|
func parseQMResponse(raw string) string {
|
||||||
firstLine := true
|
lines := strings.Split(raw, "\n")
|
||||||
for {
|
// Skip the command echo (first line)
|
||||||
if time.Now().After(deadline) {
|
if len(lines) > 0 {
|
||||||
return "", fmt.Errorf("timeout waiting for qm> prompt")
|
lines = lines[1:]
|
||||||
}
|
}
|
||||||
line, err := r.ReadString('\n')
|
var out []string
|
||||||
if err != nil {
|
for _, line := range lines {
|
||||||
if strings.Contains(line, "qm>") {
|
cleaned := strings.TrimRight(line, "\r")
|
||||||
break
|
out = append(out, cleaned)
|
||||||
}
|
}
|
||||||
return "", err
|
// Trim trailing empty lines
|
||||||
|
for len(out) > 0 && out[len(out)-1] == "" {
|
||||||
|
out = out[:len(out)-1]
|
||||||
}
|
}
|
||||||
if strings.Contains(line, "qm>") {
|
return strings.Join(out, "\n")
|
||||||
break
|
|
||||||
}
|
|
||||||
// Skip the echo of the command (first line)
|
|
||||||
if firstLine {
|
|
||||||
firstLine = false
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
lines = append(lines, strings.TrimRight(line, "\r\n"))
|
|
||||||
}
|
|
||||||
return strings.Join(lines, "\n"), nil
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
package qmmonitor
|
package qmmonitor
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
|
"io"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -57,3 +59,85 @@ func TestMockQMMonitor_Invalidate(t *testing.T) {
|
|||||||
t.Errorf("unexpected: %q", r)
|
t.Errorf("unexpected: %q", r)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestReadUntilMarker_Success(t *testing.T) {
|
||||||
|
pr, pw := io.Pipe()
|
||||||
|
defer pr.Close()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
pw.Write([]byte("banner\nqm> "))
|
||||||
|
pw.Close()
|
||||||
|
}()
|
||||||
|
|
||||||
|
reader := bufio.NewReader(pr)
|
||||||
|
deadline := time.Now().Add(5 * time.Second)
|
||||||
|
got, err := readUntilMarker(reader, "qm>", deadline)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if got != "banner\n" {
|
||||||
|
t.Errorf("got %q, want %q", got, "banner\n")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReadUntilMarker_Timeout(t *testing.T) {
|
||||||
|
pr, pw := io.Pipe()
|
||||||
|
defer pr.Close()
|
||||||
|
defer pw.Close()
|
||||||
|
|
||||||
|
// Write nothing -- should timeout
|
||||||
|
reader := bufio.NewReader(pr)
|
||||||
|
deadline := time.Now().Add(50 * time.Millisecond)
|
||||||
|
start := time.Now()
|
||||||
|
_, err := readUntilMarker(reader, "qm>", deadline)
|
||||||
|
elapsed := time.Since(start)
|
||||||
|
|
||||||
|
if err != errTimeout {
|
||||||
|
t.Fatalf("expected errTimeout, got: %v", err)
|
||||||
|
}
|
||||||
|
if elapsed > 500*time.Millisecond {
|
||||||
|
t.Errorf("timeout took too long: %v", elapsed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestReadUntilMarker_EOF(t *testing.T) {
|
||||||
|
pr, pw := io.Pipe()
|
||||||
|
defer pr.Close()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
pw.Write([]byte("partial data"))
|
||||||
|
pw.Close()
|
||||||
|
}()
|
||||||
|
|
||||||
|
reader := bufio.NewReader(pr)
|
||||||
|
deadline := time.Now().Add(5 * time.Second)
|
||||||
|
_, err := readUntilMarker(reader, "qm>", deadline)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error on EOF before marker")
|
||||||
|
}
|
||||||
|
if err == errTimeout {
|
||||||
|
t.Fatal("expected EOF error, not timeout")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseQMResponse(t *testing.T) {
|
||||||
|
// Simulate: command echo + response lines with \r + trailing newline
|
||||||
|
raw := "info network\r\n" +
|
||||||
|
"net0: index=0,type=tap,ifname=tap100i0\r\n" +
|
||||||
|
"net1: index=1,type=tap,ifname=tap100i1\r\n"
|
||||||
|
|
||||||
|
got := parseQMResponse(raw)
|
||||||
|
want := "net0: index=0,type=tap,ifname=tap100i0\n" +
|
||||||
|
"net1: index=1,type=tap,ifname=tap100i1"
|
||||||
|
if got != want {
|
||||||
|
t.Errorf("parseQMResponse:\ngot: %q\nwant: %q", got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseQMResponse_Empty(t *testing.T) {
|
||||||
|
// Command echo only, no response data
|
||||||
|
got := parseQMResponse("info version\r\n")
|
||||||
|
if got != "" {
|
||||||
|
t.Errorf("expected empty, got %q", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
// StorageSize holds the total and free bytes of a storage pool.
|
// StorageSize holds the total and free bytes of a storage pool.
|
||||||
@@ -51,6 +53,8 @@ func GetZPoolSize(output string) (StorageSize, error) {
|
|||||||
return StorageSize{}, fmt.Errorf("not enough fields in zpool output: %q", lines[1])
|
return StorageSize{}, fmt.Errorf("not enough fields in zpool output: %q", lines[1])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logging.Trace("zpool fields", "name", fields[0], "size", fields[1], "alloc", fields[2], "free", fields[3])
|
||||||
|
|
||||||
total, err := strconv.ParseInt(fields[1], 10, 64)
|
total, err := strconv.ParseInt(fields[1], 10, 64)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return StorageSize{}, fmt.Errorf("parse total: %w", err)
|
return StorageSize{}, fmt.Errorf("parse total: %w", err)
|
||||||
@@ -60,5 +64,6 @@ func GetZPoolSize(output string) (StorageSize, error) {
|
|||||||
return StorageSize{}, fmt.Errorf("parse free: %w", err)
|
return StorageSize{}, fmt.Errorf("parse free: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logging.Trace("zpool parsed", "total", total, "free", free)
|
||||||
return StorageSize{Total: total, Free: free}, nil
|
return StorageSize{Total: total, Free: free}, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,8 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
// SysReader abstracts /sys access for testability.
|
// SysReader abstracts /sys access for testability.
|
||||||
@@ -46,6 +48,7 @@ func (r *RealSysReader) ReadInterfaceStats(ifname string) (map[string]int64, err
|
|||||||
}
|
}
|
||||||
stats[e.Name()] = val
|
stats[e.Name()] = val
|
||||||
}
|
}
|
||||||
|
logging.Trace("interface stats", "ifname", ifname, "stat_count", len(stats))
|
||||||
return stats, nil
|
return stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -58,6 +61,7 @@ func (r *RealSysReader) GetBlockDeviceSize(devPath string) (int64, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, fmt.Errorf("resolve symlink %s: %w", devPath, err)
|
return 0, fmt.Errorf("resolve symlink %s: %w", devPath, err)
|
||||||
}
|
}
|
||||||
|
logging.Trace("block device resolved", "path", devPath, "resolved", resolved)
|
||||||
|
|
||||||
// Extract device name from /dev/XXX
|
// Extract device name from /dev/XXX
|
||||||
devName := filepath.Base(resolved)
|
devName := filepath.Base(resolved)
|
||||||
@@ -84,5 +88,6 @@ func GetDeviceSymlinkTarget(devPath string) (string, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
logging.Trace("device symlink resolved", "path", devPath, "target", resolved)
|
||||||
return resolved, nil
|
return resolved, nil
|
||||||
}
|
}
|
||||||
|
|||||||
19
src/main.go
19
src/main.go
@@ -15,12 +15,14 @@ import (
|
|||||||
|
|
||||||
"pve_local_exporter/internal/collector"
|
"pve_local_exporter/internal/collector"
|
||||||
"pve_local_exporter/internal/config"
|
"pve_local_exporter/internal/config"
|
||||||
|
"pve_local_exporter/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
var version string
|
var version string
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
cfg := config.Parse()
|
cfg := config.Parse()
|
||||||
|
cfg.Version = version
|
||||||
|
|
||||||
if cfg.ShowVersion {
|
if cfg.ShowVersion {
|
||||||
fmt.Println(version)
|
fmt.Println(version)
|
||||||
@@ -29,6 +31,8 @@ func main() {
|
|||||||
|
|
||||||
level := slog.LevelInfo
|
level := slog.LevelInfo
|
||||||
switch strings.ToUpper(cfg.LogLevel) {
|
switch strings.ToUpper(cfg.LogLevel) {
|
||||||
|
case "TRACE":
|
||||||
|
level = logging.LevelTrace
|
||||||
case "DEBUG":
|
case "DEBUG":
|
||||||
level = slog.LevelDebug
|
level = slog.LevelDebug
|
||||||
case "WARNING", "WARN":
|
case "WARNING", "WARN":
|
||||||
@@ -36,7 +40,18 @@ func main() {
|
|||||||
case "ERROR", "CRITICAL":
|
case "ERROR", "CRITICAL":
|
||||||
level = slog.LevelError
|
level = slog.LevelError
|
||||||
}
|
}
|
||||||
slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: level})))
|
slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
||||||
|
Level: level,
|
||||||
|
ReplaceAttr: func(groups []string, a slog.Attr) slog.Attr {
|
||||||
|
if a.Key == slog.LevelKey {
|
||||||
|
lvl := a.Value.Any().(slog.Level)
|
||||||
|
if lvl == logging.LevelTrace {
|
||||||
|
a.Value = slog.StringValue("TRACE")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return a
|
||||||
|
},
|
||||||
|
})))
|
||||||
|
|
||||||
reg := prometheus.NewRegistry()
|
reg := prometheus.NewRegistry()
|
||||||
c := collector.New(cfg)
|
c := collector.New(cfg)
|
||||||
@@ -59,7 +74,7 @@ func main() {
|
|||||||
server.Close()
|
server.Close()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
log.Printf("listening on %s", addr)
|
slog.Info("listening", "addr", addr)
|
||||||
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user