From 0ba7c190f8c175d26005c4ea9467e90979f05dfd Mon Sep 17 00:00:00 2001 From: illustris Date: Thu, 12 Mar 2026 16:03:36 +0530 Subject: [PATCH] fix disk info collection --- src/internal/collector/collector.go | 76 +++++++++++++------- src/internal/collector/collector_test.go | 88 +++++++++++++++++++++++- src/internal/qmmonitor/block.go | 12 ++-- src/internal/qmmonitor/block_test.go | 7 +- 4 files changed, 146 insertions(+), 37 deletions(-) diff --git a/src/internal/collector/collector.go b/src/internal/collector/collector.go index 82db1ea..f4394ff 100644 --- a/src/internal/collector/collector.go +++ b/src/internal/collector/collector.go @@ -49,6 +49,7 @@ type PVECollector struct { descCtxSwitches *prometheus.Desc descNicInfo *prometheus.Desc descNicQueues *prometheus.Desc + descDiskInfo *prometheus.Desc descDiskSize *prometheus.Desc descStorageSize *prometheus.Desc descStorageFree *prometheus.Desc @@ -141,7 +142,12 @@ func NewWithDeps(cfg config.Config, proc procfs.ProcReader, sys sysfs.SysReader, descCtxSwitches: prometheus.NewDesc(p+"_kvm_ctx_switches_total", "Context switches", []string{"id", "type"}, nil), descNicInfo: prometheus.NewDesc(p+"_kvm_nic_info", "NIC info", []string{"id", "ifname", "netdev", "queues", "type", "model", "macaddr"}, nil), descNicQueues: prometheus.NewDesc(p+"_kvm_nic_queues", "NIC queue count", []string{"id", "ifname"}, nil), - descDiskSize: prometheus.NewDesc(p+"_kvm_disk_size_bytes", "Disk size bytes", []string{"id", "disk_name"}, nil), + descDiskInfo: prometheus.NewDesc(p+"_kvm_disk_info", "Disk info", []string{ + "id", "disk_name", "block_id", "disk_path", "disk_type", + "vol_name", "pool", "pool_name", "cluster_id", "vg_name", + "device", "attached_to", "cache_mode", "detect_zeroes", "read_only", + }, nil), + descDiskSize: prometheus.NewDesc(p+"_kvm_disk_size_bytes", "Disk size bytes", []string{"id", "disk_name"}, nil), descStorageSize: prometheus.NewDesc(p+"_node_storage_size_bytes", "Storage total size", []string{"name", "type"}, nil), descStorageFree: prometheus.NewDesc(p+"_node_storage_free_bytes", "Storage free space", []string{"name", "type"}, nil), @@ -363,19 +369,23 @@ func (c *PVECollector) collectDiskMetrics(ch chan<- prometheus.Metric, proc proc ch <- prometheus.MustNewConstMetric(c.descDiskSize, prometheus.GaugeValue, float64(diskSize), id, diskName) } - // Disk info metric - collect all labels - labelNames := []string{"id", "disk_name", "block_id", "disk_path", "disk_type"} - labelValues := []string{id, diskName, disk.BlockID, disk.DiskPath, disk.DiskType} - - // Add variable labels in sorted-ish order - for _, key := range sortedKeys(disk.Labels) { - labelNames = append(labelNames, key) - labelValues = append(labelValues, disk.Labels[key]) - } - - ch <- prometheus.MustNewConstMetric( - prometheus.NewDesc(c.prefix+"_kvm_disk", "Disk info", labelNames, nil), - prometheus.GaugeValue, 1, labelValues..., + // Disk info metric with fixed label set + ch <- prometheus.MustNewConstMetric(c.descDiskInfo, prometheus.GaugeValue, 1, + id, + diskName, + disk.BlockID, + disk.DiskPath, + disk.DiskType, + disk.Labels["vol_name"], + disk.Labels["pool"], + disk.Labels["pool_name"], + disk.Labels["cluster_id"], + disk.Labels["vg_name"], + disk.Labels["device"], + disk.Labels["attached_to"], + disk.Labels["cache_mode"], + disk.Labels["detect_zeroes"], + disk.Labels["read_only"], ) } } @@ -383,21 +393,30 @@ func (c *PVECollector) collectDiskMetrics(ch chan<- prometheus.Metric, proc proc func (c *PVECollector) collectStorage(ch chan<- prometheus.Metric) { entries := c.getStorageEntries() + // Compute superset of property keys across all entries + keySet := make(map[string]struct{}) + for _, entry := range entries { + for k := range entry.Properties { + keySet[k] = struct{}{} + } + } + allKeys := sortedKeySet(keySet) + + // Create descriptor once with fixed labels for this scrape + storageInfoDesc := prometheus.NewDesc( + c.prefix+"_node_storage_info", "Storage info", allKeys, nil, + ) + for _, entry := range entries { storageType := entry.Properties["type"] storageName := entry.Properties["name"] - // Info metric - labelNames := make([]string, 0, len(entry.Properties)) - labelValues := make([]string, 0, len(entry.Properties)) - for _, key := range sortedKeys(entry.Properties) { - labelNames = append(labelNames, key) - labelValues = append(labelValues, entry.Properties[key]) + // Info metric with consistent labels + vals := make([]string, len(allKeys)) + for i, k := range allKeys { + vals[i] = entry.Properties[k] // "" if missing } - ch <- prometheus.MustNewConstMetric( - prometheus.NewDesc(c.prefix+"_node_storage", "Storage info", labelNames, nil), - prometheus.GaugeValue, 1, labelValues..., - ) + ch <- prometheus.MustNewConstMetric(storageInfoDesc, prometheus.GaugeValue, 1, vals...) // Size metrics var size storage.StorageSize @@ -478,3 +497,12 @@ func sortedKeys(m map[string]string) []string { return keys } +func sortedKeySet(m map[string]struct{}) []string { + keys := make([]string, 0, len(m)) + for k := range m { + keys = append(keys, k) + } + slices.Sort(keys) + return keys +} + diff --git a/src/internal/collector/collector_test.go b/src/internal/collector/collector_test.go index 60023b3..0b52c9a 100644 --- a/src/internal/collector/collector_test.go +++ b/src/internal/collector/collector_test.go @@ -307,7 +307,7 @@ func TestCollector_StorageMetrics(t *testing.T) { } // Check storage info - infoMetrics := metrics["pve_node_storage"] + infoMetrics := metrics["pve_node_storage_info"] if len(infoMetrics) != 1 { t.Fatalf("expected 1 storage info metric, got %d", len(infoMetrics)) } @@ -476,3 +476,89 @@ func TestCollector_BuildInfo(t *testing.T) { t.Error("build_info missing version label") } } + +func TestCollector_DiskInfoMetrics(t *testing.T) { + cfg := config.Config{ + CollectRunningVMs: true, + CollectStorage: false, + MetricsPrefix: "pve", + } + + proc := &mockProcReader{ + procs: []procfs.QEMUProcess{ + {PID: 1, VMID: "100", Name: "vm", Vcores: 1, MaxMem: 1024}, + }, + cpuTimes: map[int]procfs.CPUTimes{1: {}}, + ioCount: map[int]procfs.IOCounters{1: {}}, + status: map[int]procfs.StatusInfo{ + 1: {Threads: 1, MemoryExtended: procfs.MemoryExtended{}}, + }, + memPct: map[int]float64{1: 0}, + } + + blockOutput := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write) + Attached to: /machine/peripheral/virtioscsi0/virtio-backend + Cache mode: writeback, direct + Detect zeroes: on +drive-scsi1 (#block101): /mnt/storage/images/100/vm-100-disk-1.qcow2 (qcow2, read-only) + Attached to: /machine/peripheral/virtioscsi0/virtio-backend +` + + sys := &mockSysReader{ + blockSize: map[string]int64{ + "/dev/zvol/rpool/data/vm-100-disk-0": 10737418240, + }, + } + + qm := &mockQMMonitor{responses: map[string]string{ + "100:info network": "", + "100:info block": blockOutput, + }} + + fr := &mockFileReader{files: map[string]string{"/etc/pve/user.cfg": ""}} + c := NewWithDeps(cfg, proc, sys, qm, &mockStatFS{}, &mockCmdRunner{}, fr) + metrics := collectMetrics(c) + + diskInfo := metrics["pve_kvm_disk_info"] + if len(diskInfo) != 2 { + t.Fatalf("expected 2 disk info metrics, got %d", len(diskInfo)) + } + + // Check zvol disk + m := findMetricWithLabels(diskInfo, map[string]string{ + "id": "100", + "disk_name": "scsi0", + "disk_type": "zvol", + "cache_mode": "writeback, direct", + "detect_zeroes": "on", + "read_only": "", + "vol_name": "vm-100-disk-0", + "pool": "rpool/data", + }) + if m == nil { + t.Error("zvol disk info metric not found with expected labels") + } + + // Check qcow2 disk (read-only, no cache_mode) + m = findMetricWithLabels(diskInfo, map[string]string{ + "id": "100", + "disk_name": "scsi1", + "disk_type": "qcow2", + "read_only": "true", + "cache_mode": "", + "vol_name": "vm-100-disk-1", + }) + if m == nil { + t.Error("qcow2 disk info metric not found with expected labels") + } + + // Verify disk size for zvol + diskSize := metrics["pve_kvm_disk_size_bytes"] + if len(diskSize) < 1 { + t.Fatal("expected at least 1 disk size metric") + } + m = findMetricWithLabels(diskSize, map[string]string{"disk_name": "scsi0"}) + if m == nil || metricValue(m) != 10737418240 { + t.Errorf("disk size for scsi0 = %v", m) + } +} diff --git a/src/internal/qmmonitor/block.go b/src/internal/qmmonitor/block.go index 06ec3af..693f680 100644 --- a/src/internal/qmmonitor/block.go +++ b/src/internal/qmmonitor/block.go @@ -3,6 +3,7 @@ package qmmonitor import ( "encoding/json" "fmt" + "log/slog" "regexp" "strings" ) @@ -95,13 +96,7 @@ func ParseBlockInfo(raw string) map[string]DiskInfo { info.Labels["attached_to"] = val } else if strings.HasPrefix(line, "Cache mode:") { val := strings.TrimSpace(strings.TrimPrefix(line, "Cache mode:")) - for _, mode := range strings.Split(val, ", ") { - mode = strings.TrimSpace(mode) - if mode != "" { - key := "cache_mode_" + strings.ReplaceAll(mode, " ", "_") - info.Labels[key] = "true" - } - } + info.Labels["cache_mode"] = val } else if strings.HasPrefix(line, "Detect zeroes:") { info.Labels["detect_zeroes"] = "on" } @@ -110,6 +105,9 @@ func ParseBlockInfo(raw string) map[string]DiskInfo { result[diskName] = info } + if len(result) == 0 && raw != "" { + slog.Debug("ParseBlockInfo found no disks", "rawLen", len(raw)) + } return result } diff --git a/src/internal/qmmonitor/block_test.go b/src/internal/qmmonitor/block_test.go index 8d2434d..7cb350b 100644 --- a/src/internal/qmmonitor/block_test.go +++ b/src/internal/qmmonitor/block_test.go @@ -27,11 +27,8 @@ func TestParseBlockInfo_Qcow2(t *testing.T) { if d.Labels["detect_zeroes"] != "on" { t.Errorf("detect_zeroes = %q", d.Labels["detect_zeroes"]) } - if d.Labels["cache_mode_writeback"] != "true" { - t.Errorf("cache_mode_writeback missing") - } - if d.Labels["cache_mode_direct"] != "true" { - t.Errorf("cache_mode_direct missing") + if d.Labels["cache_mode"] != "writeback, direct" { + t.Errorf("cache_mode = %q, want %q", d.Labels["cache_mode"], "writeback, direct") } }