This commit is contained in:
illustris
2026-03-11 15:01:20 +05:30
commit d8f4a77657
30 changed files with 3164 additions and 0 deletions

View File

@@ -0,0 +1,174 @@
package qmmonitor
import (
"encoding/json"
"fmt"
"regexp"
"strings"
)
// DiskInfo holds parsed block device info from "info block".
type DiskInfo struct {
DiskName string
BlockID string
DiskPath string
DiskType string
Labels map[string]string // additional labels: vol_name, pool, device, etc.
}
// blockHeaderRe matches: "disk_name (#blockN): /path/to/disk (type, mode)"
var blockHeaderRe = regexp.MustCompile(`^(\w+) \(#block(\d+)\): (.+) \(([\w, -]+)\)$`)
// lvmRe matches: /dev/{vg_name}/vm-{N}-disk-{N}
var lvmRe = regexp.MustCompile(`^/dev/([^/]+)/(vm-\d+-disk-\d+)$`)
// ParseBlockInfo parses "info block" output from qm monitor.
// Returns map of disk_name -> DiskInfo. Skips efidisk entries.
func ParseBlockInfo(raw string) map[string]DiskInfo {
result := make(map[string]DiskInfo)
// Split by "drive-" prefix to get individual disk blocks
parts := strings.Split(raw, "drive-")
if len(parts) < 2 {
return result
}
for _, part := range parts[1:] {
lines := strings.Split(strings.TrimSpace(part), "\n")
if len(lines) == 0 {
continue
}
match := blockHeaderRe.FindStringSubmatch(strings.TrimSpace(lines[0]))
if match == nil {
continue
}
diskName := match[1]
blockID := match[2]
diskPath := match[3]
diskTypeAndMode := match[4]
diskType := strings.Split(diskTypeAndMode, ", ")[0]
// Skip EFI disks
if strings.Contains(diskName, "efidisk") {
continue
}
// Handle json: paths
if strings.HasPrefix(diskPath, "json:") {
resolved, err := HandleJSONPath(diskPath)
if err != nil {
continue
}
diskPath = resolved
}
info := DiskInfo{
DiskName: diskName,
BlockID: blockID,
DiskPath: diskPath,
DiskType: diskType,
Labels: make(map[string]string),
}
// Detect disk type from path
classifyDisk(&info)
// Parse additional info from remaining lines
for _, line := range lines[1:] {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "Attached to:") {
// Extract device ID, e.g. "Attached to: /machine/peripheral/virtio0/virtio-backend"
val := strings.TrimSpace(strings.TrimPrefix(line, "Attached to:"))
info.Labels["attached_to"] = val
} else if strings.HasPrefix(line, "Cache mode:") {
val := strings.TrimSpace(strings.TrimPrefix(line, "Cache mode:"))
for _, mode := range strings.Split(val, ", ") {
mode = strings.TrimSpace(mode)
if mode != "" {
key := "cache_mode_" + strings.ReplaceAll(mode, " ", "_")
info.Labels[key] = "true"
}
}
} else if strings.HasPrefix(line, "Detect zeroes:") {
info.Labels["detect_zeroes"] = "on"
}
}
result[diskName] = info
}
return result
}
// classifyDisk sets DiskType and extra labels based on the disk path.
func classifyDisk(info *DiskInfo) {
path := info.DiskPath
if info.DiskType == "qcow2" {
// Extract volume name: filename without extension
parts := strings.Split(path, "/")
filename := parts[len(parts)-1]
dotIdx := strings.Index(filename, ".")
if dotIdx > 0 {
info.Labels["vol_name"] = filename[:dotIdx]
}
}
if strings.HasPrefix(path, "/dev/zvol/") {
info.DiskType = "zvol"
// /dev/zvol/pool_name/vol_name
trimmed := strings.TrimPrefix(path, "/dev/zvol/")
parts := strings.Split(trimmed, "/")
if len(parts) >= 2 {
info.Labels["pool"] = strings.Join(parts[:len(parts)-1], "/")
info.Labels["vol_name"] = parts[len(parts)-1]
}
} else if strings.HasPrefix(path, "/dev/rbd-pve/") {
info.DiskType = "rbd"
// /dev/rbd-pve/cluster_id/pool/vol_name
parts := strings.Split(path, "/")
if len(parts) >= 5 {
info.Labels["cluster_id"] = parts[len(parts)-3]
info.Labels["pool"] = parts[len(parts)-2]
info.Labels["pool_name"] = parts[len(parts)-2]
info.Labels["vol_name"] = parts[len(parts)-1]
}
} else if m := lvmRe.FindStringSubmatch(path); m != nil {
info.DiskType = "lvm"
info.Labels["vg_name"] = m[1]
info.Labels["vol_name"] = m[2]
}
}
// HandleJSONPath resolves a "json:{...}" disk path by searching for
// a driver == "host_device" entry and extracting its filename.
func HandleJSONPath(path string) (string, error) {
jsonStr := strings.TrimPrefix(path, "json:")
var data map[string]any
if err := json.Unmarshal([]byte(jsonStr), &data); err != nil {
return "", fmt.Errorf("parse json path: %w", err)
}
if result := searchHostDevice(data); result != "" {
return result, nil
}
return "", fmt.Errorf("no host_device driver found in json path")
}
func searchHostDevice(data map[string]any) string {
driver, _ := data["driver"].(string)
if driver == "host_device" {
if filename, ok := data["filename"].(string); ok {
return filename
}
}
for _, v := range data {
if sub, ok := v.(map[string]any); ok {
if result := searchHostDevice(sub); result != "" {
return result
}
}
}
return ""
}

View File

@@ -0,0 +1,149 @@
package qmmonitor
import (
"testing"
)
func TestParseBlockInfo_Qcow2(t *testing.T) {
raw := `drive-scsi0 (#block100): /mnt/storage/images/100/vm-100-disk-0.qcow2 (qcow2, read-write)
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
Cache mode: writeback, direct
Detect zeroes: on
`
disks := ParseBlockInfo(raw)
if len(disks) != 1 {
t.Fatalf("expected 1 disk, got %d", len(disks))
}
d := disks["scsi0"]
if d.DiskType != "qcow2" {
t.Errorf("type = %q", d.DiskType)
}
if d.BlockID != "100" {
t.Errorf("block_id = %q", d.BlockID)
}
if d.Labels["vol_name"] != "vm-100-disk-0" {
t.Errorf("vol_name = %q", d.Labels["vol_name"])
}
if d.Labels["detect_zeroes"] != "on" {
t.Errorf("detect_zeroes = %q", d.Labels["detect_zeroes"])
}
if d.Labels["cache_mode_writeback"] != "true" {
t.Errorf("cache_mode_writeback missing")
}
if d.Labels["cache_mode_direct"] != "true" {
t.Errorf("cache_mode_direct missing")
}
}
func TestParseBlockInfo_Zvol(t *testing.T) {
raw := `drive-scsi0 (#block200): /dev/zvol/rpool/data/vm-200-disk-0 (raw, read-write)
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
`
disks := ParseBlockInfo(raw)
d := disks["scsi0"]
if d.DiskType != "zvol" {
t.Errorf("type = %q", d.DiskType)
}
if d.Labels["pool"] != "rpool/data" {
t.Errorf("pool = %q", d.Labels["pool"])
}
if d.Labels["vol_name"] != "vm-200-disk-0" {
t.Errorf("vol_name = %q", d.Labels["vol_name"])
}
}
func TestParseBlockInfo_RBD(t *testing.T) {
raw := `drive-scsi0 (#block300): /dev/rbd-pve/ceph1/pool1/vm-300-disk-0 (raw, read-write)
`
disks := ParseBlockInfo(raw)
d := disks["scsi0"]
if d.DiskType != "rbd" {
t.Errorf("type = %q", d.DiskType)
}
if d.Labels["cluster_id"] != "ceph1" {
t.Errorf("cluster_id = %q", d.Labels["cluster_id"])
}
if d.Labels["pool"] != "pool1" {
t.Errorf("pool = %q", d.Labels["pool"])
}
if d.Labels["vol_name"] != "vm-300-disk-0" {
t.Errorf("vol_name = %q", d.Labels["vol_name"])
}
}
func TestParseBlockInfo_LVM(t *testing.T) {
raw := `drive-scsi0 (#block400): /dev/myvg/vm-400-disk-0 (raw, read-write)
`
disks := ParseBlockInfo(raw)
d := disks["scsi0"]
if d.DiskType != "lvm" {
t.Errorf("type = %q", d.DiskType)
}
if d.Labels["vg_name"] != "myvg" {
t.Errorf("vg_name = %q", d.Labels["vg_name"])
}
if d.Labels["vol_name"] != "vm-400-disk-0" {
t.Errorf("vol_name = %q", d.Labels["vol_name"])
}
}
func TestParseBlockInfo_SkipsEFI(t *testing.T) {
raw := `drive-efidisk0 (#block500): /dev/zvol/rpool/data/vm-500-disk-1 (raw, read-write)
drive-scsi0 (#block501): /dev/zvol/rpool/data/vm-500-disk-0 (raw, read-write)
`
disks := ParseBlockInfo(raw)
if len(disks) != 1 {
t.Fatalf("expected 1 disk (efidisk skipped), got %d", len(disks))
}
if _, ok := disks["efidisk0"]; ok {
t.Error("efidisk0 should be skipped")
}
}
func TestHandleJSONPath(t *testing.T) {
jsonPath := `json:{"driver":"raw","file":{"driver":"host_device","filename":"/dev/zvol/rpool/data/vm-100-disk-0"}}`
result, err := HandleJSONPath(jsonPath)
if err != nil {
t.Fatal(err)
}
if result != "/dev/zvol/rpool/data/vm-100-disk-0" {
t.Errorf("got %q", result)
}
}
func TestHandleJSONPath_Nested(t *testing.T) {
jsonPath := `json:{"driver":"raw","file":{"driver":"copy-on-read","file":{"driver":"host_device","filename":"/dev/rbd-pve/ceph/pool/vm-200-disk-0"}}}`
result, err := HandleJSONPath(jsonPath)
if err != nil {
t.Fatal(err)
}
if result != "/dev/rbd-pve/ceph/pool/vm-200-disk-0" {
t.Errorf("got %q", result)
}
}
func TestHandleJSONPath_NoHostDevice(t *testing.T) {
jsonPath := `json:{"driver":"raw","file":{"driver":"file","filename":"/tmp/test.img"}}`
_, err := HandleJSONPath(jsonPath)
if err == nil {
t.Fatal("expected error for missing host_device")
}
}
func TestParseBlockInfo_MultiDisk(t *testing.T) {
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
drive-scsi1 (#block101): /mnt/storage/images/100/vm-100-disk-1.qcow2 (qcow2, read-write)
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
`
disks := ParseBlockInfo(raw)
if len(disks) != 2 {
t.Fatalf("expected 2 disks, got %d", len(disks))
}
if disks["scsi0"].DiskType != "zvol" {
t.Errorf("scsi0 type = %q", disks["scsi0"].DiskType)
}
if disks["scsi1"].DiskType != "qcow2" {
t.Errorf("scsi1 type = %q", disks["scsi1"].DiskType)
}
}

View File

@@ -0,0 +1,77 @@
package qmmonitor
import (
"strconv"
"strings"
)
// NICInfo holds parsed network interface info from "info network".
type NICInfo struct {
Netdev string
Queues int
Type string
Model string
Macaddr string
Ifname string
}
// ParseNetworkInfo parses the output of "info network" from qm monitor.
// Format: "net0: index=0,type=tap,ifname=tap100i0,model=virtio-net-pci,macaddr=AA:BB:CC:DD:EE:FF"
// Multiqueue lines: " \ net0: index=1,type=tap,ifname=tap100i0"
// For multiqueue, same netdev appears multiple times with increasing index; queues = max(index)+1.
func ParseNetworkInfo(raw string) []NICInfo {
nicsMap := make(map[string]map[string]string)
lines := strings.Split(raw, "\n")
for _, line := range lines {
line = strings.TrimSpace(line)
// Strip leading "\ " from continuation lines
line = strings.TrimPrefix(line, "\\ ")
line = strings.TrimSpace(line)
if line == "" {
continue
}
colonIdx := strings.Index(line, ": ")
if colonIdx < 0 {
continue
}
netdev := line[:colonIdx]
if !strings.HasPrefix(netdev, "net") {
continue
}
cfg := line[colonIdx+2:]
if _, ok := nicsMap[netdev]; !ok {
nicsMap[netdev] = make(map[string]string)
}
for _, pair := range strings.Split(cfg, ",") {
pair = strings.TrimSpace(pair)
if pair == "" {
continue
}
eqIdx := strings.Index(pair, "=")
if eqIdx < 0 {
continue
}
key := pair[:eqIdx]
value := pair[eqIdx+1:]
// Overwrite so last index wins (for multiqueue)
nicsMap[netdev][key] = value
}
}
var result []NICInfo
for netdev, cfg := range nicsMap {
idx, _ := strconv.Atoi(cfg["index"])
result = append(result, NICInfo{
Netdev: netdev,
Queues: idx + 1,
Type: cfg["type"],
Model: cfg["model"],
Macaddr: cfg["macaddr"],
Ifname: cfg["ifname"],
})
}
return result
}

View File

@@ -0,0 +1,65 @@
package qmmonitor
import (
"testing"
)
func TestParseNetworkInfo_Single(t *testing.T) {
raw := `net0: index=0,type=tap,ifname=tap100i0,script=/var/lib/qemu-server/pve-bridge,downscript=/var/lib/qemu-server/pve-bridgedown,model=virtio-net-pci,macaddr=AA:BB:CC:DD:EE:FF`
nics := ParseNetworkInfo(raw)
if len(nics) != 1 {
t.Fatalf("expected 1 NIC, got %d", len(nics))
}
nic := nics[0]
if nic.Netdev != "net0" {
t.Errorf("netdev = %q", nic.Netdev)
}
if nic.Queues != 1 {
t.Errorf("queues = %d", nic.Queues)
}
if nic.Type != "tap" {
t.Errorf("type = %q", nic.Type)
}
if nic.Model != "virtio-net-pci" {
t.Errorf("model = %q", nic.Model)
}
if nic.Macaddr != "AA:BB:CC:DD:EE:FF" {
t.Errorf("macaddr = %q", nic.Macaddr)
}
if nic.Ifname != "tap100i0" {
t.Errorf("ifname = %q", nic.Ifname)
}
}
func TestParseNetworkInfo_Multiqueue(t *testing.T) {
raw := `net0: index=0,type=tap,ifname=tap100i0,model=virtio-net-pci,macaddr=AA:BB:CC:DD:EE:FF
\ net0: index=1,type=tap,ifname=tap100i0
\ net0: index=2,type=tap,ifname=tap100i0
\ net0: index=3,type=tap,ifname=tap100i0
net1: index=0,type=tap,ifname=tap100i1,model=virtio-net-pci,macaddr=11:22:33:44:55:66`
nics := ParseNetworkInfo(raw)
if len(nics) != 2 {
t.Fatalf("expected 2 NICs, got %d", len(nics))
}
byName := map[string]NICInfo{}
for _, n := range nics {
byName[n.Netdev] = n
}
if byName["net0"].Queues != 4 {
t.Errorf("net0 queues = %d, want 4", byName["net0"].Queues)
}
if byName["net1"].Queues != 1 {
t.Errorf("net1 queues = %d, want 1", byName["net1"].Queues)
}
}
func TestParseNetworkInfo_Empty(t *testing.T) {
nics := ParseNetworkInfo("")
if len(nics) != 0 {
t.Fatalf("expected 0 NICs, got %d", len(nics))
}
}

View File

@@ -0,0 +1,190 @@
package qmmonitor
import (
"bufio"
"fmt"
"io"
"log/slog"
"os/exec"
"strings"
"sync"
"time"
"pve_local_exporter/internal/cache"
)
// QMMonitor runs commands against qm monitor and caches results.
type QMMonitor interface {
RunCommand(vmid, cmd string) (string, error)
InvalidateCache(vmid, cmd string)
}
// RealQMMonitor spawns `qm monitor` via os/exec with pipe-based I/O.
type RealQMMonitor struct {
timeout time.Duration
deferClose bool
cache *cache.TTLCache[string, string]
mu sync.Mutex
deferredProcs []deferredProc
}
type deferredProc struct {
cmd *exec.Cmd
stdin io.WriteCloser
timestamp time.Time
}
func NewRealQMMonitor(timeout, maxTTL, randRange time.Duration, deferClose bool) *RealQMMonitor {
return &RealQMMonitor{
timeout: timeout,
deferClose: deferClose,
cache: cache.NewTTLCache[string, string](maxTTL, randRange),
}
}
func cacheKey(vmid, cmd string) string {
return vmid + "\x00" + cmd
}
func (m *RealQMMonitor) InvalidateCache(vmid, cmd string) {
m.cache.Invalidate(cacheKey(vmid, cmd))
}
func (m *RealQMMonitor) RunCommand(vmid, cmd string) (string, error) {
key := cacheKey(vmid, cmd)
if v, ok := m.cache.Get(key); ok {
return v, nil
}
result, err := m.execQMMonitor(vmid, cmd)
if err != nil {
return "", err
}
m.cache.Set(key, result)
m.cleanupDeferred()
return result, nil
}
func (m *RealQMMonitor) execQMMonitor(vmid, cmd string) (string, error) {
qmCmd := exec.Command("qm", "monitor", vmid)
stdin, err := qmCmd.StdinPipe()
if err != nil {
return "", fmt.Errorf("stdin pipe: %w", err)
}
stdout, err := qmCmd.StdoutPipe()
if err != nil {
return "", fmt.Errorf("stdout pipe: %w", err)
}
if err := qmCmd.Start(); err != nil {
return "", fmt.Errorf("start qm monitor: %w", err)
}
reader := bufio.NewReader(stdout)
// Wait for initial "qm>" prompt
if err := readUntilPrompt(reader, m.timeout); err != nil {
m.deferCloseProcess(qmCmd, stdin)
return "", fmt.Errorf("initial prompt: %w", err)
}
// Send command
fmt.Fprintf(stdin, "%s\n", cmd)
// Read response until next "qm>" prompt
response, err := readResponseUntilPrompt(reader, m.timeout)
if err != nil {
m.deferCloseProcess(qmCmd, stdin)
return "", fmt.Errorf("read response: %w", err)
}
// Close cleanly
stdin.Close()
if err := qmCmd.Wait(); err != nil {
slog.Debug("qm monitor wait error", "vmid", vmid, "err", err)
}
return response, nil
}
func (m *RealQMMonitor) deferCloseProcess(cmd *exec.Cmd, stdin io.WriteCloser) {
stdin.Close()
if m.deferClose {
m.mu.Lock()
m.deferredProcs = append(m.deferredProcs, deferredProc{cmd: cmd, stdin: stdin, timestamp: time.Now()})
m.mu.Unlock()
slog.Warn("deferred closing qm monitor process", "pid", cmd.Process.Pid)
} else {
cmd.Process.Kill()
cmd.Wait()
}
}
func (m *RealQMMonitor) cleanupDeferred() {
m.mu.Lock()
defer m.mu.Unlock()
var still []deferredProc
for _, dp := range m.deferredProcs {
if time.Since(dp.timestamp) > 10*time.Second {
if err := dp.cmd.Process.Kill(); err != nil {
still = append(still, dp)
} else {
dp.cmd.Wait()
}
} else {
still = append(still, dp)
}
}
m.deferredProcs = still
}
func readUntilPrompt(r *bufio.Reader, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for {
if time.Now().After(deadline) {
return fmt.Errorf("timeout waiting for qm> prompt")
}
line, err := r.ReadString('\n')
if err != nil {
// Check if we got the prompt without newline
if strings.Contains(line, "qm>") {
return nil
}
return err
}
if strings.Contains(line, "qm>") {
return nil
}
}
}
func readResponseUntilPrompt(r *bufio.Reader, timeout time.Duration) (string, error) {
deadline := time.Now().Add(timeout)
var lines []string
firstLine := true
for {
if time.Now().After(deadline) {
return "", fmt.Errorf("timeout waiting for qm> prompt")
}
line, err := r.ReadString('\n')
if err != nil {
if strings.Contains(line, "qm>") {
break
}
return "", err
}
if strings.Contains(line, "qm>") {
break
}
// Skip the echo of the command (first line)
if firstLine {
firstLine = false
continue
}
lines = append(lines, strings.TrimRight(line, "\r\n"))
}
return strings.Join(lines, "\n"), nil
}

View File

@@ -0,0 +1,59 @@
package qmmonitor
import (
"testing"
"time"
"pve_local_exporter/internal/cache"
)
// mockQMMonitor is a test double for QMMonitor.
type mockQMMonitor struct {
responses map[string]string
cache *cache.TTLCache[string, string]
}
func newMockQMMonitor(responses map[string]string) *mockQMMonitor {
return &mockQMMonitor{
responses: responses,
cache: cache.NewTTLCache[string, string](time.Hour, 0),
}
}
func (m *mockQMMonitor) RunCommand(vmid, cmd string) (string, error) {
key := cacheKey(vmid, cmd)
if v, ok := m.cache.Get(key); ok {
return v, nil
}
resp := m.responses[vmid+":"+cmd]
m.cache.Set(key, resp)
return resp, nil
}
func (m *mockQMMonitor) InvalidateCache(vmid, cmd string) {
m.cache.Invalidate(cacheKey(vmid, cmd))
}
func TestMockQMMonitor_CacheHit(t *testing.T) {
mon := newMockQMMonitor(map[string]string{
"100:info network": "some output",
})
r1, _ := mon.RunCommand("100", "info network")
r2, _ := mon.RunCommand("100", "info network")
if r1 != r2 {
t.Errorf("cache miss: %q != %q", r1, r2)
}
}
func TestMockQMMonitor_Invalidate(t *testing.T) {
mon := newMockQMMonitor(map[string]string{
"100:info network": "some output",
})
mon.RunCommand("100", "info network")
mon.InvalidateCache("100", "info network")
// After invalidation, it fetches again (same mock response)
r, _ := mon.RunCommand("100", "info network")
if r != "some output" {
t.Errorf("unexpected: %q", r)
}
}