add trace logging

This commit is contained in:
illustris
2026-03-12 19:39:35 +05:30
parent f15d0a90c8
commit ac2ac7a27c
11 changed files with 136 additions and 6 deletions

View File

@@ -5,6 +5,7 @@ import (
"log/slog" "log/slog"
"os" "os"
"os/exec" "os/exec"
"path/filepath"
"slices" "slices"
"strconv" "strconv"
"strings" "strings"
@@ -15,6 +16,7 @@ import (
"pve_local_exporter/internal/cache" "pve_local_exporter/internal/cache"
"pve_local_exporter/internal/config" "pve_local_exporter/internal/config"
"pve_local_exporter/internal/logging"
"pve_local_exporter/internal/procfs" "pve_local_exporter/internal/procfs"
"pve_local_exporter/internal/pveconfig" "pve_local_exporter/internal/pveconfig"
"pve_local_exporter/internal/qmmonitor" "pve_local_exporter/internal/qmmonitor"
@@ -191,9 +193,11 @@ func (c *PVECollector) collectVMs(ch chan<- prometheus.Metric) {
slog.Error("discover QEMU processes", "err", err) slog.Error("discover QEMU processes", "err", err)
return return
} }
logging.Trace("collectVMs", "vm_count", len(procs))
// Load pool info // Load pool info
vmPoolMap, pools := c.getPoolInfo() vmPoolMap, pools := c.getPoolInfo()
logging.Trace("pool info loaded", "vm_pool_map_size", len(vmPoolMap), "pools_count", len(pools))
for _, proc := range procs { for _, proc := range procs {
c.collectVMMetrics(ch, proc, vmPoolMap, pools) c.collectVMMetrics(ch, proc, vmPoolMap, pools)
@@ -299,8 +303,10 @@ func (c *PVECollector) collectNICMetrics(ch chan<- prometheus.Metric, proc procf
slog.Error("qm info network", "vmid", id, "err", err) slog.Error("qm info network", "vmid", id, "err", err)
return return
} }
logging.Trace("qm info network response", "vmid", id, "raw_len", len(raw))
nics := qmmonitor.ParseNetworkInfo(raw) nics := qmmonitor.ParseNetworkInfo(raw)
logging.Trace("parsed NICs", "vmid", id, "nic_count", len(nics))
for _, nic := range nics { for _, nic := range nics {
// NIC info metric // NIC info metric
ch <- prometheus.MustNewConstMetric(c.descNicInfo, prometheus.GaugeValue, 1, ch <- prometheus.MustNewConstMetric(c.descNicInfo, prometheus.GaugeValue, 1,
@@ -334,14 +340,16 @@ func (c *PVECollector) collectDiskMetrics(ch chan<- prometheus.Metric, proc proc
slog.Error("qm info block", "vmid", id, "err", err) slog.Error("qm info block", "vmid", id, "err", err)
return return
} }
logging.Trace("qm info block response", "vmid", id, "raw_len", len(raw))
disks := qmmonitor.ParseBlockInfo(raw) disks := qmmonitor.ParseBlockInfo(raw)
logging.Trace("parsed disks", "vmid", id, "disk_count", len(disks))
for diskName, disk := range disks { for diskName, disk := range disks {
// Try to get device symlink target for zvol/rbd/lvm // Try to get device symlink target for zvol/rbd/lvm
if disk.DiskType == "zvol" || disk.DiskType == "rbd" || disk.DiskType == "lvm" { if disk.DiskType == "zvol" || disk.DiskType == "rbd" || disk.DiskType == "lvm" {
target, err := sysfs.GetDeviceSymlinkTarget(disk.DiskPath) target, err := sysfs.GetDeviceSymlinkTarget(disk.DiskPath)
if err == nil { if err == nil {
disk.Labels["device"] = target disk.Labels["device"] = filepath.Base(target)
} else { } else {
slog.Debug("resolve device symlink", "path", disk.DiskPath, "err", err) slog.Debug("resolve device symlink", "path", disk.DiskPath, "err", err)
// Retry with cache invalidation // Retry with cache invalidation
@@ -365,6 +373,9 @@ func (c *PVECollector) collectDiskMetrics(ch chan<- prometheus.Metric, proc proc
} }
} }
logging.Trace("disk metric", "vmid", id, "disk", diskName, "type", disk.DiskType,
"path", disk.DiskPath, "size", diskSize, "device", disk.Labels["device"])
if diskSize > 0 { if diskSize > 0 {
ch <- prometheus.MustNewConstMetric(c.descDiskSize, prometheus.GaugeValue, float64(diskSize), id, diskName) ch <- prometheus.MustNewConstMetric(c.descDiskSize, prometheus.GaugeValue, float64(diskSize), id, diskName)
} }
@@ -392,6 +403,7 @@ func (c *PVECollector) collectDiskMetrics(ch chan<- prometheus.Metric, proc proc
func (c *PVECollector) collectStorage(ch chan<- prometheus.Metric) { func (c *PVECollector) collectStorage(ch chan<- prometheus.Metric) {
entries := c.getStorageEntries() entries := c.getStorageEntries()
logging.Trace("collectStorage", "entries_count", len(entries))
// Compute superset of property keys across all entries // Compute superset of property keys across all entries
keySet := make(map[string]struct{}) keySet := make(map[string]struct{})
@@ -410,6 +422,7 @@ func (c *PVECollector) collectStorage(ch chan<- prometheus.Metric) {
for _, entry := range entries { for _, entry := range entries {
storageType := entry.Properties["type"] storageType := entry.Properties["type"]
storageName := entry.Properties["name"] storageName := entry.Properties["name"]
logging.Trace("storage entry", "name", storageName, "type", storageType)
// Info metric with consistent labels // Info metric with consistent labels
vals := make([]string, len(allKeys)) vals := make([]string, len(allKeys))
@@ -438,7 +451,7 @@ func (c *PVECollector) collectStorage(ch chan<- prometheus.Metric) {
poolName := strings.Split(pool, "/")[0] poolName := strings.Split(pool, "/")[0]
out, runErr := c.cmdRunner.Run("zpool", "list", "-p", poolName) out, runErr := c.cmdRunner.Run("zpool", "list", "-p", poolName)
if runErr != nil { if runErr != nil {
slog.Error("zpool list", "pool", poolName, "err", runErr) slog.Warn("zpool list", "pool", poolName, "err", runErr)
continue continue
} }
size, err = storage.GetZPoolSize(out) size, err = storage.GetZPoolSize(out)

View File

@@ -0,0 +1,14 @@
package logging
import (
"context"
"log/slog"
)
// LevelTrace is a custom log level below Debug for very detailed diagnostic output.
const LevelTrace = slog.Level(-8)
// Trace logs a message at TRACE level using the default logger.
func Trace(msg string, args ...any) {
slog.Default().Log(context.Background(), LevelTrace, msg, args...)
}

View File

@@ -2,10 +2,13 @@ package procfs
import ( import (
"fmt" "fmt"
"log/slog"
"os" "os"
"path/filepath" "path/filepath"
"strconv" "strconv"
"strings" "strings"
"pve_local_exporter/internal/logging"
) )
const clkTck = 100 // sysconf(_SC_CLK_TCK) on Linux const clkTck = 100 // sysconf(_SC_CLK_TCK) on Linux
@@ -83,6 +86,7 @@ func (r *RealProcReader) DiscoverQEMUProcesses() ([]QEMUProcess, error) {
return nil, err return nil, err
} }
numericPIDs := 0
var procs []QEMUProcess var procs []QEMUProcess
for _, e := range entries { for _, e := range entries {
if !e.IsDir() { if !e.IsDir() {
@@ -92,26 +96,32 @@ func (r *RealProcReader) DiscoverQEMUProcesses() ([]QEMUProcess, error) {
if err != nil { if err != nil {
continue continue
} }
numericPIDs++
exe, err := os.Readlink(filepath.Join(r.ProcPath, e.Name(), "exe")) exe, err := os.Readlink(filepath.Join(r.ProcPath, e.Name(), "exe"))
if err != nil { if err != nil {
logging.Trace("proc readlink failed", "pid", pid, "err", err)
continue continue
} }
if exe != "/usr/bin/qemu-system-x86_64" { if exe != "/usr/bin/qemu-system-x86_64" {
logging.Trace("proc exe skip", "pid", pid, "exe", exe)
continue continue
} }
cmdlineBytes, err := os.ReadFile(filepath.Join(r.ProcPath, e.Name(), "cmdline")) cmdlineBytes, err := os.ReadFile(filepath.Join(r.ProcPath, e.Name(), "cmdline"))
if err != nil { if err != nil {
logging.Trace("proc cmdline read failed", "pid", pid, "err", err)
continue continue
} }
cmdline := ParseCmdline(cmdlineBytes) cmdline := ParseCmdline(cmdlineBytes)
vmid := FlagValue(cmdline, "-id") vmid := FlagValue(cmdline, "-id")
if vmid == "" { if vmid == "" {
logging.Trace("proc no -id flag", "pid", pid)
continue continue
} }
if !r.VMConfigExists(vmid) { if !r.VMConfigExists(vmid) {
logging.Trace("proc no config", "pid", pid, "vmid", vmid)
continue continue
} }
@@ -123,8 +133,14 @@ func (r *RealProcReader) DiscoverQEMUProcesses() ([]QEMUProcess, error) {
} }
proc.Vcores = ParseVcores(cmdline) proc.Vcores = ParseVcores(cmdline)
proc.MaxMem = ParseMem(cmdline) proc.MaxMem = ParseMem(cmdline)
logging.Trace("proc discovered VM", "pid", pid, "vmid", vmid, "name", proc.Name)
procs = append(procs, proc) procs = append(procs, proc)
} }
logging.Trace("proc scan complete", "numeric_pids", numericPIDs, "qemu_count", len(procs))
if len(procs) == 0 {
slog.Warn("no QEMU processes discovered", "numeric_pids", numericPIDs, "proc_path", r.ProcPath, "pve_cfg_path", r.PVECfgPath)
}
return procs, nil return procs, nil
} }

View File

@@ -3,6 +3,8 @@ package pveconfig
import ( import (
"regexp" "regexp"
"strings" "strings"
"pve_local_exporter/internal/logging"
) )
// StorageEntry holds a parsed storage definition from storage.cfg. // StorageEntry holds a parsed storage definition from storage.cfg.
@@ -47,6 +49,7 @@ func ParseStorageConfig(data string) []StorageEntry {
"name": SanitizeKey(sectionName), "name": SanitizeKey(sectionName),
}, },
} }
logging.Trace("storage.cfg section", "type", sectionType, "name", sectionName)
continue continue
} }
@@ -66,5 +69,6 @@ func ParseStorageConfig(data string) []StorageEntry {
result = append(result, *current) result = append(result, *current)
} }
logging.Trace("ParseStorageConfig complete", "entries", len(result))
return result return result
} }

View File

@@ -6,6 +6,8 @@ import (
"log/slog" "log/slog"
"regexp" "regexp"
"strings" "strings"
"pve_local_exporter/internal/logging"
) )
// DiskInfo holds parsed block device info from "info block". // DiskInfo holds parsed block device info from "info block".
@@ -44,6 +46,7 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
match := blockHeaderRe.FindStringSubmatch(strings.TrimSpace(lines[0])) match := blockHeaderRe.FindStringSubmatch(strings.TrimSpace(lines[0]))
if match == nil { if match == nil {
logging.Trace("block header no match", "line", lines[0])
continue continue
} }
@@ -69,8 +72,10 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
if strings.HasPrefix(diskPath, "json:") { if strings.HasPrefix(diskPath, "json:") {
resolved, err := HandleJSONPath(diskPath) resolved, err := HandleJSONPath(diskPath)
if err != nil { if err != nil {
logging.Trace("block json path error", "disk", diskName, "err", err)
continue continue
} }
logging.Trace("block json resolved", "disk", diskName, "resolved", resolved)
diskPath = resolved diskPath = resolved
} }
@@ -88,6 +93,7 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
// Detect disk type from path // Detect disk type from path
classifyDisk(&info) classifyDisk(&info)
logging.Trace("block classified", "disk", diskName, "type", info.DiskType, "path", diskPath)
// Parse additional info from remaining lines // Parse additional info from remaining lines
for _, line := range lines[1:] { for _, line := range lines[1:] {
@@ -95,6 +101,13 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
if strings.HasPrefix(line, "Attached to:") { if strings.HasPrefix(line, "Attached to:") {
// Extract device ID, e.g. "Attached to: /machine/peripheral/virtio0/virtio-backend" // Extract device ID, e.g. "Attached to: /machine/peripheral/virtio0/virtio-backend"
val := strings.TrimSpace(strings.TrimPrefix(line, "Attached to:")) val := strings.TrimSpace(strings.TrimPrefix(line, "Attached to:"))
// Extract short device name from QOM path
if strings.Contains(val, "/") {
qomParts := strings.Split(val, "/")
if len(qomParts) > 3 {
val = qomParts[3]
}
}
info.Labels["attached_to"] = val info.Labels["attached_to"] = val
} else if strings.HasPrefix(line, "Cache mode:") { } else if strings.HasPrefix(line, "Cache mode:") {
val := strings.TrimSpace(strings.TrimPrefix(line, "Cache mode:")) val := strings.TrimSpace(strings.TrimPrefix(line, "Cache mode:"))
@@ -105,9 +118,11 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
} }
} }
logging.Trace("block parsed", "disk", diskName, "labels", info.Labels)
result[diskName] = info result[diskName] = info
} }
logging.Trace("ParseBlockInfo complete", "disk_count", len(result))
if len(result) == 0 && raw != "" { if len(result) == 0 && raw != "" {
slog.Debug("ParseBlockInfo found no disks", "rawLen", len(raw)) slog.Debug("ParseBlockInfo found no disks", "rawLen", len(raw))
} }

View File

@@ -218,6 +218,39 @@ func TestParseBlockInfo_DetectZeroesUnmap(t *testing.T) {
} }
} }
func TestParseBlockInfo_AttachedToVirtio(t *testing.T) {
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
Attached to: /machine/peripheral/virtio0/virtio-backend
`
disks := ParseBlockInfo(raw)
d := disks["scsi0"]
if d.Labels["attached_to"] != "virtio0" {
t.Errorf("attached_to = %q, want %q", d.Labels["attached_to"], "virtio0")
}
}
func TestParseBlockInfo_AttachedToVirtioScsi(t *testing.T) {
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
`
disks := ParseBlockInfo(raw)
d := disks["scsi0"]
if d.Labels["attached_to"] != "virtioscsi0" {
t.Errorf("attached_to = %q, want %q", d.Labels["attached_to"], "virtioscsi0")
}
}
func TestParseBlockInfo_AttachedToBare(t *testing.T) {
raw := `drive-ide2 (#block100): /path/to/disk.iso (raw, read-only)
Attached to: ide2
`
disks := ParseBlockInfo(raw)
d := disks["ide2"]
if d.Labels["attached_to"] != "ide2" {
t.Errorf("attached_to = %q, want %q", d.Labels["attached_to"], "ide2")
}
}
func TestParseBlockInfo_MultiDisk(t *testing.T) { func TestParseBlockInfo_MultiDisk(t *testing.T) {
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write) raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
Attached to: /machine/peripheral/virtioscsi0/virtio-backend Attached to: /machine/peripheral/virtioscsi0/virtio-backend

View File

@@ -3,6 +3,8 @@ package qmmonitor
import ( import (
"strconv" "strconv"
"strings" "strings"
"pve_local_exporter/internal/logging"
) )
// NICInfo holds parsed network interface info from "info network". // NICInfo holds parsed network interface info from "info network".
@@ -64,14 +66,17 @@ func ParseNetworkInfo(raw string) []NICInfo {
var result []NICInfo var result []NICInfo
for netdev, cfg := range nicsMap { for netdev, cfg := range nicsMap {
idx, _ := strconv.Atoi(cfg["index"]) idx, _ := strconv.Atoi(cfg["index"])
result = append(result, NICInfo{ nic := NICInfo{
Netdev: netdev, Netdev: netdev,
Queues: idx + 1, Queues: idx + 1,
Type: cfg["type"], Type: cfg["type"],
Model: cfg["model"], Model: cfg["model"],
Macaddr: cfg["macaddr"], Macaddr: cfg["macaddr"],
Ifname: cfg["ifname"], Ifname: cfg["ifname"],
}) }
logging.Trace("parsed NIC", "netdev", netdev, "ifname", nic.Ifname, "queues", nic.Queues, "model", nic.Model)
result = append(result, nic)
} }
logging.Trace("ParseNetworkInfo complete", "nic_count", len(result))
return result return result
} }

View File

@@ -14,6 +14,7 @@ import (
"github.com/creack/pty" "github.com/creack/pty"
"pve_local_exporter/internal/cache" "pve_local_exporter/internal/cache"
"pve_local_exporter/internal/logging"
) )
var errTimeout = errors.New("timeout waiting for qm monitor") var errTimeout = errors.New("timeout waiting for qm monitor")
@@ -75,6 +76,7 @@ func (m *RealQMMonitor) execQMMonitor(vmid, cmd string) (string, error) {
slog.Debug("qm monitor exec", "vmid", vmid, "cmd", cmd) slog.Debug("qm monitor exec", "vmid", vmid, "cmd", cmd)
start := time.Now() start := time.Now()
logging.Trace("qm pty spawn start", "vmid", vmid)
qmCmd := exec.Command("qm", "monitor", vmid) qmCmd := exec.Command("qm", "monitor", vmid)
qmCmd.Env = append(os.Environ(), "TERM=dumb") qmCmd.Env = append(os.Environ(), "TERM=dumb")
@@ -82,6 +84,7 @@ func (m *RealQMMonitor) execQMMonitor(vmid, cmd string) (string, error) {
if err != nil { if err != nil {
return "", fmt.Errorf("start qm monitor: %w", err) return "", fmt.Errorf("start qm monitor: %w", err)
} }
logging.Trace("qm pty spawn success", "vmid", vmid, "pid", qmCmd.Process.Pid)
reader := bufio.NewReader(ptmx) reader := bufio.NewReader(ptmx)
@@ -93,8 +96,10 @@ func (m *RealQMMonitor) execQMMonitor(vmid, cmd string) (string, error) {
m.killOrDefer(qmCmd, ptmx) m.killOrDefer(qmCmd, ptmx)
return "", fmt.Errorf("initial prompt: %w", err) return "", fmt.Errorf("initial prompt: %w", err)
} }
logging.Trace("qm initial prompt received", "vmid", vmid)
// Send command // Send command
logging.Trace("qm send command", "vmid", vmid, "cmd", cmd)
fmt.Fprintf(ptmx, "%s\n", cmd) fmt.Fprintf(ptmx, "%s\n", cmd)
// Read response until next "qm>" prompt // Read response until next "qm>" prompt
@@ -105,6 +110,7 @@ func (m *RealQMMonitor) execQMMonitor(vmid, cmd string) (string, error) {
m.killOrDefer(qmCmd, ptmx) m.killOrDefer(qmCmd, ptmx)
return "", fmt.Errorf("read response: %w", err) return "", fmt.Errorf("read response: %w", err)
} }
logging.Trace("qm raw response", "vmid", vmid, "raw_len", len(raw))
response := parseQMResponse(raw) response := parseQMResponse(raw)

View File

@@ -5,6 +5,8 @@ import (
"strconv" "strconv"
"strings" "strings"
"syscall" "syscall"
"pve_local_exporter/internal/logging"
) )
// StorageSize holds the total and free bytes of a storage pool. // StorageSize holds the total and free bytes of a storage pool.
@@ -51,6 +53,8 @@ func GetZPoolSize(output string) (StorageSize, error) {
return StorageSize{}, fmt.Errorf("not enough fields in zpool output: %q", lines[1]) return StorageSize{}, fmt.Errorf("not enough fields in zpool output: %q", lines[1])
} }
logging.Trace("zpool fields", "name", fields[0], "size", fields[1], "alloc", fields[2], "free", fields[3])
total, err := strconv.ParseInt(fields[1], 10, 64) total, err := strconv.ParseInt(fields[1], 10, 64)
if err != nil { if err != nil {
return StorageSize{}, fmt.Errorf("parse total: %w", err) return StorageSize{}, fmt.Errorf("parse total: %w", err)
@@ -60,5 +64,6 @@ func GetZPoolSize(output string) (StorageSize, error) {
return StorageSize{}, fmt.Errorf("parse free: %w", err) return StorageSize{}, fmt.Errorf("parse free: %w", err)
} }
logging.Trace("zpool parsed", "total", total, "free", free)
return StorageSize{Total: total, Free: free}, nil return StorageSize{Total: total, Free: free}, nil
} }

View File

@@ -6,6 +6,8 @@ import (
"path/filepath" "path/filepath"
"strconv" "strconv"
"strings" "strings"
"pve_local_exporter/internal/logging"
) )
// SysReader abstracts /sys access for testability. // SysReader abstracts /sys access for testability.
@@ -46,6 +48,7 @@ func (r *RealSysReader) ReadInterfaceStats(ifname string) (map[string]int64, err
} }
stats[e.Name()] = val stats[e.Name()] = val
} }
logging.Trace("interface stats", "ifname", ifname, "stat_count", len(stats))
return stats, nil return stats, nil
} }
@@ -58,6 +61,7 @@ func (r *RealSysReader) GetBlockDeviceSize(devPath string) (int64, error) {
if err != nil { if err != nil {
return 0, fmt.Errorf("resolve symlink %s: %w", devPath, err) return 0, fmt.Errorf("resolve symlink %s: %w", devPath, err)
} }
logging.Trace("block device resolved", "path", devPath, "resolved", resolved)
// Extract device name from /dev/XXX // Extract device name from /dev/XXX
devName := filepath.Base(resolved) devName := filepath.Base(resolved)
@@ -84,5 +88,6 @@ func GetDeviceSymlinkTarget(devPath string) (string, error) {
if err != nil { if err != nil {
return "", err return "", err
} }
logging.Trace("device symlink resolved", "path", devPath, "target", resolved)
return resolved, nil return resolved, nil
} }

View File

@@ -15,6 +15,7 @@ import (
"pve_local_exporter/internal/collector" "pve_local_exporter/internal/collector"
"pve_local_exporter/internal/config" "pve_local_exporter/internal/config"
"pve_local_exporter/internal/logging"
) )
var version string var version string
@@ -30,6 +31,8 @@ func main() {
level := slog.LevelInfo level := slog.LevelInfo
switch strings.ToUpper(cfg.LogLevel) { switch strings.ToUpper(cfg.LogLevel) {
case "TRACE":
level = logging.LevelTrace
case "DEBUG": case "DEBUG":
level = slog.LevelDebug level = slog.LevelDebug
case "WARNING", "WARN": case "WARNING", "WARN":
@@ -37,7 +40,18 @@ func main() {
case "ERROR", "CRITICAL": case "ERROR", "CRITICAL":
level = slog.LevelError level = slog.LevelError
} }
slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: level}))) slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
Level: level,
ReplaceAttr: func(groups []string, a slog.Attr) slog.Attr {
if a.Key == slog.LevelKey {
lvl := a.Value.Any().(slog.Level)
if lvl == logging.LevelTrace {
a.Value = slog.StringValue("TRACE")
}
}
return a
},
})))
reg := prometheus.NewRegistry() reg := prometheus.NewRegistry()
c := collector.New(cfg) c := collector.New(cfg)
@@ -60,7 +74,7 @@ func main() {
server.Close() server.Close()
}() }()
log.Printf("listening on %s", addr) slog.Info("listening", "addr", addr)
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
log.Fatal(err) log.Fatal(err)
} }