add trace logging
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -15,6 +16,7 @@ import (
|
||||
|
||||
"pve_local_exporter/internal/cache"
|
||||
"pve_local_exporter/internal/config"
|
||||
"pve_local_exporter/internal/logging"
|
||||
"pve_local_exporter/internal/procfs"
|
||||
"pve_local_exporter/internal/pveconfig"
|
||||
"pve_local_exporter/internal/qmmonitor"
|
||||
@@ -191,9 +193,11 @@ func (c *PVECollector) collectVMs(ch chan<- prometheus.Metric) {
|
||||
slog.Error("discover QEMU processes", "err", err)
|
||||
return
|
||||
}
|
||||
logging.Trace("collectVMs", "vm_count", len(procs))
|
||||
|
||||
// Load pool info
|
||||
vmPoolMap, pools := c.getPoolInfo()
|
||||
logging.Trace("pool info loaded", "vm_pool_map_size", len(vmPoolMap), "pools_count", len(pools))
|
||||
|
||||
for _, proc := range procs {
|
||||
c.collectVMMetrics(ch, proc, vmPoolMap, pools)
|
||||
@@ -299,8 +303,10 @@ func (c *PVECollector) collectNICMetrics(ch chan<- prometheus.Metric, proc procf
|
||||
slog.Error("qm info network", "vmid", id, "err", err)
|
||||
return
|
||||
}
|
||||
logging.Trace("qm info network response", "vmid", id, "raw_len", len(raw))
|
||||
|
||||
nics := qmmonitor.ParseNetworkInfo(raw)
|
||||
logging.Trace("parsed NICs", "vmid", id, "nic_count", len(nics))
|
||||
for _, nic := range nics {
|
||||
// NIC info metric
|
||||
ch <- prometheus.MustNewConstMetric(c.descNicInfo, prometheus.GaugeValue, 1,
|
||||
@@ -334,14 +340,16 @@ func (c *PVECollector) collectDiskMetrics(ch chan<- prometheus.Metric, proc proc
|
||||
slog.Error("qm info block", "vmid", id, "err", err)
|
||||
return
|
||||
}
|
||||
logging.Trace("qm info block response", "vmid", id, "raw_len", len(raw))
|
||||
|
||||
disks := qmmonitor.ParseBlockInfo(raw)
|
||||
logging.Trace("parsed disks", "vmid", id, "disk_count", len(disks))
|
||||
for diskName, disk := range disks {
|
||||
// Try to get device symlink target for zvol/rbd/lvm
|
||||
if disk.DiskType == "zvol" || disk.DiskType == "rbd" || disk.DiskType == "lvm" {
|
||||
target, err := sysfs.GetDeviceSymlinkTarget(disk.DiskPath)
|
||||
if err == nil {
|
||||
disk.Labels["device"] = target
|
||||
disk.Labels["device"] = filepath.Base(target)
|
||||
} else {
|
||||
slog.Debug("resolve device symlink", "path", disk.DiskPath, "err", err)
|
||||
// Retry with cache invalidation
|
||||
@@ -365,6 +373,9 @@ func (c *PVECollector) collectDiskMetrics(ch chan<- prometheus.Metric, proc proc
|
||||
}
|
||||
}
|
||||
|
||||
logging.Trace("disk metric", "vmid", id, "disk", diskName, "type", disk.DiskType,
|
||||
"path", disk.DiskPath, "size", diskSize, "device", disk.Labels["device"])
|
||||
|
||||
if diskSize > 0 {
|
||||
ch <- prometheus.MustNewConstMetric(c.descDiskSize, prometheus.GaugeValue, float64(diskSize), id, diskName)
|
||||
}
|
||||
@@ -392,6 +403,7 @@ func (c *PVECollector) collectDiskMetrics(ch chan<- prometheus.Metric, proc proc
|
||||
|
||||
func (c *PVECollector) collectStorage(ch chan<- prometheus.Metric) {
|
||||
entries := c.getStorageEntries()
|
||||
logging.Trace("collectStorage", "entries_count", len(entries))
|
||||
|
||||
// Compute superset of property keys across all entries
|
||||
keySet := make(map[string]struct{})
|
||||
@@ -410,6 +422,7 @@ func (c *PVECollector) collectStorage(ch chan<- prometheus.Metric) {
|
||||
for _, entry := range entries {
|
||||
storageType := entry.Properties["type"]
|
||||
storageName := entry.Properties["name"]
|
||||
logging.Trace("storage entry", "name", storageName, "type", storageType)
|
||||
|
||||
// Info metric with consistent labels
|
||||
vals := make([]string, len(allKeys))
|
||||
@@ -438,7 +451,7 @@ func (c *PVECollector) collectStorage(ch chan<- prometheus.Metric) {
|
||||
poolName := strings.Split(pool, "/")[0]
|
||||
out, runErr := c.cmdRunner.Run("zpool", "list", "-p", poolName)
|
||||
if runErr != nil {
|
||||
slog.Error("zpool list", "pool", poolName, "err", runErr)
|
||||
slog.Warn("zpool list", "pool", poolName, "err", runErr)
|
||||
continue
|
||||
}
|
||||
size, err = storage.GetZPoolSize(out)
|
||||
|
||||
14
src/internal/logging/logging.go
Normal file
14
src/internal/logging/logging.go
Normal file
@@ -0,0 +1,14 @@
|
||||
package logging
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log/slog"
|
||||
)
|
||||
|
||||
// LevelTrace is a custom log level below Debug for very detailed diagnostic output.
|
||||
const LevelTrace = slog.Level(-8)
|
||||
|
||||
// Trace logs a message at TRACE level using the default logger.
|
||||
func Trace(msg string, args ...any) {
|
||||
slog.Default().Log(context.Background(), LevelTrace, msg, args...)
|
||||
}
|
||||
@@ -2,10 +2,13 @@ package procfs
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"pve_local_exporter/internal/logging"
|
||||
)
|
||||
|
||||
const clkTck = 100 // sysconf(_SC_CLK_TCK) on Linux
|
||||
@@ -83,6 +86,7 @@ func (r *RealProcReader) DiscoverQEMUProcesses() ([]QEMUProcess, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
numericPIDs := 0
|
||||
var procs []QEMUProcess
|
||||
for _, e := range entries {
|
||||
if !e.IsDir() {
|
||||
@@ -92,26 +96,32 @@ func (r *RealProcReader) DiscoverQEMUProcesses() ([]QEMUProcess, error) {
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
numericPIDs++
|
||||
|
||||
exe, err := os.Readlink(filepath.Join(r.ProcPath, e.Name(), "exe"))
|
||||
if err != nil {
|
||||
logging.Trace("proc readlink failed", "pid", pid, "err", err)
|
||||
continue
|
||||
}
|
||||
if exe != "/usr/bin/qemu-system-x86_64" {
|
||||
logging.Trace("proc exe skip", "pid", pid, "exe", exe)
|
||||
continue
|
||||
}
|
||||
|
||||
cmdlineBytes, err := os.ReadFile(filepath.Join(r.ProcPath, e.Name(), "cmdline"))
|
||||
if err != nil {
|
||||
logging.Trace("proc cmdline read failed", "pid", pid, "err", err)
|
||||
continue
|
||||
}
|
||||
cmdline := ParseCmdline(cmdlineBytes)
|
||||
|
||||
vmid := FlagValue(cmdline, "-id")
|
||||
if vmid == "" {
|
||||
logging.Trace("proc no -id flag", "pid", pid)
|
||||
continue
|
||||
}
|
||||
if !r.VMConfigExists(vmid) {
|
||||
logging.Trace("proc no config", "pid", pid, "vmid", vmid)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -123,8 +133,14 @@ func (r *RealProcReader) DiscoverQEMUProcesses() ([]QEMUProcess, error) {
|
||||
}
|
||||
proc.Vcores = ParseVcores(cmdline)
|
||||
proc.MaxMem = ParseMem(cmdline)
|
||||
logging.Trace("proc discovered VM", "pid", pid, "vmid", vmid, "name", proc.Name)
|
||||
procs = append(procs, proc)
|
||||
}
|
||||
|
||||
logging.Trace("proc scan complete", "numeric_pids", numericPIDs, "qemu_count", len(procs))
|
||||
if len(procs) == 0 {
|
||||
slog.Warn("no QEMU processes discovered", "numeric_pids", numericPIDs, "proc_path", r.ProcPath, "pve_cfg_path", r.PVECfgPath)
|
||||
}
|
||||
return procs, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@ package pveconfig
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"pve_local_exporter/internal/logging"
|
||||
)
|
||||
|
||||
// StorageEntry holds a parsed storage definition from storage.cfg.
|
||||
@@ -47,6 +49,7 @@ func ParseStorageConfig(data string) []StorageEntry {
|
||||
"name": SanitizeKey(sectionName),
|
||||
},
|
||||
}
|
||||
logging.Trace("storage.cfg section", "type", sectionType, "name", sectionName)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -66,5 +69,6 @@ func ParseStorageConfig(data string) []StorageEntry {
|
||||
result = append(result, *current)
|
||||
}
|
||||
|
||||
logging.Trace("ParseStorageConfig complete", "entries", len(result))
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"log/slog"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"pve_local_exporter/internal/logging"
|
||||
)
|
||||
|
||||
// DiskInfo holds parsed block device info from "info block".
|
||||
@@ -44,6 +46,7 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
|
||||
|
||||
match := blockHeaderRe.FindStringSubmatch(strings.TrimSpace(lines[0]))
|
||||
if match == nil {
|
||||
logging.Trace("block header no match", "line", lines[0])
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -69,8 +72,10 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
|
||||
if strings.HasPrefix(diskPath, "json:") {
|
||||
resolved, err := HandleJSONPath(diskPath)
|
||||
if err != nil {
|
||||
logging.Trace("block json path error", "disk", diskName, "err", err)
|
||||
continue
|
||||
}
|
||||
logging.Trace("block json resolved", "disk", diskName, "resolved", resolved)
|
||||
diskPath = resolved
|
||||
}
|
||||
|
||||
@@ -88,6 +93,7 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
|
||||
|
||||
// Detect disk type from path
|
||||
classifyDisk(&info)
|
||||
logging.Trace("block classified", "disk", diskName, "type", info.DiskType, "path", diskPath)
|
||||
|
||||
// Parse additional info from remaining lines
|
||||
for _, line := range lines[1:] {
|
||||
@@ -95,6 +101,13 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
|
||||
if strings.HasPrefix(line, "Attached to:") {
|
||||
// Extract device ID, e.g. "Attached to: /machine/peripheral/virtio0/virtio-backend"
|
||||
val := strings.TrimSpace(strings.TrimPrefix(line, "Attached to:"))
|
||||
// Extract short device name from QOM path
|
||||
if strings.Contains(val, "/") {
|
||||
qomParts := strings.Split(val, "/")
|
||||
if len(qomParts) > 3 {
|
||||
val = qomParts[3]
|
||||
}
|
||||
}
|
||||
info.Labels["attached_to"] = val
|
||||
} else if strings.HasPrefix(line, "Cache mode:") {
|
||||
val := strings.TrimSpace(strings.TrimPrefix(line, "Cache mode:"))
|
||||
@@ -105,9 +118,11 @@ func ParseBlockInfo(raw string) map[string]DiskInfo {
|
||||
}
|
||||
}
|
||||
|
||||
logging.Trace("block parsed", "disk", diskName, "labels", info.Labels)
|
||||
result[diskName] = info
|
||||
}
|
||||
|
||||
logging.Trace("ParseBlockInfo complete", "disk_count", len(result))
|
||||
if len(result) == 0 && raw != "" {
|
||||
slog.Debug("ParseBlockInfo found no disks", "rawLen", len(raw))
|
||||
}
|
||||
|
||||
@@ -218,6 +218,39 @@ func TestParseBlockInfo_DetectZeroesUnmap(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseBlockInfo_AttachedToVirtio(t *testing.T) {
|
||||
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
|
||||
Attached to: /machine/peripheral/virtio0/virtio-backend
|
||||
`
|
||||
disks := ParseBlockInfo(raw)
|
||||
d := disks["scsi0"]
|
||||
if d.Labels["attached_to"] != "virtio0" {
|
||||
t.Errorf("attached_to = %q, want %q", d.Labels["attached_to"], "virtio0")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseBlockInfo_AttachedToVirtioScsi(t *testing.T) {
|
||||
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
|
||||
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
|
||||
`
|
||||
disks := ParseBlockInfo(raw)
|
||||
d := disks["scsi0"]
|
||||
if d.Labels["attached_to"] != "virtioscsi0" {
|
||||
t.Errorf("attached_to = %q, want %q", d.Labels["attached_to"], "virtioscsi0")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseBlockInfo_AttachedToBare(t *testing.T) {
|
||||
raw := `drive-ide2 (#block100): /path/to/disk.iso (raw, read-only)
|
||||
Attached to: ide2
|
||||
`
|
||||
disks := ParseBlockInfo(raw)
|
||||
d := disks["ide2"]
|
||||
if d.Labels["attached_to"] != "ide2" {
|
||||
t.Errorf("attached_to = %q, want %q", d.Labels["attached_to"], "ide2")
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseBlockInfo_MultiDisk(t *testing.T) {
|
||||
raw := `drive-scsi0 (#block100): /dev/zvol/rpool/data/vm-100-disk-0 (raw, read-write)
|
||||
Attached to: /machine/peripheral/virtioscsi0/virtio-backend
|
||||
|
||||
@@ -3,6 +3,8 @@ package qmmonitor
|
||||
import (
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"pve_local_exporter/internal/logging"
|
||||
)
|
||||
|
||||
// NICInfo holds parsed network interface info from "info network".
|
||||
@@ -64,14 +66,17 @@ func ParseNetworkInfo(raw string) []NICInfo {
|
||||
var result []NICInfo
|
||||
for netdev, cfg := range nicsMap {
|
||||
idx, _ := strconv.Atoi(cfg["index"])
|
||||
result = append(result, NICInfo{
|
||||
nic := NICInfo{
|
||||
Netdev: netdev,
|
||||
Queues: idx + 1,
|
||||
Type: cfg["type"],
|
||||
Model: cfg["model"],
|
||||
Macaddr: cfg["macaddr"],
|
||||
Ifname: cfg["ifname"],
|
||||
})
|
||||
}
|
||||
logging.Trace("parsed NIC", "netdev", netdev, "ifname", nic.Ifname, "queues", nic.Queues, "model", nic.Model)
|
||||
result = append(result, nic)
|
||||
}
|
||||
logging.Trace("ParseNetworkInfo complete", "nic_count", len(result))
|
||||
return result
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
|
||||
"github.com/creack/pty"
|
||||
"pve_local_exporter/internal/cache"
|
||||
"pve_local_exporter/internal/logging"
|
||||
)
|
||||
|
||||
var errTimeout = errors.New("timeout waiting for qm monitor")
|
||||
@@ -75,6 +76,7 @@ func (m *RealQMMonitor) execQMMonitor(vmid, cmd string) (string, error) {
|
||||
slog.Debug("qm monitor exec", "vmid", vmid, "cmd", cmd)
|
||||
start := time.Now()
|
||||
|
||||
logging.Trace("qm pty spawn start", "vmid", vmid)
|
||||
qmCmd := exec.Command("qm", "monitor", vmid)
|
||||
qmCmd.Env = append(os.Environ(), "TERM=dumb")
|
||||
|
||||
@@ -82,6 +84,7 @@ func (m *RealQMMonitor) execQMMonitor(vmid, cmd string) (string, error) {
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("start qm monitor: %w", err)
|
||||
}
|
||||
logging.Trace("qm pty spawn success", "vmid", vmid, "pid", qmCmd.Process.Pid)
|
||||
|
||||
reader := bufio.NewReader(ptmx)
|
||||
|
||||
@@ -93,8 +96,10 @@ func (m *RealQMMonitor) execQMMonitor(vmid, cmd string) (string, error) {
|
||||
m.killOrDefer(qmCmd, ptmx)
|
||||
return "", fmt.Errorf("initial prompt: %w", err)
|
||||
}
|
||||
logging.Trace("qm initial prompt received", "vmid", vmid)
|
||||
|
||||
// Send command
|
||||
logging.Trace("qm send command", "vmid", vmid, "cmd", cmd)
|
||||
fmt.Fprintf(ptmx, "%s\n", cmd)
|
||||
|
||||
// Read response until next "qm>" prompt
|
||||
@@ -105,6 +110,7 @@ func (m *RealQMMonitor) execQMMonitor(vmid, cmd string) (string, error) {
|
||||
m.killOrDefer(qmCmd, ptmx)
|
||||
return "", fmt.Errorf("read response: %w", err)
|
||||
}
|
||||
logging.Trace("qm raw response", "vmid", vmid, "raw_len", len(raw))
|
||||
|
||||
response := parseQMResponse(raw)
|
||||
|
||||
|
||||
@@ -5,6 +5,8 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"pve_local_exporter/internal/logging"
|
||||
)
|
||||
|
||||
// StorageSize holds the total and free bytes of a storage pool.
|
||||
@@ -51,6 +53,8 @@ func GetZPoolSize(output string) (StorageSize, error) {
|
||||
return StorageSize{}, fmt.Errorf("not enough fields in zpool output: %q", lines[1])
|
||||
}
|
||||
|
||||
logging.Trace("zpool fields", "name", fields[0], "size", fields[1], "alloc", fields[2], "free", fields[3])
|
||||
|
||||
total, err := strconv.ParseInt(fields[1], 10, 64)
|
||||
if err != nil {
|
||||
return StorageSize{}, fmt.Errorf("parse total: %w", err)
|
||||
@@ -60,5 +64,6 @@ func GetZPoolSize(output string) (StorageSize, error) {
|
||||
return StorageSize{}, fmt.Errorf("parse free: %w", err)
|
||||
}
|
||||
|
||||
logging.Trace("zpool parsed", "total", total, "free", free)
|
||||
return StorageSize{Total: total, Free: free}, nil
|
||||
}
|
||||
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"pve_local_exporter/internal/logging"
|
||||
)
|
||||
|
||||
// SysReader abstracts /sys access for testability.
|
||||
@@ -46,6 +48,7 @@ func (r *RealSysReader) ReadInterfaceStats(ifname string) (map[string]int64, err
|
||||
}
|
||||
stats[e.Name()] = val
|
||||
}
|
||||
logging.Trace("interface stats", "ifname", ifname, "stat_count", len(stats))
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
@@ -58,6 +61,7 @@ func (r *RealSysReader) GetBlockDeviceSize(devPath string) (int64, error) {
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("resolve symlink %s: %w", devPath, err)
|
||||
}
|
||||
logging.Trace("block device resolved", "path", devPath, "resolved", resolved)
|
||||
|
||||
// Extract device name from /dev/XXX
|
||||
devName := filepath.Base(resolved)
|
||||
@@ -84,5 +88,6 @@ func GetDeviceSymlinkTarget(devPath string) (string, error) {
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
logging.Trace("device symlink resolved", "path", devPath, "target", resolved)
|
||||
return resolved, nil
|
||||
}
|
||||
|
||||
18
src/main.go
18
src/main.go
@@ -15,6 +15,7 @@ import (
|
||||
|
||||
"pve_local_exporter/internal/collector"
|
||||
"pve_local_exporter/internal/config"
|
||||
"pve_local_exporter/internal/logging"
|
||||
)
|
||||
|
||||
var version string
|
||||
@@ -30,6 +31,8 @@ func main() {
|
||||
|
||||
level := slog.LevelInfo
|
||||
switch strings.ToUpper(cfg.LogLevel) {
|
||||
case "TRACE":
|
||||
level = logging.LevelTrace
|
||||
case "DEBUG":
|
||||
level = slog.LevelDebug
|
||||
case "WARNING", "WARN":
|
||||
@@ -37,7 +40,18 @@ func main() {
|
||||
case "ERROR", "CRITICAL":
|
||||
level = slog.LevelError
|
||||
}
|
||||
slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{Level: level})))
|
||||
slog.SetDefault(slog.New(slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
|
||||
Level: level,
|
||||
ReplaceAttr: func(groups []string, a slog.Attr) slog.Attr {
|
||||
if a.Key == slog.LevelKey {
|
||||
lvl := a.Value.Any().(slog.Level)
|
||||
if lvl == logging.LevelTrace {
|
||||
a.Value = slog.StringValue("TRACE")
|
||||
}
|
||||
}
|
||||
return a
|
||||
},
|
||||
})))
|
||||
|
||||
reg := prometheus.NewRegistry()
|
||||
c := collector.New(cfg)
|
||||
@@ -60,7 +74,7 @@ func main() {
|
||||
server.Close()
|
||||
}()
|
||||
|
||||
log.Printf("listening on %s", addr)
|
||||
slog.Info("listening", "addr", addr)
|
||||
if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user