forked from Minki/linux
be03ea3b77
Improve consistency in the interactive dialogue for pid filtering by removing any filters on empty input (in addition to entering 0). Signed-off-by: Stefan Raspl <raspl@linux.vnet.ibm.com> Reviewed-by: Janosch Frank <frankja@linux.vnet.ibm.com> Reviewed-by: Marc Hartmayer <mhartmay@linux.vnet.ibm.com> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
1221 lines
38 KiB
Python
Executable File
1221 lines
38 KiB
Python
Executable File
#!/usr/bin/python
|
|
#
|
|
# top-like utility for displaying kvm statistics
|
|
#
|
|
# Copyright 2006-2008 Qumranet Technologies
|
|
# Copyright 2008-2011 Red Hat, Inc.
|
|
#
|
|
# Authors:
|
|
# Avi Kivity <avi@redhat.com>
|
|
#
|
|
# This work is licensed under the terms of the GNU GPL, version 2. See
|
|
# the COPYING file in the top-level directory.
|
|
"""The kvm_stat module outputs statistics about running KVM VMs
|
|
|
|
Three different ways of output formatting are available:
|
|
- as a top-like text ui
|
|
- in a key -> value format
|
|
- in an all keys, all values format
|
|
|
|
The data is sampled from the KVM's debugfs entries and its perf events.
|
|
"""
|
|
|
|
import curses
|
|
import sys
|
|
import os
|
|
import time
|
|
import optparse
|
|
import ctypes
|
|
import fcntl
|
|
import resource
|
|
import struct
|
|
import re
|
|
from collections import defaultdict
|
|
|
|
VMX_EXIT_REASONS = {
|
|
'EXCEPTION_NMI': 0,
|
|
'EXTERNAL_INTERRUPT': 1,
|
|
'TRIPLE_FAULT': 2,
|
|
'PENDING_INTERRUPT': 7,
|
|
'NMI_WINDOW': 8,
|
|
'TASK_SWITCH': 9,
|
|
'CPUID': 10,
|
|
'HLT': 12,
|
|
'INVLPG': 14,
|
|
'RDPMC': 15,
|
|
'RDTSC': 16,
|
|
'VMCALL': 18,
|
|
'VMCLEAR': 19,
|
|
'VMLAUNCH': 20,
|
|
'VMPTRLD': 21,
|
|
'VMPTRST': 22,
|
|
'VMREAD': 23,
|
|
'VMRESUME': 24,
|
|
'VMWRITE': 25,
|
|
'VMOFF': 26,
|
|
'VMON': 27,
|
|
'CR_ACCESS': 28,
|
|
'DR_ACCESS': 29,
|
|
'IO_INSTRUCTION': 30,
|
|
'MSR_READ': 31,
|
|
'MSR_WRITE': 32,
|
|
'INVALID_STATE': 33,
|
|
'MWAIT_INSTRUCTION': 36,
|
|
'MONITOR_INSTRUCTION': 39,
|
|
'PAUSE_INSTRUCTION': 40,
|
|
'MCE_DURING_VMENTRY': 41,
|
|
'TPR_BELOW_THRESHOLD': 43,
|
|
'APIC_ACCESS': 44,
|
|
'EPT_VIOLATION': 48,
|
|
'EPT_MISCONFIG': 49,
|
|
'WBINVD': 54,
|
|
'XSETBV': 55,
|
|
'APIC_WRITE': 56,
|
|
'INVPCID': 58,
|
|
}
|
|
|
|
SVM_EXIT_REASONS = {
|
|
'READ_CR0': 0x000,
|
|
'READ_CR3': 0x003,
|
|
'READ_CR4': 0x004,
|
|
'READ_CR8': 0x008,
|
|
'WRITE_CR0': 0x010,
|
|
'WRITE_CR3': 0x013,
|
|
'WRITE_CR4': 0x014,
|
|
'WRITE_CR8': 0x018,
|
|
'READ_DR0': 0x020,
|
|
'READ_DR1': 0x021,
|
|
'READ_DR2': 0x022,
|
|
'READ_DR3': 0x023,
|
|
'READ_DR4': 0x024,
|
|
'READ_DR5': 0x025,
|
|
'READ_DR6': 0x026,
|
|
'READ_DR7': 0x027,
|
|
'WRITE_DR0': 0x030,
|
|
'WRITE_DR1': 0x031,
|
|
'WRITE_DR2': 0x032,
|
|
'WRITE_DR3': 0x033,
|
|
'WRITE_DR4': 0x034,
|
|
'WRITE_DR5': 0x035,
|
|
'WRITE_DR6': 0x036,
|
|
'WRITE_DR7': 0x037,
|
|
'EXCP_BASE': 0x040,
|
|
'INTR': 0x060,
|
|
'NMI': 0x061,
|
|
'SMI': 0x062,
|
|
'INIT': 0x063,
|
|
'VINTR': 0x064,
|
|
'CR0_SEL_WRITE': 0x065,
|
|
'IDTR_READ': 0x066,
|
|
'GDTR_READ': 0x067,
|
|
'LDTR_READ': 0x068,
|
|
'TR_READ': 0x069,
|
|
'IDTR_WRITE': 0x06a,
|
|
'GDTR_WRITE': 0x06b,
|
|
'LDTR_WRITE': 0x06c,
|
|
'TR_WRITE': 0x06d,
|
|
'RDTSC': 0x06e,
|
|
'RDPMC': 0x06f,
|
|
'PUSHF': 0x070,
|
|
'POPF': 0x071,
|
|
'CPUID': 0x072,
|
|
'RSM': 0x073,
|
|
'IRET': 0x074,
|
|
'SWINT': 0x075,
|
|
'INVD': 0x076,
|
|
'PAUSE': 0x077,
|
|
'HLT': 0x078,
|
|
'INVLPG': 0x079,
|
|
'INVLPGA': 0x07a,
|
|
'IOIO': 0x07b,
|
|
'MSR': 0x07c,
|
|
'TASK_SWITCH': 0x07d,
|
|
'FERR_FREEZE': 0x07e,
|
|
'SHUTDOWN': 0x07f,
|
|
'VMRUN': 0x080,
|
|
'VMMCALL': 0x081,
|
|
'VMLOAD': 0x082,
|
|
'VMSAVE': 0x083,
|
|
'STGI': 0x084,
|
|
'CLGI': 0x085,
|
|
'SKINIT': 0x086,
|
|
'RDTSCP': 0x087,
|
|
'ICEBP': 0x088,
|
|
'WBINVD': 0x089,
|
|
'MONITOR': 0x08a,
|
|
'MWAIT': 0x08b,
|
|
'MWAIT_COND': 0x08c,
|
|
'XSETBV': 0x08d,
|
|
'NPF': 0x400,
|
|
}
|
|
|
|
# EC definition of HSR (from arch/arm64/include/asm/kvm_arm.h)
|
|
AARCH64_EXIT_REASONS = {
|
|
'UNKNOWN': 0x00,
|
|
'WFI': 0x01,
|
|
'CP15_32': 0x03,
|
|
'CP15_64': 0x04,
|
|
'CP14_MR': 0x05,
|
|
'CP14_LS': 0x06,
|
|
'FP_ASIMD': 0x07,
|
|
'CP10_ID': 0x08,
|
|
'CP14_64': 0x0C,
|
|
'ILL_ISS': 0x0E,
|
|
'SVC32': 0x11,
|
|
'HVC32': 0x12,
|
|
'SMC32': 0x13,
|
|
'SVC64': 0x15,
|
|
'HVC64': 0x16,
|
|
'SMC64': 0x17,
|
|
'SYS64': 0x18,
|
|
'IABT': 0x20,
|
|
'IABT_HYP': 0x21,
|
|
'PC_ALIGN': 0x22,
|
|
'DABT': 0x24,
|
|
'DABT_HYP': 0x25,
|
|
'SP_ALIGN': 0x26,
|
|
'FP_EXC32': 0x28,
|
|
'FP_EXC64': 0x2C,
|
|
'SERROR': 0x2F,
|
|
'BREAKPT': 0x30,
|
|
'BREAKPT_HYP': 0x31,
|
|
'SOFTSTP': 0x32,
|
|
'SOFTSTP_HYP': 0x33,
|
|
'WATCHPT': 0x34,
|
|
'WATCHPT_HYP': 0x35,
|
|
'BKPT32': 0x38,
|
|
'VECTOR32': 0x3A,
|
|
'BRK64': 0x3C,
|
|
}
|
|
|
|
# From include/uapi/linux/kvm.h, KVM_EXIT_xxx
|
|
USERSPACE_EXIT_REASONS = {
|
|
'UNKNOWN': 0,
|
|
'EXCEPTION': 1,
|
|
'IO': 2,
|
|
'HYPERCALL': 3,
|
|
'DEBUG': 4,
|
|
'HLT': 5,
|
|
'MMIO': 6,
|
|
'IRQ_WINDOW_OPEN': 7,
|
|
'SHUTDOWN': 8,
|
|
'FAIL_ENTRY': 9,
|
|
'INTR': 10,
|
|
'SET_TPR': 11,
|
|
'TPR_ACCESS': 12,
|
|
'S390_SIEIC': 13,
|
|
'S390_RESET': 14,
|
|
'DCR': 15,
|
|
'NMI': 16,
|
|
'INTERNAL_ERROR': 17,
|
|
'OSI': 18,
|
|
'PAPR_HCALL': 19,
|
|
'S390_UCONTROL': 20,
|
|
'WATCHDOG': 21,
|
|
'S390_TSCH': 22,
|
|
'EPR': 23,
|
|
'SYSTEM_EVENT': 24,
|
|
}
|
|
|
|
IOCTL_NUMBERS = {
|
|
'SET_FILTER': 0x40082406,
|
|
'ENABLE': 0x00002400,
|
|
'DISABLE': 0x00002401,
|
|
'RESET': 0x00002403,
|
|
}
|
|
|
|
|
|
class Arch(object):
|
|
"""Encapsulates global architecture specific data.
|
|
|
|
Contains the performance event open syscall and ioctl numbers, as
|
|
well as the VM exit reasons for the architecture it runs on.
|
|
|
|
"""
|
|
@staticmethod
|
|
def get_arch():
|
|
machine = os.uname()[4]
|
|
|
|
if machine.startswith('ppc'):
|
|
return ArchPPC()
|
|
elif machine.startswith('aarch64'):
|
|
return ArchA64()
|
|
elif machine.startswith('s390'):
|
|
return ArchS390()
|
|
else:
|
|
# X86_64
|
|
for line in open('/proc/cpuinfo'):
|
|
if not line.startswith('flags'):
|
|
continue
|
|
|
|
flags = line.split()
|
|
if 'vmx' in flags:
|
|
return ArchX86(VMX_EXIT_REASONS)
|
|
if 'svm' in flags:
|
|
return ArchX86(SVM_EXIT_REASONS)
|
|
return
|
|
|
|
|
|
class ArchX86(Arch):
|
|
def __init__(self, exit_reasons):
|
|
self.sc_perf_evt_open = 298
|
|
self.ioctl_numbers = IOCTL_NUMBERS
|
|
self.exit_reasons = exit_reasons
|
|
|
|
|
|
class ArchPPC(Arch):
|
|
def __init__(self):
|
|
self.sc_perf_evt_open = 319
|
|
self.ioctl_numbers = IOCTL_NUMBERS
|
|
self.ioctl_numbers['ENABLE'] = 0x20002400
|
|
self.ioctl_numbers['DISABLE'] = 0x20002401
|
|
self.ioctl_numbers['RESET'] = 0x20002403
|
|
|
|
# PPC comes in 32 and 64 bit and some generated ioctl
|
|
# numbers depend on the wordsize.
|
|
char_ptr_size = ctypes.sizeof(ctypes.c_char_p)
|
|
self.ioctl_numbers['SET_FILTER'] = 0x80002406 | char_ptr_size << 16
|
|
self.exit_reasons = {}
|
|
|
|
|
|
class ArchA64(Arch):
|
|
def __init__(self):
|
|
self.sc_perf_evt_open = 241
|
|
self.ioctl_numbers = IOCTL_NUMBERS
|
|
self.exit_reasons = AARCH64_EXIT_REASONS
|
|
|
|
|
|
class ArchS390(Arch):
|
|
def __init__(self):
|
|
self.sc_perf_evt_open = 331
|
|
self.ioctl_numbers = IOCTL_NUMBERS
|
|
self.exit_reasons = None
|
|
|
|
ARCH = Arch.get_arch()
|
|
|
|
|
|
def walkdir(path):
|
|
"""Returns os.walk() data for specified directory.
|
|
|
|
As it is only a wrapper it returns the same 3-tuple of (dirpath,
|
|
dirnames, filenames).
|
|
"""
|
|
return next(os.walk(path))
|
|
|
|
|
|
def parse_int_list(list_string):
|
|
"""Returns an int list from a string of comma separated integers and
|
|
integer ranges."""
|
|
integers = []
|
|
members = list_string.split(',')
|
|
|
|
for member in members:
|
|
if '-' not in member:
|
|
integers.append(int(member))
|
|
else:
|
|
int_range = member.split('-')
|
|
integers.extend(range(int(int_range[0]),
|
|
int(int_range[1]) + 1))
|
|
|
|
return integers
|
|
|
|
|
|
def get_gname_from_pid(pid):
|
|
"""Returns the guest name for a QEMU process pid.
|
|
|
|
Extracts the guest name from the QEMU comma line by processing the '-name'
|
|
option. Will also handle names specified out of sequence.
|
|
|
|
"""
|
|
name = ''
|
|
try:
|
|
line = open('/proc/{}/cmdline'.format(pid), 'rb').read().split('\0')
|
|
parms = line[line.index('-name') + 1].split(',')
|
|
while '' in parms:
|
|
# commas are escaped (i.e. ',,'), hence e.g. 'foo,bar' results in
|
|
# ['foo', '', 'bar'], which we revert here
|
|
idx = parms.index('')
|
|
parms[idx - 1] += ',' + parms[idx + 1]
|
|
del parms[idx:idx+2]
|
|
# the '-name' switch allows for two ways to specify the guest name,
|
|
# where the plain name overrides the name specified via 'guest='
|
|
for arg in parms:
|
|
if '=' not in arg:
|
|
name = arg
|
|
break
|
|
if arg[:6] == 'guest=':
|
|
name = arg[6:]
|
|
except (ValueError, IOError, IndexError):
|
|
pass
|
|
|
|
return name
|
|
|
|
|
|
def get_online_cpus():
|
|
"""Returns a list of cpu id integers."""
|
|
with open('/sys/devices/system/cpu/online') as cpu_list:
|
|
cpu_string = cpu_list.readline()
|
|
return parse_int_list(cpu_string)
|
|
|
|
|
|
def get_filters():
|
|
"""Returns a dict of trace events, their filter ids and
|
|
the values that can be filtered.
|
|
|
|
Trace events can be filtered for special values by setting a
|
|
filter string via an ioctl. The string normally has the format
|
|
identifier==value. For each filter a new event will be created, to
|
|
be able to distinguish the events.
|
|
|
|
"""
|
|
filters = {}
|
|
filters['kvm_userspace_exit'] = ('reason', USERSPACE_EXIT_REASONS)
|
|
if ARCH.exit_reasons:
|
|
filters['kvm_exit'] = ('exit_reason', ARCH.exit_reasons)
|
|
return filters
|
|
|
|
libc = ctypes.CDLL('libc.so.6', use_errno=True)
|
|
syscall = libc.syscall
|
|
|
|
|
|
class perf_event_attr(ctypes.Structure):
|
|
"""Struct that holds the necessary data to set up a trace event.
|
|
|
|
For an extensive explanation see perf_event_open(2) and
|
|
include/uapi/linux/perf_event.h, struct perf_event_attr
|
|
|
|
All fields that are not initialized in the constructor are 0.
|
|
|
|
"""
|
|
_fields_ = [('type', ctypes.c_uint32),
|
|
('size', ctypes.c_uint32),
|
|
('config', ctypes.c_uint64),
|
|
('sample_freq', ctypes.c_uint64),
|
|
('sample_type', ctypes.c_uint64),
|
|
('read_format', ctypes.c_uint64),
|
|
('flags', ctypes.c_uint64),
|
|
('wakeup_events', ctypes.c_uint32),
|
|
('bp_type', ctypes.c_uint32),
|
|
('bp_addr', ctypes.c_uint64),
|
|
('bp_len', ctypes.c_uint64),
|
|
]
|
|
|
|
def __init__(self):
|
|
super(self.__class__, self).__init__()
|
|
self.type = PERF_TYPE_TRACEPOINT
|
|
self.size = ctypes.sizeof(self)
|
|
self.read_format = PERF_FORMAT_GROUP
|
|
|
|
|
|
def perf_event_open(attr, pid, cpu, group_fd, flags):
|
|
"""Wrapper for the sys_perf_evt_open() syscall.
|
|
|
|
Used to set up performance events, returns a file descriptor or -1
|
|
on error.
|
|
|
|
Attributes are:
|
|
- syscall number
|
|
- struct perf_event_attr *
|
|
- pid or -1 to monitor all pids
|
|
- cpu number or -1 to monitor all cpus
|
|
- The file descriptor of the group leader or -1 to create a group.
|
|
- flags
|
|
|
|
"""
|
|
return syscall(ARCH.sc_perf_evt_open, ctypes.pointer(attr),
|
|
ctypes.c_int(pid), ctypes.c_int(cpu),
|
|
ctypes.c_int(group_fd), ctypes.c_long(flags))
|
|
|
|
PERF_TYPE_TRACEPOINT = 2
|
|
PERF_FORMAT_GROUP = 1 << 3
|
|
|
|
PATH_DEBUGFS_TRACING = '/sys/kernel/debug/tracing'
|
|
PATH_DEBUGFS_KVM = '/sys/kernel/debug/kvm'
|
|
|
|
|
|
class Group(object):
|
|
"""Represents a perf event group."""
|
|
|
|
def __init__(self):
|
|
self.events = []
|
|
|
|
def add_event(self, event):
|
|
self.events.append(event)
|
|
|
|
def read(self):
|
|
"""Returns a dict with 'event name: value' for all events in the
|
|
group.
|
|
|
|
Values are read by reading from the file descriptor of the
|
|
event that is the group leader. See perf_event_open(2) for
|
|
details.
|
|
|
|
Read format for the used event configuration is:
|
|
struct read_format {
|
|
u64 nr; /* The number of events */
|
|
struct {
|
|
u64 value; /* The value of the event */
|
|
} values[nr];
|
|
};
|
|
|
|
"""
|
|
length = 8 * (1 + len(self.events))
|
|
read_format = 'xxxxxxxx' + 'Q' * len(self.events)
|
|
return dict(zip([event.name for event in self.events],
|
|
struct.unpack(read_format,
|
|
os.read(self.events[0].fd, length))))
|
|
|
|
|
|
class Event(object):
|
|
"""Represents a performance event and manages its life cycle."""
|
|
def __init__(self, name, group, trace_cpu, trace_pid, trace_point,
|
|
trace_filter, trace_set='kvm'):
|
|
self.name = name
|
|
self.fd = None
|
|
self.setup_event(group, trace_cpu, trace_pid, trace_point,
|
|
trace_filter, trace_set)
|
|
|
|
def __del__(self):
|
|
"""Closes the event's file descriptor.
|
|
|
|
As no python file object was created for the file descriptor,
|
|
python will not reference count the descriptor and will not
|
|
close it itself automatically, so we do it.
|
|
|
|
"""
|
|
if self.fd:
|
|
os.close(self.fd)
|
|
|
|
def setup_event_attribute(self, trace_set, trace_point):
|
|
"""Returns an initialized ctype perf_event_attr struct."""
|
|
|
|
id_path = os.path.join(PATH_DEBUGFS_TRACING, 'events', trace_set,
|
|
trace_point, 'id')
|
|
|
|
event_attr = perf_event_attr()
|
|
event_attr.config = int(open(id_path).read())
|
|
return event_attr
|
|
|
|
def setup_event(self, group, trace_cpu, trace_pid, trace_point,
|
|
trace_filter, trace_set):
|
|
"""Sets up the perf event in Linux.
|
|
|
|
Issues the syscall to register the event in the kernel and
|
|
then sets the optional filter.
|
|
|
|
"""
|
|
|
|
event_attr = self.setup_event_attribute(trace_set, trace_point)
|
|
|
|
# First event will be group leader.
|
|
group_leader = -1
|
|
|
|
# All others have to pass the leader's descriptor instead.
|
|
if group.events:
|
|
group_leader = group.events[0].fd
|
|
|
|
fd = perf_event_open(event_attr, trace_pid,
|
|
trace_cpu, group_leader, 0)
|
|
if fd == -1:
|
|
err = ctypes.get_errno()
|
|
raise OSError(err, os.strerror(err),
|
|
'while calling sys_perf_event_open().')
|
|
|
|
if trace_filter:
|
|
fcntl.ioctl(fd, ARCH.ioctl_numbers['SET_FILTER'],
|
|
trace_filter)
|
|
|
|
self.fd = fd
|
|
|
|
def enable(self):
|
|
"""Enables the trace event in the kernel.
|
|
|
|
Enabling the group leader makes reading counters from it and the
|
|
events under it possible.
|
|
|
|
"""
|
|
fcntl.ioctl(self.fd, ARCH.ioctl_numbers['ENABLE'], 0)
|
|
|
|
def disable(self):
|
|
"""Disables the trace event in the kernel.
|
|
|
|
Disabling the group leader makes reading all counters under it
|
|
impossible.
|
|
|
|
"""
|
|
fcntl.ioctl(self.fd, ARCH.ioctl_numbers['DISABLE'], 0)
|
|
|
|
def reset(self):
|
|
"""Resets the count of the trace event in the kernel."""
|
|
fcntl.ioctl(self.fd, ARCH.ioctl_numbers['RESET'], 0)
|
|
|
|
|
|
class TracepointProvider(object):
|
|
"""Data provider for the stats class.
|
|
|
|
Manages the events/groups from which it acquires its data.
|
|
|
|
"""
|
|
def __init__(self):
|
|
self.group_leaders = []
|
|
self.filters = get_filters()
|
|
self._fields = self.get_available_fields()
|
|
self._pid = 0
|
|
|
|
def get_available_fields(self):
|
|
"""Returns a list of available event's of format 'event name(filter
|
|
name)'.
|
|
|
|
All available events have directories under
|
|
/sys/kernel/debug/tracing/events/ which export information
|
|
about the specific event. Therefore, listing the dirs gives us
|
|
a list of all available events.
|
|
|
|
Some events like the vm exit reasons can be filtered for
|
|
specific values. To take account for that, the routine below
|
|
creates special fields with the following format:
|
|
event name(filter name)
|
|
|
|
"""
|
|
path = os.path.join(PATH_DEBUGFS_TRACING, 'events', 'kvm')
|
|
fields = walkdir(path)[1]
|
|
extra = []
|
|
for field in fields:
|
|
if field in self.filters:
|
|
filter_name_, filter_dicts = self.filters[field]
|
|
for name in filter_dicts:
|
|
extra.append(field + '(' + name + ')')
|
|
fields += extra
|
|
return fields
|
|
|
|
def setup_traces(self):
|
|
"""Creates all event and group objects needed to be able to retrieve
|
|
data."""
|
|
fields = self.get_available_fields()
|
|
if self._pid > 0:
|
|
# Fetch list of all threads of the monitored pid, as qemu
|
|
# starts a thread for each vcpu.
|
|
path = os.path.join('/proc', str(self._pid), 'task')
|
|
groupids = walkdir(path)[1]
|
|
else:
|
|
groupids = get_online_cpus()
|
|
|
|
# The constant is needed as a buffer for python libs, std
|
|
# streams and other files that the script opens.
|
|
newlim = len(groupids) * len(fields) + 50
|
|
try:
|
|
softlim_, hardlim = resource.getrlimit(resource.RLIMIT_NOFILE)
|
|
|
|
if hardlim < newlim:
|
|
# Now we need CAP_SYS_RESOURCE, to increase the hard limit.
|
|
resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, newlim))
|
|
else:
|
|
# Raising the soft limit is sufficient.
|
|
resource.setrlimit(resource.RLIMIT_NOFILE, (newlim, hardlim))
|
|
|
|
except ValueError:
|
|
sys.exit("NOFILE rlimit could not be raised to {0}".format(newlim))
|
|
|
|
for groupid in groupids:
|
|
group = Group()
|
|
for name in fields:
|
|
tracepoint = name
|
|
tracefilter = None
|
|
match = re.match(r'(.*)\((.*)\)', name)
|
|
if match:
|
|
tracepoint, sub = match.groups()
|
|
tracefilter = ('%s==%d\0' %
|
|
(self.filters[tracepoint][0],
|
|
self.filters[tracepoint][1][sub]))
|
|
|
|
# From perf_event_open(2):
|
|
# pid > 0 and cpu == -1
|
|
# This measures the specified process/thread on any CPU.
|
|
#
|
|
# pid == -1 and cpu >= 0
|
|
# This measures all processes/threads on the specified CPU.
|
|
trace_cpu = groupid if self._pid == 0 else -1
|
|
trace_pid = int(groupid) if self._pid != 0 else -1
|
|
|
|
group.add_event(Event(name=name,
|
|
group=group,
|
|
trace_cpu=trace_cpu,
|
|
trace_pid=trace_pid,
|
|
trace_point=tracepoint,
|
|
trace_filter=tracefilter))
|
|
|
|
self.group_leaders.append(group)
|
|
|
|
def available_fields(self):
|
|
return self.get_available_fields()
|
|
|
|
@property
|
|
def fields(self):
|
|
return self._fields
|
|
|
|
@fields.setter
|
|
def fields(self, fields):
|
|
"""Enables/disables the (un)wanted events"""
|
|
self._fields = fields
|
|
for group in self.group_leaders:
|
|
for index, event in enumerate(group.events):
|
|
if event.name in fields:
|
|
event.reset()
|
|
event.enable()
|
|
else:
|
|
# Do not disable the group leader.
|
|
# It would disable all of its events.
|
|
if index != 0:
|
|
event.disable()
|
|
|
|
@property
|
|
def pid(self):
|
|
return self._pid
|
|
|
|
@pid.setter
|
|
def pid(self, pid):
|
|
"""Changes the monitored pid by setting new traces."""
|
|
self._pid = pid
|
|
# The garbage collector will get rid of all Event/Group
|
|
# objects and open files after removing the references.
|
|
self.group_leaders = []
|
|
self.setup_traces()
|
|
self.fields = self._fields
|
|
|
|
def read(self):
|
|
"""Returns 'event name: current value' for all enabled events."""
|
|
ret = defaultdict(int)
|
|
for group in self.group_leaders:
|
|
for name, val in group.read().iteritems():
|
|
if name in self._fields:
|
|
ret[name] += val
|
|
return ret
|
|
|
|
|
|
class DebugfsProvider(object):
|
|
"""Provides data from the files that KVM creates in the kvm debugfs
|
|
folder."""
|
|
def __init__(self):
|
|
self._fields = self.get_available_fields()
|
|
self._pid = 0
|
|
self.do_read = True
|
|
self.paths = []
|
|
|
|
def get_available_fields(self):
|
|
""""Returns a list of available fields.
|
|
|
|
The fields are all available KVM debugfs files
|
|
|
|
"""
|
|
return walkdir(PATH_DEBUGFS_KVM)[2]
|
|
|
|
@property
|
|
def fields(self):
|
|
return self._fields
|
|
|
|
@fields.setter
|
|
def fields(self, fields):
|
|
self._fields = fields
|
|
|
|
@property
|
|
def pid(self):
|
|
return self._pid
|
|
|
|
@pid.setter
|
|
def pid(self, pid):
|
|
if pid != 0:
|
|
self._pid = pid
|
|
|
|
vms = walkdir(PATH_DEBUGFS_KVM)[1]
|
|
if len(vms) == 0:
|
|
self.do_read = False
|
|
|
|
self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
|
|
|
|
else:
|
|
self.paths = ['']
|
|
self.do_read = True
|
|
|
|
def read(self):
|
|
"""Returns a dict with format:'file name / field -> current value'."""
|
|
results = {}
|
|
|
|
# If no debugfs filtering support is available, then don't read.
|
|
if not self.do_read:
|
|
return results
|
|
|
|
for path in self.paths:
|
|
for field in self._fields:
|
|
results[field] = results.get(field, 0) \
|
|
+ self.read_field(field, path)
|
|
|
|
return results
|
|
|
|
def read_field(self, field, path):
|
|
"""Returns the value of a single field from a specific VM."""
|
|
try:
|
|
return int(open(os.path.join(PATH_DEBUGFS_KVM,
|
|
path,
|
|
field))
|
|
.read())
|
|
except IOError:
|
|
return 0
|
|
|
|
|
|
class Stats(object):
|
|
"""Manages the data providers and the data they provide.
|
|
|
|
It is used to set filters on the provider's data and collect all
|
|
provider data.
|
|
|
|
"""
|
|
def __init__(self, providers, pid, fields=None):
|
|
self.providers = providers
|
|
self._pid_filter = pid
|
|
self._fields_filter = fields
|
|
self.values = {}
|
|
self.update_provider_pid()
|
|
self.update_provider_filters()
|
|
|
|
def update_provider_filters(self):
|
|
"""Propagates fields filters to providers."""
|
|
def wanted(key):
|
|
if not self._fields_filter:
|
|
return True
|
|
return re.match(self._fields_filter, key) is not None
|
|
|
|
# As we reset the counters when updating the fields we can
|
|
# also clear the cache of old values.
|
|
self.values = {}
|
|
for provider in self.providers:
|
|
provider_fields = [key for key in provider.get_available_fields()
|
|
if wanted(key)]
|
|
provider.fields = provider_fields
|
|
|
|
def update_provider_pid(self):
|
|
"""Propagates pid filters to providers."""
|
|
for provider in self.providers:
|
|
provider.pid = self._pid_filter
|
|
|
|
@property
|
|
def fields_filter(self):
|
|
return self._fields_filter
|
|
|
|
@fields_filter.setter
|
|
def fields_filter(self, fields_filter):
|
|
self._fields_filter = fields_filter
|
|
self.update_provider_filters()
|
|
|
|
@property
|
|
def pid_filter(self):
|
|
return self._pid_filter
|
|
|
|
@pid_filter.setter
|
|
def pid_filter(self, pid):
|
|
self._pid_filter = pid
|
|
self.values = {}
|
|
self.update_provider_pid()
|
|
|
|
def get(self):
|
|
"""Returns a dict with field -> (value, delta to last value) of all
|
|
provider data."""
|
|
for provider in self.providers:
|
|
new = provider.read()
|
|
for key in provider.fields:
|
|
oldval = self.values.get(key, (0, 0))
|
|
newval = new.get(key, 0)
|
|
newdelta = None
|
|
if oldval is not None:
|
|
newdelta = newval - oldval[0]
|
|
self.values[key] = (newval, newdelta)
|
|
return self.values
|
|
|
|
LABEL_WIDTH = 40
|
|
NUMBER_WIDTH = 10
|
|
DELAY_INITIAL = 0.25
|
|
DELAY_REGULAR = 3.0
|
|
MAX_GUEST_NAME_LEN = 48
|
|
|
|
|
|
class Tui(object):
|
|
"""Instruments curses to draw a nice text ui."""
|
|
def __init__(self, stats):
|
|
self.stats = stats
|
|
self.screen = None
|
|
self.update_drilldown()
|
|
|
|
def __enter__(self):
|
|
"""Initialises curses for later use. Based on curses.wrapper
|
|
implementation from the Python standard library."""
|
|
self.screen = curses.initscr()
|
|
curses.noecho()
|
|
curses.cbreak()
|
|
|
|
# The try/catch works around a minor bit of
|
|
# over-conscientiousness in the curses module, the error
|
|
# return from C start_color() is ignorable.
|
|
try:
|
|
curses.start_color()
|
|
except curses.error:
|
|
pass
|
|
|
|
# Hide cursor in extra statement as some monochrome terminals
|
|
# might support hiding but not colors.
|
|
try:
|
|
curses.curs_set(0)
|
|
except curses.error:
|
|
pass
|
|
|
|
curses.use_default_colors()
|
|
return self
|
|
|
|
def __exit__(self, *exception):
|
|
"""Resets the terminal to its normal state. Based on curses.wrappre
|
|
implementation from the Python standard library."""
|
|
if self.screen:
|
|
self.screen.keypad(0)
|
|
curses.echo()
|
|
curses.nocbreak()
|
|
curses.endwin()
|
|
|
|
def update_drilldown(self):
|
|
"""Sets or removes a filter that only allows fields without braces."""
|
|
if not self.stats.fields_filter:
|
|
self.stats.fields_filter = r'^[^\(]*$'
|
|
|
|
elif self.stats.fields_filter == r'^[^\(]*$':
|
|
self.stats.fields_filter = None
|
|
|
|
def update_pid(self, pid):
|
|
"""Propagates pid selection to stats object."""
|
|
self.stats.pid_filter = pid
|
|
|
|
def refresh_header(self, pid=None):
|
|
"""Refreshes the header."""
|
|
if pid is None:
|
|
pid = self.stats.pid_filter
|
|
self.screen.erase()
|
|
gname = get_gname_from_pid(pid)
|
|
if gname:
|
|
gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
|
|
if len(gname) > MAX_GUEST_NAME_LEN
|
|
else gname))
|
|
if pid > 0:
|
|
self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}'
|
|
.format(pid, gname), curses.A_BOLD)
|
|
else:
|
|
self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
|
|
self.screen.addstr(2, 1, 'Event')
|
|
self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH -
|
|
len('Total'), 'Total')
|
|
self.screen.addstr(2, 1 + LABEL_WIDTH + NUMBER_WIDTH + 8 -
|
|
len('Current'), 'Current')
|
|
self.screen.addstr(4, 1, 'Collecting data...')
|
|
self.screen.refresh()
|
|
|
|
def refresh_body(self, sleeptime):
|
|
row = 3
|
|
self.screen.move(row, 0)
|
|
self.screen.clrtobot()
|
|
stats = self.stats.get()
|
|
|
|
def sortkey(x):
|
|
if stats[x][1]:
|
|
return (-stats[x][1], -stats[x][0])
|
|
else:
|
|
return (0, -stats[x][0])
|
|
for key in sorted(stats.keys(), key=sortkey):
|
|
|
|
if row >= self.screen.getmaxyx()[0]:
|
|
break
|
|
values = stats[key]
|
|
if not values[0] and not values[1]:
|
|
break
|
|
col = 1
|
|
self.screen.addstr(row, col, key)
|
|
col += LABEL_WIDTH
|
|
self.screen.addstr(row, col, '%10d' % (values[0],))
|
|
col += NUMBER_WIDTH
|
|
if values[1] is not None:
|
|
self.screen.addstr(row, col, '%8d' % (values[1] / sleeptime,))
|
|
row += 1
|
|
self.screen.refresh()
|
|
|
|
def show_filter_selection(self):
|
|
"""Draws filter selection mask.
|
|
|
|
Asks for a valid regex and sets the fields filter accordingly.
|
|
|
|
"""
|
|
while True:
|
|
self.screen.erase()
|
|
self.screen.addstr(0, 0,
|
|
"Show statistics for events matching a regex.",
|
|
curses.A_BOLD)
|
|
self.screen.addstr(2, 0,
|
|
"Current regex: {0}"
|
|
.format(self.stats.fields_filter))
|
|
self.screen.addstr(3, 0, "New regex: ")
|
|
curses.echo()
|
|
regex = self.screen.getstr()
|
|
curses.noecho()
|
|
if len(regex) == 0:
|
|
self.refresh_header()
|
|
return
|
|
try:
|
|
re.compile(regex)
|
|
self.stats.fields_filter = regex
|
|
self.refresh_header()
|
|
return
|
|
except re.error:
|
|
continue
|
|
|
|
def show_vm_selection(self):
|
|
"""Draws PID selection mask.
|
|
|
|
Asks for a pid until a valid pid or 0 has been entered.
|
|
|
|
"""
|
|
while True:
|
|
self.screen.erase()
|
|
self.screen.addstr(0, 0,
|
|
'Show statistics for specific pid.',
|
|
curses.A_BOLD)
|
|
self.screen.addstr(1, 0,
|
|
'This might limit the shown data to the trace '
|
|
'statistics.')
|
|
|
|
curses.echo()
|
|
self.screen.addstr(3, 0, "Pid [0 or pid]: ")
|
|
pid = self.screen.getstr()
|
|
curses.noecho()
|
|
|
|
try:
|
|
if len(pid) > 0:
|
|
pid = int(pid)
|
|
if pid != 0 and not os.path.isdir(os.path.join('/proc/',
|
|
str(pid))):
|
|
continue
|
|
else:
|
|
pid = 0
|
|
self.refresh_header(pid)
|
|
self.update_pid(pid)
|
|
break
|
|
|
|
except ValueError:
|
|
continue
|
|
|
|
def show_stats(self):
|
|
"""Refreshes the screen and processes user input."""
|
|
sleeptime = DELAY_INITIAL
|
|
self.refresh_header()
|
|
while True:
|
|
self.refresh_body(sleeptime)
|
|
curses.halfdelay(int(sleeptime * 10))
|
|
sleeptime = DELAY_REGULAR
|
|
try:
|
|
char = self.screen.getkey()
|
|
if char == 'x':
|
|
self.refresh_header()
|
|
self.update_drilldown()
|
|
sleeptime = DELAY_INITIAL
|
|
if char == 'q':
|
|
break
|
|
if char == 'f':
|
|
self.show_filter_selection()
|
|
sleeptime = DELAY_INITIAL
|
|
if char == 'p':
|
|
self.show_vm_selection()
|
|
sleeptime = DELAY_INITIAL
|
|
except KeyboardInterrupt:
|
|
break
|
|
except curses.error:
|
|
continue
|
|
|
|
|
|
def batch(stats):
|
|
"""Prints statistics in a key, value format."""
|
|
try:
|
|
s = stats.get()
|
|
time.sleep(1)
|
|
s = stats.get()
|
|
for key in sorted(s.keys()):
|
|
values = s[key]
|
|
print '%-42s%10d%10d' % (key, values[0], values[1])
|
|
except KeyboardInterrupt:
|
|
pass
|
|
|
|
|
|
def log(stats):
|
|
"""Prints statistics as reiterating key block, multiple value blocks."""
|
|
keys = sorted(stats.get().iterkeys())
|
|
|
|
def banner():
|
|
for k in keys:
|
|
print '%s' % k,
|
|
print
|
|
|
|
def statline():
|
|
s = stats.get()
|
|
for k in keys:
|
|
print ' %9d' % s[k][1],
|
|
print
|
|
line = 0
|
|
banner_repeat = 20
|
|
while True:
|
|
try:
|
|
time.sleep(1)
|
|
if line % banner_repeat == 0:
|
|
banner()
|
|
statline()
|
|
line += 1
|
|
except KeyboardInterrupt:
|
|
break
|
|
|
|
|
|
def get_options():
|
|
"""Returns processed program arguments."""
|
|
description_text = """
|
|
This script displays various statistics about VMs running under KVM.
|
|
The statistics are gathered from the KVM debugfs entries and / or the
|
|
currently available perf traces.
|
|
|
|
The monitoring takes additional cpu cycles and might affect the VM's
|
|
performance.
|
|
|
|
Requirements:
|
|
- Access to:
|
|
/sys/kernel/debug/kvm
|
|
/sys/kernel/debug/trace/events/*
|
|
/proc/pid/task
|
|
- /proc/sys/kernel/perf_event_paranoid < 1 if user has no
|
|
CAP_SYS_ADMIN and perf events are used.
|
|
- CAP_SYS_RESOURCE if the hard limit is not high enough to allow
|
|
the large number of files that are possibly opened.
|
|
|
|
Interactive Commands:
|
|
f filter by regular expression
|
|
p filter by PID
|
|
q quit
|
|
x toggle reporting of stats for individual child trace events
|
|
Press any other key to refresh statistics immediately.
|
|
"""
|
|
|
|
class PlainHelpFormatter(optparse.IndentedHelpFormatter):
|
|
def format_description(self, description):
|
|
if description:
|
|
return description + "\n"
|
|
else:
|
|
return ""
|
|
|
|
optparser = optparse.OptionParser(description=description_text,
|
|
formatter=PlainHelpFormatter())
|
|
optparser.add_option('-1', '--once', '--batch',
|
|
action='store_true',
|
|
default=False,
|
|
dest='once',
|
|
help='run in batch mode for one second',
|
|
)
|
|
optparser.add_option('-l', '--log',
|
|
action='store_true',
|
|
default=False,
|
|
dest='log',
|
|
help='run in logging mode (like vmstat)',
|
|
)
|
|
optparser.add_option('-t', '--tracepoints',
|
|
action='store_true',
|
|
default=False,
|
|
dest='tracepoints',
|
|
help='retrieve statistics from tracepoints',
|
|
)
|
|
optparser.add_option('-d', '--debugfs',
|
|
action='store_true',
|
|
default=False,
|
|
dest='debugfs',
|
|
help='retrieve statistics from debugfs',
|
|
)
|
|
optparser.add_option('-f', '--fields',
|
|
action='store',
|
|
default=None,
|
|
dest='fields',
|
|
help='fields to display (regex)',
|
|
)
|
|
optparser.add_option('-p', '--pid',
|
|
action='store',
|
|
default=0,
|
|
type='int',
|
|
dest='pid',
|
|
help='restrict statistics to pid',
|
|
)
|
|
(options, _) = optparser.parse_args(sys.argv)
|
|
return options
|
|
|
|
|
|
def get_providers(options):
|
|
"""Returns a list of data providers depending on the passed options."""
|
|
providers = []
|
|
|
|
if options.tracepoints:
|
|
providers.append(TracepointProvider())
|
|
if options.debugfs:
|
|
providers.append(DebugfsProvider())
|
|
if len(providers) == 0:
|
|
providers.append(TracepointProvider())
|
|
|
|
return providers
|
|
|
|
|
|
def check_access(options):
|
|
"""Exits if the current user can't access all needed directories."""
|
|
if not os.path.exists('/sys/kernel/debug'):
|
|
sys.stderr.write('Please enable CONFIG_DEBUG_FS in your kernel.')
|
|
sys.exit(1)
|
|
|
|
if not os.path.exists(PATH_DEBUGFS_KVM):
|
|
sys.stderr.write("Please make sure, that debugfs is mounted and "
|
|
"readable by the current user:\n"
|
|
"('mount -t debugfs debugfs /sys/kernel/debug')\n"
|
|
"Also ensure, that the kvm modules are loaded.\n")
|
|
sys.exit(1)
|
|
|
|
if not os.path.exists(PATH_DEBUGFS_TRACING) and (options.tracepoints or
|
|
not options.debugfs):
|
|
sys.stderr.write("Please enable CONFIG_TRACING in your kernel "
|
|
"when using the option -t (default).\n"
|
|
"If it is enabled, make {0} readable by the "
|
|
"current user.\n"
|
|
.format(PATH_DEBUGFS_TRACING))
|
|
if options.tracepoints:
|
|
sys.exit(1)
|
|
|
|
sys.stderr.write("Falling back to debugfs statistics!\n")
|
|
options.debugfs = True
|
|
time.sleep(5)
|
|
|
|
return options
|
|
|
|
|
|
def main():
|
|
options = get_options()
|
|
options = check_access(options)
|
|
|
|
if (options.pid > 0 and
|
|
not os.path.isdir(os.path.join('/proc/',
|
|
str(options.pid)))):
|
|
sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
|
|
sys.exit('Specified pid does not exist.')
|
|
|
|
providers = get_providers(options)
|
|
stats = Stats(providers, options.pid, fields=options.fields)
|
|
|
|
if options.log:
|
|
log(stats)
|
|
elif not options.once:
|
|
with Tui(stats) as tui:
|
|
tui.show_stats()
|
|
else:
|
|
batch(stats)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|