Skip to content

Commit

Permalink
KVM: PPC: Book3S HV: Accumulate timing information for real-mode code
Browse files Browse the repository at this point in the history
This reads the timebase at various points in the real-mode guest
entry/exit code and uses that to accumulate total, minimum and
maximum time spent in those parts of the code.  Currently these
times are accumulated per vcpu in 5 parts of the code:

* rm_entry - time taken from the start of kvmppc_hv_entry() until
  just before entering the guest.
* rm_intr - time from when we take a hypervisor interrupt in the
  guest until we either re-enter the guest or decide to exit to the
  host.  This includes time spent handling hcalls in real mode.
* rm_exit - time from when we decide to exit the guest until the
  return from kvmppc_hv_entry().
* guest - time spend in the guest
* cede - time spent napping in real mode due to an H_CEDE hcall
  while other threads in the same vcore are active.

These times are exposed in debugfs in a directory per vcpu that
contains a file called "timings".  This file contains one line for
each of the 5 timings above, with the name followed by a colon and
4 numbers, which are the count (number of times the code has been
executed), the total time, the minimum time, and the maximum time,
all in nanoseconds.

The overhead of the extra code amounts to about 30ns for an hcall that
is handled in real mode (e.g. H_SET_DABR), which is about 25%.  Since
production environments may not wish to incur this overhead, the new
code is conditional on a new config symbol,
CONFIG_KVM_BOOK3S_HV_EXIT_TIMING.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
  • Loading branch information
Paul Mackerras authored and Alexander Graf committed Apr 21, 2015
1 parent e23a808 commit b6c295d
Show file tree
Hide file tree
Showing 7 changed files with 346 additions and 2 deletions.
21 changes: 21 additions & 0 deletions arch/powerpc/include/asm/kvm_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,14 @@ struct kvmppc_slb {
u8 base_page_size; /* MMU_PAGE_xxx */
};

/* Struct used to accumulate timing information in HV real mode code */
struct kvmhv_tb_accumulator {
u64 seqcount; /* used to synchronize access, also count * 2 */
u64 tb_total; /* total time in timebase ticks */
u64 tb_min; /* min time */
u64 tb_max; /* max time */
};

# ifdef CONFIG_PPC_FSL_BOOK3E
#define KVMPPC_BOOKE_IAC_NUM 2
#define KVMPPC_BOOKE_DAC_NUM 2
Expand Down Expand Up @@ -657,6 +665,19 @@ struct kvm_vcpu_arch {

u32 emul_inst;
#endif

#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
struct kvmhv_tb_accumulator *cur_activity; /* What we're timing */
u64 cur_tb_start; /* when it started */
struct kvmhv_tb_accumulator rm_entry; /* real-mode entry code */
struct kvmhv_tb_accumulator rm_intr; /* real-mode intr handling */
struct kvmhv_tb_accumulator rm_exit; /* real-mode exit code */
struct kvmhv_tb_accumulator guest_time; /* guest execution */
struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */

struct dentry *debugfs_dir;
struct dentry *debugfs_timings;
#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
};

#define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET]
Expand Down
3 changes: 3 additions & 0 deletions arch/powerpc/include/asm/time.h
Original file line number Diff line number Diff line change
Expand Up @@ -211,5 +211,8 @@ extern void secondary_cpu_time_init(void);

DECLARE_PER_CPU(u64, decrementers_next_tb);

/* Convert timebase ticks to nanoseconds */
unsigned long long tb_to_ns(unsigned long long tb_ticks);

#endif /* __KERNEL__ */
#endif /* __POWERPC_TIME_H */
13 changes: 13 additions & 0 deletions arch/powerpc/kernel/asm-offsets.c
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,19 @@ int main(void)
DEFINE(VCPU_SPRG1, offsetof(struct kvm_vcpu, arch.shregs.sprg1));
DEFINE(VCPU_SPRG2, offsetof(struct kvm_vcpu, arch.shregs.sprg2));
DEFINE(VCPU_SPRG3, offsetof(struct kvm_vcpu, arch.shregs.sprg3));
#endif
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
DEFINE(VCPU_TB_RMENTRY, offsetof(struct kvm_vcpu, arch.rm_entry));
DEFINE(VCPU_TB_RMINTR, offsetof(struct kvm_vcpu, arch.rm_intr));
DEFINE(VCPU_TB_RMEXIT, offsetof(struct kvm_vcpu, arch.rm_exit));
DEFINE(VCPU_TB_GUEST, offsetof(struct kvm_vcpu, arch.guest_time));
DEFINE(VCPU_TB_CEDE, offsetof(struct kvm_vcpu, arch.cede_time));
DEFINE(VCPU_CUR_ACTIVITY, offsetof(struct kvm_vcpu, arch.cur_activity));
DEFINE(VCPU_ACTIVITY_START, offsetof(struct kvm_vcpu, arch.cur_tb_start));
DEFINE(TAS_SEQCOUNT, offsetof(struct kvmhv_tb_accumulator, seqcount));
DEFINE(TAS_TOTAL, offsetof(struct kvmhv_tb_accumulator, tb_total));
DEFINE(TAS_MIN, offsetof(struct kvmhv_tb_accumulator, tb_min));
DEFINE(TAS_MAX, offsetof(struct kvmhv_tb_accumulator, tb_max));
#endif
DEFINE(VCPU_SHARED_SPRG3, offsetof(struct kvm_vcpu_arch_shared, sprg3));
DEFINE(VCPU_SHARED_SPRG4, offsetof(struct kvm_vcpu_arch_shared, sprg4));
Expand Down
6 changes: 6 additions & 0 deletions arch/powerpc/kernel/time.c
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,12 @@ void arch_suspend_enable_irqs(void)
}
#endif

unsigned long long tb_to_ns(unsigned long long ticks)
{
return mulhdu(ticks, tb_to_ns_scale) << tb_to_ns_shift;
}
EXPORT_SYMBOL_GPL(tb_to_ns);

/*
* Scheduler clock - returns current time in nanosec units.
*
Expand Down
14 changes: 14 additions & 0 deletions arch/powerpc/kvm/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,20 @@ config KVM_BOOK3S_64_PR
processor, including emulating 32-bit processors on a 64-bit
host.

config KVM_BOOK3S_HV_EXIT_TIMING
bool "Detailed timing for hypervisor real-mode code"
depends on KVM_BOOK3S_HV_POSSIBLE && DEBUG_FS
---help---
Calculate time taken for each vcpu in the real-mode guest entry,
exit, and interrupt handling code, plus time spent in the guest
and in nap mode due to idle (cede) while other threads are still
in the guest. The total, minimum and maximum times in nanoseconds
together with the number of executions are reported in debugfs in
kvm/vm#/vcpu#/timings. The overhead is of the order of 30 - 40
ns per exit on POWER8.

If unsure, say N.

config KVM_BOOKE_HV
bool

Expand Down
150 changes: 150 additions & 0 deletions arch/powerpc/kvm/book3s_hv.c
Original file line number Diff line number Diff line change
Expand Up @@ -1423,6 +1423,154 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
return vcore;
}

#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
static struct debugfs_timings_element {
const char *name;
size_t offset;
} timings[] = {
{"rm_entry", offsetof(struct kvm_vcpu, arch.rm_entry)},
{"rm_intr", offsetof(struct kvm_vcpu, arch.rm_intr)},
{"rm_exit", offsetof(struct kvm_vcpu, arch.rm_exit)},
{"guest", offsetof(struct kvm_vcpu, arch.guest_time)},
{"cede", offsetof(struct kvm_vcpu, arch.cede_time)},
};

#define N_TIMINGS (sizeof(timings) / sizeof(timings[0]))

struct debugfs_timings_state {
struct kvm_vcpu *vcpu;
unsigned int buflen;
char buf[N_TIMINGS * 100];
};

static int debugfs_timings_open(struct inode *inode, struct file *file)
{
struct kvm_vcpu *vcpu = inode->i_private;
struct debugfs_timings_state *p;

p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return -ENOMEM;

kvm_get_kvm(vcpu->kvm);
p->vcpu = vcpu;
file->private_data = p;

return nonseekable_open(inode, file);
}

static int debugfs_timings_release(struct inode *inode, struct file *file)
{
struct debugfs_timings_state *p = file->private_data;

kvm_put_kvm(p->vcpu->kvm);
kfree(p);
return 0;
}

static ssize_t debugfs_timings_read(struct file *file, char __user *buf,
size_t len, loff_t *ppos)
{
struct debugfs_timings_state *p = file->private_data;
struct kvm_vcpu *vcpu = p->vcpu;
char *s, *buf_end;
struct kvmhv_tb_accumulator tb;
u64 count;
loff_t pos;
ssize_t n;
int i, loops;
bool ok;

if (!p->buflen) {
s = p->buf;
buf_end = s + sizeof(p->buf);
for (i = 0; i < N_TIMINGS; ++i) {
struct kvmhv_tb_accumulator *acc;

acc = (struct kvmhv_tb_accumulator *)
((unsigned long)vcpu + timings[i].offset);
ok = false;
for (loops = 0; loops < 1000; ++loops) {
count = acc->seqcount;
if (!(count & 1)) {
smp_rmb();
tb = *acc;
smp_rmb();
if (count == acc->seqcount) {
ok = true;
break;
}
}
udelay(1);
}
if (!ok)
snprintf(s, buf_end - s, "%s: stuck\n",
timings[i].name);
else
snprintf(s, buf_end - s,
"%s: %llu %llu %llu %llu\n",
timings[i].name, count / 2,
tb_to_ns(tb.tb_total),
tb_to_ns(tb.tb_min),
tb_to_ns(tb.tb_max));
s += strlen(s);
}
p->buflen = s - p->buf;
}

pos = *ppos;
if (pos >= p->buflen)
return 0;
if (len > p->buflen - pos)
len = p->buflen - pos;
n = copy_to_user(buf, p->buf + pos, len);
if (n) {
if (n == len)
return -EFAULT;
len -= n;
}
*ppos = pos + len;
return len;
}

static ssize_t debugfs_timings_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
return -EACCES;
}

static const struct file_operations debugfs_timings_ops = {
.owner = THIS_MODULE,
.open = debugfs_timings_open,
.release = debugfs_timings_release,
.read = debugfs_timings_read,
.write = debugfs_timings_write,
.llseek = generic_file_llseek,
};

/* Create a debugfs directory for the vcpu */
static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
{
char buf[16];
struct kvm *kvm = vcpu->kvm;

snprintf(buf, sizeof(buf), "vcpu%u", id);
if (IS_ERR_OR_NULL(kvm->arch.debugfs_dir))
return;
vcpu->arch.debugfs_dir = debugfs_create_dir(buf, kvm->arch.debugfs_dir);
if (IS_ERR_OR_NULL(vcpu->arch.debugfs_dir))
return;
vcpu->arch.debugfs_timings =
debugfs_create_file("timings", 0444, vcpu->arch.debugfs_dir,
vcpu, &debugfs_timings_ops);
}

#else /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */
static void debugfs_vcpu_init(struct kvm_vcpu *vcpu, unsigned int id)
{
}
#endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */

static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
unsigned int id)
{
Expand Down Expand Up @@ -1492,6 +1640,8 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
vcpu->arch.cpu_type = KVM_CPU_3S_64;
kvmppc_sanity_check(vcpu);

debugfs_vcpu_init(vcpu, id);

return vcpu;

free_vcpu:
Expand Down
Loading

0 comments on commit b6c295d

Please sign in to comment.