From 9b3a7fd0f5fb583a8fdda678e8a87dff1717f7f3 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 25 Sep 2017 16:39:33 -0700 Subject: [PATCH 01/12] x86/intel_rdt: Add framework for better RDT UI diagnostics Commands are given to the resctrl file system by making/removing directories, or by writing to files. When something goes wrong the user is generally left wondering why they got: bash: echo: write error: Invalid argument Add a new file "last_cmd_status" to the "info" directory that will give the user some better clues on what went wrong. Provide functions to clear and update last_cmd_status which check that we hold the rdtgroup_mutex. [ tglx: Made last_cmd_status static and folded back the hunk from patch 3 which replaces the open coded access to last_cmd_status with the accessor function ] Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: Steven Rostedt Cc: Vikas Shivappa Cc: Boris Petkov Cc: Reinette Chatre Link: https://lkml.kernel.org/r/edc4e0e9741eee89bba569f0021b1b2662fd9508.1506382469.git.tony.luck@intel.com --- arch/x86/kernel/cpu/intel_rdt.h | 7 +++ arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 56 ++++++++++++++++++++++++ 2 files changed, 63 insertions(+) diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index ebaddaeef023f..9d3148d88ec8b 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -126,12 +126,15 @@ struct rdtgroup { #define RFTYPE_BASE BIT(1) #define RF_CTRLSHIFT 4 #define RF_MONSHIFT 5 +#define RF_TOPSHIFT 6 #define RFTYPE_CTRL BIT(RF_CTRLSHIFT) #define RFTYPE_MON BIT(RF_MONSHIFT) +#define RFTYPE_TOP BIT(RF_TOPSHIFT) #define RFTYPE_RES_CACHE BIT(8) #define RFTYPE_RES_MB BIT(9) #define RF_CTRL_INFO (RFTYPE_INFO | RFTYPE_CTRL) #define RF_MON_INFO (RFTYPE_INFO | RFTYPE_MON) +#define RF_TOP_INFO (RFTYPE_INFO | RFTYPE_TOP) #define RF_CTRL_BASE (RFTYPE_BASE | RFTYPE_CTRL) /* List of all resource groups */ @@ -408,6 +411,10 @@ union cpuid_0x10_x_edx { unsigned int full; }; +void rdt_last_cmd_clear(void); +void rdt_last_cmd_puts(const char *s); +void rdt_last_cmd_printf(const char *fmt, ...); + void rdt_ctrl_update(void *arg); struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn); void rdtgroup_kn_unlock(struct kernfs_node *kn); diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index a869d4a073c5c..68103513130b7 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,31 @@ static struct kernfs_node *kn_mongrp; /* Kernel fs node for "mon_data" directory under root */ static struct kernfs_node *kn_mondata; +static struct seq_buf last_cmd_status; +static char last_cmd_status_buf[512]; + +void rdt_last_cmd_clear(void) +{ + lockdep_assert_held(&rdtgroup_mutex); + seq_buf_clear(&last_cmd_status); +} + +void rdt_last_cmd_puts(const char *s) +{ + lockdep_assert_held(&rdtgroup_mutex); + seq_buf_puts(&last_cmd_status, s); +} + +void rdt_last_cmd_printf(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + lockdep_assert_held(&rdtgroup_mutex); + seq_buf_vprintf(&last_cmd_status, fmt, ap); + va_end(ap); +} + /* * Trivial allocator for CLOSIDs. Since h/w only supports a small number, * we can keep a bitmap of free CLOSIDs in a single integer. @@ -569,6 +595,21 @@ static int rdtgroup_tasks_show(struct kernfs_open_file *of, return ret; } +static int rdt_last_cmd_status_show(struct kernfs_open_file *of, + struct seq_file *seq, void *v) +{ + int len; + + mutex_lock(&rdtgroup_mutex); + len = seq_buf_used(&last_cmd_status); + if (len) + seq_printf(seq, "%.*s", len, last_cmd_status_buf); + else + seq_puts(seq, "ok\n"); + mutex_unlock(&rdtgroup_mutex); + return 0; +} + static int rdt_num_closids_show(struct kernfs_open_file *of, struct seq_file *seq, void *v) { @@ -685,6 +726,13 @@ static ssize_t max_threshold_occ_write(struct kernfs_open_file *of, /* rdtgroup information files for one cache resource. */ static struct rftype res_common_files[] = { + { + .name = "last_cmd_status", + .mode = 0444, + .kf_ops = &rdtgroup_kf_single_ops, + .seq_show = rdt_last_cmd_status_show, + .fflags = RF_TOP_INFO, + }, { .name = "num_closids", .mode = 0444, @@ -855,6 +903,10 @@ static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn) return PTR_ERR(kn_info); kernfs_get(kn_info); + ret = rdtgroup_add_files(kn_info, RF_TOP_INFO); + if (ret) + goto out_destroy; + for_each_alloc_enabled_rdt_resource(r) { fflags = r->fflags | RF_CTRL_INFO; ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags); @@ -1156,6 +1208,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, out_cdp: cdp_disable(); out: + rdt_last_cmd_clear(); mutex_unlock(&rdtgroup_mutex); return dentry; @@ -1902,6 +1955,9 @@ int __init rdtgroup_init(void) { int ret = 0; + seq_buf_init(&last_cmd_status, last_cmd_status_buf, + sizeof(last_cmd_status_buf)); + ret = rdtgroup_setup_root(); if (ret) return ret; From c377dcfbee808efdb66cf1bb6b9f06fa26b2ad0a Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 25 Sep 2017 16:39:34 -0700 Subject: [PATCH 02/12] x86/intel_rdt: Add diagnostics when writing the schemata file Save helpful descriptions of what went wrong when writing a schemata file. Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: Steven Rostedt Cc: Vikas Shivappa Cc: Boris Petkov Cc: Reinette Chatre Link: https://lkml.kernel.org/r/9d6cef757dc88639c8ab47f1e7bc1b081a84bb88.1506382469.git.tony.luck@intel.com --- arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c | 49 ++++++++++++++++----- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c index f6ea94f8954a7..f29b4c21e7d4f 100644 --- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c +++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c @@ -42,15 +42,22 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) /* * Only linear delay values is supported for current Intel SKUs. */ - if (!r->membw.delay_linear) + if (!r->membw.delay_linear) { + rdt_last_cmd_puts("No support for non-linear MB domains\n"); return false; + } ret = kstrtoul(buf, 10, &bw); - if (ret) + if (ret) { + rdt_last_cmd_printf("Non-decimal digit in MB value %s\n", buf); return false; + } - if (bw < r->membw.min_bw || bw > r->default_ctrl) + if (bw < r->membw.min_bw || bw > r->default_ctrl) { + rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw, + r->membw.min_bw, r->default_ctrl); return false; + } *data = roundup(bw, (unsigned long)r->membw.bw_gran); return true; @@ -60,8 +67,10 @@ int parse_bw(char *buf, struct rdt_resource *r, struct rdt_domain *d) { unsigned long data; - if (d->have_new_ctrl) + if (d->have_new_ctrl) { + rdt_last_cmd_printf("duplicate domain %d\n", d->id); return -EINVAL; + } if (!bw_validate(buf, &data, r)) return -EINVAL; @@ -84,20 +93,29 @@ static bool cbm_validate(char *buf, unsigned long *data, struct rdt_resource *r) int ret; ret = kstrtoul(buf, 16, &val); - if (ret) + if (ret) { + rdt_last_cmd_printf("non-hex character in mask %s\n", buf); return false; + } - if (val == 0 || val > r->default_ctrl) + if (val == 0 || val > r->default_ctrl) { + rdt_last_cmd_puts("mask out of range\n"); return false; + } first_bit = find_first_bit(&val, cbm_len); zero_bit = find_next_zero_bit(&val, cbm_len, first_bit); - if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) + if (find_next_bit(&val, cbm_len, zero_bit) < cbm_len) { + rdt_last_cmd_printf("mask %lx has non-consecutive 1-bits\n", val); return false; + } - if ((zero_bit - first_bit) < r->cache.min_cbm_bits) + if ((zero_bit - first_bit) < r->cache.min_cbm_bits) { + rdt_last_cmd_printf("Need at least %d bits in mask\n", + r->cache.min_cbm_bits); return false; + } *data = val; return true; @@ -111,8 +129,10 @@ int parse_cbm(char *buf, struct rdt_resource *r, struct rdt_domain *d) { unsigned long data; - if (d->have_new_ctrl) + if (d->have_new_ctrl) { + rdt_last_cmd_printf("duplicate domain %d\n", d->id); return -EINVAL; + } if(!cbm_validate(buf, &data, r)) return -EINVAL; @@ -139,8 +159,10 @@ static int parse_line(char *line, struct rdt_resource *r) return 0; dom = strsep(&line, ";"); id = strsep(&dom, "="); - if (!dom || kstrtoul(id, 10, &dom_id)) + if (!dom || kstrtoul(id, 10, &dom_id)) { + rdt_last_cmd_puts("Missing '=' or non-numeric domain\n"); return -EINVAL; + } dom = strim(dom); list_for_each_entry(d, &r->domains, list) { if (d->id == dom_id) { @@ -196,6 +218,7 @@ static int rdtgroup_parse_resource(char *resname, char *tok, int closid) if (!strcmp(resname, r->name) && closid < r->num_closid) return parse_line(tok, r); } + rdt_last_cmd_printf("unknown/unsupported resource name '%s'\n", resname); return -EINVAL; } @@ -209,8 +232,10 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, int closid, ret = 0; /* Valid input requires a trailing newline */ - if (nbytes == 0 || buf[nbytes - 1] != '\n') + if (nbytes == 0 || buf[nbytes - 1] != '\n') { + seq_buf_puts(&last_cmd_status, "no trailing newline\n"); return -EINVAL; + } buf[nbytes - 1] = '\0'; rdtgrp = rdtgroup_kn_lock_live(of->kn); @@ -218,6 +243,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, rdtgroup_kn_unlock(of->kn); return -ENOENT; } + rdt_last_cmd_clear(); closid = rdtgrp->closid; @@ -229,6 +255,7 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, while ((tok = strsep(&buf, "\n")) != NULL) { resname = strim(strsep(&tok, ":")); if (!tok) { + rdt_last_cmd_puts("Missing ':'\n"); ret = -EINVAL; goto out; } From 29e74f35b2fed0ca3e8b31db157e1d183e9d0819 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 25 Sep 2017 16:39:35 -0700 Subject: [PATCH 03/12] x86/intel_rdt: Add diagnostics when writing the tasks file About the only tricky case is trying to move a task into a monitor group that is a subdirectory of a different control group. But cover the simple cases too. Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: Steven Rostedt Cc: Vikas Shivappa Cc: Boris Petkov Cc: Reinette Chatre Link: https://lkml.kernel.org/r/f1841cce6a242aed37cb926dee8942727331bf78.1506382469.git.tony.luck@intel.com --- arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c | 4 +--- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 13 ++++++++++--- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c index f29b4c21e7d4f..30aeb267cbd2d 100644 --- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c +++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c @@ -232,10 +232,8 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, int closid, ret = 0; /* Valid input requires a trailing newline */ - if (nbytes == 0 || buf[nbytes - 1] != '\n') { - seq_buf_puts(&last_cmd_status, "no trailing newline\n"); + if (nbytes == 0 || buf[nbytes - 1] != '\n') return -EINVAL; - } buf[nbytes - 1] = '\0'; rdtgrp = rdtgroup_kn_lock_live(of->kn); diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 68103513130b7..d39092eb63bbe 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -478,6 +478,7 @@ static int __rdtgroup_move_task(struct task_struct *tsk, */ atomic_dec(&rdtgrp->waitcount); kfree(callback); + rdt_last_cmd_puts("task exited\n"); } else { /* * For ctrl_mon groups move both closid and rmid. @@ -488,10 +489,12 @@ static int __rdtgroup_move_task(struct task_struct *tsk, tsk->closid = rdtgrp->closid; tsk->rmid = rdtgrp->mon.rmid; } else if (rdtgrp->type == RDTMON_GROUP) { - if (rdtgrp->mon.parent->closid == tsk->closid) + if (rdtgrp->mon.parent->closid == tsk->closid) { tsk->rmid = rdtgrp->mon.rmid; - else + } else { + rdt_last_cmd_puts("Can't move task to different control group\n"); ret = -EINVAL; + } } } return ret; @@ -510,8 +513,10 @@ static int rdtgroup_task_write_permission(struct task_struct *task, */ if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) && !uid_eq(cred->euid, tcred->uid) && - !uid_eq(cred->euid, tcred->suid)) + !uid_eq(cred->euid, tcred->suid)) { + rdt_last_cmd_printf("No permission to move task %d\n", task->pid); ret = -EPERM; + } put_cred(tcred); return ret; @@ -528,6 +533,7 @@ static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp, tsk = find_task_by_vpid(pid); if (!tsk) { rcu_read_unlock(); + rdt_last_cmd_printf("No task %d\n", pid); return -ESRCH; } } else { @@ -555,6 +561,7 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of, if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0) return -EINVAL; rdtgrp = rdtgroup_kn_lock_live(of->kn); + rdt_last_cmd_clear(); if (rdtgrp) ret = rdtgroup_move_task(pid, rdtgrp, of); From 94457b36e8a5026443707b48dcf54b204e098fd7 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 25 Sep 2017 16:39:36 -0700 Subject: [PATCH 04/12] x86/intel_rdt: Add diagnostics when writing the cpus file Can't add a cpu to a monitor group unless it belongs to parent group. Can't delete cpus from the default group. Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: Steven Rostedt Cc: Vikas Shivappa Cc: Boris Petkov Cc: Reinette Chatre Link: https://lkml.kernel.org/r/757a869a25e9fc1b7a2e9bc43e1159455c1964a0.1506382469.git.tony.luck@intel.com --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index d39092eb63bbe..6e0ee7ca14907 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -264,8 +264,10 @@ static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, /* Check whether cpus belong to parent ctrl group */ cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask); - if (cpumask_weight(tmpmask)) + if (cpumask_weight(tmpmask)) { + rdt_last_cmd_puts("can only add CPUs to mongroup that belong to parent\n"); return -EINVAL; + } /* Check whether cpus are dropped from this group */ cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); @@ -317,8 +319,10 @@ static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask, cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask); if (cpumask_weight(tmpmask)) { /* Can't drop from default group */ - if (rdtgrp == &rdtgroup_default) + if (rdtgrp == &rdtgroup_default) { + rdt_last_cmd_puts("Can't drop CPUs from default group\n"); return -EINVAL; + } /* Give any dropped cpus to rdtgroup_default */ cpumask_or(&rdtgroup_default.cpu_mask, @@ -383,8 +387,10 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, } rdtgrp = rdtgroup_kn_lock_live(of->kn); + rdt_last_cmd_clear(); if (!rdtgrp) { ret = -ENOENT; + rdt_last_cmd_puts("directory was removed\n"); goto unlock; } @@ -393,13 +399,16 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of, else ret = cpumask_parse(buf, newmask); - if (ret) + if (ret) { + rdt_last_cmd_puts("bad cpu list/mask\n"); goto unlock; + } /* check that user didn't specify any offline cpus */ cpumask_andnot(tmpmask, newmask, cpu_online_mask); if (cpumask_weight(tmpmask)) { ret = -EINVAL; + rdt_last_cmd_puts("can only assign online cpus\n"); goto unlock; } From cfd0f34e4cd5f1a5ad7000a3104c37886a70bca9 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 25 Sep 2017 16:39:37 -0700 Subject: [PATCH 05/12] x86/intel_rdt: Add diagnostics when making directories Mostly this is about running out of RMIDs or CLOSIDs. Other errors are various internal errors. Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: Steven Rostedt Cc: Vikas Shivappa Cc: Boris Petkov Cc: Reinette Chatre Link: https://lkml.kernel.org/r/027cf1ffb3a3695f2d54525813a1d644887353cf.1506382469.git.tony.luck@intel.com --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 28 +++++++++++++++++++----- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 6e0ee7ca14907..abd220bf6cd75 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1593,8 +1593,10 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, int ret; prdtgrp = rdtgroup_kn_lock_live(prgrp_kn); + rdt_last_cmd_clear(); if (!prdtgrp) { ret = -ENODEV; + rdt_last_cmd_puts("directory was removed\n"); goto out_unlock; } @@ -1602,6 +1604,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL); if (!rdtgrp) { ret = -ENOSPC; + rdt_last_cmd_puts("kernel out of memory\n"); goto out_unlock; } *r = rdtgrp; @@ -1613,6 +1616,7 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp); if (IS_ERR(kn)) { ret = PTR_ERR(kn); + rdt_last_cmd_puts("kernfs create error\n"); goto out_free_rgrp; } rdtgrp->kn = kn; @@ -1626,24 +1630,32 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, kernfs_get(kn); ret = rdtgroup_kn_set_ugid(kn); - if (ret) + if (ret) { + rdt_last_cmd_puts("kernfs perm error\n"); goto out_destroy; + } files = RFTYPE_BASE | RFTYPE_CTRL; files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype); ret = rdtgroup_add_files(kn, files); - if (ret) + if (ret) { + rdt_last_cmd_puts("kernfs fill error\n"); goto out_destroy; + } if (rdt_mon_capable) { ret = alloc_rmid(); - if (ret < 0) + if (ret < 0) { + rdt_last_cmd_puts("out of RMIDs\n"); goto out_destroy; + } rdtgrp->mon.rmid = ret; ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn); - if (ret) + if (ret) { + rdt_last_cmd_puts("kernfs subdir error\n"); goto out_idfree; + } } kernfs_activate(kn); @@ -1721,8 +1733,10 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, kn = rdtgrp->kn; ret = closid_alloc(); - if (ret < 0) + if (ret < 0) { + rdt_last_cmd_puts("out of CLOSIDs\n"); goto out_common_fail; + } closid = ret; rdtgrp->closid = closid; @@ -1734,8 +1748,10 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, * of tasks and cpus to monitor. */ ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL); - if (ret) + if (ret) { + rdt_last_cmd_puts("kernfs subdir error\n"); goto out_id_free; + } } goto out_unlock; From 165d3ad884df4b30c3564a478b457b499345886f Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 25 Sep 2017 16:39:38 -0700 Subject: [PATCH 06/12] x86/intel_rdt: Add documentation for "info/last_cmd_status" New file in the "info" directory helps diagnose what went wrong when using the /sys/fs/resctrl file system Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: Steven Rostedt Cc: Vikas Shivappa Cc: Boris Petkov Cc: Reinette Chatre Link: https://lkml.kernel.org/r/387e78e444582403c2454479e576caf5721a363f.1506382469.git.tony.luck@intel.com --- Documentation/x86/intel_rdt_ui.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Documentation/x86/intel_rdt_ui.txt b/Documentation/x86/intel_rdt_ui.txt index 4d8848e4e224a..6851854cf69dd 100644 --- a/Documentation/x86/intel_rdt_ui.txt +++ b/Documentation/x86/intel_rdt_ui.txt @@ -87,6 +87,17 @@ with the following files: bytes) at which a previously used LLC_occupancy counter can be considered for re-use. +Finally, in the top level of the "info" directory there is a file +named "last_cmd_status". This is reset with every "command" issued +via the file system (making new directories or writing to any of the +control files). If the command was successful, it will read as "ok". +If the command failed, it will provide more information that can be +conveyed in the error returns from file operations. E.g. + + # echo L3:0=f7 > schemata + bash: echo: write error: Invalid argument + # cat info/last_cmd_status + mask f7 has non-consecutive 1-bits Resource alloc and monitor groups --------------------------------- From 5fd88b60e11b7d81b2c944c1b45834c4a6aa0157 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 2 Oct 2017 15:59:31 +0100 Subject: [PATCH 07/12] x86/intel_rdt/cqm: Make integer rmid_limbo_count static rmid_limbo_count is local to the source and does not need to be in global scope, so make it static. Cleans up sparse warning: symbol 'rmid_limbo_count' was not declared. Should it be static? Signed-off-by: Colin Ian King Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: kernel-janitors@vger.kernel.org Link: https://lkml.kernel.org/r/20171002145931.27479-1-colin.king@canonical.com --- arch/x86/kernel/cpu/intel_rdt_monitor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c index 30827510094be..681450eee428b 100644 --- a/arch/x86/kernel/cpu/intel_rdt_monitor.c +++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c @@ -51,7 +51,7 @@ static LIST_HEAD(rmid_free_lru); * may have a occupancy value > intel_cqm_threshold. User can change * the threshold occupancy value. */ -unsigned int rmid_limbo_count; +static unsigned int rmid_limbo_count; /** * @rmid_entry - The entry in the limbo and free lists. From 3916a4135c696fa226a1abe6d6a0ff7f5edd9a7c Mon Sep 17 00:00:00 2001 From: Jithu Joseph Date: Wed, 4 Oct 2017 15:48:57 -0700 Subject: [PATCH 08/12] x86/intel_rdt: Remove redundant assignment The assignment to the 'files' variable is immediately overwritten in the following line. Remove the older assignment, which was meant specifially for creating control groups files. Fixes: c7d9aac61311 ("x86/intel_rdt/cqm: Add mkdir support for RDT monitoring") Reported-by: Reinette Chatre Signed-off-by: Jithu Joseph Signed-off-by: Thomas Gleixner Acked-by: Fenghua Yu Cc: tony.luck@intel.com Cc: vikas.shivappa@intel.com Link: https://lkml.kernel.org/r/1507157337-18118-1-git-send-email-jithu.joseph@intel.com --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index abd220bf6cd75..8a61b20c7e51a 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1635,7 +1635,6 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn, goto out_destroy; } - files = RFTYPE_BASE | RFTYPE_CTRL; files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype); ret = rdtgroup_add_files(kn, files); if (ret) { From 95953034fb24c16ad0047a98b16427e5935830c4 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Fri, 20 Oct 2017 02:16:57 -0700 Subject: [PATCH 09/12] x86/intel_rdt: Initialize bitmask of shareable resource if CDP enabled The platform informs via CPUID.(EAX=0x10, ECX=res#):EBX[31:0] (valid res# are only 1 for L3 and 2 for L2) which unit of the allocation may be used by other entities in the platform. This information is valid whether CDP (Code and Data Prioritization) is enabled or not. Ensure that the bitmask of shareable resource is initialized when CDP is enabled. Fixes: 0dd2d7494cd8 ("x86/intel_rdt: Show bitmask of shareable resource with other executing units" Signed-off-by: Reinette Chatre Signed-off-by: Thomas Gleixner Acked-by: Fenghua Yu Acked-by: Vikas Shivappa Acked-by: Tony Luck Link: https://lkml.kernel.org/r/815747bddc820ca221a8924edaf4d1a7324547e4.1508490116.git.reinette.chatre@intel.com --- arch/x86/kernel/cpu/intel_rdt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index cd5fc61ba4502..88dcf84790135 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -267,6 +267,7 @@ static void rdt_get_cdp_l3_config(int type) r->num_closid = r_l3->num_closid / 2; r->cache.cbm_len = r_l3->cache.cbm_len; r->default_ctrl = r_l3->default_ctrl; + r->cache.shareable_bits = r_l3->cache.shareable_bits; r->data_width = (r->cache.cbm_len + 3) / 4; r->alloc_capable = true; /* From 36b6f9fcb8928c06b6638a4cf91bc9d69bb49aa2 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Fri, 20 Oct 2017 02:16:58 -0700 Subject: [PATCH 10/12] x86/intel_rdt: Fix potential deadlock during resctrl unmount Lockdep warns about a potential deadlock: [ 66.782842] ====================================================== [ 66.782888] WARNING: possible circular locking dependency detected [ 66.782937] 4.14.0-rc2-test-test+ #48 Not tainted [ 66.782983] ------------------------------------------------------ [ 66.783052] umount/336 is trying to acquire lock: [ 66.783117] (cpu_hotplug_lock.rw_sem){++++}, at: [] rdt_kill_sb+0x215/0x390 [ 66.783193] but task is already holding lock: [ 66.783244] (rdtgroup_mutex){+.+.}, at: [] rdt_kill_sb+0x36/0x390 [ 66.783305] which lock already depends on the new lock. [ 66.783364] the existing dependency chain (in reverse order) is: [ 66.783419] -> #3 (rdtgroup_mutex){+.+.}: [ 66.783467] __lock_acquire+0x1293/0x13f0 [ 66.783509] lock_acquire+0xaf/0x220 [ 66.783543] __mutex_lock+0x71/0x9b0 [ 66.783575] mutex_lock_nested+0x1b/0x20 [ 66.783610] intel_rdt_online_cpu+0x3b/0x430 [ 66.783649] cpuhp_invoke_callback+0xab/0x8e0 [ 66.783687] cpuhp_thread_fun+0x7a/0x150 [ 66.783722] smpboot_thread_fn+0x1cc/0x270 [ 66.783764] kthread+0x16e/0x190 [ 66.783794] ret_from_fork+0x27/0x40 [ 66.783825] -> #2 (cpuhp_state){+.+.}: [ 66.783870] __lock_acquire+0x1293/0x13f0 [ 66.783906] lock_acquire+0xaf/0x220 [ 66.783938] cpuhp_issue_call+0x102/0x170 [ 66.783974] __cpuhp_setup_state_cpuslocked+0x154/0x2a0 [ 66.784023] __cpuhp_setup_state+0xc7/0x170 [ 66.784061] page_writeback_init+0x43/0x67 [ 66.784097] pagecache_init+0x43/0x4a [ 66.784131] start_kernel+0x3ad/0x3f7 [ 66.784165] x86_64_start_reservations+0x2a/0x2c [ 66.784204] x86_64_start_kernel+0x72/0x75 [ 66.784241] verify_cpu+0x0/0xfb [ 66.784270] -> #1 (cpuhp_state_mutex){+.+.}: [ 66.784319] __lock_acquire+0x1293/0x13f0 [ 66.784355] lock_acquire+0xaf/0x220 [ 66.784387] __mutex_lock+0x71/0x9b0 [ 66.784419] mutex_lock_nested+0x1b/0x20 [ 66.784454] __cpuhp_setup_state_cpuslocked+0x52/0x2a0 [ 66.784497] __cpuhp_setup_state+0xc7/0x170 [ 66.784535] page_alloc_init+0x28/0x30 [ 66.784569] start_kernel+0x148/0x3f7 [ 66.784602] x86_64_start_reservations+0x2a/0x2c [ 66.784642] x86_64_start_kernel+0x72/0x75 [ 66.784678] verify_cpu+0x0/0xfb [ 66.784707] -> #0 (cpu_hotplug_lock.rw_sem){++++}: [ 66.784759] check_prev_add+0x32f/0x6e0 [ 66.784794] __lock_acquire+0x1293/0x13f0 [ 66.784830] lock_acquire+0xaf/0x220 [ 66.784863] cpus_read_lock+0x3d/0xb0 [ 66.784896] rdt_kill_sb+0x215/0x390 [ 66.784930] deactivate_locked_super+0x3e/0x70 [ 66.784968] deactivate_super+0x40/0x60 [ 66.785003] cleanup_mnt+0x3f/0x80 [ 66.785034] __cleanup_mnt+0x12/0x20 [ 66.785070] task_work_run+0x8b/0xc0 [ 66.785103] exit_to_usermode_loop+0x94/0xa0 [ 66.786804] syscall_return_slowpath+0xe8/0x150 [ 66.788502] entry_SYSCALL_64_fastpath+0xab/0xad [ 66.790194] other info that might help us debug this: [ 66.795139] Chain exists of: cpu_hotplug_lock.rw_sem --> cpuhp_state --> rdtgroup_mutex [ 66.800035] Possible unsafe locking scenario: [ 66.803267] CPU0 CPU1 [ 66.804867] ---- ---- [ 66.806443] lock(rdtgroup_mutex); [ 66.808002] lock(cpuhp_state); [ 66.809565] lock(rdtgroup_mutex); [ 66.811110] lock(cpu_hotplug_lock.rw_sem); [ 66.812608] *** DEADLOCK *** [ 66.816983] 2 locks held by umount/336: [ 66.818418] #0: (&type->s_umount_key#35){+.+.}, at: [] deactivate_super+0x38/0x60 [ 66.819922] #1: (rdtgroup_mutex){+.+.}, at: [] rdt_kill_sb+0x36/0x390 When the resctrl filesystem is unmounted the locks should be obtain in the locks in the same order as was done when the cpus came online: cpu_hotplug_lock before rdtgroup_mutex. This also requires to switch the static_branch_disable() calls to the _cpulocked variant because now cpu hotplug lock is held already. [ tglx: Switched to cpus_read_[un]lock ] Signed-off-by: Reinette Chatre Signed-off-by: Thomas Gleixner Tested-by: Sai Praneeth Prakhya Acked-by: Vikas Shivappa Acked-by: Fenghua Yu Acked-by: Tony Luck Link: https://lkml.kernel.org/r/cc292e76be073f7260604651711c47b09fd0dc81.1508490116.git.reinette.chatre@intel.com --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 8a61b20c7e51a..8ce5d038c43b3 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1364,9 +1364,7 @@ static void rmdir_all_sub(void) kfree(rdtgrp); } /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */ - get_online_cpus(); update_closid_rmid(cpu_online_mask, &rdtgroup_default); - put_online_cpus(); kernfs_remove(kn_info); kernfs_remove(kn_mongrp); @@ -1377,6 +1375,7 @@ static void rdt_kill_sb(struct super_block *sb) { struct rdt_resource *r; + cpus_read_lock(); mutex_lock(&rdtgroup_mutex); /*Put everything back to default values. */ @@ -1384,11 +1383,12 @@ static void rdt_kill_sb(struct super_block *sb) reset_all_ctrls(r); cdp_disable(); rmdir_all_sub(); - static_branch_disable(&rdt_alloc_enable_key); - static_branch_disable(&rdt_mon_enable_key); - static_branch_disable(&rdt_enable_key); + static_branch_disable_cpuslocked(&rdt_alloc_enable_key); + static_branch_disable_cpuslocked(&rdt_mon_enable_key); + static_branch_disable_cpuslocked(&rdt_enable_key); kernfs_kill_sb(sb); mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); } static struct file_system_type rdt_fs_type = { From 87943db7dfb0c5ee5aa74a9ac06346fadd9695c8 Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Fri, 20 Oct 2017 02:16:59 -0700 Subject: [PATCH 11/12] x86/intel_rdt: Fix potential deadlock during resctrl mount Sai reported a warning during some MBA tests: [ 236.755559] ====================================================== [ 236.762443] WARNING: possible circular locking dependency detected [ 236.769328] 4.14.0-rc4-yocto-standard #8 Not tainted [ 236.774857] ------------------------------------------------------ [ 236.781738] mount/10091 is trying to acquire lock: [ 236.787071] (cpu_hotplug_lock.rw_sem){++++}, at: [] static_key_enable+0x12/0x30 [ 236.797058] but task is already holding lock: [ 236.803552] (&type->s_umount_key#37/1){+.+.}, at: [] sget_userns+0x32f/0x520 [ 236.813247] which lock already depends on the new lock. [ 236.822353] the existing dependency chain (in reverse order) is: [ 236.830686] -> #4 (&type->s_umount_key#37/1){+.+.}: [ 236.837756] __lock_acquire+0x1100/0x11a0 [ 236.842799] lock_acquire+0xdf/0x1d0 [ 236.847363] down_write_nested+0x46/0x80 [ 236.852310] sget_userns+0x32f/0x520 [ 236.856873] kernfs_mount_ns+0x7e/0x1f0 [ 236.861728] rdt_mount+0x30c/0x440 [ 236.866096] mount_fs+0x38/0x150 [ 236.870262] vfs_kern_mount+0x67/0x150 [ 236.875015] do_mount+0x1df/0xd50 [ 236.879286] SyS_mount+0x95/0xe0 [ 236.883464] entry_SYSCALL_64_fastpath+0x18/0xad [ 236.889183] -> #3 (rdtgroup_mutex){+.+.}: [ 236.895292] __lock_acquire+0x1100/0x11a0 [ 236.900337] lock_acquire+0xdf/0x1d0 [ 236.904899] __mutex_lock+0x80/0x8f0 [ 236.909459] mutex_lock_nested+0x1b/0x20 [ 236.914407] intel_rdt_online_cpu+0x3b/0x4a0 [ 236.919745] cpuhp_invoke_callback+0xce/0xb80 [ 236.925177] cpuhp_thread_fun+0x1c5/0x230 [ 236.930222] smpboot_thread_fn+0x11a/0x1e0 [ 236.935362] kthread+0x152/0x190 [ 236.939536] ret_from_fork+0x27/0x40 [ 236.944097] -> #2 (cpuhp_state-up){+.+.}: [ 236.950199] __lock_acquire+0x1100/0x11a0 [ 236.955241] lock_acquire+0xdf/0x1d0 [ 236.959800] cpuhp_issue_call+0x12e/0x1c0 [ 236.964845] __cpuhp_setup_state_cpuslocked+0x13b/0x2f0 [ 236.971242] __cpuhp_setup_state+0xa7/0x120 [ 236.976483] page_writeback_init+0x43/0x67 [ 236.981623] pagecache_init+0x38/0x3b [ 236.986281] start_kernel+0x3c6/0x41a [ 236.990931] x86_64_start_reservations+0x2a/0x2c [ 236.996650] x86_64_start_kernel+0x72/0x75 [ 237.001793] verify_cpu+0x0/0xfb [ 237.005966] -> #1 (cpuhp_state_mutex){+.+.}: [ 237.012364] __lock_acquire+0x1100/0x11a0 [ 237.017408] lock_acquire+0xdf/0x1d0 [ 237.021969] __mutex_lock+0x80/0x8f0 [ 237.026527] mutex_lock_nested+0x1b/0x20 [ 237.031475] __cpuhp_setup_state_cpuslocked+0x54/0x2f0 [ 237.037777] __cpuhp_setup_state+0xa7/0x120 [ 237.043013] page_alloc_init+0x28/0x30 [ 237.047769] start_kernel+0x148/0x41a [ 237.052425] x86_64_start_reservations+0x2a/0x2c [ 237.058145] x86_64_start_kernel+0x72/0x75 [ 237.063284] verify_cpu+0x0/0xfb [ 237.067456] -> #0 (cpu_hotplug_lock.rw_sem){++++}: [ 237.074436] check_prev_add+0x401/0x800 [ 237.079286] __lock_acquire+0x1100/0x11a0 [ 237.084330] lock_acquire+0xdf/0x1d0 [ 237.088890] cpus_read_lock+0x42/0x90 [ 237.093548] static_key_enable+0x12/0x30 [ 237.098496] rdt_mount+0x406/0x440 [ 237.102862] mount_fs+0x38/0x150 [ 237.107035] vfs_kern_mount+0x67/0x150 [ 237.111787] do_mount+0x1df/0xd50 [ 237.116058] SyS_mount+0x95/0xe0 [ 237.120233] entry_SYSCALL_64_fastpath+0x18/0xad [ 237.125952] other info that might help us debug this: [ 237.134867] Chain exists of: cpu_hotplug_lock.rw_sem --> rdtgroup_mutex --> &type->s_umount_key#37/1 [ 237.148425] Possible unsafe locking scenario: [ 237.155015] CPU0 CPU1 [ 237.160057] ---- ---- [ 237.165100] lock(&type->s_umount_key#37/1); [ 237.169952] lock(rdtgroup_mutex); [ 237.176641] lock(&type->s_umount_key#37/1); [ 237.184287] lock(cpu_hotplug_lock.rw_sem); [ 237.189041] *** DEADLOCK *** When the resctrl filesystem is mounted the locks must be acquired in the same order as was done when the cpus came online: cpu_hotplug_lock before rdtgroup_mutex. This also requires to switch the static_branch_enable() calls to the _cpulocked variant because now cpu hotplug lock is held already. [ tglx: Switched to cpus_read_[un]lock ] Reported-by: Sai Praneeth Prakhya Signed-off-by: Reinette Chatre Tested-by: Sai Praneeth Prakhya Acked-by: Vikas Shivappa Cc: fenghua.yu@intel.com Cc: tony.luck@intel.com Link: https://lkml.kernel.org/r/9c41b91bc2f47d9e95b62b213ecdb45623c47a9f.1508490116.git.reinette.chatre@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 8ce5d038c43b3..64c5ff97ee0d7 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1149,6 +1149,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, struct dentry *dentry; int ret; + cpus_read_lock(); mutex_lock(&rdtgroup_mutex); /* * resctrl file system can only be mounted once. @@ -1198,12 +1199,12 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, goto out_mondata; if (rdt_alloc_capable) - static_branch_enable(&rdt_alloc_enable_key); + static_branch_enable_cpuslocked(&rdt_alloc_enable_key); if (rdt_mon_capable) - static_branch_enable(&rdt_mon_enable_key); + static_branch_enable_cpuslocked(&rdt_mon_enable_key); if (rdt_alloc_capable || rdt_mon_capable) - static_branch_enable(&rdt_enable_key); + static_branch_enable_cpuslocked(&rdt_enable_key); if (is_mbm_enabled()) { r = &rdt_resources_all[RDT_RESOURCE_L3]; @@ -1226,6 +1227,7 @@ static struct dentry *rdt_mount(struct file_system_type *fs_type, out: rdt_last_cmd_clear(); mutex_unlock(&rdtgroup_mutex); + cpus_read_unlock(); return dentry; } From 2244645ab194fe45ffcbaa08f235c8f0c7fb54fc Mon Sep 17 00:00:00 2001 From: Xiaochen Shen Date: Fri, 10 Nov 2017 11:16:24 -0800 Subject: [PATCH 12/12] x86/intel_rdt: Fix a silent failure when writing zero value schemata Writing an invalid schemata with no domain values (e.g., "(L3|MB):"), results in a silent failure, i.e. the last_cmd_status returns OK, Check for an empty value and set the result string with a proper error message and return -EINVAL. Before the fix: # mkdir /sys/fs/resctrl/p1 # echo "L3:" > /sys/fs/resctrl/p1/schemata (silent failure) # cat /sys/fs/resctrl/info/last_cmd_status ok # echo "MB:" > /sys/fs/resctrl/p1/schemata (silent failure) # cat /sys/fs/resctrl/info/last_cmd_status ok After the fix: # mkdir /sys/fs/resctrl/p1 # echo "L3:" > /sys/fs/resctrl/p1/schemata -bash: echo: write error: Invalid argument # cat /sys/fs/resctrl/info/last_cmd_status Missing 'L3' value # echo "MB:" > /sys/fs/resctrl/p1/schemata -bash: echo: write error: Invalid argument # cat /sys/fs/resctrl/info/last_cmd_status Missing 'MB' value [ Tony: This is an unintended side effect of the patch earlier to allow the user to just write the value they want to change. While allowing user to specify less than all of the values, it also allows an empty value. ] Fixes: c4026b7b95a4 ("x86/intel_rdt: Implement "update" mode when writing schemata file") Signed-off-by: Xiaochen Shen Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Cc: Vikas Shivappa Cc: Fenghua Yu Link: https://lkml.kernel.org/r/20171110191624.20280-1-tony.luck@intel.com --- arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c index 30aeb267cbd2d..23e1d5c249c60 100644 --- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c +++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c @@ -257,6 +257,11 @@ ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of, ret = -EINVAL; goto out; } + if (tok[0] == '\0') { + rdt_last_cmd_printf("Missing '%s' value\n", resname); + ret = -EINVAL; + goto out; + } ret = rdtgroup_parse_resource(resname, tok, closid); if (ret) goto out;