Skip to content

Commit

Permalink
devlink: Add auto dump flag to health reporter
Browse files Browse the repository at this point in the history
On low memory system, run time dumps can consume too much memory. Add
administrator ability to disable auto dumps per reporter as part of the
error flow handle routine.

This attribute is not relevant while executing
DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET.

By default, auto dump is activated for any reporter that has a dump method,
as part of the reporter registration to devlink.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Eran Ben Elisha authored and David S. Miller committed Mar 30, 2020
1 parent ba7d16c commit 48bb52c
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 4 deletions.
2 changes: 2 additions & 0 deletions include/uapi/linux/devlink.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,8 @@ enum devlink_attr {
DEVLINK_ATTR_NETNS_FD, /* u32 */
DEVLINK_ATTR_NETNS_PID, /* u32 */
DEVLINK_ATTR_NETNS_ID, /* u32 */

DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP, /* u8 */
/* add new attributes above here, update the policy in devlink.c */

__DEVLINK_ATTR_MAX,
Expand Down
26 changes: 22 additions & 4 deletions net/core/devlink.c
Original file line number Diff line number Diff line change
Expand Up @@ -5089,6 +5089,7 @@ struct devlink_health_reporter {
struct mutex dump_lock; /* lock parallel read/write from dump buffers */
u64 graceful_period;
bool auto_recover;
bool auto_dump;
u8 health_state;
u64 dump_ts;
u64 dump_real_ts;
Expand Down Expand Up @@ -5155,6 +5156,7 @@ devlink_health_reporter_create(struct devlink *devlink,
reporter->devlink = devlink;
reporter->graceful_period = graceful_period;
reporter->auto_recover = !!ops->recover;
reporter->auto_dump = !!ops->dump;
mutex_init(&reporter->dump_lock);
refcount_set(&reporter->refcount, 1);
list_add_tail(&reporter->list, &devlink->reporter_list);
Expand Down Expand Up @@ -5235,6 +5237,10 @@ devlink_nl_health_reporter_fill(struct sk_buff *msg,
nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
reporter->dump_real_ts, DEVLINK_ATTR_PAD))
goto reporter_nest_cancel;
if (reporter->ops->dump &&
nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP,
reporter->auto_dump))
goto reporter_nest_cancel;

nla_nest_end(msg, reporter_attr);
genlmsg_end(msg, hdr);
Expand Down Expand Up @@ -5381,10 +5387,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter,

reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;

mutex_lock(&reporter->dump_lock);
/* store current dump of current error, for later analysis */
devlink_health_do_dump(reporter, priv_ctx, NULL);
mutex_unlock(&reporter->dump_lock);
if (reporter->auto_dump) {
mutex_lock(&reporter->dump_lock);
/* store current dump of current error, for later analysis */
devlink_health_do_dump(reporter, priv_ctx, NULL);
mutex_unlock(&reporter->dump_lock);
}

if (reporter->auto_recover)
return devlink_health_reporter_recover(reporter,
Expand Down Expand Up @@ -5558,6 +5566,11 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
err = -EOPNOTSUPP;
goto out;
}
if (!reporter->ops->dump &&
info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]) {
err = -EOPNOTSUPP;
goto out;
}

if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
reporter->graceful_period =
Expand All @@ -5567,6 +5580,10 @@ devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
reporter->auto_recover =
nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);

if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP])
reporter->auto_dump =
nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]);

devlink_health_reporter_put(reporter);
return 0;
out:
Expand Down Expand Up @@ -6313,6 +6330,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
[DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
[DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
[DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8 },
};

static const struct genl_ops devlink_nl_ops[] = {
Expand Down

0 comments on commit 48bb52c

Please sign in to comment.