Skip to content

Commit

Permalink
tipc: add trace_events for tipc link
Browse files Browse the repository at this point in the history
The commit adds the new trace_events for TIPC link object:

trace_tipc_link_timeout()
trace_tipc_link_fsm()
trace_tipc_link_reset()
trace_tipc_link_too_silent()
trace_tipc_link_retrans()
trace_tipc_link_bc_ack()
trace_tipc_link_conges()

And the traces for PROTOCOL messages at building and receiving:

trace_tipc_proto_build()
trace_tipc_proto_rcv()

Note:
a) The 'tipc_link_too_silent' event will only happen when the
'silent_intv_cnt' is about to reach the 'abort_limit' value (and the
event is enabled). The benefit for this kind of event is that we can
get an early indication about TIPC link loss issue due to timeout, then
can do some necessary actions for troubleshooting.

For example: To trigger the 'tipc_proto_rcv' when the 'too_silent'
event occurs:

echo 'enable_event:tipc:tipc_proto_rcv' > \
      events/tipc/tipc_link_too_silent/trigger

And disable it when TIPC link is reset:

echo 'disable_event:tipc:tipc_proto_rcv' > \
      events/tipc/tipc_link_reset/trigger

b) The 'tipc_link_retrans' or 'tipc_link_bc_ack' event is useful to
trace TIPC retransmission issues.

In addition, the commit adds the 'trace_tipc_list/link_dump()' at the
'retransmission failure' case. Then, if the issue occurs, the link
'transmq' along with the link data can be dumped for post-analysis.
These dump events should be enabled by default since it will only take
effect when the failure happens.

The same approach is also applied for the faulty case that the
validation of protocol message is failed.

Acked-by: Ying Xue <ying.xue@windriver.com>
Tested-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Tuong Lien authored and David S. Miller committed Dec 19, 2018
1 parent b4b9771 commit 26574db
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 2 deletions.
33 changes: 32 additions & 1 deletion net/tipc/link.c
Original file line number Diff line number Diff line change
Expand Up @@ -357,9 +357,11 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
rcv_l->bc_peer_is_up = true;
rcv_l->state = LINK_ESTABLISHED;
tipc_link_bc_ack_rcv(rcv_l, ack, xmitq);
trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!");
tipc_link_reset(rcv_l);
rcv_l->state = LINK_RESET;
if (!snd_l->ackers) {
trace_tipc_link_reset(snd_l, TIPC_DUMP_ALL, "zero ackers!");
tipc_link_reset(snd_l);
snd_l->state = LINK_RESET;
__skb_queue_purge(xmitq);
Expand Down Expand Up @@ -523,6 +525,7 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,

l = *link;
strcpy(l->name, tipc_bclink_name);
trace_tipc_link_reset(l, TIPC_DUMP_ALL, "bclink created!");
tipc_link_reset(l);
l->state = LINK_RESET;
l->ackers = 0;
Expand All @@ -547,6 +550,7 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
int tipc_link_fsm_evt(struct tipc_link *l, int evt)
{
int rc = 0;
int old_state = l->state;

switch (l->state) {
case LINK_RESETTING:
Expand Down Expand Up @@ -693,10 +697,12 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt)
default:
pr_err("Unknown FSM state %x in %s\n", l->state, l->name);
}
trace_tipc_link_fsm(l->name, old_state, l->state, evt);
return rc;
illegal_evt:
pr_err("Illegal FSM event %x in state %x on link %s\n",
evt, l->state, l->name);
trace_tipc_link_fsm(l->name, old_state, l->state, evt);
return rc;
}

Expand Down Expand Up @@ -741,6 +747,18 @@ static void link_profile_stats(struct tipc_link *l)
l->stats.msg_length_profile[6]++;
}

/**
* tipc_link_too_silent - check if link is "too silent"
* @l: tipc link to be checked
*
* Returns true if the link 'silent_intv_cnt' is about to reach the
* 'abort_limit' value, otherwise false
*/
bool tipc_link_too_silent(struct tipc_link *l)
{
return (l->silent_intv_cnt + 2 > l->abort_limit);
}

/* tipc_link_timeout - perform periodic task as instructed from node timeout
*/
int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
Expand All @@ -754,6 +772,8 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
u16 bc_acked = l->bc_rcvlink->acked;
struct tipc_mon_state *mstate = &l->mon_state;

trace_tipc_link_timeout(l, TIPC_DUMP_NONE, " ");
trace_tipc_link_too_silent(l, TIPC_DUMP_ALL, " ");
switch (l->state) {
case LINK_ESTABLISHED:
case LINK_SYNCHING:
Expand Down Expand Up @@ -816,6 +836,7 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr)
TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr);
skb_queue_tail(&l->wakeupq, skb);
l->stats.link_congs++;
trace_tipc_link_conges(l, TIPC_DUMP_ALL, "wakeup scheduled!");
return -ELINKCONG;
}

Expand Down Expand Up @@ -1037,13 +1058,17 @@ static int tipc_link_retrans(struct tipc_link *l, struct tipc_link *r,
if (less(to, from))
return 0;

trace_tipc_link_retrans(r, from, to, &l->transmq);
/* Detect repeated retransmit failures on same packet */
if (r->prev_from != from) {
r->prev_from = from;
r->stale_limit = jiffies + msecs_to_jiffies(r->tolerance);
r->stale_cnt = 0;
} else if (++r->stale_cnt > 99 && time_after(jiffies, r->stale_limit)) {
link_retransmit_failure(l, skb);
trace_tipc_list_dump(&l->transmq, true, "retrans failure!");
trace_tipc_link_dump(l, TIPC_DUMP_NONE, "retrans failure!");
trace_tipc_link_dump(r, TIPC_DUMP_NONE, "retrans failure!");
if (link_is_bc_sndlink(l))
return TIPC_LINK_DOWN_EVT;
return tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
Expand Down Expand Up @@ -1403,6 +1428,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
l->stats.sent_nacks++;
skb->priority = TC_PRIO_CONTROL;
__skb_queue_tail(xmitq, skb);
trace_tipc_proto_build(skb, false, l->name);
}

void tipc_link_create_dummy_tnl_msg(struct tipc_link *l,
Expand Down Expand Up @@ -1566,6 +1592,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
char *if_name;
int rc = 0;

trace_tipc_proto_rcv(skb, false, l->name);
if (tipc_link_is_blocked(l) || !xmitq)
goto exit;

Expand All @@ -1576,8 +1603,11 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
hdr = buf_msg(skb);
data = msg_data(hdr);

if (!tipc_link_validate_msg(l, hdr))
if (!tipc_link_validate_msg(l, hdr)) {
trace_tipc_skb_dump(skb, false, "PROTO invalid (1)!");
trace_tipc_link_dump(l, TIPC_DUMP_NONE, "PROTO invalid (1)!");
goto exit;
}

switch (mtyp) {
case RESET_MSG:
Expand Down Expand Up @@ -1820,6 +1850,7 @@ void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
if (!more(acked, l->acked))
return;

trace_tipc_link_bc_ack(l, l->acked, acked, &snd_l->transmq);
/* Skip over packets peer has already acked */
skb_queue_walk(&snd_l->transmq, skb) {
if (more(buf_seqno(skb), l->acked))
Expand Down
1 change: 1 addition & 0 deletions net/tipc/link.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,4 +148,5 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
struct sk_buff_head *xmitq);
int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
struct sk_buff_head *xmitq);
bool tipc_link_too_silent(struct tipc_link *l);
#endif
8 changes: 7 additions & 1 deletion net/tipc/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
if (tipc_link_peer_is_down(l))
tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT);
tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT);
trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link down!");
tipc_link_fsm_evt(l, LINK_RESET_EVT);
tipc_link_reset(l);
tipc_link_build_reset_msg(l, xmitq);
Expand All @@ -801,6 +802,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id,
tipc_node_fsm_evt(n, NODE_SYNCH_END_EVT);
n->sync_point = tipc_link_rcv_nxt(tnl) + (U16_MAX / 2 - 1);
tipc_link_tnl_prepare(l, tnl, FAILOVER_MSG, xmitq);
trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link down -> failover!");
tipc_link_reset(l);
tipc_link_fsm_evt(l, LINK_RESET_EVT);
tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
Expand Down Expand Up @@ -1022,6 +1024,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
*respond = false;
goto exit;
}
trace_tipc_link_reset(l, TIPC_DUMP_ALL, "link created!");
tipc_link_reset(l);
tipc_link_fsm_evt(l, LINK_RESET_EVT);
if (n->state == NODE_FAILINGOVER)
Expand Down Expand Up @@ -1599,8 +1602,11 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
}
}

if (!tipc_link_validate_msg(l, hdr))
if (!tipc_link_validate_msg(l, hdr)) {
trace_tipc_skb_dump(skb, false, "PROTO invalid (2)!");
trace_tipc_link_dump(l, TIPC_DUMP_NONE, "PROTO invalid (2)!");
return false;
}

/* Check and update node accesibility if applicable */
if (state == SELF_UP_PEER_COMING) {
Expand Down
124 changes: 124 additions & 0 deletions net/tipc/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,45 @@ enum {
};
#endif

/* Link & Node FSM states: */
#define state_sym(val) \
__print_symbolic(val, \
{(0xe), "ESTABLISHED" },\
{(0xe << 4), "ESTABLISHING" },\
{(0x1 << 8), "RESET" },\
{(0x2 << 12), "RESETTING" },\
{(0xd << 16), "PEER_RESET" },\
{(0xf << 20), "FAILINGOVER" },\
{(0xc << 24), "SYNCHING" },\
{(0xdd), "SELF_DOWN_PEER_DOWN" },\
{(0xaa), "SELF_UP_PEER_UP" },\
{(0xd1), "SELF_DOWN_PEER_LEAVING" },\
{(0xac), "SELF_UP_PEER_COMING" },\
{(0xca), "SELF_COMING_PEER_UP" },\
{(0x1d), "SELF_LEAVING_PEER_DOWN" },\
{(0xf0), "FAILINGOVER" },\
{(0xcc), "SYNCHING" })

/* Link & Node FSM events: */
#define evt_sym(val) \
__print_symbolic(val, \
{(0xec1ab1e), "ESTABLISH_EVT" },\
{(0x9eed0e), "PEER_RESET_EVT" },\
{(0xfa110e), "FAILURE_EVT" },\
{(0x10ca1d0e), "RESET_EVT" },\
{(0xfa110bee), "FAILOVER_BEGIN_EVT" },\
{(0xfa110ede), "FAILOVER_END_EVT" },\
{(0xc1ccbee), "SYNCH_BEGIN_EVT" },\
{(0xc1ccede), "SYNCH_END_EVT" },\
{(0xece), "SELF_ESTABL_CONTACT_EVT" },\
{(0x1ce), "SELF_LOST_CONTACT_EVT" },\
{(0x9ece), "PEER_ESTABL_CONTACT_EVT" },\
{(0x91ce), "PEER_LOST_CONTACT_EVT" },\
{(0xfbe), "FAILOVER_BEGIN_EVT" },\
{(0xfee), "FAILOVER_END_EVT" },\
{(0xcbe), "SYNCH_BEGIN_EVT" },\
{(0xcee), "SYNCH_END_EVT" })

int tipc_skb_dump(struct sk_buff *skb, bool more, char *buf);
int tipc_list_dump(struct sk_buff_head *list, bool more, char *buf);
int tipc_sk_dump(struct sock *sk, u16 dqueues, char *buf);
Expand Down Expand Up @@ -104,6 +143,8 @@ DEFINE_EVENT(tipc_skb_class, name, \
TP_PROTO(struct sk_buff *skb, bool more, const char *header), \
TP_ARGS(skb, more, header))
DEFINE_SKB_EVENT(tipc_skb_dump);
DEFINE_SKB_EVENT(tipc_proto_build);
DEFINE_SKB_EVENT(tipc_proto_rcv);

DECLARE_EVENT_CLASS(tipc_list_class,

Expand Down Expand Up @@ -192,6 +233,58 @@ DEFINE_EVENT(tipc_link_class, name, \
TP_PROTO(struct tipc_link *l, u16 dqueues, const char *header), \
TP_ARGS(l, dqueues, header))
DEFINE_LINK_EVENT(tipc_link_dump);
DEFINE_LINK_EVENT(tipc_link_conges);
DEFINE_LINK_EVENT(tipc_link_timeout);
DEFINE_LINK_EVENT(tipc_link_reset);

#define DEFINE_LINK_EVENT_COND(name, cond) \
DEFINE_EVENT_CONDITION(tipc_link_class, name, \
TP_PROTO(struct tipc_link *l, u16 dqueues, const char *header), \
TP_ARGS(l, dqueues, header), \
TP_CONDITION(cond))
DEFINE_LINK_EVENT_COND(tipc_link_too_silent, tipc_link_too_silent(l));

DECLARE_EVENT_CLASS(tipc_link_transmq_class,

TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),

TP_ARGS(r, f, t, tq),

TP_STRUCT__entry(
__array(char, name, TIPC_MAX_LINK_NAME)
__field(u16, from)
__field(u16, to)
__field(u32, len)
__field(u16, fseqno)
__field(u16, lseqno)
),

TP_fast_assign(
tipc_link_name_ext(r, __entry->name);
__entry->from = f;
__entry->to = t;
__entry->len = skb_queue_len(tq);
__entry->fseqno = msg_seqno(buf_msg(skb_peek(tq)));
__entry->lseqno = msg_seqno(buf_msg(skb_peek_tail(tq)));
),

TP_printk("<%s> retrans req: [%u-%u] transmq: %u [%u-%u]\n",
__entry->name, __entry->from, __entry->to,
__entry->len, __entry->fseqno, __entry->lseqno)
);

DEFINE_EVENT(tipc_link_transmq_class, tipc_link_retrans,
TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
TP_ARGS(r, f, t, tq)
);

DEFINE_EVENT_PRINT(tipc_link_transmq_class, tipc_link_bc_ack,
TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
TP_ARGS(r, f, t, tq),
TP_printk("<%s> acked: [%u-%u] transmq: %u [%u-%u]\n",
__entry->name, __entry->from, __entry->to,
__entry->len, __entry->fseqno, __entry->lseqno)
);

DECLARE_EVENT_CLASS(tipc_node_class,

Expand Down Expand Up @@ -221,6 +314,37 @@ DEFINE_EVENT(tipc_node_class, name, \
TP_ARGS(n, more, header))
DEFINE_NODE_EVENT(tipc_node_dump);

DECLARE_EVENT_CLASS(tipc_fsm_class,

TP_PROTO(const char *name, u32 os, u32 ns, int evt),

TP_ARGS(name, os, ns, evt),

TP_STRUCT__entry(
__string(name, name)
__field(u32, os)
__field(u32, ns)
__field(u32, evt)
),

TP_fast_assign(
__assign_str(name, name);
__entry->os = os;
__entry->ns = ns;
__entry->evt = evt;
),

TP_printk("<%s> %s--(%s)->%s\n", __get_str(name),
state_sym(__entry->os), evt_sym(__entry->evt),
state_sym(__entry->ns))
);

#define DEFINE_FSM_EVENT(fsm_name) \
DEFINE_EVENT(tipc_fsm_class, fsm_name, \
TP_PROTO(const char *name, u32 os, u32 ns, int evt), \
TP_ARGS(name, os, ns, evt))
DEFINE_FSM_EVENT(tipc_link_fsm);

#endif /* _TIPC_TRACE_H */

/* This part must be outside protection */
Expand Down

0 comments on commit 26574db

Please sign in to comment.