Skip to content

Commit

Permalink
RDMA/nes: Fix hang issues for large cluster dynamic connections
Browse files Browse the repository at this point in the history
Running large cluster setup, we are hanging after many hours of
testing.  Fixing this required going over the code and making sure the
rexmit entry was properly removed based on the cm_node's state and
packet received.  Also when receiving a FIN packet, check seq# and
make sure there were no errors before calling handle_fin().

Following are the changes done in nes_cm.c:

* handle_ack_pkt() needs to return error value, so in case of error,
  handle_fin() is not called. Some cleanup done while going over the code.

* handle_rst_pkt(), handling of cm_node's NES_CM_STATE_LAST_ACK is missing.

* process_packet(), in case of FIN only packet is received, call
  check_seq() before processing.

* in handle_fin_pkt(), we are calling cleanup_retrans_entry() for all
  conditions, even if the packets need to be dropped.

Signed-off-by: Faisal Latif <faisal.latif@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
  • Loading branch information
Faisal Latif authored and Roland Dreier committed Apr 27, 2009
1 parent 4e9c390 commit 109d67e
Showing 1 changed file with 25 additions and 31 deletions.
56 changes: 25 additions & 31 deletions drivers/infiniband/hw/nes/nes_cm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1326,30 +1326,36 @@ static void handle_fin_pkt(struct nes_cm_node *cm_node)
nes_debug(NES_DBG_CM, "Received FIN, cm_node = %p, state = %u. "
"refcnt=%d\n", cm_node, cm_node->state,
atomic_read(&cm_node->ref_count));
cm_node->tcp_cntxt.rcv_nxt++;
cleanup_retrans_entry(cm_node);
switch (cm_node->state) {
case NES_CM_STATE_SYN_RCVD:
case NES_CM_STATE_SYN_SENT:
case NES_CM_STATE_ESTABLISHED:
case NES_CM_STATE_MPAREQ_SENT:
case NES_CM_STATE_MPAREJ_RCVD:
cm_node->tcp_cntxt.rcv_nxt++;
cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_LAST_ACK;
send_fin(cm_node, NULL);
break;
case NES_CM_STATE_FIN_WAIT1:
cm_node->tcp_cntxt.rcv_nxt++;
cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_CLOSING;
send_ack(cm_node, NULL);
/* Wait for ACK as this is simultanous close..
* After we receive ACK, do not send anything..
* Just rm the node.. Done.. */
break;
case NES_CM_STATE_FIN_WAIT2:
cm_node->tcp_cntxt.rcv_nxt++;
cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_TIME_WAIT;
send_ack(cm_node, NULL);
schedule_nes_timer(cm_node, NULL, NES_TIMER_TYPE_CLOSE, 1, 0);
break;
case NES_CM_STATE_TIME_WAIT:
cm_node->tcp_cntxt.rcv_nxt++;
cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_CLOSED;
rem_ref_cm_node(cm_node->cm_core, cm_node);
break;
Expand Down Expand Up @@ -1385,7 +1391,6 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
passive_state = atomic_add_return(1, &cm_node->passive_state);
if (passive_state == NES_SEND_RESET_EVENT)
create_event(cm_node, NES_CM_EVENT_RESET);
cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_CLOSED;
dev_kfree_skb_any(skb);
break;
Expand All @@ -1399,17 +1404,16 @@ static void handle_rst_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
active_open_err(cm_node, skb, reset);
break;
case NES_CM_STATE_CLOSED:
cleanup_retrans_entry(cm_node);
drop_packet(skb);
break;
case NES_CM_STATE_LAST_ACK:
cm_node->cm_id->rem_ref(cm_node->cm_id);
case NES_CM_STATE_TIME_WAIT:
cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_CLOSED;
rem_ref_cm_node(cm_node->cm_core, cm_node);
drop_packet(skb);
break;
case NES_CM_STATE_FIN_WAIT1:
cleanup_retrans_entry(cm_node);
nes_debug(NES_DBG_CM, "Bad state %s[%u]\n", __func__, __LINE__);
default:
drop_packet(skb);
Expand Down Expand Up @@ -1456,6 +1460,7 @@ static void handle_rcv_mpa(struct nes_cm_node *cm_node, struct sk_buff *skb)
NES_PASSIVE_STATE_INDICATED);
break;
case NES_CM_STATE_MPAREQ_SENT:
cleanup_retrans_entry(cm_node);
if (res_type == NES_MPA_REQUEST_REJECT) {
type = NES_CM_EVENT_MPA_REJECT;
cm_node->state = NES_CM_STATE_MPAREJ_RCVD;
Expand Down Expand Up @@ -1653,49 +1658,39 @@ static void handle_synack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
}
}

static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
struct tcphdr *tcph)
{
int datasize = 0;
u32 inc_sequence;
u32 rem_seq_ack;
u32 rem_seq;
int ret;
int ret = 0;
int optionsize;
optionsize = (tcph->doff << 2) - sizeof(struct tcphdr);

if (check_seq(cm_node, tcph, skb))
return;
return -EINVAL;

skb_pull(skb, tcph->doff << 2);
inc_sequence = ntohl(tcph->seq);
rem_seq = ntohl(tcph->seq);
rem_seq_ack = ntohl(tcph->ack_seq);
datasize = skb->len;
cleanup_retrans_entry(cm_node);
switch (cm_node->state) {
case NES_CM_STATE_SYN_RCVD:
/* Passive OPEN */
cleanup_retrans_entry(cm_node);
ret = handle_tcp_options(cm_node, tcph, skb, optionsize, 1);
if (ret)
break;
cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
if (cm_node->tcp_cntxt.rem_ack_num !=
cm_node->tcp_cntxt.loc_seq_num) {
nes_debug(NES_DBG_CM, "rem_ack_num != loc_seq_num\n");
cleanup_retrans_entry(cm_node);
send_reset(cm_node, skb);
return;
}
cm_node->state = NES_CM_STATE_ESTABLISHED;
cleanup_retrans_entry(cm_node);
if (datasize) {
cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
handle_rcv_mpa(cm_node, skb);
} else { /* rcvd ACK only */
} else /* rcvd ACK only */
dev_kfree_skb_any(skb);
cleanup_retrans_entry(cm_node);
}
break;
case NES_CM_STATE_ESTABLISHED:
/* Passive OPEN */
Expand All @@ -1707,27 +1702,23 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
drop_packet(skb);
break;
case NES_CM_STATE_MPAREQ_SENT:
cleanup_retrans_entry(cm_node);
cm_node->tcp_cntxt.rem_ack_num = ntohl(tcph->ack_seq);
if (datasize) {
cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
handle_rcv_mpa(cm_node, skb);
} else { /* Could be just an ack pkt.. */
cleanup_retrans_entry(cm_node);
} else /* Could be just an ack pkt.. */
dev_kfree_skb_any(skb);
}
break;
case NES_CM_STATE_LISTENING:
case NES_CM_STATE_CLOSED:
cleanup_retrans_entry(cm_node);
send_reset(cm_node, skb);
break;
case NES_CM_STATE_LAST_ACK:
case NES_CM_STATE_CLOSING:
cleanup_retrans_entry(cm_node);
cm_node->state = NES_CM_STATE_CLOSED;
cm_node->cm_id->rem_ref(cm_node->cm_id);
case NES_CM_STATE_CLOSING:
cleanup_retrans_entry(cm_node);
rem_ref_cm_node(cm_node->cm_core, cm_node);
drop_packet(skb);
break;
Expand All @@ -1742,9 +1733,11 @@ static void handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
case NES_CM_STATE_MPAREQ_RCVD:
case NES_CM_STATE_UNKNOWN:
default:
cleanup_retrans_entry(cm_node);
drop_packet(skb);
break;
}
return ret;
}


Expand Down Expand Up @@ -1850,6 +1843,7 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
enum nes_tcpip_pkt_type pkt_type = NES_PKT_TYPE_UNKNOWN;
struct tcphdr *tcph = tcp_hdr(skb);
u32 fin_set = 0;
int ret = 0;
skb_pull(skb, ip_hdr(skb)->ihl << 2);

nes_debug(NES_DBG_CM, "process_packet: cm_node=%p state =%d syn=%d "
Expand All @@ -1875,17 +1869,17 @@ static void process_packet(struct nes_cm_node *cm_node, struct sk_buff *skb,
handle_synack_pkt(cm_node, skb, tcph);
break;
case NES_PKT_TYPE_ACK:
handle_ack_pkt(cm_node, skb, tcph);
if (fin_set)
ret = handle_ack_pkt(cm_node, skb, tcph);
if (fin_set && !ret)
handle_fin_pkt(cm_node);
break;
case NES_PKT_TYPE_RST:
handle_rst_pkt(cm_node, skb, tcph);
break;
default:
drop_packet(skb);
if (fin_set)
if ((fin_set) && (!check_seq(cm_node, tcph, skb)))
handle_fin_pkt(cm_node);
drop_packet(skb);
break;
}
}
Expand Down

0 comments on commit 109d67e

Please sign in to comment.