Skip to content

Commit

Permalink
Merge branch 'cxgb4-ch_ktls-fixes-in-nic-tls-code'
Browse files Browse the repository at this point in the history
Rohit Maheshwari says:

====================
cxgb4/ch_ktls: Fixes in nic tls code

This series helps in fixing multiple nic ktls issues. Series is broken
into 12 patches.

Patch 1 avoids deciding tls packet based on decrypted bit. If its a
retransmit packet which has tls handshake and finish (for encryption),
decrypted bit won't be set there, and so we can't rely on decrypted
bit.

Patch 2 helps supporting linear skb. SKBs were assumed non-linear.
Corrected the length extraction.

Patch 3 fixes the checksum offload update in WR.

Patch 4 fixes kernel panic happening due to creating new skb for each
record. As part of fix driver will use same skb to send out one tls
record (partial data) of the same SKB.

Patch 5 fixes the problem of skb data length smaller than remaining data
of the record.

Patch 6 fixes the handling of SKBs which has tls header alone pkt, but
not starting from beginning.

Patch 7 avoids sending extra data which is used to make a record 16 byte
aligned. We don't need to retransmit those extra few bytes.

Patch 8 handles the cases where retransmit packet has tls starting
exchanges which are prior to tls start marker.

Patch 9 fixes the problem os skb free before HW knows about tcp FIN.

Patch 10 handles the small packet case which has partial TAG bytes only.
HW can't handle those, hence using sw crypto for such pkts.

Patch 11 corrects the potential tcb update problem.

Patch 12 stops the queue if queue reaches threshold value.

v1->v2:
- Corrected fixes tag issue.
- Marked chcr_ktls_sw_fallback() static.

v2->v3:
- Replaced GFP_KERNEL with GFP_ATOMIC.
- Removed mixed fixes.

v3->v4:
- Corrected fixes tag issue.

v4->v5:
- Separated mixed fixes from patch 4.

v5-v6:
- Fixes tag should be at the end.
====================

Link: https://lore.kernel.org/r/20201109105142.15398-1-rohitm@chelsio.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Nov 12, 2020
2 parents 3611823 + 83a95df commit fcd1ecc
Show file tree
Hide file tree
Showing 7 changed files with 478 additions and 228 deletions.
3 changes: 3 additions & 0 deletions drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
Original file line number Diff line number Diff line change
Expand Up @@ -2124,6 +2124,9 @@ void cxgb4_inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
struct ulptx_sgl *sgl, u64 *end, unsigned int start,
const dma_addr_t *addr);
void cxgb4_write_partial_sgl(const struct sk_buff *skb, struct sge_txq *q,
struct ulptx_sgl *sgl, u64 *end,
const dma_addr_t *addr, u32 start, u32 send_len);
void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n);
int t4_set_vlan_acl(struct adapter *adap, unsigned int mbox, unsigned int vf,
u16 vlan);
Expand Down
2 changes: 2 additions & 0 deletions drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -3573,6 +3573,8 @@ static int chcr_stats_show(struct seq_file *seq, void *v)
atomic64_read(&adap->ch_ktls_stats.ktls_tx_complete_pkts));
seq_printf(seq, "TX trim pkts : %20llu\n",
atomic64_read(&adap->ch_ktls_stats.ktls_tx_trimmed_pkts));
seq_printf(seq, "TX sw fallback : %20llu\n",
atomic64_read(&adap->ch_ktls_stats.ktls_tx_fallback));
while (i < MAX_NPORTS) {
ktls_port = &adap->ch_ktls_stats.ktls_port[i];
seq_printf(seq, "Port %d\n", i);
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1176,6 +1176,7 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb,
txq = netdev_pick_tx(dev, skb, sb_dev);
if (xfrm_offload(skb) || is_ptp_enabled(skb, dev) ||
skb->encapsulation ||
cxgb4_is_ktls_skb(skb) ||
(proto != IPPROTO_TCP && proto != IPPROTO_UDP))
txq = txq % pi->nqsets;

Expand Down
6 changes: 6 additions & 0 deletions drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,7 @@ struct ch_ktls_stats_debug {
atomic64_t ktls_tx_retransmit_pkts;
atomic64_t ktls_tx_complete_pkts;
atomic64_t ktls_tx_trimmed_pkts;
atomic64_t ktls_tx_fallback;
};
#endif

Expand Down Expand Up @@ -493,6 +494,11 @@ struct cxgb4_uld_info {
#endif
};

static inline bool cxgb4_is_ktls_skb(struct sk_buff *skb)
{
return skb->sk && tls_is_sk_tx_device_offloaded(skb->sk);
}

void cxgb4_uld_enable(struct adapter *adap);
void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
int cxgb4_unregister_uld(enum cxgb4_uld type);
Expand Down
111 changes: 110 additions & 1 deletion drivers/net/ethernet/chelsio/cxgb4/sge.c
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,114 @@ void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
}
EXPORT_SYMBOL(cxgb4_write_sgl);

/* cxgb4_write_partial_sgl - populate SGL for partial packet
* @skb: the packet
* @q: the Tx queue we are writing into
* @sgl: starting location for writing the SGL
* @end: points right after the end of the SGL
* @addr: the list of bus addresses for the SGL elements
* @start: start offset in the SKB where partial data starts
* @len: length of data from @start to send out
*
* This API will handle sending out partial data of a skb if required.
* Unlike cxgb4_write_sgl, @start can be any offset into the skb data,
* and @len will decide how much data after @start offset to send out.
*/
void cxgb4_write_partial_sgl(const struct sk_buff *skb, struct sge_txq *q,
struct ulptx_sgl *sgl, u64 *end,
const dma_addr_t *addr, u32 start, u32 len)
{
struct ulptx_sge_pair buf[MAX_SKB_FRAGS / 2 + 1] = {0}, *to;
u32 frag_size, skb_linear_data_len = skb_headlen(skb);
struct skb_shared_info *si = skb_shinfo(skb);
u8 i = 0, frag_idx = 0, nfrags = 0;
skb_frag_t *frag;

/* Fill the first SGL either from linear data or from partial
* frag based on @start.
*/
if (unlikely(start < skb_linear_data_len)) {
frag_size = min(len, skb_linear_data_len - start);
sgl->len0 = htonl(frag_size);
sgl->addr0 = cpu_to_be64(addr[0] + start);
len -= frag_size;
nfrags++;
} else {
start -= skb_linear_data_len;
frag = &si->frags[frag_idx];
frag_size = skb_frag_size(frag);
/* find the first frag */
while (start >= frag_size) {
start -= frag_size;
frag_idx++;
frag = &si->frags[frag_idx];
frag_size = skb_frag_size(frag);
}

frag_size = min(len, skb_frag_size(frag) - start);
sgl->len0 = cpu_to_be32(frag_size);
sgl->addr0 = cpu_to_be64(addr[frag_idx + 1] + start);
len -= frag_size;
nfrags++;
frag_idx++;
}

/* If the entire partial data fit in one SGL, then send it out
* now.
*/
if (!len)
goto done;

/* Most of the complexity below deals with the possibility we hit the
* end of the queue in the middle of writing the SGL. For this case
* only we create the SGL in a temporary buffer and then copy it.
*/
to = (u8 *)end > (u8 *)q->stat ? buf : sgl->sge;

/* If the skb couldn't fit in first SGL completely, fill the
* rest of the frags in subsequent SGLs. Note that each SGL
* pair can store 2 frags.
*/
while (len) {
frag_size = min(len, skb_frag_size(&si->frags[frag_idx]));
to->len[i & 1] = cpu_to_be32(frag_size);
to->addr[i & 1] = cpu_to_be64(addr[frag_idx + 1]);
if (i && (i & 1))
to++;
nfrags++;
frag_idx++;
i++;
len -= frag_size;
}

/* If we ended in an odd boundary, then set the second SGL's
* length in the pair to 0.
*/
if (i & 1)
to->len[1] = cpu_to_be32(0);

/* Copy from temporary buffer to Tx ring, in case we hit the
* end of the queue in the middle of writing the SGL.
*/
if (unlikely((u8 *)end > (u8 *)q->stat)) {
u32 part0 = (u8 *)q->stat - (u8 *)sgl->sge, part1;

if (likely(part0))
memcpy(sgl->sge, buf, part0);
part1 = (u8 *)end - (u8 *)q->stat;
memcpy(q->desc, (u8 *)buf + part0, part1);
end = (void *)q->desc + part1;
}

/* 0-pad to multiple of 16 */
if ((uintptr_t)end & 8)
*end = 0;
done:
sgl->cmd_nsge = htonl(ULPTX_CMD_V(ULP_TX_SC_DSGL) |
ULPTX_NSGE_V(nfrags));
}
EXPORT_SYMBOL(cxgb4_write_partial_sgl);

/* This function copies 64 byte coalesced work request to
* memory mapped BAR2 space. For coalesced WR SGE fetches
* data from the FIFO instead of from Host.
Expand Down Expand Up @@ -1422,7 +1530,8 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
#endif /* CHELSIO_IPSEC_INLINE */

#if IS_ENABLED(CONFIG_CHELSIO_TLS_DEVICE)
if (skb->decrypted)
if (cxgb4_is_ktls_skb(skb) &&
(skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb))))
return adap->uld[CXGB4_ULD_KTLS].tx_handler(skb, dev);
#endif /* CHELSIO_TLS_DEVICE */

Expand Down
Loading

0 comments on commit fcd1ecc

Please sign in to comment.