Skip to content

Commit

Permalink
Merge branch 'pskb_extract'
Browse files Browse the repository at this point in the history
Sowmini Varadhan says:

====================
pskb_extract() helper function.

This patchset follows up on the discussion in
 https://www.mail-archive.com/netdev@vger.kernel.org/msg105090.html

For RDS-TCP, we have to deal with the full gamut of
nonlinear sk_buffs, including all the frag_list variants.
Also, the parent skb has to remain unchanged, while the clone
is queued for Rx on the PF_RDS socket.

Patch 1 of this patchset adds a pskb_extract() function that
does all this without the redundant memcpy's in pskb_expand_head()
and __pskb_pull_tail().

v2: Marcelo Leitner review comments
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Apr 25, 2016
2 parents 557fc4a + 947d275 commit 5a5f079
Show file tree
Hide file tree
Showing 3 changed files with 247 additions and 11 deletions.
2 changes: 2 additions & 0 deletions include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -2986,6 +2986,8 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
int skb_ensure_writable(struct sk_buff *skb, int write_len);
int skb_vlan_pop(struct sk_buff *skb);
int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy,
gfp_t gfp);

static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
{
Expand Down
242 changes: 242 additions & 0 deletions net/core/skbuff.c
Original file line number Diff line number Diff line change
Expand Up @@ -4622,3 +4622,245 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
return NULL;
}
EXPORT_SYMBOL(alloc_skb_with_frags);

/* carve out the first off bytes from skb when off < headlen */
static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
const int headlen, gfp_t gfp_mask)
{
int i;
int size = skb_end_offset(skb);
int new_hlen = headlen - off;
u8 *data;
int doff = 0;

size = SKB_DATA_ALIGN(size);

if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
data = kmalloc_reserve(size +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
gfp_mask, NUMA_NO_NODE, NULL);
if (!data)
return -ENOMEM;

size = SKB_WITH_OVERHEAD(ksize(data));

/* Copy real data, and all frags */
skb_copy_from_linear_data_offset(skb, off, data, new_hlen);
skb->len -= off;

memcpy((struct skb_shared_info *)(data + size),
skb_shinfo(skb),
offsetof(struct skb_shared_info,
frags[skb_shinfo(skb)->nr_frags]));
if (skb_cloned(skb)) {
/* drop the old head gracefully */
if (skb_orphan_frags(skb, gfp_mask)) {
kfree(data);
return -ENOMEM;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
skb_frag_ref(skb, i);
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);
skb_release_data(skb);
} else {
/* we can reuse existing recount- all we did was
* relocate values
*/
skb_free_head(skb);
}

doff = (data - skb->head);
skb->head = data;
skb->data = data;
skb->head_frag = 0;
#ifdef NET_SKBUFF_DATA_USES_OFFSET
skb->end = size;
doff = 0;
#else
skb->end = skb->head + size;
#endif
skb_set_tail_pointer(skb, skb_headlen(skb));
skb_headers_offset_update(skb, 0);
skb->cloned = 0;
skb->hdr_len = 0;
skb->nohdr = 0;
atomic_set(&skb_shinfo(skb)->dataref, 1);

return 0;
}

static int pskb_carve(struct sk_buff *skb, const u32 off, gfp_t gfp);

/* carve out the first eat bytes from skb's frag_list. May recurse into
* pskb_carve()
*/
static int pskb_carve_frag_list(struct sk_buff *skb,
struct skb_shared_info *shinfo, int eat,
gfp_t gfp_mask)
{
struct sk_buff *list = shinfo->frag_list;
struct sk_buff *clone = NULL;
struct sk_buff *insp = NULL;

do {
if (!list) {
pr_err("Not enough bytes to eat. Want %d\n", eat);
return -EFAULT;
}
if (list->len <= eat) {
/* Eaten as whole. */
eat -= list->len;
list = list->next;
insp = list;
} else {
/* Eaten partially. */
if (skb_shared(list)) {
clone = skb_clone(list, gfp_mask);
if (!clone)
return -ENOMEM;
insp = list->next;
list = clone;
} else {
/* This may be pulled without problems. */
insp = list;
}
if (pskb_carve(list, eat, gfp_mask) < 0) {
kfree_skb(clone);
return -ENOMEM;
}
break;
}
} while (eat);

/* Free pulled out fragments. */
while ((list = shinfo->frag_list) != insp) {
shinfo->frag_list = list->next;
kfree_skb(list);
}
/* And insert new clone at head. */
if (clone) {
clone->next = list;
shinfo->frag_list = clone;
}
return 0;
}

/* carve off first len bytes from skb. Split line (off) is in the
* non-linear part of skb
*/
static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
int pos, gfp_t gfp_mask)
{
int i, k = 0;
int size = skb_end_offset(skb);
u8 *data;
const int nfrags = skb_shinfo(skb)->nr_frags;
struct skb_shared_info *shinfo;
int doff = 0;

size = SKB_DATA_ALIGN(size);

if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
data = kmalloc_reserve(size +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
gfp_mask, NUMA_NO_NODE, NULL);
if (!data)
return -ENOMEM;

size = SKB_WITH_OVERHEAD(ksize(data));

memcpy((struct skb_shared_info *)(data + size),
skb_shinfo(skb), offsetof(struct skb_shared_info,
frags[skb_shinfo(skb)->nr_frags]));
if (skb_orphan_frags(skb, gfp_mask)) {
kfree(data);
return -ENOMEM;
}
shinfo = (struct skb_shared_info *)(data + size);
for (i = 0; i < nfrags; i++) {
int fsize = skb_frag_size(&skb_shinfo(skb)->frags[i]);

if (pos + fsize > off) {
shinfo->frags[k] = skb_shinfo(skb)->frags[i];

if (pos < off) {
/* Split frag.
* We have two variants in this case:
* 1. Move all the frag to the second
* part, if it is possible. F.e.
* this approach is mandatory for TUX,
* where splitting is expensive.
* 2. Split is accurately. We make this.
*/
shinfo->frags[0].page_offset += off - pos;
skb_frag_size_sub(&shinfo->frags[0], off - pos);
}
skb_frag_ref(skb, i);
k++;
}
pos += fsize;
}
shinfo->nr_frags = k;
if (skb_has_frag_list(skb))
skb_clone_fraglist(skb);

if (k == 0) {
/* split line is in frag list */
pskb_carve_frag_list(skb, shinfo, off - pos, gfp_mask);
}
skb_release_data(skb);

doff = (data - skb->head);
skb->head = data;
skb->head_frag = 0;
skb->data = data;
#ifdef NET_SKBUFF_DATA_USES_OFFSET
skb->end = size;
doff = 0;
#else
skb->end = skb->head + size;
#endif
skb_reset_tail_pointer(skb);
skb_headers_offset_update(skb, 0);
skb->cloned = 0;
skb->hdr_len = 0;
skb->nohdr = 0;
skb->len -= off;
skb->data_len = skb->len;
atomic_set(&skb_shinfo(skb)->dataref, 1);
return 0;
}

/* remove len bytes from the beginning of the skb */
static int pskb_carve(struct sk_buff *skb, const u32 len, gfp_t gfp)
{
int headlen = skb_headlen(skb);

if (len < headlen)
return pskb_carve_inside_header(skb, len, headlen, gfp);
else
return pskb_carve_inside_nonlinear(skb, len, headlen, gfp);
}

/* Extract to_copy bytes starting at off from skb, and return this in
* a new skb
*/
struct sk_buff *pskb_extract(struct sk_buff *skb, int off,
int to_copy, gfp_t gfp)
{
struct sk_buff *clone = skb_clone(skb, gfp);

if (!clone)
return NULL;

if (pskb_carve(clone, off, gfp) < 0 ||
pskb_trim(clone, to_copy)) {
kfree_skb(clone);
return NULL;
}
return clone;
}
EXPORT_SYMBOL(pskb_extract);
14 changes: 3 additions & 11 deletions net/rds/tcp_recv.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,22 +207,14 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
}

if (left && tc->t_tinc_data_rem) {
clone = skb_clone(skb, arg->gfp);
to_copy = min(tc->t_tinc_data_rem, left);

clone = pskb_extract(skb, offset, to_copy, arg->gfp);
if (!clone) {
desc->error = -ENOMEM;
goto out;
}

to_copy = min(tc->t_tinc_data_rem, left);
if (!pskb_pull(clone, offset) ||
pskb_trim(clone, to_copy)) {
pr_warn("rds_tcp_data_recv: pull/trim failed "
"left %zu data_rem %zu skb_len %d\n",
left, tc->t_tinc_data_rem, skb->len);
kfree_skb(clone);
desc->error = -ENOMEM;
goto out;
}
skb_queue_tail(&tinc->ti_skb_list, clone);

rdsdebug("skb %p data %p len %d off %u to_copy %zu -> "
Expand Down

0 comments on commit 5a5f079

Please sign in to comment.