From 0e06b227c5221dd51b5569de93f3b9f532be4a32 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 29 Aug 2018 16:50:34 +0200
Subject: [PATCH 1/3] bpf: fix msg->data/data_end after sg shift repair in
 bpf_msg_pull_data

In the current code, msg->data is set as sg_virt(&sg[i]) + start - offset
and msg->data_end relative to it as msg->data + bytes. Using iterator i
to point to the updated starting scatterlist element holds true for some
cases, however not for all where we'd end up pointing out of bounds. It
is /correct/ for these ones:

1) When first finding the starting scatterlist element (sge) where we
   find that the page is already privately owned by the msg and where
   the requested bytes and headroom fit into the sge's length.

However, it's /incorrect/ for the following ones:

2) After we made the requested area private and updated the newly allocated
   page into first_sg slot of the scatterlist ring; when we find that no
   shift repair of the ring is needed where we bail out updating msg->data
   and msg->data_end. At that point i will point to last_sg, which in this
   case is the next elem of first_sg in the ring. The sge at that point
   might as well be invalid (e.g. i == msg->sg_end), which we use for
   setting the range of sg_virt(&sg[i]). The correct one would have been
   first_sg.

3) Similar as in 2) but when we find that a shift repair of the ring is
   needed. In this case we fix up all sges and stop once we've reached the
   end. In this case i will point to will point to the new msg->sg_end,
   and the sge at that point will be invalid. Again here the requested
   range sits in first_sg.

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index ec4d67c0cf0cd..b9225c55926ac 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2310,6 +2310,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (unlikely(start >= offset + len))
 		return -EINVAL;
 
+	first_sg = i;
 	/* The start may point into the sg element so we need to also
 	 * account for the headroom.
 	 */
@@ -2317,8 +2318,6 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (!msg->sg_copy[i] && bytes_sg_total <= len)
 		goto out;
 
-	first_sg = i;
-
 	/* At this point we need to linearize multiple scatterlist
 	 * elements or a single shared page. Either way we need to
 	 * copy into a linear buffer exclusively owned by BPF. Then
@@ -2400,7 +2399,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (msg->sg_end < 0)
 		msg->sg_end += MAX_SKB_FRAGS;
 out:
-	msg->data = sg_virt(&sg[i]) + start - offset;
+	msg->data = sg_virt(&sg[first_sg]) + start - offset;
 	msg->data_end = msg->data + bytes;
 
 	return 0;

From 2e43f95dd8ee62bc8bf57f2afac37fbd70c8d565 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 29 Aug 2018 16:50:35 +0200
Subject: [PATCH 2/3] bpf: fix shift upon scatterlist ring wrap-around in
 bpf_msg_pull_data

If first_sg and last_sg wraps around in the scatterlist ring, then we
need to account for that in the shift as well. E.g. crafting such msgs
where this is the case leads to a hang as shift becomes negative. E.g.
consider the following scenario:

  first_sg := 14     |=>    shift := -12     msg->sg_start := 10
  last_sg  :=  3     |                       msg->sg_end   :=  5

round  1:  i := 15, move_from :=   3, sg[15] := sg[  3]
round  2:  i :=  0, move_from := -12, sg[ 0] := sg[-12]
round  3:  i :=  1, move_from := -11, sg[ 1] := sg[-11]
round  4:  i :=  2, move_from := -10, sg[ 2] := sg[-10]
[...]
round 13:  i := 11, move_from :=  -1, sg[ 2] := sg[ -1]
round 14:  i := 12, move_from :=   0, sg[ 2] := sg[  0]
round 15:  i := 13, move_from :=   1, sg[ 2] := sg[  1]
round 16:  i := 14, move_from :=   2, sg[ 2] := sg[  2]
round 17:  i := 15, move_from :=   3, sg[ 2] := sg[  3]
[...]

This means we will loop forever and never hit the msg->sg_end condition
to break out of the loop. When we see that the ring wraps around, then
the shift should be MAX_SKB_FRAGS - first_sg + last_sg - 1. Meaning,
the remainder slots from the tail of the ring and the head until last_sg
combined.

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index b9225c55926ac..43ba5f8c38cac 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2370,7 +2370,10 @@ BPF_CALL_4(bpf_msg_pull_data,
 	 * had a single entry though we can just replace it and
 	 * be done. Otherwise walk the ring and shift the entries.
 	 */
-	shift = last_sg - first_sg - 1;
+	WARN_ON_ONCE(last_sg == first_sg);
+	shift = last_sg > first_sg ?
+		last_sg - first_sg - 1 :
+		MAX_SKB_FRAGS - first_sg + last_sg - 1;
 	if (!shift)
 		goto out;
 

From a8cf76a9023bc6709b1361d06bb2fae5227b9d68 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 29 Aug 2018 16:50:36 +0200
Subject: [PATCH 3/3] bpf: fix sg shift repair start offset in
 bpf_msg_pull_data

When we perform the sg shift repair for the scatterlist ring, we
currently start out at i = first_sg + 1. However, this is not
correct since the first_sg could point to the sge sitting at slot
MAX_SKB_FRAGS - 1, and a subsequent i = MAX_SKB_FRAGS will access
the scatterlist ring (sg) out of bounds. Add the sk_msg_iter_var()
helper for iterating through the ring, and apply the same rule
for advancing to the next ring element as we do elsewhere. Later
work will use this helper also in other places.

Fixes: 015632bb30da ("bpf: sk_msg program helper bpf_sk_msg_pull_data")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 43ba5f8c38cac..2c7801f6737ad 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2282,6 +2282,13 @@ static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
 	.arg2_type      = ARG_ANYTHING,
 };
 
+#define sk_msg_iter_var(var)			\
+	do {					\
+		var++;				\
+		if (var == MAX_SKB_FRAGS)	\
+			var = 0;		\
+	} while (0)
+
 BPF_CALL_4(bpf_msg_pull_data,
 	   struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
 {
@@ -2302,9 +2309,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 		if (start < offset + len)
 			break;
 		offset += len;
-		i++;
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
+		sk_msg_iter_var(i);
 	} while (i != msg->sg_end);
 
 	if (unlikely(start >= offset + len))
@@ -2330,9 +2335,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 	 */
 	do {
 		copy += sg[i].length;
-		i++;
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
+		sk_msg_iter_var(i);
 		if (bytes_sg_total <= copy)
 			break;
 	} while (i != msg->sg_end);
@@ -2358,9 +2361,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 		sg[i].length = 0;
 		put_page(sg_page(&sg[i]));
 
-		i++;
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
+		sk_msg_iter_var(i);
 	} while (i != last_sg);
 
 	sg[first_sg].length = copy;
@@ -2377,7 +2378,8 @@ BPF_CALL_4(bpf_msg_pull_data,
 	if (!shift)
 		goto out;
 
-	i = first_sg + 1;
+	i = first_sg;
+	sk_msg_iter_var(i);
 	do {
 		int move_from;
 
@@ -2394,9 +2396,7 @@ BPF_CALL_4(bpf_msg_pull_data,
 		sg[move_from].page_link = 0;
 		sg[move_from].offset = 0;
 
-		i++;
-		if (i == MAX_SKB_FRAGS)
-			i = 0;
+		sk_msg_iter_var(i);
 	} while (1);
 	msg->sg_end -= shift;
 	if (msg->sg_end < 0)