Skip to content

Commit

Permalink
linux-tinker-board.bbappend: Add NFLX-2019-001 series of patches
Browse files Browse the repository at this point in the history
Add patches for multiple TCP-based remote denial
of service vulnerabilities identified by Netflix.
Patch source:
https://github.com/Netflix/security-bulletins/blob/master/advisories/third-party/2019-001.md

Changelog-entry: Patches for TCP-based remote denial of service vulnerabilities
Signed-off-by: Vicentiu Galanopulo <[email protected]>
  • Loading branch information
vicgal committed Feb 11, 2020
1 parent ee6ecff commit 94966ae
Show file tree
Hide file tree
Showing 6 changed files with 476 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -1 +1,10 @@
inherit kernel-resin

FILESEXTRAPATHS_append := ":${THISDIR}/patches"
SRC_URI_append = " \
file://0002-NFLX-2019-001-SACK-Panic.patch \
file://0003-NFLX-2019-001-SACK-Panic-for-lteq-4.14.patch \
file://0004-NFLX-2019-001-SACK-Slowness.patch \
file://0005-NFLX-2019-001-Resour-Consump-Low-MSS.patch \
file://0006-NFLX-2019-001-Resour-Consump-Low-MSS.patch \
"
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
Date: Sat, 8 Jun 2019 10:38:05 -0700
Subject: [PATCH net 1/4] tcp: limit payload size of sacked skbs
From: Eric Dumazet <[email protected]>

Jonathan Looney reported that TCP can trigger the following crash
in tcp_shifted_skb() :

BUG_ON(tcp_skb_pcount(skb) < pcount);

This can happen if the remote peer has advertized the smallest
MSS that linux TCP accepts : 48

An skb can hold 17 fragments, and each fragment can hold 32KB
on x86, or 64KB on PowerPC.

This means that the 16bit witdh of TCP_SKB_CB(skb)->tcp_gso_segs
can overflow.

Note that tcp_sendmsg() builds skbs with less than 64KB
of payload, so this problem needs SACK to be enabled.
SACK blocks allow TCP to coalesce multiple skbs in the retransmit
queue, thus filling the 17 fragments to maximal capacity.

Fixes: 832d11c5cd07 ("tcp: Try to restore large SKBs while SACK processing")
Signed-off-by: Eric Dumazet <[email protected]>
Reported-by: Jonathan Looney <[email protected]>
Acked-by: Neal Cardwell <[email protected]>
Reviewed-by: Tyler Hicks <[email protected]>
Cc: Yuchung Cheng <[email protected]>
Cc: Bruce Curtis <[email protected]>
Cc: Jonathan Lemon <[email protected]>

Upstream-Status: Inappropriate [not author]
Signed-off-by: Vicentiu Galanopulo <[email protected]>
---
include/linux/tcp.h | 3 +++
include/net/tcp.h | 2 ++
net/ipv4/tcp.c | 1 +
net/ipv4/tcp_input.c | 26 ++++++++++++++++++++------
net/ipv4/tcp_output.c | 4 ++--
5 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index b386361..cfbe3c4 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -410,4 +410,7 @@ static inline void tcp_saved_syn_free(struct tcp_sock *tp)
tp->saved_syn = NULL;
}

+int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
+ int shiftlen);
+
#endif /* _LINUX_TCP_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4ea3739..03c6f68 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -54,6 +54,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);

#define MAX_TCP_HEADER (128 + MAX_HEADER)
#define MAX_TCP_OPTION_SPACE 40
+#define TCP_MIN_SND_MSS 48
+#define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE)

/*
* Never offer a window over 32767 without using window scaling. Some
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dd2a41b..367dc51 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3170,6 +3170,7 @@ void __init tcp_init(void)
int max_rshare, max_wshare, cnt;
unsigned int i;

+ BUILD_BUG_ON(TCP_MIN_SND_MSS <= MAX_TCP_OPTION_SPACE);
sock_skb_cb_check_size(sizeof(struct tcp_skb_cb));

percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 35e97ff..467d414 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1267,7 +1267,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
TCP_SKB_CB(skb)->seq += shifted;

tcp_skb_pcount_add(prev, pcount);
- BUG_ON(tcp_skb_pcount(skb) < pcount);
+ WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount);
tcp_skb_pcount_add(skb, -pcount);

/* When we're adding to gso_segs == 1, gso_size will be zero,
@@ -1329,6 +1329,21 @@ static int skb_can_shift(const struct sk_buff *skb)
return !skb_headlen(skb) && skb_is_nonlinear(skb);
}

+int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from,
+ int pcount, int shiftlen)
+{
+ /* TCP min gso_size is 8 bytes (TCP_MIN_GSO_SIZE)
+ * Since TCP_SKB_CB(skb)->tcp_gso_segs is 16 bits, we need
+ * to make sure not storing more than 65535 * 8 bytes per skb,
+ * even if current MSS is bigger.
+ */
+ if (unlikely(to->len + shiftlen >= 65535 * TCP_MIN_GSO_SIZE))
+ return 0;
+ if (unlikely(tcp_skb_pcount(to) + pcount > 65535))
+ return 0;
+ return skb_shift(to, from, shiftlen);
+}
+
/* Try collapsing SACK blocks spanning across multiple skbs to a single
* skb.
*/
@@ -1434,7 +1449,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
if (!after(TCP_SKB_CB(skb)->seq + len, tp->snd_una))
goto fallback;

- if (!skb_shift(prev, skb, len))
+ if (!tcp_skb_shift(prev, skb, pcount, len))
goto fallback;
if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack))
goto out;
@@ -1453,10 +1468,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
goto out;

len = skb->len;
- if (skb_shift(prev, skb, len)) {
- pcount += tcp_skb_pcount(skb);
- tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0);
- }
+ pcount = tcp_skb_pcount(skb);
+ if (tcp_skb_shift(prev, skb, pcount, len))
+ tcp_shifted_skb(sk, skb, state, pcount, len, mss, 0);

out:
state->fack_count += pcount;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3e52a48..34042e0 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1337,8 +1337,8 @@ static inline int __tcp_mtu_to_mss(struct sock *sk, int pmtu)
mss_now -= icsk->icsk_ext_hdr_len;

/* Then reserve room for full set of TCP options and 8 bytes of data */
- if (mss_now < 48)
- mss_now = 48;
+ if (mss_now < TCP_MIN_SND_MSS)
+ mss_now = TCP_MIN_SND_MSS;
return mss_now;
}

--
2.7.4

Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
From cd4ffa93f16efea290bb70537f98f518e1927e63 Mon Sep 17 00:00:00 2001
From: Joao Martins <[email protected]>
Date: Mon, 10 Jun 2019 23:12:39 +0100
Subject: [PATCH 5/5] tcp: fix fack_count accounting on tcp_shift_skb_data()

v4.15 or since commit 737ff314563 ("tcp: use sequence distance to
detect reordering") had switched from the packet-based FACK tracking
to sequence-based.

v4.14 and older still have the old logic and hence on
tcp_skb_shift_data() needs to retain its original logic and have
@fack_count in sync. In other words, we keep the increment of pcount with
tcp_skb_pcount(skb) to later used that to update fack_count. To make it
more explicit we track the new skb that gets incremented to pcount in
@next_pcount, and we get to avoid the constant invocation of
tcp_skb_pcount(skb) all together.

Fixes: a5f1faa40101 ("tcp: limit payload size of sacked skbs")
Reported-by: Alexey Kodanev <[email protected]>
Reviewed-by: Jack Vogel <[email protected]>
Reviewed-by: John Haxby <[email protected]>
Reviewed-by: Rao Shoaib [email protected]>
Signed-off-by: Joao Martins <[email protected]>
Signed-off-by: Konrad Rzeszutek Wilk <[email protected]>

Upstream-Status: Inappropriate [not author]
Signed-off-by: Vicentiu Galanopulo <[email protected]>
---
net/ipv4/tcp_input.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c092c7c..6c7190c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1422,6 +1422,7 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *prev;
int mss;
+ int next_pcount;
int pcount = 0;
int len;
int in_sack;
@@ -1538,10 +1539,11 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb,
goto out;

len = skb->len;
- pcount = tcp_skb_pcount(skb);
- if (tcp_skb_shift(prev, skb, pcount, len))
- tcp_shifted_skb(sk, skb, state, pcount, len, mss, 0);
-
+ next_pcount = tcp_skb_pcount(skb);
+ if (tcp_skb_shift(prev, skb, next_pcount, len)) {
+ pcount += next_pcount;
+ tcp_shifted_skb(sk, skb, state, next_pcount, len, mss, 0);
+ }
out:
state->fack_count += pcount;
return prev;
--
2.7.4

Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
Date: Sat, 8 Jun 2019 10:38:06 -0700
Subject: [PATCH net 2/4] tcp: tcp_fragment() should apply sane memory limits
From: Eric Dumazet <[email protected]>

Jonathan Looney reported that a malicious peer can force a sender
to fragment its retransmit queue into tiny skbs, inflating memory
usage and/or overflow 32bit counters.

TCP allows an application to queue up to sk_sndbuf bytes,
so we need to give some allowance for non malicious splitting
of retransmit queue.

A new SNMP counter is added to monitor how many times TCP
did not allow to split an skb if the allowance was exceeded.

Note that this counter might increase in the case applications
use SO_SNDBUF socket option to lower sk_sndbuf.

Signed-off-by: Eric Dumazet <[email protected]>
Reported-by: Jonathan Looney <[email protected]>
Acked-by: Neal Cardwell <[email protected]>
Acked-by: Yuchung Cheng <[email protected]>
Reviewed-by: Tyler Hicks <[email protected]>
Cc: Bruce Curtis <[email protected]>
Cc: Jonathan Lemon <[email protected]>

Upstream-Status: Inappropriate [not author]
Signed-off-by: Vicentiu Galanopulo <[email protected]>
---
include/uapi/linux/snmp.h | 1 +
net/ipv4/proc.c | 1 +
net/ipv4/tcp_output.c | 5 +++++
3 files changed, 7 insertions(+)

diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index f5d753e..bf31965 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -278,6 +278,7 @@ enum
LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */
LINUX_MIB_TCPMTUPFAIL, /* TCPMTUPFail */
LINUX_MIB_TCPMTUPSUCCESS, /* TCPMTUPSuccess */
+ LINUX_MIB_TCPWQUEUETOOBIG, /* TCPWqueueTooBig */
__LINUX_MIB_MAX
};

diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 3fbf688..88aaf14 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -299,6 +299,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE),
SNMP_MIB_ITEM("TCPMTUPFail", LINUX_MIB_TCPMTUPFAIL),
SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS),
+ SNMP_MIB_ITEM("TCPWqueueTooBig", LINUX_MIB_TCPWQUEUETOOBIG),
SNMP_MIB_SENTINEL
};

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index def09d1..36d1945 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1274,6 +1274,11 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
if (nsize < 0)
nsize = 0;

+ if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf)) {
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
+ return -ENOMEM;
+ }
+
if (skb_unclone(skb, gfp))
return -ENOMEM;

--
2.7.4

Loading

0 comments on commit 94966ae

Please sign in to comment.