From e8d4d34df715133c319fabcf63fdec684be75ff8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 23 Sep 2024 23:22:41 +0200 Subject: net: Add netif_get_gro_max_size helper for GRO Add a small netif_get_gro_max_size() helper which returns the maximum IPv4 or IPv6 GRO size of the netdevice. We later add a netif_get_gso_max_size() equivalent as well for GSO, so that these helpers can be used consistently instead of open-coded checks. Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Cc: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240923212242.15669-1-daniel@iogearbox.net Signed-off-by: Paolo Abeni --- net/core/gro.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'net/core') diff --git a/net/core/gro.c b/net/core/gro.c index 802b4a062400..d1f44084e978 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -98,7 +98,6 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) unsigned int headlen = skb_headlen(skb); unsigned int len = skb_gro_len(skb); unsigned int delta_truesize; - unsigned int gro_max_size; unsigned int new_truesize; struct sk_buff *lp; int segs; @@ -112,12 +111,8 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) if (p->pp_recycle != skb->pp_recycle) return -ETOOMANYREFS; - /* pairs with WRITE_ONCE() in netif_set_gro(_ipv4)_max_size() */ - gro_max_size = p->protocol == htons(ETH_P_IPV6) ? - READ_ONCE(p->dev->gro_max_size) : - READ_ONCE(p->dev->gro_ipv4_max_size); - - if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush)) + if (unlikely(p->len + len >= netif_get_gro_max_size(p->dev, p) || + NAPI_GRO_CB(skb)->flush)) return -E2BIG; if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) { -- cgit v1.2.3 From e609c959a939660c7519895f853dfa5624c6827a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 23 Sep 2024 23:22:42 +0200 Subject: net: Fix gso_features_check to check for both dev->gso_{ipv4_,}max_size Commit 24ab059d2ebd ("net: check dev->gso_max_size in gso_features_check()") added a dev->gso_max_size test to gso_features_check() in order to fall back to GSO when needed. This was added as it was noticed that some drivers could misbehave if TSO packets get too big. However, the check doesn't respect dev->gso_ipv4_max_size limit. For instance, a device could be configured with BIG TCP for IPv4, but not IPv6. Therefore, add a netif_get_gso_max_size() equivalent to netif_get_gro_max_size() and use the helper to respect both limits before falling back to GSO engine. Fixes: 24ab059d2ebd ("net: check dev->gso_max_size in gso_features_check()") Signed-off-by: Daniel Borkmann Cc: Eric Dumazet Cc: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://patch.msgid.link/20240923212242.15669-2-daniel@iogearbox.net Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 9 +++++++++ net/core/dev.c | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d571451638de..4d20c776a4ff 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -5038,6 +5038,15 @@ netif_get_gro_max_size(const struct net_device *dev, const struct sk_buff *skb) READ_ONCE(dev->gro_ipv4_max_size); } +static inline unsigned int +netif_get_gso_max_size(const struct net_device *dev, const struct sk_buff *skb) +{ + /* pairs with WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */ + return skb->protocol == htons(ETH_P_IPV6) ? + READ_ONCE(dev->gso_max_size) : + READ_ONCE(dev->gso_ipv4_max_size); +} + static inline bool netif_is_macsec(const struct net_device *dev) { return dev->priv_flags & IFF_MACSEC; diff --git a/net/core/dev.c b/net/core/dev.c index cd479f5f22f6..74cf78a6b512 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3512,7 +3512,7 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb, if (gso_segs > READ_ONCE(dev->gso_max_segs)) return features & ~NETIF_F_GSO_MASK; - if (unlikely(skb->len >= READ_ONCE(dev->gso_max_size))) + if (unlikely(skb->len >= netif_get_gso_max_size(dev, skb))) return features & ~NETIF_F_GSO_MASK; if (!skb_shinfo(skb)->gso_type) { -- cgit v1.2.3 From c20029db28399ecc50e556964eaba75c43b1e2f1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Sep 2024 15:02:56 +0000 Subject: net: avoid potential underflow in qdisc_pkt_len_init() with UFO After commit 7c6d2ecbda83 ("net: be more gentle about silly gso requests coming from user") virtio_net_hdr_to_skb() had sanity check to detect malicious attempts from user space to cook a bad GSO packet. Then commit cf9acc90c80ec ("net: virtio_net_hdr_to_skb: count transport header in UFO") while fixing one issue, allowed user space to cook a GSO packet with the following characteristic : IPv4 SKB_GSO_UDP, gso_size=3, skb->len = 28. When this packet arrives in qdisc_pkt_len_init(), we end up with hdr_len = 28 (IPv4 header + UDP header), matching skb->len Then the following sets gso_segs to 0 : gso_segs = DIV_ROUND_UP(skb->len - hdr_len, shinfo->gso_size); Then later we set qdisc_skb_cb(skb)->pkt_len to back to zero :/ qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; This leads to the following crash in fq_codel [1] qdisc_pkt_len_init() is best effort, we only want an estimation of the bytes sent on the wire, not crashing the kernel. This patch is fixing this particular issue, a following one adds more sanity checks for another potential bug. [1] [ 70.724101] BUG: kernel NULL pointer dereference, address: 0000000000000000 [ 70.724561] #PF: supervisor read access in kernel mode [ 70.724561] #PF: error_code(0x0000) - not-present page [ 70.724561] PGD 10ac61067 P4D 10ac61067 PUD 107ee2067 PMD 0 [ 70.724561] Oops: Oops: 0000 [#1] SMP NOPTI [ 70.724561] CPU: 11 UID: 0 PID: 2163 Comm: b358537762 Not tainted 6.11.0-virtme #991 [ 70.724561] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 [ 70.724561] RIP: 0010:fq_codel_enqueue (net/sched/sch_fq_codel.c:120 net/sched/sch_fq_codel.c:168 net/sched/sch_fq_codel.c:230) sch_fq_codel [ 70.724561] Code: 24 08 49 c1 e1 06 44 89 7c 24 18 45 31 ed 45 31 c0 31 ff 89 44 24 14 4c 03 8b 90 01 00 00 eb 04 39 ca 73 37 4d 8b 39 83 c7 01 <49> 8b 17 49 89 11 41 8b 57 28 45 8b 5f 34 49 c7 07 00 00 00 00 49 All code ======== 0: 24 08 and $0x8,%al 2: 49 c1 e1 06 shl $0x6,%r9 6: 44 89 7c 24 18 mov %r15d,0x18(%rsp) b: 45 31 ed xor %r13d,%r13d e: 45 31 c0 xor %r8d,%r8d 11: 31 ff xor %edi,%edi 13: 89 44 24 14 mov %eax,0x14(%rsp) 17: 4c 03 8b 90 01 00 00 add 0x190(%rbx),%r9 1e: eb 04 jmp 0x24 20: 39 ca cmp %ecx,%edx 22: 73 37 jae 0x5b 24: 4d 8b 39 mov (%r9),%r15 27: 83 c7 01 add $0x1,%edi 2a:* 49 8b 17 mov (%r15),%rdx <-- trapping instruction 2d: 49 89 11 mov %rdx,(%r9) 30: 41 8b 57 28 mov 0x28(%r15),%edx 34: 45 8b 5f 34 mov 0x34(%r15),%r11d 38: 49 c7 07 00 00 00 00 movq $0x0,(%r15) 3f: 49 rex.WB Code starting with the faulting instruction =========================================== 0: 49 8b 17 mov (%r15),%rdx 3: 49 89 11 mov %rdx,(%r9) 6: 41 8b 57 28 mov 0x28(%r15),%edx a: 45 8b 5f 34 mov 0x34(%r15),%r11d e: 49 c7 07 00 00 00 00 movq $0x0,(%r15) 15: 49 rex.WB [ 70.724561] RSP: 0018:ffff95ae85e6fb90 EFLAGS: 00000202 [ 70.724561] RAX: 0000000002000000 RBX: ffff95ae841de000 RCX: 0000000000000000 [ 70.724561] RDX: 0000000000000000 RSI: 0000000000000001 RDI: 0000000000000001 [ 70.724561] RBP: ffff95ae85e6fbf8 R08: 0000000000000000 R09: ffff95b710a30000 [ 70.724561] R10: 0000000000000000 R11: bdf289445ce31881 R12: ffff95ae85e6fc58 [ 70.724561] R13: 0000000000000000 R14: 0000000000000040 R15: 0000000000000000 [ 70.724561] FS: 000000002c5c1380(0000) GS:ffff95bd7fcc0000(0000) knlGS:0000000000000000 [ 70.724561] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 70.724561] CR2: 0000000000000000 CR3: 000000010c568000 CR4: 00000000000006f0 [ 70.724561] Call Trace: [ 70.724561] [ 70.724561] ? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434) [ 70.724561] ? page_fault_oops (arch/x86/mm/fault.c:715) [ 70.724561] ? exc_page_fault (./arch/x86/include/asm/irqflags.h:26 ./arch/x86/include/asm/irqflags.h:87 ./arch/x86/include/asm/irqflags.h:147 arch/x86/mm/fault.c:1489 arch/x86/mm/fault.c:1539) [ 70.724561] ? asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:623) [ 70.724561] ? fq_codel_enqueue (net/sched/sch_fq_codel.c:120 net/sched/sch_fq_codel.c:168 net/sched/sch_fq_codel.c:230) sch_fq_codel [ 70.724561] dev_qdisc_enqueue (net/core/dev.c:3784) [ 70.724561] __dev_queue_xmit (net/core/dev.c:3880 (discriminator 2) net/core/dev.c:4390 (discriminator 2)) [ 70.724561] ? irqentry_enter (kernel/entry/common.c:237) [ 70.724561] ? sysvec_apic_timer_interrupt (./arch/x86/include/asm/hardirq.h:74 (discriminator 2) arch/x86/kernel/apic/apic.c:1043 (discriminator 2) arch/x86/kernel/apic/apic.c:1043 (discriminator 2)) [ 70.724561] ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:58 (discriminator 4)) [ 70.724561] ? asm_sysvec_apic_timer_interrupt (./arch/x86/include/asm/idtentry.h:702) [ 70.724561] ? virtio_net_hdr_to_skb.constprop.0 (./include/linux/virtio_net.h:129 (discriminator 1)) [ 70.724561] packet_sendmsg (net/packet/af_packet.c:3145 (discriminator 1) net/packet/af_packet.c:3177 (discriminator 1)) [ 70.724561] ? _raw_spin_lock_bh (./arch/x86/include/asm/atomic.h:107 (discriminator 4) ./include/linux/atomic/atomic-arch-fallback.h:2170 (discriminator 4) ./include/linux/atomic/atomic-instrumented.h:1302 (discriminator 4) ./include/asm-generic/qspinlock.h:111 (discriminator 4) ./include/linux/spinlock.h:187 (discriminator 4) ./include/linux/spinlock_api_smp.h:127 (discriminator 4) kernel/locking/spinlock.c:178 (discriminator 4)) [ 70.724561] ? netdev_name_node_lookup_rcu (net/core/dev.c:325 (discriminator 1)) [ 70.724561] __sys_sendto (net/socket.c:730 (discriminator 1) net/socket.c:745 (discriminator 1) net/socket.c:2210 (discriminator 1)) [ 70.724561] ? __sys_setsockopt (./include/linux/file.h:34 net/socket.c:2355) [ 70.724561] __x64_sys_sendto (net/socket.c:2222 (discriminator 1) net/socket.c:2218 (discriminator 1) net/socket.c:2218 (discriminator 1)) [ 70.724561] do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1) arch/x86/entry/common.c:83 (discriminator 1)) [ 70.724561] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130) [ 70.724561] RIP: 0033:0x41ae09 Fixes: cf9acc90c80ec ("net: virtio_net_hdr_to_skb: count transport header in UFO") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Jonathan Davies Reviewed-by: Willem de Bruijn Reviewed-by: Jonathan Davies Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 74cf78a6b512..7d3d34a3bdf5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3758,7 +3758,7 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) sizeof(_tcphdr), &_tcphdr); if (likely(th)) hdr_len += __tcp_hdrlen(th); - } else { + } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { struct udphdr _udphdr; if (skb_header_pointer(skb, hdr_len, -- cgit v1.2.3 From ab9a9a9e9647392a19e7a885b08000e89c86b535 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Sep 2024 15:02:57 +0000 Subject: net: add more sanity checks to qdisc_pkt_len_init() One path takes care of SKB_GSO_DODGY, assuming skb->len is bigger than hdr_len. virtio_net_hdr_to_skb() does not fully dissect TCP headers, it only make sure it is at least 20 bytes. It is possible for an user to provide a malicious 'GSO' packet, total length of 80 bytes. - 20 bytes of IPv4 header - 60 bytes TCP header - a small gso_size like 8 virtio_net_hdr_to_skb() would declare this packet as a normal GSO packet, because it would see 40 bytes of payload, bigger than gso_size. We need to make detect this case to not underflow qdisc_skb_cb(skb)->pkt_len. Fixes: 1def9238d4aa ("net_sched: more precise pkt_len computation") Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: David Ahern Signed-off-by: Paolo Abeni --- net/core/dev.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 7d3d34a3bdf5..ea5fbcd133ae 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3766,10 +3766,14 @@ static void qdisc_pkt_len_init(struct sk_buff *skb) hdr_len += sizeof(struct udphdr); } - if (shinfo->gso_type & SKB_GSO_DODGY) - gso_segs = DIV_ROUND_UP(skb->len - hdr_len, - shinfo->gso_size); + if (unlikely(shinfo->gso_type & SKB_GSO_DODGY)) { + int payload = skb->len - hdr_len; + /* Malicious packet. */ + if (payload <= 0) + return; + gso_segs = DIV_ROUND_UP(payload, shinfo->gso_size); + } qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; } } -- cgit v1.2.3