summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c23
-rw-r--r--net/core/dev.c1
-rw-r--r--net/core/flow_dissector.c2
-rw-r--r--net/core/neighbour.c3
-rw-r--r--net/core/netclassid_cgroup.c32
-rw-r--r--net/core/netpoll.c10
-rw-r--r--net/core/secure_seq.c31
-rw-r--r--net/core/skbuff.c46
-rw-r--r--net/core/sock.c16
-rw-r--r--net/core/sysctl_net_core.c6
10 files changed, 121 insertions, 49 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ea633342ab0d..f4947e737f34 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -398,7 +398,7 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
struct iov_iter *to, int len)
{
int start = skb_headlen(skb);
- int i, copy = start - offset;
+ int i, copy = start - offset, start_off = offset, n;
struct sk_buff *frag_iter;
trace_skb_copy_datagram_iovec(skb, len);
@@ -407,11 +407,12 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
if (copy > 0) {
if (copy > len)
copy = len;
- if (copy_to_iter(skb->data + offset, copy, to) != copy)
+ n = copy_to_iter(skb->data + offset, copy, to);
+ offset += n;
+ if (n != copy)
goto short_copy;
if ((len -= copy) == 0)
return 0;
- offset += copy;
}
/* Copy paged appendix. Hmm... why does this look so complicated? */
@@ -425,13 +426,14 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
if ((copy = end - offset) > 0) {
if (copy > len)
copy = len;
- if (copy_page_to_iter(skb_frag_page(frag),
+ n = copy_page_to_iter(skb_frag_page(frag),
frag->page_offset + offset -
- start, copy, to) != copy)
+ start, copy, to);
+ offset += n;
+ if (n != copy)
goto short_copy;
if (!(len -= copy))
return 0;
- offset += copy;
}
start = end;
}
@@ -463,6 +465,7 @@ int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
*/
fault:
+ iov_iter_revert(to, offset - start_off);
return -EFAULT;
short_copy:
@@ -613,7 +616,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
__wsum *csump)
{
int start = skb_headlen(skb);
- int i, copy = start - offset;
+ int i, copy = start - offset, start_off = offset;
struct sk_buff *frag_iter;
int pos = 0;
int n;
@@ -623,11 +626,11 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
if (copy > len)
copy = len;
n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
+ offset += n;
if (n != copy)
goto fault;
if ((len -= copy) == 0)
return 0;
- offset += copy;
pos = copy;
}
@@ -649,12 +652,12 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
offset - start, copy,
&csum2, to);
kunmap(page);
+ offset += n;
if (n != copy)
goto fault;
*csump = csum_block_add(*csump, csum2, pos);
if (!(len -= copy))
return 0;
- offset += copy;
pos += copy;
}
start = end;
@@ -687,6 +690,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
return 0;
fault:
+ iov_iter_revert(to, offset - start_off);
return -EFAULT;
}
@@ -771,6 +775,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
}
return 0;
csum_error:
+ iov_iter_revert(&msg->msg_iter, chunk);
return -EINVAL;
fault:
return -EFAULT;
diff --git a/net/core/dev.c b/net/core/dev.c
index 7869ae3837ca..533a6d6f6092 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6757,7 +6757,6 @@ int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags)
return err;
}
-EXPORT_SYMBOL(dev_change_xdp_fd);
/**
* dev_new_index - allocate an ifindex
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index c35aae13c8d2..d98d4998213d 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -390,7 +390,7 @@ mpls:
unsigned char ar_tip[4];
} *arp_eth, _arp_eth;
const struct arphdr *arp;
- struct arphdr *_arp;
+ struct arphdr _arp;
arp = __skb_header_pointer(skb, nhoff, sizeof(_arp), data,
hlen, &_arp);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index e7c12caa20c8..4526cbd7e28a 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -860,7 +860,8 @@ static void neigh_probe(struct neighbour *neigh)
if (skb)
skb = skb_clone(skb, GFP_ATOMIC);
write_unlock(&neigh->lock);
- neigh->ops->solicit(neigh, skb);
+ if (neigh->ops->solicit)
+ neigh->ops->solicit(neigh, skb);
atomic_inc(&neigh->probes);
kfree_skb(skb);
}
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 6ae56037bb13..029a61ac6cdd 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -71,27 +71,17 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n)
return 0;
}
-static void update_classid(struct cgroup_subsys_state *css, void *v)
+static void cgrp_attach(struct cgroup_taskset *tset)
{
- struct css_task_iter it;
+ struct cgroup_subsys_state *css;
struct task_struct *p;
- css_task_iter_start(css, &it);
- while ((p = css_task_iter_next(&it))) {
+ cgroup_taskset_for_each(p, css, tset) {
task_lock(p);
- iterate_fd(p->files, 0, update_classid_sock, v);
+ iterate_fd(p->files, 0, update_classid_sock,
+ (void *)(unsigned long)css_cls_state(css)->classid);
task_unlock(p);
}
- css_task_iter_end(&it);
-}
-
-static void cgrp_attach(struct cgroup_taskset *tset)
-{
- struct cgroup_subsys_state *css;
-
- cgroup_taskset_first(tset, &css);
- update_classid(css,
- (void *)(unsigned long)css_cls_state(css)->classid);
}
static u64 read_classid(struct cgroup_subsys_state *css, struct cftype *cft)
@@ -103,12 +93,22 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
u64 value)
{
struct cgroup_cls_state *cs = css_cls_state(css);
+ struct css_task_iter it;
+ struct task_struct *p;
cgroup_sk_alloc_disable();
cs->classid = (u32)value;
- update_classid(css, (void *)(unsigned long)cs->classid);
+ css_task_iter_start(css, &it);
+ while ((p = css_task_iter_next(&it))) {
+ task_lock(p);
+ iterate_fd(p->files, 0, update_classid_sock,
+ (void *)(unsigned long)cs->classid);
+ task_unlock(p);
+ }
+ css_task_iter_end(&it);
+
return 0;
}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 9424673009c1..29be2466970c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -105,15 +105,21 @@ static void queue_process(struct work_struct *work)
while ((skb = skb_dequeue(&npinfo->txq))) {
struct net_device *dev = skb->dev;
struct netdev_queue *txq;
+ unsigned int q_index;
if (!netif_device_present(dev) || !netif_running(dev)) {
kfree_skb(skb);
continue;
}
- txq = skb_get_tx_queue(dev, skb);
-
local_irq_save(flags);
+ /* check if skb->queue_mapping is still valid */
+ q_index = skb_get_queue_mapping(skb);
+ if (unlikely(q_index >= dev->real_num_tx_queues)) {
+ q_index = q_index % dev->real_num_tx_queues;
+ skb_set_queue_mapping(skb, q_index);
+ }
+ txq = netdev_get_tx_queue(dev, q_index);
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (netif_xmit_frozen_or_stopped(txq) ||
netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) {
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 758f140b6bed..d28da7d363f1 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -20,9 +20,11 @@
#include <net/tcp.h>
static siphash_key_t net_secret __read_mostly;
+static siphash_key_t ts_secret __read_mostly;
static __always_inline void net_secret_init(void)
{
+ net_get_random_once(&ts_secret, sizeof(ts_secret));
net_get_random_once(&net_secret, sizeof(net_secret));
}
#endif
@@ -45,6 +47,23 @@ static u32 seq_scale(u32 seq)
#endif
#if IS_ENABLED(CONFIG_IPV6)
+static u32 secure_tcpv6_ts_off(const __be32 *saddr, const __be32 *daddr)
+{
+ const struct {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ } __aligned(SIPHASH_ALIGNMENT) combined = {
+ .saddr = *(struct in6_addr *)saddr,
+ .daddr = *(struct in6_addr *)daddr,
+ };
+
+ if (sysctl_tcp_timestamps != 1)
+ return 0;
+
+ return siphash(&combined, offsetofend(typeof(combined), daddr),
+ &ts_secret);
+}
+
u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
__be16 sport, __be16 dport, u32 *tsoff)
{
@@ -63,7 +82,7 @@ u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
net_secret_init();
hash = siphash(&combined, offsetofend(typeof(combined), dport),
&net_secret);
- *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+ *tsoff = secure_tcpv6_ts_off(saddr, daddr);
return seq_scale(hash);
}
EXPORT_SYMBOL(secure_tcpv6_sequence_number);
@@ -88,6 +107,14 @@ EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
#endif
#ifdef CONFIG_INET
+static u32 secure_tcp_ts_off(__be32 saddr, __be32 daddr)
+{
+ if (sysctl_tcp_timestamps != 1)
+ return 0;
+
+ return siphash_2u32((__force u32)saddr, (__force u32)daddr,
+ &ts_secret);
+}
/* secure_tcp_sequence_number(a, b, 0, d) == secure_ipv4_port_ephemeral(a, b, d),
* but fortunately, `sport' cannot be 0 in any circumstances. If this changes,
@@ -103,7 +130,7 @@ u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
hash = siphash_3u32((__force u32)saddr, (__force u32)daddr,
(__force u32)sport << 16 | (__force u32)dport,
&net_secret);
- *tsoff = sysctl_tcp_timestamps == 1 ? (hash >> 32) : 0;
+ *tsoff = secure_tcp_ts_off(saddr, daddr);
return seq_scale(hash);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index cd4ba8c6b609..f86bf69cfb8d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3082,22 +3082,32 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
if (sg && csum && (mss != GSO_BY_FRAGS)) {
if (!(features & NETIF_F_GSO_PARTIAL)) {
struct sk_buff *iter;
+ unsigned int frag_len;
if (!list_skb ||
!net_gso_ok(features, skb_shinfo(head_skb)->gso_type))
goto normal;
- /* Split the buffer at the frag_list pointer.
- * This is based on the assumption that all
- * buffers in the chain excluding the last
- * containing the same amount of data.
+ /* If we get here then all the required
+ * GSO features except frag_list are supported.
+ * Try to split the SKB to multiple GSO SKBs
+ * with no frag_list.
+ * Currently we can do that only when the buffers don't
+ * have a linear part and all the buffers except
+ * the last are of the same length.
*/
+ frag_len = list_skb->len;
skb_walk_frags(head_skb, iter) {
+ if (frag_len != iter->len && iter->next)
+ goto normal;
if (skb_headlen(iter))
goto normal;
len -= iter->len;
}
+
+ if (len != frag_len)
+ goto normal;
}
/* GSO partial only requires that we trim off any excess that
@@ -3694,6 +3704,15 @@ static void sock_rmem_free(struct sk_buff *skb)
atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
}
+static void skb_set_err_queue(struct sk_buff *skb)
+{
+ /* pkt_type of skbs received on local sockets is never PACKET_OUTGOING.
+ * So, it is safe to (mis)use it to mark skbs on the error queue.
+ */
+ skb->pkt_type = PACKET_OUTGOING;
+ BUILD_BUG_ON(PACKET_OUTGOING == 0);
+}
+
/*
* Note: We dont mem charge error packets (no sk_forward_alloc changes)
*/
@@ -3707,6 +3726,7 @@ int sock_queue_err_skb(struct sock *sk, struct sk_buff *skb)
skb->sk = sk;
skb->destructor = sock_rmem_free;
atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+ skb_set_err_queue(skb);
/* before exiting rcu section, make sure dst is refcounted */
skb_dst_force(skb);
@@ -3783,16 +3803,21 @@ EXPORT_SYMBOL(skb_clone_sk);
static void __skb_complete_tx_timestamp(struct sk_buff *skb,
struct sock *sk,
- int tstype)
+ int tstype,
+ bool opt_stats)
{
struct sock_exterr_skb *serr;
int err;
+ BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb));
+
serr = SKB_EXT_ERR(skb);
memset(serr, 0, sizeof(*serr));
serr->ee.ee_errno = ENOMSG;
serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
serr->ee.ee_info = tstype;
+ serr->opt_stats = opt_stats;
+ serr->header.h4.iif = skb->dev ? skb->dev->ifindex : 0;
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
if (sk->sk_protocol == IPPROTO_TCP &&
@@ -3833,7 +3858,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
*/
if (likely(atomic_inc_not_zero(&sk->sk_refcnt))) {
*skb_hwtstamps(skb) = *hwtstamps;
- __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
+ __skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND, false);
sock_put(sk);
}
}
@@ -3844,7 +3869,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
struct sock *sk, int tstype)
{
struct sk_buff *skb;
- bool tsonly;
+ bool tsonly, opt_stats = false;
if (!sk)
return;
@@ -3857,9 +3882,10 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
#ifdef CONFIG_INET
if ((sk->sk_tsflags & SOF_TIMESTAMPING_OPT_STATS) &&
sk->sk_protocol == IPPROTO_TCP &&
- sk->sk_type == SOCK_STREAM)
+ sk->sk_type == SOCK_STREAM) {
skb = tcp_get_timestamping_opt_stats(sk);
- else
+ opt_stats = true;
+ } else
#endif
skb = alloc_skb(0, GFP_ATOMIC);
} else {
@@ -3878,7 +3904,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
else
skb->tstamp = ktime_get_real();
- __skb_complete_tx_timestamp(skb, sk, tstype);
+ __skb_complete_tx_timestamp(skb, sk, tstype, opt_stats);
}
EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
diff --git a/net/core/sock.c b/net/core/sock.c
index a96d5f7a5734..2c4f574168fb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1442,6 +1442,11 @@ static void __sk_destruct(struct rcu_head *head)
pr_debug("%s: optmem leakage (%d bytes) detected\n",
__func__, atomic_read(&sk->sk_omem_alloc));
+ if (sk->sk_frag.page) {
+ put_page(sk->sk_frag.page);
+ sk->sk_frag.page = NULL;
+ }
+
if (sk->sk_peer_cred)
put_cred(sk->sk_peer_cred);
put_pid(sk->sk_peer_pid);
@@ -1539,6 +1544,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
is_charged = sk_filter_charge(newsk, filter);
if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
+ /* We need to make sure that we don't uncharge the new
+ * socket if we couldn't charge it in the first place
+ * as otherwise we uncharge the parent's filter.
+ */
+ if (!is_charged)
+ RCU_INIT_POINTER(newsk->sk_filter, NULL);
sk_free_unlock_clone(newsk);
newsk = NULL;
goto out;
@@ -2787,11 +2798,6 @@ void sk_common_release(struct sock *sk)
sk_refcnt_debug_release(sk);
- if (sk->sk_frag.page) {
- put_page(sk->sk_frag.page);
- sk->sk_frag.page = NULL;
- }
-
sock_put(sk);
}
EXPORT_SYMBOL(sk_common_release);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 4ead336e14ea..7f9cc400eca0 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -408,14 +408,16 @@ static struct ctl_table net_core_table[] = {
.data = &sysctl_net_busy_poll,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
},
{
.procname = "busy_read",
.data = &sysctl_net_busy_read,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
},
#endif
#ifdef CONFIG_NET_SCHED