diff options
Diffstat (limited to 'net/sched')
-rw-r--r-- | net/sched/Kconfig | 27 | ||||
-rw-r--r-- | net/sched/Makefile | 3 | ||||
-rw-r--r-- | net/sched/act_api.c | 36 | ||||
-rw-r--r-- | net/sched/act_bpf.c | 5 | ||||
-rw-r--r-- | net/sched/act_csum.c | 36 | ||||
-rw-r--r-- | net/sched/act_gact.c | 3 | ||||
-rw-r--r-- | net/sched/act_ife.c | 26 | ||||
-rw-r--r-- | net/sched/act_meta_skbtcindex.c | 79 | ||||
-rw-r--r-- | net/sched/act_mirred.c | 11 | ||||
-rw-r--r-- | net/sched/act_police.c | 12 | ||||
-rw-r--r-- | net/sched/act_skbmod.c | 301 | ||||
-rw-r--r-- | net/sched/act_tunnel_key.c | 342 | ||||
-rw-r--r-- | net/sched/act_vlan.c | 29 | ||||
-rw-r--r-- | net/sched/cls_api.c | 18 | ||||
-rw-r--r-- | net/sched/cls_bpf.c | 126 | ||||
-rw-r--r-- | net/sched/cls_flow.c | 27 | ||||
-rw-r--r-- | net/sched/cls_flower.c | 124 | ||||
-rw-r--r-- | net/sched/cls_fw.c | 10 | ||||
-rw-r--r-- | net/sched/cls_route.c | 12 | ||||
-rw-r--r-- | net/sched/cls_tcindex.c | 12 | ||||
-rw-r--r-- | net/sched/cls_u32.c | 30 | ||||
-rw-r--r-- | net/sched/sch_api.c | 41 | ||||
-rw-r--r-- | net/sched/sch_codel.c | 4 | ||||
-rw-r--r-- | net/sched/sch_fifo.c | 4 | ||||
-rw-r--r-- | net/sched/sch_fq.c | 71 | ||||
-rw-r--r-- | net/sched/sch_generic.c | 28 | ||||
-rw-r--r-- | net/sched/sch_htb.c | 24 | ||||
-rw-r--r-- | net/sched/sch_netem.c | 20 | ||||
-rw-r--r-- | net/sched/sch_pie.c | 4 |
29 files changed, 1294 insertions, 171 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index ccf931b3b94c..87956a768d1b 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -749,6 +749,17 @@ config NET_ACT_CONNMARK To compile this code as a module, choose M here: the module will be called act_connmark. +config NET_ACT_SKBMOD + tristate "skb data modification action" + depends on NET_CLS_ACT + ---help--- + Say Y here to allow modification of skb data + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_skbmod. + config NET_ACT_IFE tristate "Inter-FE action based on IETF ForCES InterFE LFB" depends on NET_CLS_ACT @@ -761,6 +772,17 @@ config NET_ACT_IFE To compile this code as a module, choose M here: the module will be called act_ife. +config NET_ACT_TUNNEL_KEY + tristate "IP tunnel metadata manipulation" + depends on NET_CLS_ACT + ---help--- + Say Y here to set/release ip tunnel metadata. + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_tunnel_key. + config NET_IFE_SKBMARK tristate "Support to encoding decoding skb mark on IFE action" depends on NET_ACT_IFE @@ -771,6 +793,11 @@ config NET_IFE_SKBPRIO depends on NET_ACT_IFE ---help--- +config NET_IFE_SKBTCINDEX + tristate "Support to encoding decoding skb tcindex on IFE action" + depends on NET_ACT_IFE + ---help--- + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index ae088a5a9d95..4bdda3634e0b 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -19,9 +19,12 @@ obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o +obj-$(CONFIG_NET_ACT_SKBMOD) += act_skbmod.o obj-$(CONFIG_NET_ACT_IFE) += act_ife.o obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o +obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o +obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d09d0687594b..c9102172ce3b 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -592,9 +592,19 @@ err_out: return ERR_PTR(err); } -int tcf_action_init(struct net *net, struct nlattr *nla, - struct nlattr *est, char *name, int ovr, - int bind, struct list_head *actions) +static void cleanup_a(struct list_head *actions, int ovr) +{ + struct tc_action *a; + + if (!ovr) + return; + + list_for_each_entry(a, actions, list) + a->tcfa_refcnt--; +} + +int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, + char *name, int ovr, int bind, struct list_head *actions) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; @@ -612,8 +622,15 @@ int tcf_action_init(struct net *net, struct nlattr *nla, goto err; } act->order = i; + if (ovr) + act->tcfa_refcnt++; list_add_tail(&act->list, actions); } + + /* Remove the temp refcnt which was necessary to protect against + * destroying an existing action which was being replaced + */ + cleanup_a(actions, ovr); return 0; err: @@ -883,6 +900,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } act->order = i; + if (event == RTM_GETACTION) + act->tcfa_refcnt++; list_add_tail(&act->list, &actions); } @@ -923,9 +942,8 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, return err; } -static int -tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 portid, int ovr) +static int tcf_action_add(struct net *net, struct nlattr *nla, + struct nlmsghdr *n, u32 portid, int ovr) { int ret = 0; LIST_HEAD(actions); @@ -988,8 +1006,7 @@ replay: return ret; } -static struct nlattr * -find_dump_kind(const struct nlmsghdr *n) +static struct nlattr *find_dump_kind(const struct nlmsghdr *n) { struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; @@ -1016,8 +1033,7 @@ find_dump_kind(const struct nlmsghdr *n) return kind; } -static int -tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) +static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct nlmsghdr *nlh; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index bfa870731e74..1d3960033f61 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -39,13 +39,10 @@ static struct tc_action_ops act_bpf_ops; static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, struct tcf_result *res) { + bool at_ingress = skb_at_tc_ingress(skb); struct tcf_bpf *prog = to_bpf(act); struct bpf_prog *filter; int action, filter_res; - bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; - - if (unlikely(!skb_mac_header_was_set(skb))) - return TC_ACT_UNSPEC; tcf_lastuse_update(&prog->tcf_tm); bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb); diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index b5dbf633a863..e0defcef376d 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -116,8 +116,8 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb, return (void *)(skb_network_header(skb) + ihl); } -static int tcf_csum_ipv4_icmp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv4_icmp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct icmphdr *icmph; @@ -152,8 +152,8 @@ static int tcf_csum_ipv4_igmp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv6_icmp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv6_icmp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct icmp6hdr *icmp6h; const struct ipv6hdr *ip6h; @@ -174,8 +174,8 @@ static int tcf_csum_ipv6_icmp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv4_tcp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct tcphdr *tcph; const struct iphdr *iph; @@ -195,8 +195,8 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv6_tcp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct tcphdr *tcph; const struct ipv6hdr *ip6h; @@ -217,8 +217,8 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv4_udp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl, int udplite) +static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl, int udplite) { struct udphdr *udph; const struct iphdr *iph; @@ -270,8 +270,8 @@ ignore_obscure_skb: return 1; } -static int tcf_csum_ipv6_udp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl, int udplite) +static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl, int udplite) { struct udphdr *udph; const struct ipv6hdr *ip6h; @@ -380,8 +380,8 @@ fail: return 0; } -static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, - unsigned int ixhl, unsigned int *pl) +static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, unsigned int ixhl, + unsigned int *pl) { int off, len, optlen; unsigned char *xh = (void *)ip6xh; @@ -494,8 +494,8 @@ fail: return 0; } -static int tcf_csum(struct sk_buff *skb, - const struct tc_action *a, struct tcf_result *res) +static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct tcf_csum *p = to_tcf_csum(a); int action; @@ -531,8 +531,8 @@ drop: return TC_ACT_SHOT; } -static int tcf_csum_dump(struct sk_buff *skb, - struct tc_action *a, int bind, int ref) +static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, + int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_csum *p = to_tcf_csum(a); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e24a4093d6f6..e0aa30f83c6c 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -156,7 +156,8 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, int action = READ_ONCE(gact->tcf_action); struct tcf_t *tm = &gact->tcf_tm; - _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes, packets); + _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes, + packets); if (action == TC_ACT_SHOT) this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index e87cd81315e1..ccf7b4b655fe 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -63,6 +63,23 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) } EXPORT_SYMBOL_GPL(ife_tlv_meta_encode); +int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi) +{ + u16 edata = 0; + + if (mi->metaval) + edata = *(u16 *)mi->metaval; + else if (metaval) + edata = metaval; + + if (!edata) /* will not encode */ + return 0; + + edata = htons(edata); + return ife_tlv_meta_encode(skbdata, mi->metaid, 2, &edata); +} +EXPORT_SYMBOL_GPL(ife_encode_meta_u16); + int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi) { if (mi->metaval) @@ -81,6 +98,15 @@ int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi) } EXPORT_SYMBOL_GPL(ife_check_meta_u32); +int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi) +{ + if (metaval || mi->metaval) + return 8; /* T+L+(V) == 2+2+(2+2bytepad) */ + + return 0; +} +EXPORT_SYMBOL_GPL(ife_check_meta_u16); + int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi) { u32 edata = metaval; diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c new file mode 100644 index 000000000000..3b35774ce890 --- /dev/null +++ b/net/sched/act_meta_skbtcindex.c @@ -0,0 +1,79 @@ +/* + * net/sched/act_meta_tc_index.c IFE skb->tc_index metadata module + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * copyright Jamal Hadi Salim (2016) + * +*/ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <linux/module.h> +#include <linux/init.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> +#include <uapi/linux/tc_act/tc_ife.h> +#include <net/tc_act/tc_ife.h> +#include <linux/rtnetlink.h> + +static int skbtcindex_encode(struct sk_buff *skb, void *skbdata, + struct tcf_meta_info *e) +{ + u32 ifetc_index = skb->tc_index; + + return ife_encode_meta_u16(ifetc_index, skbdata, e); +} + +static int skbtcindex_decode(struct sk_buff *skb, void *data, u16 len) +{ + u16 ifetc_index = *(u16 *)data; + + skb->tc_index = ntohs(ifetc_index); + return 0; +} + +static int skbtcindex_check(struct sk_buff *skb, struct tcf_meta_info *e) +{ + return ife_check_meta_u16(skb->tc_index, e); +} + +static struct tcf_meta_ops ife_skbtcindex_ops = { + .metaid = IFE_META_TCINDEX, + .metatype = NLA_U16, + .name = "tc_index", + .synopsis = "skb tc_index 16 bit metadata", + .check_presence = skbtcindex_check, + .encode = skbtcindex_encode, + .decode = skbtcindex_decode, + .get = ife_get_meta_u16, + .alloc = ife_alloc_meta_u16, + .release = ife_release_meta_gen, + .validate = ife_validate_meta_u16, + .owner = THIS_MODULE, +}; + +static int __init ifetc_index_init_module(void) +{ + return register_ife_op(&ife_skbtcindex_ops); +} + +static void __exit ifetc_index_cleanup_module(void) +{ + unregister_ife_op(&ife_skbtcindex_ops); +} + +module_init(ifetc_index_init_module); +module_exit(ifetc_index_cleanup_module); + +MODULE_AUTHOR("Jamal Hadi Salim(2016)"); +MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6038c85d92f5..667dc382df82 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -204,7 +204,15 @@ out: return retval; } -static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, + u64 lastuse) +{ + tcf_lastuse_update(&a->tcfa_tm); + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); +} + +static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, + int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = to_mirred(a); @@ -280,6 +288,7 @@ static struct tc_action_ops act_mirred_ops = { .type = TCA_ACT_MIRRED, .owner = THIS_MODULE, .act = tcf_mirred, + .stats_update = tcf_stats_update, .dump = tcf_mirred_dump, .cleanup = tcf_mirred_release, .init = tcf_mirred_init, diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8a3be1d99775..d1bd248fe146 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -249,6 +249,8 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, police->tcfp_t_c = now; police->tcfp_toks = toks; police->tcfp_ptoks = ptoks; + if (police->tcfp_result == TC_ACT_SHOT) + police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcfp_result; } @@ -261,8 +263,8 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, return police->tcf_action; } -static int -tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_police *police = to_police(a); @@ -347,14 +349,12 @@ static struct pernet_operations police_net_ops = { .size = sizeof(struct tc_action_net), }; -static int __init -police_init_module(void) +static int __init police_init_module(void) { return tcf_register_action(&act_police_ops, &police_net_ops); } -static void __exit -police_cleanup_module(void) +static void __exit police_cleanup_module(void) { tcf_unregister_action(&act_police_ops, &police_net_ops); } diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c new file mode 100644 index 000000000000..e7d96381c908 --- /dev/null +++ b/net/sched/act_skbmod.c @@ -0,0 +1,301 @@ +/* + * net/sched/act_skbmod.c skb data modifier + * + * Copyright (c) 2016 Jamal Hadi Salim <jhs@mojatatu.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> + +#include <linux/tc_act/tc_skbmod.h> +#include <net/tc_act/tc_skbmod.h> + +#define SKBMOD_TAB_MASK 15 + +static int skbmod_net_id; +static struct tc_action_ops act_skbmod_ops; + +#define MAX_EDIT_LEN ETH_HLEN +static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_skbmod *d = to_skbmod(a); + int action; + struct tcf_skbmod_params *p; + u64 flags; + int err; + + tcf_lastuse_update(&d->tcf_tm); + bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); + + /* XXX: if you are going to edit more fields beyond ethernet header + * (example when you add IP header replacement or vlan swap) + * then MAX_EDIT_LEN needs to change appropriately + */ + err = skb_ensure_writable(skb, MAX_EDIT_LEN); + if (unlikely(err)) { /* best policy is to drop on the floor */ + qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); + return TC_ACT_SHOT; + } + + rcu_read_lock(); + action = READ_ONCE(d->tcf_action); + if (unlikely(action == TC_ACT_SHOT)) { + qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); + rcu_read_unlock(); + return action; + } + + p = rcu_dereference(d->skbmod_p); + flags = p->flags; + if (flags & SKBMOD_F_DMAC) + ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst); + if (flags & SKBMOD_F_SMAC) + ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src); + if (flags & SKBMOD_F_ETYPE) + eth_hdr(skb)->h_proto = p->eth_type; + rcu_read_unlock(); + + if (flags & SKBMOD_F_SWAPMAC) { + u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */ + /*XXX: I am sure we can come up with more efficient swapping*/ + ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest); + ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); + ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr); + } + + return action; +} + +static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { + [TCA_SKBMOD_PARMS] = { .len = sizeof(struct tc_skbmod) }, + [TCA_SKBMOD_DMAC] = { .len = ETH_ALEN }, + [TCA_SKBMOD_SMAC] = { .len = ETH_ALEN }, + [TCA_SKBMOD_ETYPE] = { .type = NLA_U16 }, +}; + +static int tcf_skbmod_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + struct nlattr *tb[TCA_SKBMOD_MAX + 1]; + struct tcf_skbmod_params *p, *p_old; + struct tc_skbmod *parm; + struct tcf_skbmod *d; + bool exists = false; + u8 *daddr = NULL; + u8 *saddr = NULL; + u16 eth_type = 0; + u32 lflags = 0; + int ret = 0, err; + + if (!nla) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy); + if (err < 0) + return err; + + if (!tb[TCA_SKBMOD_PARMS]) + return -EINVAL; + + if (tb[TCA_SKBMOD_DMAC]) { + daddr = nla_data(tb[TCA_SKBMOD_DMAC]); + lflags |= SKBMOD_F_DMAC; + } + + if (tb[TCA_SKBMOD_SMAC]) { + saddr = nla_data(tb[TCA_SKBMOD_SMAC]); + lflags |= SKBMOD_F_SMAC; + } + + if (tb[TCA_SKBMOD_ETYPE]) { + eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]); + lflags |= SKBMOD_F_ETYPE; + } + + parm = nla_data(tb[TCA_SKBMOD_PARMS]); + if (parm->flags & SKBMOD_F_SWAPMAC) + lflags = SKBMOD_F_SWAPMAC; + + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + if (!lflags) + return -EINVAL; + + if (!exists) { + ret = tcf_hash_create(tn, parm->index, est, a, + &act_skbmod_ops, bind, true); + if (ret) + return ret; + + ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; + } + + d = to_skbmod(*a); + + ASSERT_RTNL(); + p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); + if (unlikely(!p)) { + if (ovr) + tcf_hash_release(*a, bind); + return -ENOMEM; + } + + p->flags = lflags; + d->tcf_action = parm->action; + + p_old = rtnl_dereference(d->skbmod_p); + + if (ovr) + spin_lock_bh(&d->tcf_lock); + + if (lflags & SKBMOD_F_DMAC) + ether_addr_copy(p->eth_dst, daddr); + if (lflags & SKBMOD_F_SMAC) + ether_addr_copy(p->eth_src, saddr); + if (lflags & SKBMOD_F_ETYPE) + p->eth_type = htons(eth_type); + + rcu_assign_pointer(d->skbmod_p, p); + if (ovr) + spin_unlock_bh(&d->tcf_lock); + + if (p_old) + kfree_rcu(p_old, rcu); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + return ret; +} + +static void tcf_skbmod_cleanup(struct tc_action *a, int bind) +{ + struct tcf_skbmod *d = to_skbmod(a); + struct tcf_skbmod_params *p; + + p = rcu_dereference_protected(d->skbmod_p, 1); + kfree_rcu(p, rcu); +} + +static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + struct tcf_skbmod *d = to_skbmod(a); + unsigned char *b = skb_tail_pointer(skb); + struct tcf_skbmod_params *p = rtnl_dereference(d->skbmod_p); + struct tc_skbmod opt = { + .index = d->tcf_index, + .refcnt = d->tcf_refcnt - ref, + .bindcnt = d->tcf_bindcnt - bind, + .action = d->tcf_action, + }; + struct tcf_t t; + + opt.flags = p->flags; + if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_DMAC) && + nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_SMAC) && + nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_ETYPE) && + nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type))) + goto nla_put_failure; + + tcf_tm_dump(&t, &d->tcf_tm); + if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD)) + goto nla_put_failure; + + return skb->len; +nla_put_failure: + rcu_read_unlock(); + nlmsg_trim(skb, b); + return -1; +} + +static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops); +} + +static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tcf_hash_search(tn, a, index); +} + +static struct tc_action_ops act_skbmod_ops = { + .kind = "skbmod", + .type = TCA_ACT_SKBMOD, + .owner = THIS_MODULE, + .act = tcf_skbmod_run, + .dump = tcf_skbmod_dump, + .init = tcf_skbmod_init, + .cleanup = tcf_skbmod_cleanup, + .walk = tcf_skbmod_walker, + .lookup = tcf_skbmod_search, + .size = sizeof(struct tcf_skbmod), +}; + +static __net_init int skbmod_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK); +} + +static void __net_exit skbmod_exit_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + tc_action_net_exit(tn); +} + +static struct pernet_operations skbmod_net_ops = { + .init = skbmod_init_net, + .exit = skbmod_exit_net, + .id = &skbmod_net_id, + .size = sizeof(struct tc_action_net), +}; + +MODULE_AUTHOR("Jamal Hadi Salim, <jhs@mojatatu.com>"); +MODULE_DESCRIPTION("SKB data mod-ing"); +MODULE_LICENSE("GPL"); + +static int __init skbmod_init_module(void) +{ + return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops); +} + +static void __exit skbmod_cleanup_module(void) +{ + tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops); +} + +module_init(skbmod_init_module); +module_exit(skbmod_cleanup_module); diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c new file mode 100644 index 000000000000..af47bdf2f483 --- /dev/null +++ b/net/sched/act_tunnel_key.c @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2016, Amir Vadai <amir@vadai.me> + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> +#include <net/dst.h> +#include <net/dst_metadata.h> + +#include <linux/tc_act/tc_tunnel_key.h> +#include <net/tc_act/tc_tunnel_key.h> + +#define TUNNEL_KEY_TAB_MASK 15 + +static int tunnel_key_net_id; +static struct tc_action_ops act_tunnel_key_ops; + +static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + int action; + + rcu_read_lock(); + + params = rcu_dereference(t->params); + + tcf_lastuse_update(&t->tcf_tm); + bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb); + action = params->action; + + switch (params->tcft_action) { + case TCA_TUNNEL_KEY_ACT_RELEASE: + skb_dst_drop(skb); + break; + case TCA_TUNNEL_KEY_ACT_SET: + skb_dst_drop(skb); + skb_dst_set(skb, dst_clone(¶ms->tcft_enc_metadata->dst)); + break; + default: + WARN_ONCE(1, "Bad tunnel_key action %d.\n", + params->tcft_action); + break; + } + + rcu_read_unlock(); + + return action; +} + +static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { + [TCA_TUNNEL_KEY_PARMS] = { .len = sizeof(struct tc_tunnel_key) }, + [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, + [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, +}; + +static int tunnel_key_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; + struct tcf_tunnel_key_params *params_old; + struct tcf_tunnel_key_params *params_new; + struct metadata_dst *metadata = NULL; + struct tc_tunnel_key *parm; + struct tcf_tunnel_key *t; + bool exists = false; + __be64 key_id; + int ret = 0; + int err; + + if (!nla) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy); + if (err < 0) + return err; + + if (!tb[TCA_TUNNEL_KEY_PARMS]) + return -EINVAL; + + parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + switch (parm->t_action) { + case TCA_TUNNEL_KEY_ACT_RELEASE: + break; + case TCA_TUNNEL_KEY_ACT_SET: + if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) { + ret = -EINVAL; + goto err_out; + } + + key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID])); + + if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] && + tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) { + __be32 saddr; + __be32 daddr; + + saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]); + daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); + + metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, + TUNNEL_KEY, key_id, 0); + } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && + tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { + struct in6_addr saddr; + struct in6_addr daddr; + + saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]); + daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); + + metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0, + TUNNEL_KEY, key_id, 0); + } + + if (!metadata) { + ret = -EINVAL; + goto err_out; + } + + metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; + break; + default: + goto err_out; + } + + if (!exists) { + ret = tcf_hash_create(tn, parm->index, est, a, + &act_tunnel_key_ops, bind, true); + if (ret) + return ret; + + ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; + } + + t = to_tunnel_key(*a); + + ASSERT_RTNL(); + params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); + if (unlikely(!params_new)) { + if (ret == ACT_P_CREATED) + tcf_hash_release(*a, bind); + return -ENOMEM; + } + + params_old = rtnl_dereference(t->params); + + params_new->action = parm->action; + params_new->tcft_action = parm->t_action; + params_new->tcft_enc_metadata = metadata; + + rcu_assign_pointer(t->params, params_new); + + if (params_old) + kfree_rcu(params_old, rcu); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + + return ret; + +err_out: + if (exists) + tcf_hash_release(*a, bind); + return ret; +} + +static void tunnel_key_release(struct tc_action *a, int bind) +{ + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + + params = rcu_dereference_protected(t->params, 1); + + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) + dst_release(¶ms->tcft_enc_metadata->dst); + + kfree_rcu(params, rcu); +} + +static int tunnel_key_dump_addresses(struct sk_buff *skb, + const struct ip_tunnel_info *info) +{ + unsigned short family = ip_tunnel_info_af(info); + + if (family == AF_INET) { + __be32 saddr = info->key.u.ipv4.src; + __be32 daddr = info->key.u.ipv4.dst; + + if (!nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_SRC, saddr) && + !nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_DST, daddr)) + return 0; + } + + if (family == AF_INET6) { + const struct in6_addr *saddr6 = &info->key.u.ipv6.src; + const struct in6_addr *daddr6 = &info->key.u.ipv6.dst; + + if (!nla_put_in6_addr(skb, + TCA_TUNNEL_KEY_ENC_IPV6_SRC, saddr6) && + !nla_put_in6_addr(skb, + TCA_TUNNEL_KEY_ENC_IPV6_DST, daddr6)) + return 0; + } + + return -EINVAL; +} + +static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + struct tc_tunnel_key opt = { + .index = t->tcf_index, + .refcnt = t->tcf_refcnt - ref, + .bindcnt = t->tcf_bindcnt - bind, + }; + struct tcf_t tm; + + params = rtnl_dereference(t->params); + + opt.t_action = params->tcft_action; + opt.action = params->action; + + if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) { + struct ip_tunnel_key *key = + ¶ms->tcft_enc_metadata->u.tun_info.key; + __be32 key_id = tunnel_id_to_key32(key->tun_id); + + if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) || + tunnel_key_dump_addresses(skb, + ¶ms->tcft_enc_metadata->u.tun_info)) + goto nla_put_failure; + } + + tcf_tm_dump(&tm, &t->tcf_tm); + if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm), + &tm, TCA_TUNNEL_KEY_PAD)) + goto nla_put_failure; + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static int tunnel_key_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops); +} + +static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tcf_hash_search(tn, a, index); +} + +static struct tc_action_ops act_tunnel_key_ops = { + .kind = "tunnel_key", + .type = TCA_ACT_TUNNEL_KEY, + .owner = THIS_MODULE, + .act = tunnel_key_act, + .dump = tunnel_key_dump, + .init = tunnel_key_init, + .cleanup = tunnel_key_release, + .walk = tunnel_key_walker, + .lookup = tunnel_key_search, + .size = sizeof(struct tcf_tunnel_key), +}; + +static __net_init int tunnel_key_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK); +} + +static void __net_exit tunnel_key_exit_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + tc_action_net_exit(tn); +} + +static struct pernet_operations tunnel_key_net_ops = { + .init = tunnel_key_init_net, + .exit = tunnel_key_exit_net, + .id = &tunnel_key_net_id, + .size = sizeof(struct tc_action_net), +}; + +static int __init tunnel_key_init_module(void) +{ + return tcf_register_action(&act_tunnel_key_ops, &tunnel_key_net_ops); +} + +static void __exit tunnel_key_cleanup_module(void) +{ + tcf_unregister_action(&act_tunnel_key_ops, &tunnel_key_net_ops); +} + +module_init(tunnel_key_init_module); +module_exit(tunnel_key_cleanup_module); + +MODULE_AUTHOR("Amir Vadai <amir@vadai.me>"); +MODULE_DESCRIPTION("ip tunnel manipulation actions"); +MODULE_LICENSE("GPL v2"); diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 59a8d3150ae2..a95c00b119da 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -30,6 +30,7 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, struct tcf_vlan *v = to_vlan(a); int action; int err; + u16 tci; spin_lock(&v->tcf_lock); tcf_lastuse_update(&v->tcf_tm); @@ -48,6 +49,30 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, if (err) goto drop; break; + case TCA_VLAN_ACT_MODIFY: + /* No-op if no vlan tag (either hw-accel or in-payload) */ + if (!skb_vlan_tagged(skb)) + goto unlock; + /* extract existing tag (and guarantee no hw-accel tag) */ + if (skb_vlan_tag_present(skb)) { + tci = skb_vlan_tag_get(skb); + skb->vlan_tci = 0; + } else { + /* in-payload vlan tag, pop it */ + err = __skb_vlan_pop(skb, &tci); + if (err) + goto drop; + } + /* replace the vid */ + tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid; + /* replace prio bits, if tcfv_push_prio specified */ + if (v->tcfv_push_prio) { + tci &= ~VLAN_PRIO_MASK; + tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT; + } + /* put updated tci as hwaccel tag */ + __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci); + break; default: BUG(); } @@ -102,6 +127,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, case TCA_VLAN_ACT_POP: break; case TCA_VLAN_ACT_PUSH: + case TCA_VLAN_ACT_MODIFY: if (!tb[TCA_VLAN_PUSH_VLAN_ID]) { if (exists) tcf_hash_release(*a, bind); @@ -185,7 +211,8 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - if (v->tcfv_action == TCA_VLAN_ACT_PUSH && + if ((v->tcfv_action == TCA_VLAN_ACT_PUSH || + v->tcfv_action == TCA_VLAN_ACT_MODIFY) && (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) || nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, v->tcfv_push_proto) || diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a7c5645373af..11da7da0b7c4 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -344,13 +344,15 @@ replay: if (err == 0) { struct tcf_proto *next = rtnl_dereference(tp->next); - tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); + tfilter_notify(net, skb, n, tp, fh, + RTM_DELTFILTER); if (tcf_destroy(tp, false)) RCU_INIT_POINTER(*back, next); } goto errout; case RTM_GETTFILTER: - err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); + err = tfilter_notify(net, skb, n, tp, fh, + RTM_NEWTFILTER); goto errout; default: err = -EINVAL; @@ -448,7 +450,8 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct net *net = sock_net(a->skb->sk); return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, - a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); + a->cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWTFILTER); } /* called with RTNL */ @@ -552,7 +555,7 @@ void tcf_exts_destroy(struct tcf_exts *exts) EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) + struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) { #ifdef CONFIG_NET_CLS_ACT { @@ -560,8 +563,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tb[exts->police], rate_tlv, - "police", ovr, - TCA_ACT_BIND); + "police", ovr, TCA_ACT_BIND); if (IS_ERR(act)) return PTR_ERR(act); @@ -573,8 +575,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, int err, i = 0; err = tcf_action_init(net, tb[exts->action], rate_tlv, - NULL, ovr, - TCA_ACT_BIND, &actions); + NULL, ovr, TCA_ACT_BIND, + &actions); if (err) return err; list_for_each_entry(act, &actions, list) diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 4742f415ee5b..bb1d5a487081 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -27,6 +27,8 @@ MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); MODULE_DESCRIPTION("TC BPF based classifier"); #define CLS_BPF_NAME_LEN 256 +#define CLS_BPF_SUPPORTED_GEN_FLAGS \ + (TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW) struct cls_bpf_head { struct list_head plist; @@ -39,6 +41,8 @@ struct cls_bpf_prog { struct list_head link; struct tcf_result res; bool exts_integrated; + bool offloaded; + u32 gen_flags; struct tcf_exts exts; u32 handle; union { @@ -54,8 +58,10 @@ struct cls_bpf_prog { static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { [TCA_BPF_CLASSID] = { .type = NLA_U32 }, [TCA_BPF_FLAGS] = { .type = NLA_U32 }, + [TCA_BPF_FLAGS_GEN] = { .type = NLA_U32 }, [TCA_BPF_FD] = { .type = NLA_U32 }, - [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, + [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, + .len = CLS_BPF_NAME_LEN }, [TCA_BPF_OPS_LEN] = { .type = NLA_U16 }, [TCA_BPF_OPS] = { .type = NLA_BINARY, .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, @@ -83,9 +89,6 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_bpf_prog *prog; int ret = -1; - if (unlikely(!skb_mac_header_was_set(skb))) - return -1; - /* Needed here for accessing maps. */ rcu_read_lock(); list_for_each_entry_rcu(prog, &head->plist, link) { @@ -93,7 +96,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, qdisc_skb_cb(skb)->tc_classid = prog->res.classid; - if (at_ingress) { + if (tc_skip_sw(prog->gen_flags)) { + filter_res = prog->exts_integrated ? TC_ACT_UNSPEC : 0; + } else if (at_ingress) { /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); bpf_compute_data_end(skb); @@ -140,6 +145,91 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) return !prog->bpf_ops; } +static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, + enum tc_clsbpf_command cmd) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_bpf_offload bpf_offload = {}; + struct tc_to_netdev offload; + + offload.type = TC_SETUP_CLSBPF; + offload.cls_bpf = &bpf_offload; + + bpf_offload.command = cmd; + bpf_offload.exts = &prog->exts; + bpf_offload.prog = prog->filter; + bpf_offload.name = prog->bpf_name; + bpf_offload.exts_integrated = prog->exts_integrated; + bpf_offload.gen_flags = prog->gen_flags; + + return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); +} + +static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, + struct cls_bpf_prog *oldprog) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct cls_bpf_prog *obj = prog; + enum tc_clsbpf_command cmd; + bool skip_sw; + int ret; + + skip_sw = tc_skip_sw(prog->gen_flags) || + (oldprog && tc_skip_sw(oldprog->gen_flags)); + + if (oldprog && oldprog->offloaded) { + if (tc_should_offload(dev, tp, prog->gen_flags)) { + cmd = TC_CLSBPF_REPLACE; + } else if (!tc_skip_sw(prog->gen_flags)) { + obj = oldprog; + cmd = TC_CLSBPF_DESTROY; + } else { + return -EINVAL; + } + } else { + if (!tc_should_offload(dev, tp, prog->gen_flags)) + return skip_sw ? -EINVAL : 0; + cmd = TC_CLSBPF_ADD; + } + + ret = cls_bpf_offload_cmd(tp, obj, cmd); + if (ret) + return skip_sw ? ret : 0; + + obj->offloaded = true; + if (oldprog) + oldprog->offloaded = false; + + return 0; +} + +static void cls_bpf_stop_offload(struct tcf_proto *tp, + struct cls_bpf_prog *prog) +{ + int err; + + if (!prog->offloaded) + return; + + err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); + if (err) { + pr_err("Stopping hardware offload failed: %d\n", err); + return; + } + + prog->offloaded = false; +} + +static void cls_bpf_offload_update_stats(struct tcf_proto *tp, + struct cls_bpf_prog *prog) +{ + if (!prog->offloaded) + return; + + cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS); +} + static int cls_bpf_init(struct tcf_proto *tp) { struct cls_bpf_head *head; @@ -179,6 +269,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) { struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg; + cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); @@ -195,6 +286,7 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) return false; list_for_each_entry_safe(prog, tmp, &head->plist, link) { + cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); @@ -304,6 +396,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, { bool is_bpf, is_ebpf, have_exts = false; struct tcf_exts exts; + u32 gen_flags = 0; int ret; is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS]; @@ -328,8 +421,17 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; } + if (tb[TCA_BPF_FLAGS_GEN]) { + gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]); + if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS || + !tc_flags_valid(gen_flags)) { + ret = -EINVAL; + goto errout; + } + } prog->exts_integrated = have_exts; + prog->gen_flags = gen_flags; ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : cls_bpf_prog_from_efd(tb, prog, tp); @@ -412,10 +514,17 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; } - ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr); + ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], + ovr); if (ret < 0) goto errout; + ret = cls_bpf_offload(tp, prog, oldprog); + if (ret) { + cls_bpf_delete_prog(tp, prog); + return ret; + } + if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); @@ -477,6 +586,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, tm->tcm_handle = prog->handle; + cls_bpf_offload_update_stats(tp, prog); + nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; @@ -499,6 +610,9 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT; if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags)) goto nla_put_failure; + if (prog->gen_flags && + nla_put_u32(skb, TCA_BPF_FLAGS_GEN, prog->gen_flags)) + goto nla_put_failure; nla_nest_end(skb, nest); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 2c1ae549edbf..e39672394c7b 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -29,7 +29,7 @@ #include <net/route.h> #include <net/flow_dissector.h> -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> #endif @@ -87,12 +87,14 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } -static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_proto(const struct sk_buff *skb, + const struct flow_keys *flow) { return flow->basic.ip_proto; } -static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_proto_src(const struct sk_buff *skb, + const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.src); @@ -100,7 +102,8 @@ static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys return addr_fold(skb->sk); } -static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_proto_dst(const struct sk_buff *skb, + const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.dst); @@ -125,14 +128,14 @@ static u32 flow_get_mark(const struct sk_buff *skb) static u32 flow_get_nfct(const struct sk_buff *skb) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) return addr_fold(skb->nfct); #else return 0; #endif } -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) #define CTTUPLE(skb, member) \ ({ \ enum ip_conntrack_info ctinfo; \ @@ -149,7 +152,8 @@ static u32 flow_get_nfct(const struct sk_buff *skb) }) #endif -static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_src(const struct sk_buff *skb, + const struct flow_keys *flow) { switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): @@ -161,7 +165,8 @@ fallback: return flow_get_src(skb, flow); } -static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_dst(const struct sk_buff *skb, + const struct flow_keys *flow) { switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): @@ -173,14 +178,16 @@ fallback: return flow_get_dst(skb, flow); } -static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, + const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, src.u.all)); fallback: return flow_get_proto_src(skb, flow); } -static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, + const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, dst.u.all)); fallback: diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index cf9ad5b50889..2af09c872a1a 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -23,18 +23,26 @@ #include <net/ip.h> #include <net/flow_dissector.h> +#include <net/dst.h> +#include <net/dst_metadata.h> + struct fl_flow_key { int indev_ifindex; struct flow_dissector_key_control control; + struct flow_dissector_key_control enc_control; struct flow_dissector_key_basic basic; struct flow_dissector_key_eth_addrs eth; struct flow_dissector_key_vlan vlan; - struct flow_dissector_key_addrs ipaddrs; union { struct flow_dissector_key_ipv4_addrs ipv4; struct flow_dissector_key_ipv6_addrs ipv6; }; struct flow_dissector_key_ports tp; + struct flow_dissector_key_keyid enc_key_id; + union { + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; + }; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -124,11 +132,31 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_fl_filter *f; struct fl_flow_key skb_key; struct fl_flow_key skb_mkey; + struct ip_tunnel_info *info; if (!atomic_read(&head->ht.nelems)) return -1; fl_clear_masked_range(&skb_key, &head->mask); + + info = skb_tunnel_info(skb); + if (info) { + struct ip_tunnel_key *key = &info->key; + + switch (ip_tunnel_info_af(info)) { + case AF_INET: + skb_key.enc_ipv4.src = key->u.ipv4.src; + skb_key.enc_ipv4.dst = key->u.ipv4.dst; + break; + case AF_INET6: + skb_key.enc_ipv6.src = key->u.ipv6.src; + skb_key.enc_ipv6.dst = key->u.ipv6.dst; + break; + } + + skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id); + } + skb_key.indev_ifindex = skb->skb_iif; /* skb_flow_dissect() does not set n_proto in case an unknown protocol, * so do it rather here. @@ -213,7 +241,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, tc.type = TC_SETUP_CLSFLOWER; tc.cls_flower = &offload; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, + &tc); if (tc_skip_sw(flags)) return err; @@ -297,7 +326,19 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_VLAN_ID] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NLA_U16 }, - + [TCA_FLOWER_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_DST] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -395,20 +436,54 @@ static int fl_set_key(struct net *net, struct nlattr **tb, if (key->basic.ip_proto == IPPROTO_TCP) { fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK, sizeof(key->tp.src)); fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK, sizeof(key->tp.dst)); } else if (key->basic.ip_proto == IPPROTO_UDP) { fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK, sizeof(key->tp.src)); fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)); } + if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] || + tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) { + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + fl_set_key_val(tb, &key->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC, + &mask->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, + sizeof(key->enc_ipv4.src)); + fl_set_key_val(tb, &key->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST, + &mask->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, + sizeof(key->enc_ipv4.dst)); + } + + if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] || + tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) { + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + fl_set_key_val(tb, &key->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC, + &mask->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK, + sizeof(key->enc_ipv6.src)); + fl_set_key_val(tb, &key->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST, + &mask->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK, + sizeof(key->enc_ipv6.dst)); + } + + fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID, + &mask->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID, + sizeof(key->enc_key_id.keyid)); + return 0; } @@ -806,21 +881,48 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (key->basic.ip_proto == IPPROTO_TCP && (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK, sizeof(key->tp.src)) || fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK, sizeof(key->tp.dst)))) goto nla_put_failure; else if (key->basic.ip_proto == IPPROTO_UDP && (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK, sizeof(key->tp.src)) || fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)))) goto nla_put_failure; + if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && + (fl_dump_key_val(skb, &key->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, + sizeof(key->enc_ipv4.src)) || + fl_dump_key_val(skb, &key->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST, &mask->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, + sizeof(key->enc_ipv4.dst)))) + goto nla_put_failure; + else if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS && + (fl_dump_key_val(skb, &key->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC, &mask->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK, + sizeof(key->enc_ipv6.src)) || + fl_dump_key_val(skb, &key->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST, + &mask->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK, + sizeof(key->enc_ipv6.dst)))) + goto nla_put_failure; + + if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, + &mask->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, + sizeof(key->enc_key_id))) + goto nla_put_failure; + nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags); if (tcf_exts_dump(skb, &f->exts)) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index cc0bda945800..9dc63d54e167 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -57,7 +57,7 @@ static u32 fw_hash(u32 handle) } static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) + struct tcf_result *res) { struct fw_head *head = rcu_dereference_bh(tp->root); struct fw_filter *f; @@ -188,7 +188,8 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { static int fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, - struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr) + struct nlattr **tb, struct nlattr **tca, unsigned long base, + bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); struct tcf_exts e; @@ -237,9 +238,8 @@ errout: static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - unsigned long *arg, bool ovr) + u32 handle, struct nlattr **tca, unsigned long *arg, + bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = (struct fw_filter *) *arg; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index c91e65d81a48..455fc8f83d0a 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -268,8 +268,7 @@ static int route4_init(struct tcf_proto *tp) return 0; } -static void -route4_delete_filter(struct rcu_head *head) +static void route4_delete_filter(struct rcu_head *head) { struct route4_filter *f = container_of(head, struct route4_filter, rcu); @@ -474,10 +473,8 @@ errout: } static int route4_change(struct net *net, struct sk_buff *in_skb, - struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - unsigned long *arg, bool ovr) + struct tcf_proto *tp, unsigned long base, u32 handle, + struct nlattr **tca, unsigned long *arg, bool ovr) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; @@ -562,7 +559,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, return 0; errout: - tcf_exts_destroy(&f->exts); + if (f) + tcf_exts_destroy(&f->exts); kfree(f); return err; } diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index d9500709831f..96144bdf30db 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -50,14 +50,13 @@ struct tcindex_data { struct rcu_head rcu; }; -static inline int -tcindex_filter_is_set(struct tcindex_filter_result *r) +static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) { return tcf_exts_is_predicative(&r->exts) || r->res.classid; } -static struct tcindex_filter_result * -tcindex_lookup(struct tcindex_data *p, u16 key) +static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p, + u16 key) { if (p->perfect) { struct tcindex_filter_result *f = p->perfect + key; @@ -144,7 +143,8 @@ static void tcindex_destroy_rexts(struct rcu_head *head) static void tcindex_destroy_fexts(struct rcu_head *head) { - struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu); + struct tcindex_filter *f = container_of(head, struct tcindex_filter, + rcu); tcf_exts_destroy(&f->result.exts); kfree(f); @@ -550,7 +550,7 @@ static bool tcindex_destroy(struct tcf_proto *tp, bool force) static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index a29263a9d8c1..ae83c3aec308 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -104,7 +104,8 @@ static inline unsigned int u32_hash_fold(__be32 key, return h; } -static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) +static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res) { struct { struct tc_u_knode *knode; @@ -256,8 +257,7 @@ deadloop: return -1; } -static struct tc_u_hnode * -u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) +static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) { struct tc_u_hnode *ht; @@ -270,8 +270,7 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) return ht; } -static struct tc_u_knode * -u32_lookup_key(struct tc_u_hnode *ht, u32 handle) +static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle) { unsigned int sel; struct tc_u_knode *n = NULL; @@ -360,8 +359,7 @@ static int u32_init(struct tcf_proto *tp) return 0; } -static int u32_destroy_key(struct tcf_proto *tp, - struct tc_u_knode *n, +static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool free_pf) { tcf_exts_destroy(&n->exts); @@ -448,9 +446,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) } } -static int u32_replace_hw_hnode(struct tcf_proto *tp, - struct tc_u_hnode *h, - u32 flags) +static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, + u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; @@ -496,9 +493,8 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) } } -static int u32_replace_hw_knode(struct tcf_proto *tp, - struct tc_u_knode *n, - u32 flags) +static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, + u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; @@ -763,8 +759,7 @@ errout: return err; } -static void u32_replace_knode(struct tcf_proto *tp, - struct tc_u_common *tp_c, +static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, struct tc_u_knode *n) { struct tc_u_knode __rcu **ins; @@ -845,8 +840,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, - unsigned long *arg, bool ovr) + struct nlattr **tca, unsigned long *arg, bool ovr) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -1088,7 +1082,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t) { struct tc_u_knode *n = (struct tc_u_knode *)fh; struct tc_u_hnode *ht_up, *ht_down; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d677b3484d81..206dc24add3a 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -389,7 +389,8 @@ static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) static struct qdisc_rate_table *qdisc_rtab_list; -struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab) +struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, + struct nlattr *tab) { struct qdisc_rate_table *rtab; @@ -541,7 +542,8 @@ nla_put_failure: return -1; } -void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab) +void __qdisc_calculate_pkt_len(struct sk_buff *skb, + const struct qdisc_size_table *stab) { int pkt_len, slot; @@ -888,10 +890,10 @@ static struct lock_class_key qdisc_rx_lock; Parameters are passed via opt. */ -static struct Qdisc * -qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, - struct Qdisc *p, u32 parent, u32 handle, - struct nlattr **tca, int *errp) +static struct Qdisc *qdisc_create(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc *p, u32 parent, u32 handle, + struct nlattr **tca, int *errp) { int err; struct nlattr *kind = tca[TCA_KIND]; @@ -1073,7 +1075,8 @@ struct check_loop_arg { int depth; }; -static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w); +static int check_loop_fn(struct Qdisc *q, unsigned long cl, + struct qdisc_walker *w); static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth) { @@ -1450,7 +1453,8 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, } else { if (!tc_qdisc_dump_ignore(q) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWQDISC) <= 0) goto done; q_idx++; } @@ -1471,7 +1475,8 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, } if (!tc_qdisc_dump_ignore(q) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWQDISC) <= 0) goto done; q_idx++; } @@ -1505,7 +1510,8 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = 0; q_idx = 0; - if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, true) < 0) + if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, + true) < 0) goto done; dev_queue = dev_ingress_queue(dev); @@ -1640,7 +1646,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) if (cops->delete) err = cops->delete(q, cl); if (err == 0) - tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS); + tclass_notify(net, skb, n, q, cl, + RTM_DELTCLASS); goto out; case RTM_GETTCLASS: err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); @@ -1738,12 +1745,14 @@ struct qdisc_dump_args { struct netlink_callback *cb; }; -static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) +static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, + struct qdisc_walker *arg) { struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, - a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); + a->cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWTCLASS); } static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, @@ -1976,10 +1985,12 @@ static int __init pktsched_init(void) rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL); + rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, + NULL); rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL); rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL); + rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, + NULL); return 0; } diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 4002df3c7d9f..5bfa79ee657c 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -69,7 +69,7 @@ struct codel_sched_data { static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) { struct Qdisc *sch = ctx; - struct sk_buff *skb = __skb_dequeue(&sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); if (skb) sch->qstats.backlog -= qdisc_pkt_len(skb); @@ -172,7 +172,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = __skb_dequeue(&sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index baeed6a78d28..1e37247656f8 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -31,7 +31,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - if (likely(skb_queue_len(&sch->q) < sch->limit)) + if (likely(sch->q.qlen < sch->limit)) return qdisc_enqueue_tail(skb, sch); return qdisc_drop(skb, sch, to_free); @@ -42,7 +42,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, { unsigned int prev_backlog; - if (likely(skb_queue_len(&sch->q) < sch->limit)) + if (likely(sch->q.qlen < sch->limit)) return qdisc_enqueue_tail(skb, sch); prev_backlog = sch->qstats.backlog; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index e5458b99e09c..18e752439f6f 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -86,6 +86,7 @@ struct fq_sched_data { struct rb_root delayed; /* for rate limited flows */ u64 time_next_delayed_flow; + unsigned long unthrottle_latency_ns; struct fq_flow internal; /* for non classified or high prio packets */ u32 quantum; @@ -94,6 +95,7 @@ struct fq_sched_data { u32 flow_max_rate; /* optional max rate per flow */ u32 flow_plimit; /* max packets per flow */ u32 orphan_mask; /* mask for orphaned skb */ + u32 low_rate_threshold; struct rb_root *fq_root; u8 rate_enable; u8 fq_trees_log; @@ -407,11 +409,19 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, static void fq_check_throttled(struct fq_sched_data *q, u64 now) { + unsigned long sample; struct rb_node *p; if (q->time_next_delayed_flow > now) return; + /* Update unthrottle latency EWMA. + * This is cheap and can help diagnosing timer/latency problems. + */ + sample = (unsigned long)(now - q->time_next_delayed_flow); + q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3; + q->unthrottle_latency_ns += sample >> 3; + q->time_next_delayed_flow = ~0ULL; while ((p = rb_first(&q->delayed)) != NULL) { struct fq_flow *f = container_of(p, struct fq_flow, rate_node); @@ -433,7 +443,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; - u32 rate; + u32 rate, plen; skb = fq_dequeue_head(sch, &q->internal); if (skb) @@ -482,7 +492,7 @@ begin: prefetch(&skb->end); f->credit -= qdisc_pkt_len(skb); - if (f->credit > 0 || !q->rate_enable) + if (!q->rate_enable) goto out; /* Do not pace locally generated ack packets */ @@ -493,8 +503,15 @@ begin: if (skb->sk) rate = min(skb->sk->sk_pacing_rate, rate); + if (rate <= q->low_rate_threshold) { + f->credit = 0; + plen = qdisc_pkt_len(skb); + } else { + plen = max(qdisc_pkt_len(skb), q->quantum); + if (f->credit > 0) + goto out; + } if (rate != ~0U) { - u32 plen = max(qdisc_pkt_len(skb), q->quantum); u64 len = (u64)plen * NSEC_PER_SEC; if (likely(rate)) @@ -507,7 +524,12 @@ begin: len = NSEC_PER_SEC; q->stat_pkts_too_long++; } - + /* Account for schedule/timers drifts. + * f->time_next_packet was set when prior packet was sent, + * and current time (@now) can be too late by tens of us. + */ + if (f->time_next_packet) + len -= min(len/2, now - f->time_next_packet); f->time_next_packet = now + len; } out: @@ -662,6 +684,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, + [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, }; static int fq_change(struct Qdisc *sch, struct nlattr *opt) @@ -716,6 +739,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_FQ_FLOW_MAX_RATE]) q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]); + if (tb[TCA_FQ_LOW_RATE_THRESHOLD]) + q->low_rate_threshold = + nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]); + if (tb[TCA_FQ_RATE_ENABLE]) { u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]); @@ -774,6 +801,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch)); q->flow_refill_delay = msecs_to_jiffies(40); q->flow_max_rate = ~0U; + q->time_next_delayed_flow = ~0ULL; q->rate_enable = 1; q->new_flows.first = NULL; q->old_flows.first = NULL; @@ -781,6 +809,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) q->fq_root = NULL; q->fq_trees_log = ilog2(1024); q->orphan_mask = 1024 - 1; + q->low_rate_threshold = 550000 / 8; qdisc_watchdog_init(&q->watchdog, sch); if (opt) @@ -811,6 +840,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, jiffies_to_usecs(q->flow_refill_delay)) || nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) || + nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD, + q->low_rate_threshold) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure; @@ -823,20 +854,24 @@ nla_put_failure: static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_sched_data *q = qdisc_priv(sch); - u64 now = ktime_get_ns(); - struct tc_fq_qd_stats st = { - .gc_flows = q->stat_gc_flows, - .highprio_packets = q->stat_internal_packets, - .tcp_retrans = q->stat_tcp_retrans, - .throttled = q->stat_throttled, - .flows_plimit = q->stat_flows_plimit, - .pkts_too_long = q->stat_pkts_too_long, - .allocation_errors = q->stat_allocation_errors, - .flows = q->flows, - .inactive_flows = q->inactive_flows, - .throttled_flows = q->throttled_flows, - .time_next_delayed_flow = q->time_next_delayed_flow - now, - }; + struct tc_fq_qd_stats st; + + sch_tree_lock(sch); + + st.gc_flows = q->stat_gc_flows; + st.highprio_packets = q->stat_internal_packets; + st.tcp_retrans = q->stat_tcp_retrans; + st.throttled = q->stat_throttled; + st.flows_plimit = q->stat_flows_plimit; + st.pkts_too_long = q->stat_pkts_too_long; + st.allocation_errors = q->stat_allocation_errors; + st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns(); + st.flows = q->flows; + st.inactive_flows = q->inactive_flows; + st.throttled_flows = q->throttled_flows; + st.unthrottle_latency_ns = min_t(unsigned long, + q->unthrottle_latency_ns, ~0U); + sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0d21b567ff27..6cfb6e9038c2 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -466,7 +466,7 @@ static const u8 prio2band[TC_PRIO_MAX + 1] = { */ struct pfifo_fast_priv { u32 bitmap; - struct sk_buff_head q[PFIFO_FAST_BANDS]; + struct qdisc_skb_head q[PFIFO_FAST_BANDS]; }; /* @@ -477,7 +477,7 @@ struct pfifo_fast_priv { */ static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0}; -static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, +static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv, int band) { return priv->q + band; @@ -486,10 +486,10 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, struct sk_buff **to_free) { - if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { + if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) { int band = prio2band[skb->priority & TC_PRIO_MAX]; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - struct sk_buff_head *list = band2list(priv, band); + struct qdisc_skb_head *list = band2list(priv, band); priv->bitmap |= (1 << band); qdisc->q.qlen++; @@ -505,11 +505,16 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) int band = bitmap2band[priv->bitmap]; if (likely(band >= 0)) { - struct sk_buff_head *list = band2list(priv, band); - struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list); + struct qdisc_skb_head *qh = band2list(priv, band); + struct sk_buff *skb = __qdisc_dequeue_head(qh); + + if (likely(skb != NULL)) { + qdisc_qstats_backlog_dec(qdisc, skb); + qdisc_bstats_update(qdisc, skb); + } qdisc->q.qlen--; - if (skb_queue_empty(list)) + if (qh->qlen == 0) priv->bitmap &= ~(1 << band); return skb; @@ -524,9 +529,9 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc) int band = bitmap2band[priv->bitmap]; if (band >= 0) { - struct sk_buff_head *list = band2list(priv, band); + struct qdisc_skb_head *qh = band2list(priv, band); - return skb_peek(list); + return qh->head; } return NULL; @@ -564,7 +569,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) struct pfifo_fast_priv *priv = qdisc_priv(qdisc); for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - __skb_queue_head_init(band2list(priv, prio)); + qdisc_skb_head_init(band2list(priv, prio)); /* Can by-pass the queue discipline */ qdisc->flags |= TCQ_F_CAN_BYPASS; @@ -612,7 +617,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); sch->padded = (char *) sch - (char *) p; } - skb_queue_head_init(&sch->q); + qdisc_skb_head_init(&sch->q); + spin_lock_init(&sch->q.lock); spin_lock_init(&sch->busylock); lockdep_set_class(&sch->busylock, diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 53dbfa187870..c798d0de8a9d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -162,7 +162,7 @@ struct htb_sched { struct work_struct work; /* non shaped skbs; let them go directly thru */ - struct sk_buff_head direct_queue; + struct qdisc_skb_head direct_queue; long direct_pkts; struct qdisc_watchdog watchdog; @@ -570,6 +570,22 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) list_del_init(&cl->un.leaf.drop_list); } +static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, + struct qdisc_skb_head *qh) +{ + struct sk_buff *last = qh->tail; + + if (last) { + skb->next = NULL; + last->next = skb; + qh->tail = skb; + } else { + qh->tail = skb; + qh->head = skb; + } + qh->qlen++; +} + static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -580,7 +596,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (cl == HTB_DIRECT) { /* enqueue to helper queue */ if (q->direct_queue.qlen < q->direct_qlen) { - __skb_queue_tail(&q->direct_queue, skb); + htb_enqueue_tail(skb, sch, &q->direct_queue); q->direct_pkts++; } else { return qdisc_drop(skb, sch, to_free); @@ -888,7 +904,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) unsigned long start_at; /* try to dequeue direct packets as high prio (!) to minimize cpu work */ - skb = __skb_dequeue(&q->direct_queue); + skb = __qdisc_dequeue_head(&q->direct_queue); if (skb != NULL) { ok: qdisc_bstats_update(sch, skb); @@ -1019,7 +1035,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); INIT_WORK(&q->work, htb_work_func); - __skb_queue_head_init(&q->direct_queue); + qdisc_skb_head_init(&q->direct_queue); if (tb[TCA_HTB_DIRECT_QLEN]) q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index aaaf02175338..9f7b380cf0a3 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -413,6 +413,16 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch, return segs; } +static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb) +{ + skb->next = qh->head; + + if (!qh->head) + qh->tail = skb; + qh->head = skb; + qh->qlen++; +} + /* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. @@ -502,7 +512,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1<<(prandom_u32() % 8); } - if (unlikely(skb_queue_len(&sch->q) >= sch->limit)) + if (unlikely(sch->q.qlen >= sch->limit)) return qdisc_drop(skb, sch, to_free); qdisc_qstats_backlog_inc(sch, skb); @@ -522,8 +532,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (q->rate) { struct sk_buff *last; - if (!skb_queue_empty(&sch->q)) - last = skb_peek_tail(&sch->q); + if (sch->q.qlen) + last = sch->q.tail; else last = netem_rb_to_skb(rb_last(&q->t_root)); if (last) { @@ -552,7 +562,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, cb->time_to_send = psched_get_time(); q->counter = 0; - __skb_queue_head(&sch->q, skb); + netem_enqueue_skb_head(&sch->q, skb); sch->qstats.requeues++; } @@ -587,7 +597,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) struct rb_node *p; tfifo_dequeue: - skb = __skb_dequeue(&sch->q); + skb = __qdisc_dequeue_head(&sch->q); if (skb) { qdisc_qstats_backlog_dec(sch, skb); deliver: diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index a570b0bb254c..5c3a99d6aa82 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -231,7 +231,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) /* Drop excess packets if new limit is lower */ qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = __skb_dequeue(&sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); @@ -511,7 +511,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch) { struct sk_buff *skb; - skb = __qdisc_dequeue_head(sch, &sch->q); + skb = qdisc_dequeue_head(sch); if (!skb) return NULL; |