diff options
Diffstat (limited to 'net')
70 files changed, 917 insertions, 604 deletions
diff --git a/net/802/fddi.c b/net/802/fddi.c index f9a31a9f70f1..ebcf4830d6f1 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -10,7 +10,7 @@ * Authors: Lawrence V. Stefani, <stefani@lkg.dec.com> * * fddi.c is based on previous eth.c and tr.c work by - * Ross Biro, <bir7@leland.Stanford.Edu> + * Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Florian La Roche, <rzsfl@rz.uni-sb.de> diff --git a/net/802/hippi.c b/net/802/hippi.c index 4eb135c0afbb..051e8af56a77 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -7,7 +7,7 @@ * * Version: @(#)hippi.c 1.0.0 05/29/97 * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Florian La Roche, <rzsfl@rz.uni-sb.de> diff --git a/net/802/tr.c b/net/802/tr.c index 85293ccf7efc..a755e880f4ba 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -47,12 +47,12 @@ static void rif_check_expire(unsigned long dummy); * Each RIF entry we learn is kept this way */ -struct rif_cache_s { +struct rif_cache { unsigned char addr[TR_ALEN]; int iface; - __u16 rcf; - __u16 rseg[8]; - struct rif_cache_s *next; + __be16 rcf; + __be16 rseg[8]; + struct rif_cache *next; unsigned long last_used; unsigned char local_ring; }; @@ -64,7 +64,7 @@ struct rif_cache_s { * up a lot. */ -static struct rif_cache_s *rif_table[RIF_TABLE_SIZE]; +static struct rif_cache *rif_table[RIF_TABLE_SIZE]; static DEFINE_SPINLOCK(rif_lock); @@ -249,7 +249,7 @@ void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct net_device * { int slack; unsigned int hash; - struct rif_cache_s *entry; + struct rif_cache *entry; unsigned char *olddata; static const unsigned char mcast_func_addr[] = {0xC0,0x00,0x00,0x04,0x00,0x00}; @@ -337,7 +337,7 @@ printk("source routing for %02X:%02X:%02X:%02X:%02X:%02X\n",trh->daddr[0], static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev) { unsigned int hash, rii_p = 0; - struct rif_cache_s *entry; + struct rif_cache *entry; spin_lock_bh(&rif_lock); @@ -373,7 +373,7 @@ printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n", * FIXME: We ought to keep some kind of cache size * limiting and adjust the timers to suit. */ - entry=kmalloc(sizeof(struct rif_cache_s),GFP_ATOMIC); + entry=kmalloc(sizeof(struct rif_cache),GFP_ATOMIC); if(!entry) { @@ -435,7 +435,7 @@ static void rif_check_expire(unsigned long dummy) spin_lock_bh(&rif_lock); for(i =0; i < RIF_TABLE_SIZE; i++) { - struct rif_cache_s *entry, **pentry; + struct rif_cache *entry, **pentry; pentry = rif_table+i; while((entry=*pentry) != NULL) { @@ -467,10 +467,10 @@ static void rif_check_expire(unsigned long dummy) #ifdef CONFIG_PROC_FS -static struct rif_cache_s *rif_get_idx(loff_t pos) +static struct rif_cache *rif_get_idx(loff_t pos) { int i; - struct rif_cache_s *entry; + struct rif_cache *entry; loff_t off = 0; for(i = 0; i < RIF_TABLE_SIZE; i++) @@ -493,7 +493,7 @@ static void *rif_seq_start(struct seq_file *seq, loff_t *pos) static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos) { int i; - struct rif_cache_s *ent = v; + struct rif_cache *ent = v; ++*pos; @@ -522,7 +522,7 @@ static void rif_seq_stop(struct seq_file *seq, void *v) static int rif_seq_show(struct seq_file *seq, void *v) { int j, rcf_len, segment, brdgnmb; - struct rif_cache_s *entry = v; + struct rif_cache *entry = v; if (v == SEQ_START_TOKEN) seq_puts(seq, diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c index 76598445d84b..1237e208e246 100644 --- a/net/appletalk/dev.c +++ b/net/appletalk/dev.c @@ -19,7 +19,7 @@ static int ltalk_mac_addr(struct net_device *dev, void *addr) return -EINVAL; } -void ltalk_setup(struct net_device *dev) +static void ltalk_setup(struct net_device *dev) { /* Fill in the fields of the device structure with localtalk-generic values. */ @@ -40,4 +40,22 @@ void ltalk_setup(struct net_device *dev) dev->flags = IFF_BROADCAST|IFF_MULTICAST|IFF_NOARP; } -EXPORT_SYMBOL(ltalk_setup); + +/** + * alloc_ltalkdev - Allocates and sets up an localtalk device + * @sizeof_priv: Size of additional driver-private structure to be allocated + * for this localtalk device + * + * Fill in the fields of the device structure with localtalk-generic + * values. Basically does everything except registering the device. + * + * Constructs a new net device, complete with a private data area of + * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for + * this private data area. + */ + +struct net_device *alloc_ltalkdev(int sizeof_priv) +{ + return alloc_netdev(sizeof_priv, "lt%d", ltalk_setup); +} +EXPORT_SYMBOL(alloc_ltalkdev); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index d9b72fde433c..f564ee99782d 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -21,10 +21,7 @@ static struct net_device_stats *br_dev_get_stats(struct net_device *dev) { - struct net_bridge *br; - - br = dev->priv; - + struct net_bridge *br = netdev_priv(dev); return &br->statistics; } @@ -54,9 +51,11 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev) static int br_dev_open(struct net_device *dev) { - netif_start_queue(dev); + struct net_bridge *br = netdev_priv(dev); - br_stp_enable_bridge(dev->priv); + br_features_recompute(br); + netif_start_queue(dev); + br_stp_enable_bridge(br); return 0; } @@ -67,7 +66,7 @@ static void br_dev_set_multicast_list(struct net_device *dev) static int br_dev_stop(struct net_device *dev) { - br_stp_disable_bridge(dev->priv); + br_stp_disable_bridge(netdev_priv(dev)); netif_stop_queue(dev); @@ -76,7 +75,7 @@ static int br_dev_stop(struct net_device *dev) static int br_change_mtu(struct net_device *dev, int new_mtu) { - if ((new_mtu < 68) || new_mtu > br_min_mtu(dev->priv)) + if (new_mtu < 68 || new_mtu > br_min_mtu(netdev_priv(dev))) return -EINVAL; dev->mtu = new_mtu; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 69872bf3b87e..91bb895375f4 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -314,6 +314,28 @@ int br_min_mtu(const struct net_bridge *br) return mtu; } +/* + * Recomputes features using slave's features + */ +void br_features_recompute(struct net_bridge *br) +{ + struct net_bridge_port *p; + unsigned long features, checksum; + + features = NETIF_F_SG | NETIF_F_FRAGLIST + | NETIF_F_HIGHDMA | NETIF_F_TSO; + checksum = NETIF_F_IP_CSUM; /* least commmon subset */ + + list_for_each_entry(p, &br->port_list, list) { + if (!(p->dev->features + & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM))) + checksum = 0; + features &= p->dev->features; + } + + br->dev->features = features | checksum | NETIF_F_LLTX; +} + /* called with RTNL */ int br_add_if(struct net_bridge *br, struct net_device *dev) { @@ -368,6 +390,7 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) spin_lock_bh(&br->lock); br_stp_recalculate_bridge_id(br); + br_features_recompute(br); spin_unlock_bh(&br->lock); return 0; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 2b1cce46cab4..8f5f2e730992 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -26,7 +26,7 @@ static int br_pass_frame_up_finish(struct sk_buff *skb) #ifdef CONFIG_NETFILTER_DEBUG skb->nf_debug = 0; #endif - netif_rx(skb); + netif_receive_skb(skb); return 0; } @@ -54,6 +54,9 @@ int br_handle_frame_finish(struct sk_buff *skb) struct net_bridge_fdb_entry *dst; int passedup = 0; + /* insert into forwarding database after filtering to avoid spoofing */ + br_fdb_update(p->br, p, eth_hdr(skb)->h_source); + if (br->dev->flags & IFF_PROMISC) { struct sk_buff *skb2; @@ -108,8 +111,7 @@ int br_handle_frame(struct net_bridge_port *p, struct sk_buff **pskb) if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) goto err; - if (p->state == BR_STATE_LEARNING || - p->state == BR_STATE_FORWARDING) + if (p->state == BR_STATE_LEARNING) br_fdb_update(p->br, p, eth_hdr(skb)->h_source); if (p->br->stp_enabled && diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index f8fb49e34764..917311c6828b 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -65,6 +65,15 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v } break; + case NETDEV_FEAT_CHANGE: + if (br->dev->flags & IFF_UP) + br_features_recompute(br); + + /* could do recursive feature change notification + * but who would care?? + */ + break; + case NETDEV_DOWN: if (br->dev->flags & IFF_UP) br_stp_disable_port(p); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 54d63f1372a0..bdf95a74d8cd 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -174,6 +174,7 @@ extern int br_add_if(struct net_bridge *br, extern int br_del_if(struct net_bridge *br, struct net_device *dev); extern int br_min_mtu(const struct net_bridge *br); +extern void br_features_recompute(struct net_bridge *br); /* br_input.c */ extern int br_handle_frame_finish(struct sk_buff *skb); diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index b91a875aca01..d071f1c9ad0b 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -140,6 +140,9 @@ int br_stp_handle_bpdu(struct sk_buff *skb) struct net_bridge *br = p->br; unsigned char *buf; + /* insert into forwarding database after filtering to avoid spoofing */ + br_fdb_update(p->br, p, eth_hdr(skb)->h_source); + /* need at least the 802 and STP headers */ if (!pskb_may_pull(skb, sizeof(header)+1) || memcmp(skb->data, header, sizeof(header))) diff --git a/net/core/dev.c b/net/core/dev.c index f5f005846fe1..ab935778ce81 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7,7 +7,7 @@ * 2 of the License, or (at your option) any later version. * * Derived from the non IP parts of dev.c 1.0.19 - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * @@ -761,6 +761,18 @@ int dev_change_name(struct net_device *dev, char *newname) } /** + * netdev_features_change - device changes fatures + * @dev: device to cause notification + * + * Called to indicate a device has changed features. + */ +void netdev_features_change(struct net_device *dev) +{ + notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); +} +EXPORT_SYMBOL(netdev_features_change); + +/** * netdev_state_change - device changes state * @dev: device to cause notification * @@ -1732,6 +1744,7 @@ static int process_backlog(struct net_device *backlog_dev, int *budget) struct softnet_data *queue = &__get_cpu_var(softnet_data); unsigned long start_time = jiffies; + backlog_dev->weight = weight_p; for (;;) { struct sk_buff *skb; struct net_device *dev; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index f05fde97c43d..a3eeb88e1c81 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -29,7 +29,7 @@ u32 ethtool_op_get_link(struct net_device *dev) u32 ethtool_op_get_tx_csum(struct net_device *dev) { - return (dev->features & NETIF_F_IP_CSUM) != 0; + return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0; } int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) @@ -42,6 +42,15 @@ int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) return 0; } +int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data) +{ + if (data) + dev->features |= NETIF_F_HW_CSUM; + else + dev->features &= ~NETIF_F_HW_CSUM; + + return 0; +} u32 ethtool_op_get_sg(struct net_device *dev) { return (dev->features & NETIF_F_SG) != 0; @@ -347,7 +356,7 @@ static int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr) { struct ethtool_coalesce coalesce; - if (!dev->ethtool_ops->get_coalesce) + if (!dev->ethtool_ops->set_coalesce) return -EOPNOTSUPP; if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) @@ -682,6 +691,7 @@ int dev_ethtool(struct ifreq *ifr) void __user *useraddr = ifr->ifr_data; u32 ethcmd; int rc; + unsigned long old_features; /* * XXX: This can be pushed down into the ethtool_* handlers that @@ -703,6 +713,8 @@ int dev_ethtool(struct ifreq *ifr) if ((rc = dev->ethtool_ops->begin(dev)) < 0) return rc; + old_features = dev->features; + switch (ethcmd) { case ETHTOOL_GSET: rc = ethtool_get_settings(dev, useraddr); @@ -712,7 +724,6 @@ int dev_ethtool(struct ifreq *ifr) break; case ETHTOOL_GDRVINFO: rc = ethtool_get_drvinfo(dev, useraddr); - break; case ETHTOOL_GREGS: rc = ethtool_get_regs(dev, useraddr); @@ -801,6 +812,10 @@ int dev_ethtool(struct ifreq *ifr) if(dev->ethtool_ops->complete) dev->ethtool_ops->complete(dev); + + if (old_features != dev->features) + netdev_features_change(dev); + return rc; ioctl: @@ -817,3 +832,4 @@ EXPORT_SYMBOL(ethtool_op_get_tx_csum); EXPORT_SYMBOL(ethtool_op_set_sg); EXPORT_SYMBOL(ethtool_op_set_tso); EXPORT_SYMBOL(ethtool_op_set_tx_csum); +EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 060f703659e8..e2137f3e489d 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -21,6 +21,7 @@ #define to_net_dev(class) container_of(class, struct net_device, class_dev) static const char fmt_hex[] = "%#x\n"; +static const char fmt_long_hex[] = "%#lx\n"; static const char fmt_dec[] = "%d\n"; static const char fmt_ulong[] = "%lu\n"; @@ -91,7 +92,7 @@ static CLASS_DEVICE_ATTR(field, S_IRUGO, show_##field, NULL) \ NETDEVICE_ATTR(addr_len, fmt_dec); NETDEVICE_ATTR(iflink, fmt_dec); NETDEVICE_ATTR(ifindex, fmt_dec); -NETDEVICE_ATTR(features, fmt_hex); +NETDEVICE_ATTR(features, fmt_long_hex); NETDEVICE_ATTR(type, fmt_dec); /* use same locking rules as GIFHWADDR ioctl's */ @@ -184,6 +185,22 @@ static ssize_t store_tx_queue_len(struct class_device *dev, const char *buf, siz static CLASS_DEVICE_ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len, store_tx_queue_len); +NETDEVICE_SHOW(weight, fmt_dec); + +static int change_weight(struct net_device *net, unsigned long new_weight) +{ + net->weight = new_weight; + return 0; +} + +static ssize_t store_weight(struct class_device *dev, const char *buf, size_t len) +{ + return netdev_store(dev, buf, len, change_weight); +} + +static CLASS_DEVICE_ATTR(weight, S_IRUGO | S_IWUSR, show_weight, + store_weight); + static struct class_device_attribute *net_class_attributes[] = { &class_device_attr_ifindex, @@ -193,6 +210,7 @@ static struct class_device_attribute *net_class_attributes[] = { &class_device_attr_features, &class_device_attr_mtu, &class_device_attr_flags, + &class_device_attr_weight, &class_device_attr_type, &class_device_attr_address, &class_device_attr_broadcast, diff --git a/net/core/sock.c b/net/core/sock.c index 98171ddd7e7d..96e00b08698f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -9,7 +9,7 @@ * * Version: $Id: sock.c,v 1.117 2002/02/01 22:01:03 davem Exp $ * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Florian La Roche, <flla@stud.uni-sb.de> * Alan Cox, <A.Cox@swansea.ac.uk> @@ -635,7 +635,11 @@ struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it) if (zero_it) { memset(sk, 0, prot->obj_size); sk->sk_family = family; - sk->sk_prot = prot; + /* + * See comment in struct sock definition to understand + * why we need sk_prot_creator -acme + */ + sk->sk_prot = sk->sk_prot_creator = prot; sock_lock_init(sk); } @@ -654,7 +658,7 @@ struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it) void sk_free(struct sock *sk) { struct sk_filter *filter; - struct module *owner = sk->sk_prot->owner; + struct module *owner = sk->sk_prot_creator->owner; if (sk->sk_destruct) sk->sk_destruct(sk); @@ -672,8 +676,8 @@ void sk_free(struct sock *sk) __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); security_sk_free(sk); - if (sk->sk_prot->slab != NULL) - kmem_cache_free(sk->sk_prot->slab, sk); + if (sk->sk_prot_creator->slab != NULL) + kmem_cache_free(sk->sk_prot_creator->slab, sk); else kfree(sk); module_put(owner); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index e6e23eb14428..ee7bf46eb78a 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1426,7 +1426,7 @@ static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] = [RTM_GETRULE - RTM_BASE] = { .dumpit = dn_fib_dump_rules, }, #else [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, - .dumpit = dn_cache_dump, + .dumpit = dn_cache_dump, }, #endif }; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 16c4234cbe12..6617ea47d365 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -7,7 +7,7 @@ * * Version: @(#)eth.c 1.0.7 05/25/93 * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Florian La Roche, <rzsfl@rz.uni-sb.de> diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index cdad47642ae7..03942f133944 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -7,7 +7,7 @@ * * Version: $Id: af_inet.c,v 1.137 2002/02/01 22:01:03 davem Exp $ * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Florian La Roche, <flla@stud.uni-sb.de> * Alan Cox, <A.Cox@swansea.ac.uk> @@ -1181,6 +1181,7 @@ EXPORT_SYMBOL(inet_stream_connect); EXPORT_SYMBOL(inet_stream_ops); EXPORT_SYMBOL(inet_unregister_protosw); EXPORT_SYMBOL(net_statistics); +EXPORT_SYMBOL(sysctl_ip_nonlocal_bind); #ifdef INET_REFCNT_DEBUG EXPORT_SYMBOL(inet_sock_nr); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index abbc6d5c183e..478a30179a52 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. * * Derived from the IP parts of dev.c 1.0.19 - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * @@ -233,11 +233,14 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b) static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, int destroy) { + struct in_ifaddr *promote = NULL; struct in_ifaddr *ifa1 = *ifap; ASSERT_RTNL(); - /* 1. Deleting primary ifaddr forces deletion all secondaries */ + /* 1. Deleting primary ifaddr forces deletion all secondaries + * unless alias promotion is set + **/ if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) { struct in_ifaddr *ifa; @@ -251,11 +254,16 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, continue; } - *ifap1 = ifa->ifa_next; + if (!IN_DEV_PROMOTE_SECONDARIES(in_dev)) { + *ifap1 = ifa->ifa_next; - rtmsg_ifa(RTM_DELADDR, ifa); - notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); - inet_free_ifa(ifa); + rtmsg_ifa(RTM_DELADDR, ifa); + notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); + inet_free_ifa(ifa); + } else { + promote = ifa; + break; + } } } @@ -281,6 +289,13 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, if (!in_dev->ifa_list) inetdev_destroy(in_dev); } + + if (promote && IN_DEV_PROMOTE_SECONDARIES(in_dev)) { + /* not sure if we should send a delete notify first? */ + promote->ifa_flags &= ~IFA_F_SECONDARY; + rtmsg_ifa(RTM_NEWADDR, promote); + notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote); + } } static int inet_insert_ifa(struct in_ifaddr *ifa) @@ -1384,6 +1399,15 @@ static struct devinet_sysctl_table { .proc_handler = &ipv4_doint_and_flush, .strategy = &ipv4_doint_and_flush_strategy, }, + { + .ctl_name = NET_IPV4_CONF_PROMOTE_SECONDARIES, + .procname = "promote_secondaries", + .data = &ipv4_devconf.promote_secondaries, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &ipv4_doint_and_flush, + .strategy = &ipv4_doint_and_flush_strategy, + }, }, .devinet_dev = { { diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 053a883247ba..eae84cc39d3f 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -478,7 +478,7 @@ static int __init esp4_init(void) { struct xfrm_decap_state decap; - if (sizeof(struct esp_decap_data) < + if (sizeof(struct esp_decap_data) > sizeof(decap.decap_data)) { extern void decap_data_too_small(void); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 85bf0d3e294b..cb759484979d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -207,6 +207,7 @@ int sysctl_icmp_ignore_bogus_error_responses; int sysctl_icmp_ratelimit = 1 * HZ; int sysctl_icmp_ratemask = 0x1818; +int sysctl_icmp_errors_use_inbound_ifaddr; /* * ICMP control array. This specifies what to do with each ICMP. @@ -511,8 +512,12 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) */ saddr = iph->daddr; - if (!(rt->rt_flags & RTCF_LOCAL)) - saddr = 0; + if (!(rt->rt_flags & RTCF_LOCAL)) { + if (sysctl_icmp_errors_use_inbound_ifaddr) + saddr = inet_select_addr(skb_in->dev, 0, RT_SCOPE_LINK); + else + saddr = 0; + } tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index a0d0833034be..4e47a2658c7c 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -7,7 +7,7 @@ * * Version: $Id: ip_input.c,v 1.55 2002/01/12 07:39:45 davem Exp $ * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> * Alan Cox, <Alan.Cox@linux.org> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 24fe3e00b42b..760dc8238d65 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -7,7 +7,7 @@ * * Version: $Id: ip_output.c,v 1.100 2002/02/01 22:01:03 davem Exp $ * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> * Alan Cox, <Alan.Cox@linux.org> @@ -490,6 +490,14 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) /* Partially cloned skb? */ if (skb_shared(frag)) goto slow_path; + + BUG_ON(frag->sk); + if (skb->sk) { + sock_hold(skb->sk); + frag->sk = skb->sk; + frag->destructor = sock_wfree; + skb->truesize -= frag->truesize; + } } /* Everything is OK. Generate! */ diff --git a/net/ipv4/ipvs/Makefile b/net/ipv4/ipvs/Makefile index a788461a40c9..30e85de9ffff 100644 --- a/net/ipv4/ipvs/Makefile +++ b/net/ipv4/ipvs/Makefile @@ -11,7 +11,7 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ - ip_vs_est.o ip_vs_proto.o ip_vs_proto_icmp.o \ + ip_vs_est.o ip_vs_proto.o \ $(ip_vs_proto-objs-y) diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c index 253c46252bd5..867d4e9c6594 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/ipv4/ipvs/ip_vs_proto.c @@ -216,9 +216,6 @@ int ip_vs_protocol_init(void) #ifdef CONFIG_IP_VS_PROTO_UDP REGISTER_PROTOCOL(&ip_vs_protocol_udp); #endif -#ifdef CONFIG_IP_VS_PROTO_ICMP - REGISTER_PROTOCOL(&ip_vs_protocol_icmp); -#endif #ifdef CONFIG_IP_VS_PROTO_AH REGISTER_PROTOCOL(&ip_vs_protocol_ah); #endif diff --git a/net/ipv4/ipvs/ip_vs_proto_icmp.c b/net/ipv4/ipvs/ip_vs_proto_icmp.c deleted file mode 100644 index 191e94aa1c1f..000000000000 --- a/net/ipv4/ipvs/ip_vs_proto_icmp.c +++ /dev/null @@ -1,182 +0,0 @@ -/* - * ip_vs_proto_icmp.c: ICMP load balancing support for IP Virtual Server - * - * Authors: Julian Anastasov <ja@ssi.bg>, March 2002 - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation; - * - */ - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/icmp.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> - -#include <net/ip_vs.h> - - -static int icmp_timeouts[1] = { 1*60*HZ }; - -static char * icmp_state_name_table[1] = { "ICMP" }; - -static struct ip_vs_conn * -icmp_conn_in_get(const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct iphdr *iph, - unsigned int proto_off, - int inverse) -{ -#if 0 - struct ip_vs_conn *cp; - - if (likely(!inverse)) { - cp = ip_vs_conn_in_get(iph->protocol, - iph->saddr, 0, - iph->daddr, 0); - } else { - cp = ip_vs_conn_in_get(iph->protocol, - iph->daddr, 0, - iph->saddr, 0); - } - - return cp; - -#else - return NULL; -#endif -} - -static struct ip_vs_conn * -icmp_conn_out_get(const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct iphdr *iph, - unsigned int proto_off, - int inverse) -{ -#if 0 - struct ip_vs_conn *cp; - - if (likely(!inverse)) { - cp = ip_vs_conn_out_get(iph->protocol, - iph->saddr, 0, - iph->daddr, 0); - } else { - cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->daddr, 0, - iph->saddr, 0); - } - - return cp; -#else - return NULL; -#endif -} - -static int -icmp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, - int *verdict, struct ip_vs_conn **cpp) -{ - *verdict = NF_ACCEPT; - return 0; -} - -static int -icmp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) -{ - if (!(skb->nh.iph->frag_off & __constant_htons(IP_OFFSET))) { - if (skb->ip_summed != CHECKSUM_UNNECESSARY) { - if (ip_vs_checksum_complete(skb, skb->nh.iph->ihl * 4)) { - IP_VS_DBG_RL_PKT(0, pp, skb, 0, "Failed checksum for"); - return 0; - } - } - } - return 1; -} - -static void -icmp_debug_packet(struct ip_vs_protocol *pp, - const struct sk_buff *skb, - int offset, - const char *msg) -{ - char buf[256]; - struct iphdr _iph, *ih; - - ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); - if (ih == NULL) - sprintf(buf, "%s TRUNCATED", pp->name); - else if (ih->frag_off & __constant_htons(IP_OFFSET)) - sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag", - pp->name, NIPQUAD(ih->saddr), - NIPQUAD(ih->daddr)); - else { - struct icmphdr _icmph, *ic; - - ic = skb_header_pointer(skb, offset + ih->ihl*4, - sizeof(_icmph), &_icmph); - if (ic == NULL) - sprintf(buf, "%s TRUNCATED to %u bytes\n", - pp->name, skb->len - offset); - else - sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u T:%d C:%d", - pp->name, NIPQUAD(ih->saddr), - NIPQUAD(ih->daddr), - ic->type, ic->code); - } - printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); -} - -static int -icmp_state_transition(struct ip_vs_conn *cp, int direction, - const struct sk_buff *skb, - struct ip_vs_protocol *pp) -{ - cp->timeout = pp->timeout_table[IP_VS_ICMP_S_NORMAL]; - return 1; -} - -static int -icmp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) -{ - int num; - char **names; - - num = IP_VS_ICMP_S_LAST; - names = icmp_state_name_table; - return ip_vs_set_state_timeout(pp->timeout_table, num, names, sname, to); -} - - -static void icmp_init(struct ip_vs_protocol *pp) -{ - pp->timeout_table = icmp_timeouts; -} - -static void icmp_exit(struct ip_vs_protocol *pp) -{ -} - -struct ip_vs_protocol ip_vs_protocol_icmp = { - .name = "ICMP", - .protocol = IPPROTO_ICMP, - .dont_defrag = 0, - .init = icmp_init, - .exit = icmp_exit, - .conn_schedule = icmp_conn_schedule, - .conn_in_get = icmp_conn_in_get, - .conn_out_get = icmp_conn_out_get, - .snat_handler = NULL, - .dnat_handler = NULL, - .csum_check = icmp_csum_check, - .state_transition = icmp_state_transition, - .register_app = NULL, - .unregister_app = NULL, - .app_conn_bind = NULL, - .debug_packet = icmp_debug_packet, - .timeout_change = NULL, - .set_state_timeout = icmp_set_state_timeout, -}; diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index faa6176bbeb1..de21da00057f 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -508,7 +508,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, rc = NF_ACCEPT; /* do not touch skb anymore */ atomic_inc(&cp->in_pkts); - __ip_vs_conn_put(cp); goto out; } diff --git a/net/ipv4/multipath_drr.c b/net/ipv4/multipath_drr.c index 9349686131fc..c9cf8726051d 100644 --- a/net/ipv4/multipath_drr.c +++ b/net/ipv4/multipath_drr.c @@ -31,6 +31,7 @@ #include <linux/igmp.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/module.h> #include <linux/mroute.h> #include <linux/init.h> #include <net/ip.h> @@ -57,7 +58,6 @@ struct multipath_device { static struct multipath_device state[MULTIPATH_MAX_DEVICECANDIDATES]; static DEFINE_SPINLOCK(state_lock); -static struct rtable *last_selection = NULL; static int inline __multipath_findslot(void) { @@ -111,11 +111,6 @@ struct notifier_block drr_dev_notifier = { .notifier_call = drr_dev_event, }; -static void drr_remove(struct rtable *rt) -{ - if (last_selection == rt) - last_selection = NULL; -} static void drr_safe_inc(atomic_t *usecount) { @@ -144,14 +139,6 @@ static void drr_select_route(const struct flowi *flp, int devidx = -1; int cur_min_devidx = -1; - /* if necessary and possible utilize the old alternative */ - if ((flp->flags & FLOWI_FLAG_MULTIPATHOLDROUTE) != 0 && - last_selection != NULL) { - result = last_selection; - *rp = result; - return; - } - /* 1. make sure all alt. nexthops have the same GC related data */ /* 2. determine the new candidate to be returned */ result = NULL; @@ -229,12 +216,10 @@ static void drr_select_route(const struct flowi *flp, } *rp = result; - last_selection = result; } static struct ip_mp_alg_ops drr_ops = { .mp_alg_select_route = drr_select_route, - .mp_alg_remove = drr_remove, }; static int __init drr_init(void) @@ -244,7 +229,7 @@ static int __init drr_init(void) if (err) return err; - err = multipath_alg_register(&drr_ops, IP_MP_ALG_RR); + err = multipath_alg_register(&drr_ops, IP_MP_ALG_DRR); if (err) goto fail; @@ -263,3 +248,4 @@ static void __exit drr_exit(void) module_init(drr_init); module_exit(drr_exit); +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/multipath_random.c b/net/ipv4/multipath_random.c index 805a16e47de5..5249dbe7c559 100644 --- a/net/ipv4/multipath_random.c +++ b/net/ipv4/multipath_random.c @@ -31,6 +31,7 @@ #include <linux/igmp.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/module.h> #include <linux/mroute.h> #include <linux/init.h> #include <net/ip.h> @@ -126,3 +127,4 @@ static void __exit random_exit(void) module_init(random_init); module_exit(random_exit); +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/multipath_rr.c b/net/ipv4/multipath_rr.c index 554a82568160..b6cd2870478f 100644 --- a/net/ipv4/multipath_rr.c +++ b/net/ipv4/multipath_rr.c @@ -31,6 +31,7 @@ #include <linux/igmp.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/module.h> #include <linux/mroute.h> #include <linux/init.h> #include <net/ip.h> @@ -47,29 +48,12 @@ #include <net/checksum.h> #include <net/ip_mp_alg.h> -#define MULTIPATH_MAX_CANDIDATES 40 - -static struct rtable* last_used = NULL; - -static void rr_remove(struct rtable *rt) -{ - if (last_used == rt) - last_used = NULL; -} - static void rr_select_route(const struct flowi *flp, struct rtable *first, struct rtable **rp) { struct rtable *nh, *result, *min_use_cand = NULL; int min_use = -1; - /* if necessary and possible utilize the old alternative */ - if ((flp->flags & FLOWI_FLAG_MULTIPATHOLDROUTE) != 0 && - last_used != NULL) { - result = last_used; - goto out; - } - /* 1. make sure all alt. nexthops have the same GC related data * 2. determine the new candidate to be returned */ @@ -90,15 +74,12 @@ static void rr_select_route(const struct flowi *flp, if (!result) result = first; -out: - last_used = result; result->u.dst.__use++; *rp = result; } static struct ip_mp_alg_ops rr_ops = { .mp_alg_select_route = rr_select_route, - .mp_alg_remove = rr_remove, }; static int __init rr_init(void) @@ -113,3 +94,4 @@ static void __exit rr_exit(void) module_init(rr_init); module_exit(rr_exit); +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c index 10b23e1bece6..bd7d75b6abe0 100644 --- a/net/ipv4/multipath_wrandom.c +++ b/net/ipv4/multipath_wrandom.c @@ -31,6 +31,7 @@ #include <linux/igmp.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/module.h> #include <linux/mroute.h> #include <linux/init.h> #include <net/ip.h> @@ -172,7 +173,7 @@ static void wrandom_select_route(const struct flowi *flp, multipath_comparekeys(&rt->fl, flp)) { struct multipath_candidate* mpc = (struct multipath_candidate*) - kmalloc(size_mpc, GFP_KERNEL); + kmalloc(size_mpc, GFP_ATOMIC); if (!mpc) return; @@ -244,7 +245,7 @@ static void wrandom_set_nhinfo(__u32 network, if (!target_route) { const size_t size_rt = sizeof(struct multipath_route); target_route = (struct multipath_route *) - kmalloc(size_rt, GFP_KERNEL); + kmalloc(size_rt, GFP_ATOMIC); target_route->gw = nh->nh_gw; target_route->oif = nh->nh_oif; @@ -265,7 +266,7 @@ static void wrandom_set_nhinfo(__u32 network, if (!target_dest) { const size_t size_dst = sizeof(struct multipath_dest); target_dest = (struct multipath_dest*) - kmalloc(size_dst, GFP_KERNEL); + kmalloc(size_dst, GFP_ATOMIC); target_dest->nh_info = nh; target_dest->network = network; @@ -342,3 +343,4 @@ static void __exit wrandom_exit(void) module_init(wrandom_init); module_exit(wrandom_exit); +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 28d9425d5c39..09e824622977 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -940,37 +940,25 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct, struct sk_buff * ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) { - struct sock *sk = skb->sk; #ifdef CONFIG_NETFILTER_DEBUG unsigned int olddebug = skb->nf_debug; #endif - if (sk) { - sock_hold(sk); - skb_orphan(skb); - } + skb_orphan(skb); local_bh_disable(); skb = ip_defrag(skb, user); local_bh_enable(); - if (!skb) { - if (sk) - sock_put(sk); - return skb; - } - - if (sk) { - skb_set_owner_w(skb, sk); - sock_put(sk); - } - - ip_send_check(skb->nh.iph); - skb->nfcache |= NFC_ALTERED; + if (skb) { + ip_send_check(skb->nh.iph); + skb->nfcache |= NFC_ALTERED; #ifdef CONFIG_NETFILTER_DEBUG - /* Packet path as if nothing had happened. */ - skb->nf_debug = olddebug; + /* Packet path as if nothing had happened. */ + skb->nf_debug = olddebug; #endif + } + return skb; } diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 46ca45f74d85..bc59f7b39805 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -256,6 +256,7 @@ static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) { struct list_head *e = v; + ++*pos; e = e->next; if (e == &ip_conntrack_expect_list) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index e5746b674413..eda1fba431a4 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -3,6 +3,7 @@ * communicating with userspace via netlink. * * (C) 2000-2002 James Morris <jmorris@intercode.com.au> + * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -17,6 +18,7 @@ * 2005-01-10: Added /proc counter for dropped packets; fixed so * packets aren't delivered to user space if they're going * to be dropped. + * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte) * */ #include <linux/module.h> @@ -71,7 +73,15 @@ static DECLARE_MUTEX(ipqnl_sem); static void ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict) { + /* TCP input path (and probably other bits) assume to be called + * from softirq context, not from syscall, like ipq_issue_verdict is + * called. TCP input path deadlocks with locks taken from timer + * softirq, e.g. We therefore emulate this by local_bh_disable() */ + + local_bh_disable(); nf_reinject(entry->skb, entry->info, verdict); + local_bh_enable(); + kfree(entry); } diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index 90a587cacaa4..0db405a869f2 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -7,7 +7,7 @@ * * Version: $Id: protocol.c,v 1.14 2001/05/18 02:25:49 davem Exp $ * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * * Fixes: diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 93624a32eb9a..5b1ec586bae6 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -7,7 +7,7 @@ * * Version: $Id: raw.c,v 1.64 2002/02/01 22:01:04 davem Exp $ * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * * Fixes: diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 199311746932..a682d28e247b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -7,7 +7,7 @@ * * Version: $Id: route.c,v 1.103 2002/01/12 07:44:09 davem Exp $ * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox, <gw4pts@gw4pts.ampr.org> * Linus Torvalds, <Linus.Torvalds@helsinki.fi> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 3aafb298c1c1..23068bddbf0b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -23,6 +23,7 @@ extern int sysctl_ip_nonlocal_bind; extern int sysctl_icmp_echo_ignore_all; extern int sysctl_icmp_echo_ignore_broadcasts; extern int sysctl_icmp_ignore_bogus_error_responses; +extern int sysctl_icmp_errors_use_inbound_ifaddr; /* From ip_fragment.c */ extern int sysctl_ipfrag_low_thresh; @@ -396,6 +397,14 @@ ctl_table ipv4_table[] = { .proc_handler = &proc_dointvec }, { + .ctl_name = NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR, + .procname = "icmp_errors_use_inbound_ifaddr", + .data = &sysctl_icmp_errors_use_inbound_ifaddr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec + }, + { .ctl_name = NET_IPV4_ROUTE, .procname = "route", .maxlen = 0, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 5cff56af7855..0d9a4fd5f1a4 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -7,7 +7,7 @@ * * Version: $Id: tcp.c,v 1.216 2002/02/01 22:01:04 davem Exp $ * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Corey Minyard <wf-rch!minyard@relay.EU.net> @@ -2338,7 +2338,7 @@ void __init tcp_init(void) (tcp_bhash_size * sizeof(struct tcp_bind_hashbucket)); order++) ; - if (order > 4) { + if (order >= 4) { sysctl_local_port_range[0] = 32768; sysctl_local_port_range[1] = 61000; sysctl_tcp_max_tw_buckets = 180000; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 6984042c0927..5bad504630a3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -7,7 +7,7 @@ * * Version: $Id: tcp_input.c,v 1.243 2002/02/01 22:01:04 davem Exp $ * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Corey Minyard <wf-rch!minyard@relay.EU.net> @@ -4355,16 +4355,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, goto no_ack; } - if (eaten) { - if (tcp_in_quickack_mode(tp)) { - tcp_send_ack(sk); - } else { - tcp_send_delayed_ack(sk); - } - } else { - __tcp_ack_snd_check(sk, 0); - } - + __tcp_ack_snd_check(sk, 0); no_ack: if (eaten) __kfree_skb(skb); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index fd70509f0d53..eea1a17a9ac2 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -7,7 +7,7 @@ * * Version: $Id: tcp_minisocks.c,v 1.15 2002/02/01 22:01:04 davem Exp $ * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Corey Minyard <wf-rch!minyard@relay.EU.net> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a12df6979ffd..fa24e7ae1f40 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -7,7 +7,7 @@ * * Version: $Id: tcp_output.c,v 1.146 2002/02/01 22:01:04 davem Exp $ * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Corey Minyard <wf-rch!minyard@relay.EU.net> diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 85b279f1e935..799ebe061e2c 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -7,7 +7,7 @@ * * Version: $Id: tcp_timer.c,v 1.88 2002/02/01 22:01:04 davem Exp $ * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Mark Evans, <evansmp@uhura.aston.ac.uk> * Corey Minyard <wf-rch!minyard@relay.EU.net> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 8a213238f287..7c24e64b443f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -7,7 +7,7 @@ * * Version: $Id: udp.c,v 1.102 2002/02/01 22:01:04 davem Exp $ * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> * Alan Cox, <Alan.Cox@linux.org> @@ -738,7 +738,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) unsigned long amount; amount = 0; - spin_lock_irq(&sk->sk_receive_queue.lock); + spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); if (skb != NULL) { /* @@ -748,7 +748,7 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) */ amount = skb->len - sizeof(struct udphdr); } - spin_unlock_irq(&sk->sk_receive_queue.lock); + spin_unlock_bh(&sk->sk_receive_queue.lock); return put_user(amount, (int __user *)arg); } @@ -848,12 +848,12 @@ csum_copy_err: /* Clear queue. */ if (flags&MSG_PEEK) { int clear = 0; - spin_lock_irq(&sk->sk_receive_queue.lock); + spin_lock_bh(&sk->sk_receive_queue.lock); if (skb == skb_peek(&sk->sk_receive_queue)) { __skb_unlink(skb, &sk->sk_receive_queue); clear = 1; } - spin_unlock_irq(&sk->sk_receive_queue.lock); + spin_unlock_bh(&sk->sk_receive_queue.lock); if (clear) kfree_skb(skb); } @@ -1334,7 +1334,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) struct sk_buff_head *rcvq = &sk->sk_receive_queue; struct sk_buff *skb; - spin_lock_irq(&rcvq->lock); + spin_lock_bh(&rcvq->lock); while ((skb = skb_peek(rcvq)) != NULL) { if (udp_checksum_complete(skb)) { UDP_INC_STATS_BH(UDP_MIB_INERRORS); @@ -1345,7 +1345,7 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) break; } } - spin_unlock_irq(&rcvq->lock); + spin_unlock_bh(&rcvq->lock); /* nothing to see, move along */ if (skb == NULL) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7744a2592693..2720899d516c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -372,6 +372,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) ndev->regen_timer.data = (unsigned long) ndev; if ((dev->flags&IFF_LOOPBACK) || dev->type == ARPHRD_TUNNEL || + dev->type == ARPHRD_NONE || dev->type == ARPHRD_SIT) { printk(KERN_INFO "Disabled Privacy Extensions on device %p(%s)\n", diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 8e0f569b883e..ff3ec9822e36 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -277,8 +277,8 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, { struct inet6_dev *idev = NULL; struct ipv6hdr *hdr = skb->nh.ipv6h; - struct sock *sk = icmpv6_socket->sk; - struct ipv6_pinfo *np = inet6_sk(sk); + struct sock *sk; + struct ipv6_pinfo *np; struct in6_addr *saddr = NULL; struct dst_entry *dst; struct icmp6hdr tmp_hdr; @@ -358,6 +358,9 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if (icmpv6_xmit_lock()) return; + sk = icmpv6_socket->sk; + np = inet6_sk(sk); + if (!icmpv6_xrlim_allow(sk, type, &fl)) goto out; @@ -423,9 +426,9 @@ out: static void icmpv6_echo_reply(struct sk_buff *skb) { - struct sock *sk = icmpv6_socket->sk; + struct sock *sk; struct inet6_dev *idev; - struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_pinfo *np; struct in6_addr *saddr = NULL; struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw; struct icmp6hdr tmp_hdr; @@ -454,6 +457,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (icmpv6_xmit_lock()) return; + sk = icmpv6_socket->sk; + np = inet6_sk(sk); + if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index a93f6dc51979..0e5f7499debb 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -535,10 +535,12 @@ release: if (err) goto done; - /* Do not check for fault */ - if (!freq.flr_label) - copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, - &fl->label, sizeof(fl->label)); + if (!freq.flr_label) { + if (copy_to_user(&((struct in6_flowlabel_req __user *) optval)->flr_label, + &fl->label, sizeof(fl->label))) { + /* Intentionally ignore fault. */ + } + } sfl1->fl = fl; sfl1->next = np->ipv6_fl_list; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0f0711417c9d..b78a53586804 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -552,13 +552,17 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb_headroom(frag) < hlen) goto slow_path; - /* Correct socket ownership. */ - if (frag->sk == NULL) - goto slow_path; - /* Partially cloned skb? */ if (skb_shared(frag)) goto slow_path; + + BUG_ON(frag->sk); + if (skb->sk) { + sock_hold(skb->sk); + frag->sk = skb->sk; + frag->destructor = sock_wfree; + skb->truesize -= frag->truesize; + } } err = 0; @@ -1116,12 +1120,10 @@ int ip6_push_pending_frames(struct sock *sk) tail_skb = &(tmp_skb->next); skb->len += tmp_skb->len; skb->data_len += tmp_skb->len; -#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */ skb->truesize += tmp_skb->truesize; __sock_put(tmp_skb->sk); tmp_skb->destructor = NULL; tmp_skb->sk = NULL; -#endif } ipv6_addr_copy(final_dst, &fl->fl6_dst); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3b1c9fa184ae..ba3b0c267f75 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -882,6 +882,7 @@ ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) t->parms.hop_limit = p->hop_limit; t->parms.encap_limit = p->encap_limit; t->parms.flowinfo = p->flowinfo; + t->parms.link = p->link; ip6ip6_tnl_link_config(t); return 0; } diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index 2f4c91ddc9a3..5ade5a5d1990 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c @@ -37,5 +37,4 @@ EXPORT_SYMBOL(in6_dev_finish_destroy); EXPORT_SYMBOL(xfrm6_rcv); #endif EXPORT_SYMBOL(rt6_lookup); -EXPORT_SYMBOL(fl6_sock_lookup); EXPORT_SYMBOL(ipv6_push_nfrag_opts); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 601a148f60f3..6b9867717d11 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -84,6 +84,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) mtu = IPV6_MIN_MTU; if (skb->len > mtu) { + skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); ret = -EMSGSIZE; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 4429b1a1fe5f..cf1d91e74c82 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -113,6 +113,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int xdst = (struct xfrm_dst *)dst1; xdst->route = &rt->u.dst; + if (rt->rt6i_node) + xdst->route_cookie = rt->rt6i_node->fn_sernum; dst1->next = dst_prev; dst_prev = dst1; @@ -137,6 +139,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev->child = &rt->u.dst; dst->path = &rt->u.dst; + if (rt->rt6i_node) + ((struct xfrm_dst *)dst)->path_cookie = rt->rt6i_node->fn_sernum; *dst_p = dst; dst = dst_prev; diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index d6ccd3239dcf..70543d89438b 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -470,6 +470,7 @@ void irda_device_unregister_dongle(struct dongle_reg *dongle) } EXPORT_SYMBOL(irda_device_unregister_dongle); +#ifdef CONFIG_ISA_DMA_API /* * Function setup_dma (idev, buffer, count, mode) * @@ -492,3 +493,4 @@ void irda_setup_dma(int channel, dma_addr_t buffer, int count, int mode) release_dma_lock(flags); } EXPORT_SYMBOL(irda_setup_dma); +#endif diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4ee392066148..e41ce458c2a9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -49,6 +49,8 @@ #include <linux/bitops.h> #include <linux/mm.h> #include <linux/types.h> +#include <linux/audit.h> + #include <net/sock.h> #include <net/scm.h> @@ -733,11 +735,15 @@ static inline int do_one_broadcast(struct sock *sk, sock_hold(sk); if (p->skb2 == NULL) { - if (atomic_read(&p->skb->users) != 1) { + if (skb_shared(p->skb)) { p->skb2 = skb_clone(p->skb, p->allocation); } else { - p->skb2 = p->skb; - atomic_inc(&p->skb->users); + p->skb2 = skb_get(p->skb); + /* + * skb ownership may have been set when + * delivered to a previous socket. + */ + skb_orphan(p->skb2); } } if (p->skb2 == NULL) { @@ -783,11 +789,12 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) do_one_broadcast(sk, &info); + kfree_skb(skb); + netlink_unlock_table(); if (info.skb2) kfree_skb(info.skb2); - kfree_skb(skb); if (info.delivered) { if (info.congested && (allocation & __GFP_WAIT)) @@ -904,6 +911,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, NETLINK_CB(skb).groups = nlk->groups; NETLINK_CB(skb).dst_pid = dst_pid; NETLINK_CB(skb).dst_groups = dst_groups; + NETLINK_CB(skb).loginuid = audit_get_loginuid(current->audit_context); memcpy(NETLINK_CREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); /* What can I do? Netlink is asynchronous, so that diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 64acea0adaae..0269616e75a1 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -7,7 +7,7 @@ * * Version: $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $ * - * Authors: Ross Biro, <bir7@leland.Stanford.Edu> + * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox, <gw4pts@gw4pts.ampr.org> * diff --git a/net/sched/Kconfig b/net/sched/Kconfig index b0941186f867..b22c9beb604d 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -405,7 +405,7 @@ config NET_EMATCH_STACK ---help--- Size of the local stack variable used while evaluating the tree of ematches. Limits the depth of the tree, i.e. the number of - encapsulated precedences. Every level requires 4 bytes of addtional + encapsulated precedences. Every level requires 4 bytes of additional stack space. config NET_EMATCH_CMP diff --git a/net/sched/act_api.c b/net/sched/act_api.c index cafcb084098d..914c85ff8fe6 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -881,7 +881,7 @@ static int __init tc_action_init(void) link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action; } - printk("TC classifier action (bugs to netdev@oss.sgi.com cc " + printk("TC classifier action (bugs to netdev@vger.kernel.org cc " "hadi@cyberus.ca)\n"); return 0; } diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 0d2d4415f334..dfb300bb6baa 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -261,6 +261,9 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh, rta = (struct rtattr *) b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); + if (f->res.classid) + RTA_PUT(skb, TCA_BASIC_CLASSID, sizeof(u32), &f->res.classid); + if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 || tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) goto rtattr_failure; diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index f1eeaf65cee5..48bb23c2a35a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -32,7 +32,7 @@ * +-----------+ +-----------+ * | | * ---> meta_ops[INT][INDEV](...) | - * | | + * | | * ----------- | * V V * +-----------+ +-----------+ @@ -70,6 +70,7 @@ #include <net/dst.h> #include <net/route.h> #include <net/pkt_cls.h> +#include <net/sock.h> struct meta_obj { @@ -284,6 +285,214 @@ META_COLLECTOR(int_rtiif) } /************************************************************************** + * Socket Attributes + **************************************************************************/ + +#define SKIP_NONLOCAL(skb) \ + if (unlikely(skb->sk == NULL)) { \ + *err = -1; \ + return; \ + } + +META_COLLECTOR(int_sk_family) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_family; +} + +META_COLLECTOR(int_sk_state) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_state; +} + +META_COLLECTOR(int_sk_reuse) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_reuse; +} + +META_COLLECTOR(int_sk_bound_if) +{ + SKIP_NONLOCAL(skb); + /* No error if bound_dev_if is 0, legal userspace check */ + dst->value = skb->sk->sk_bound_dev_if; +} + +META_COLLECTOR(var_sk_bound_if) +{ + SKIP_NONLOCAL(skb); + + if (skb->sk->sk_bound_dev_if == 0) { + dst->value = (unsigned long) "any"; + dst->len = 3; + } else { + struct net_device *dev; + + dev = dev_get_by_index(skb->sk->sk_bound_dev_if); + *err = var_dev(dev, dst); + if (dev) + dev_put(dev); + } +} + +META_COLLECTOR(int_sk_refcnt) +{ + SKIP_NONLOCAL(skb); + dst->value = atomic_read(&skb->sk->sk_refcnt); +} + +META_COLLECTOR(int_sk_rcvbuf) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_rcvbuf; +} + +META_COLLECTOR(int_sk_shutdown) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_shutdown; +} + +META_COLLECTOR(int_sk_proto) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_protocol; +} + +META_COLLECTOR(int_sk_type) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_type; +} + +META_COLLECTOR(int_sk_rmem_alloc) +{ + SKIP_NONLOCAL(skb); + dst->value = atomic_read(&skb->sk->sk_rmem_alloc); +} + +META_COLLECTOR(int_sk_wmem_alloc) +{ + SKIP_NONLOCAL(skb); + dst->value = atomic_read(&skb->sk->sk_wmem_alloc); +} + +META_COLLECTOR(int_sk_omem_alloc) +{ + SKIP_NONLOCAL(skb); + dst->value = atomic_read(&skb->sk->sk_omem_alloc); +} + +META_COLLECTOR(int_sk_rcv_qlen) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_receive_queue.qlen; +} + +META_COLLECTOR(int_sk_snd_qlen) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_write_queue.qlen; +} + +META_COLLECTOR(int_sk_wmem_queued) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_wmem_queued; +} + +META_COLLECTOR(int_sk_fwd_alloc) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_forward_alloc; +} + +META_COLLECTOR(int_sk_sndbuf) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_sndbuf; +} + +META_COLLECTOR(int_sk_alloc) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_allocation; +} + +META_COLLECTOR(int_sk_route_caps) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_route_caps; +} + +META_COLLECTOR(int_sk_hashent) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_hashent; +} + +META_COLLECTOR(int_sk_lingertime) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_lingertime / HZ; +} + +META_COLLECTOR(int_sk_err_qlen) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_error_queue.qlen; +} + +META_COLLECTOR(int_sk_ack_bl) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_ack_backlog; +} + +META_COLLECTOR(int_sk_max_ack_bl) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_max_ack_backlog; +} + +META_COLLECTOR(int_sk_prio) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_priority; +} + +META_COLLECTOR(int_sk_rcvlowat) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_rcvlowat; +} + +META_COLLECTOR(int_sk_rcvtimeo) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_rcvtimeo / HZ; +} + +META_COLLECTOR(int_sk_sndtimeo) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_sndtimeo / HZ; +} + +META_COLLECTOR(int_sk_sendmsg_off) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_sndmsg_off; +} + +META_COLLECTOR(int_sk_write_pend) +{ + SKIP_NONLOCAL(skb); + dst->value = skb->sk->sk_write_pending; +} + +/************************************************************************** * Meta value collectors assignment table **************************************************************************/ @@ -293,41 +502,75 @@ struct meta_ops struct meta_value *, struct meta_obj *, int *); }; +#define META_ID(name) TCF_META_ID_##name +#define META_FUNC(name) { .get = meta_##name } + /* Meta value operations table listing all meta value collectors and * assigns them to a type and meta id. */ static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = { [TCF_META_TYPE_VAR] = { - [TCF_META_ID_DEV] = { .get = meta_var_dev }, - [TCF_META_ID_INDEV] = { .get = meta_var_indev }, - [TCF_META_ID_REALDEV] = { .get = meta_var_realdev } + [META_ID(DEV)] = META_FUNC(var_dev), + [META_ID(INDEV)] = META_FUNC(var_indev), + [META_ID(REALDEV)] = META_FUNC(var_realdev), + [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if), }, [TCF_META_TYPE_INT] = { - [TCF_META_ID_RANDOM] = { .get = meta_int_random }, - [TCF_META_ID_LOADAVG_0] = { .get = meta_int_loadavg_0 }, - [TCF_META_ID_LOADAVG_1] = { .get = meta_int_loadavg_1 }, - [TCF_META_ID_LOADAVG_2] = { .get = meta_int_loadavg_2 }, - [TCF_META_ID_DEV] = { .get = meta_int_dev }, - [TCF_META_ID_INDEV] = { .get = meta_int_indev }, - [TCF_META_ID_REALDEV] = { .get = meta_int_realdev }, - [TCF_META_ID_PRIORITY] = { .get = meta_int_priority }, - [TCF_META_ID_PROTOCOL] = { .get = meta_int_protocol }, - [TCF_META_ID_SECURITY] = { .get = meta_int_security }, - [TCF_META_ID_PKTTYPE] = { .get = meta_int_pkttype }, - [TCF_META_ID_PKTLEN] = { .get = meta_int_pktlen }, - [TCF_META_ID_DATALEN] = { .get = meta_int_datalen }, - [TCF_META_ID_MACLEN] = { .get = meta_int_maclen }, + [META_ID(RANDOM)] = META_FUNC(int_random), + [META_ID(LOADAVG_0)] = META_FUNC(int_loadavg_0), + [META_ID(LOADAVG_1)] = META_FUNC(int_loadavg_1), + [META_ID(LOADAVG_2)] = META_FUNC(int_loadavg_2), + [META_ID(DEV)] = META_FUNC(int_dev), + [META_ID(INDEV)] = META_FUNC(int_indev), + [META_ID(REALDEV)] = META_FUNC(int_realdev), + [META_ID(PRIORITY)] = META_FUNC(int_priority), + [META_ID(PROTOCOL)] = META_FUNC(int_protocol), + [META_ID(SECURITY)] = META_FUNC(int_security), + [META_ID(PKTTYPE)] = META_FUNC(int_pkttype), + [META_ID(PKTLEN)] = META_FUNC(int_pktlen), + [META_ID(DATALEN)] = META_FUNC(int_datalen), + [META_ID(MACLEN)] = META_FUNC(int_maclen), #ifdef CONFIG_NETFILTER - [TCF_META_ID_NFMARK] = { .get = meta_int_nfmark }, + [META_ID(NFMARK)] = META_FUNC(int_nfmark), #endif - [TCF_META_ID_TCINDEX] = { .get = meta_int_tcindex }, + [META_ID(TCINDEX)] = META_FUNC(int_tcindex), #ifdef CONFIG_NET_CLS_ACT - [TCF_META_ID_TCVERDICT] = { .get = meta_int_tcverd }, - [TCF_META_ID_TCCLASSID] = { .get = meta_int_tcclassid }, + [META_ID(TCVERDICT)] = META_FUNC(int_tcverd), + [META_ID(TCCLASSID)] = META_FUNC(int_tcclassid), #endif #ifdef CONFIG_NET_CLS_ROUTE - [TCF_META_ID_RTCLASSID] = { .get = meta_int_rtclassid }, + [META_ID(RTCLASSID)] = META_FUNC(int_rtclassid), #endif - [TCF_META_ID_RTIIF] = { .get = meta_int_rtiif } + [META_ID(RTIIF)] = META_FUNC(int_rtiif), + [META_ID(SK_FAMILY)] = META_FUNC(int_sk_family), + [META_ID(SK_STATE)] = META_FUNC(int_sk_state), + [META_ID(SK_REUSE)] = META_FUNC(int_sk_reuse), + [META_ID(SK_BOUND_IF)] = META_FUNC(int_sk_bound_if), + [META_ID(SK_REFCNT)] = META_FUNC(int_sk_refcnt), + [META_ID(SK_RCVBUF)] = META_FUNC(int_sk_rcvbuf), + [META_ID(SK_SNDBUF)] = META_FUNC(int_sk_sndbuf), + [META_ID(SK_SHUTDOWN)] = META_FUNC(int_sk_shutdown), + [META_ID(SK_PROTO)] = META_FUNC(int_sk_proto), + [META_ID(SK_TYPE)] = META_FUNC(int_sk_type), + [META_ID(SK_RMEM_ALLOC)] = META_FUNC(int_sk_rmem_alloc), + [META_ID(SK_WMEM_ALLOC)] = META_FUNC(int_sk_wmem_alloc), + [META_ID(SK_OMEM_ALLOC)] = META_FUNC(int_sk_omem_alloc), + [META_ID(SK_WMEM_QUEUED)] = META_FUNC(int_sk_wmem_queued), + [META_ID(SK_RCV_QLEN)] = META_FUNC(int_sk_rcv_qlen), + [META_ID(SK_SND_QLEN)] = META_FUNC(int_sk_snd_qlen), + [META_ID(SK_ERR_QLEN)] = META_FUNC(int_sk_err_qlen), + [META_ID(SK_FORWARD_ALLOCS)] = META_FUNC(int_sk_fwd_alloc), + [META_ID(SK_ALLOCS)] = META_FUNC(int_sk_alloc), + [META_ID(SK_ROUTE_CAPS)] = META_FUNC(int_sk_route_caps), + [META_ID(SK_HASHENT)] = META_FUNC(int_sk_hashent), + [META_ID(SK_LINGERTIME)] = META_FUNC(int_sk_lingertime), + [META_ID(SK_ACK_BACKLOG)] = META_FUNC(int_sk_ack_bl), + [META_ID(SK_MAX_ACK_BACKLOG)] = META_FUNC(int_sk_max_ack_bl), + [META_ID(SK_PRIO)] = META_FUNC(int_sk_prio), + [META_ID(SK_RCVLOWAT)] = META_FUNC(int_sk_rcvlowat), + [META_ID(SK_RCVTIMEO)] = META_FUNC(int_sk_rcvtimeo), + [META_ID(SK_SNDTIMEO)] = META_FUNC(int_sk_sndtimeo), + [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off), + [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend), } }; @@ -396,9 +639,9 @@ static int meta_int_compare(struct meta_obj *a, struct meta_obj *b) /* Let gcc optimize it, the unlikely is not really based on * some numbers but jump free code for mismatches seems * more logical. */ - if (unlikely(a == b)) + if (unlikely(a->value == b->value)) return 0; - else if (a < b) + else if (a->value < b->value) return -1; else return 1; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 8a3db9d95bab..d8bd2a569c7c 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -18,7 +18,7 @@ #include <asm/byteorder.h> -#if 1 /* control */ +#if 0 /* control */ #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) #else #define DPRINTK(format,args...) @@ -73,8 +73,13 @@ static int dsmark_graft(struct Qdisc *sch,unsigned long arg, DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new, old); - if (!new) - new = &noop_qdisc; + + if (new == NULL) { + new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + if (new == NULL) + new = &noop_qdisc; + } + sch_tree_lock(sch); *old = xchg(&p->q,new); if (*old) @@ -163,14 +168,15 @@ static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker) return; for (i = 0; i < p->indices; i++) { if (p->mask[i] == 0xff && !p->value[i]) - continue; + goto ignore; if (walker->count >= walker->skip) { if (walker->fn(sch, i+1, walker) < 0) { walker->stop = 1; break; } } - walker->count++; +ignore: + walker->count++; } } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index e0c9fbe73b15..bb9bf8d5003c 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -53,7 +53,6 @@ struct netem_sched_data { struct Qdisc *qdisc; - struct sk_buff_head delayed; struct timer_list timer; u32 latency; @@ -63,11 +62,12 @@ struct netem_sched_data { u32 gap; u32 jitter; u32 duplicate; + u32 reorder; struct crndstate { unsigned long last; unsigned long rho; - } delay_cor, loss_cor, dup_cor; + } delay_cor, loss_cor, dup_cor, reorder_cor; struct disttable { u32 size; @@ -137,122 +137,68 @@ static long tabledist(unsigned long mu, long sigma, return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; } -/* Put skb in the private delayed queue. */ -static int netem_delay(struct Qdisc *sch, struct sk_buff *skb) -{ - struct netem_sched_data *q = qdisc_priv(sch); - psched_tdiff_t td; - psched_time_t now; - - PSCHED_GET_TIME(now); - td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist); - - /* Always queue at tail to keep packets in order */ - if (likely(q->delayed.qlen < q->limit)) { - struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; - - PSCHED_TADD2(now, td, cb->time_to_send); - - pr_debug("netem_delay: skb=%p now=%llu tosend=%llu\n", skb, - now, cb->time_to_send); - - __skb_queue_tail(&q->delayed, skb); - return NET_XMIT_SUCCESS; - } - - pr_debug("netem_delay: queue over limit %d\n", q->limit); - sch->qstats.overlimits++; - kfree_skb(skb); - return NET_XMIT_DROP; -} - /* - * Move a packet that is ready to send from the delay holding - * list to the underlying qdisc. + * Insert one skb into qdisc. + * Note: parent depends on return value to account for queue length. + * NET_XMIT_DROP: queue length didn't change. + * NET_XMIT_SUCCESS: one skb was queued. */ -static int netem_run(struct Qdisc *sch) -{ - struct netem_sched_data *q = qdisc_priv(sch); - struct sk_buff *skb; - psched_time_t now; - - PSCHED_GET_TIME(now); - - skb = skb_peek(&q->delayed); - if (skb) { - const struct netem_skb_cb *cb - = (const struct netem_skb_cb *)skb->cb; - long delay - = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); - pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay); - - /* if more time remaining? */ - if (delay > 0) { - mod_timer(&q->timer, jiffies + delay); - return 1; - } - - __skb_unlink(skb, &q->delayed); - - if (q->qdisc->enqueue(skb, q->qdisc)) { - sch->q.qlen--; - sch->qstats.drops++; - } - } - - return 0; -} - static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); + struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb; + struct sk_buff *skb2; int ret; + int count = 1; pr_debug("netem_enqueue skb=%p\n", skb); + /* Random duplication */ + if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) + ++count; + /* Random packet drop 0 => none, ~0 => all */ - if (q->loss && q->loss >= get_crandom(&q->loss_cor)) { - pr_debug("netem_enqueue: random loss\n"); + if (q->loss && q->loss >= get_crandom(&q->loss_cor)) + --count; + + if (count == 0) { sch->qstats.drops++; kfree_skb(skb); - return 0; /* lie about loss so TCP doesn't know */ + return NET_XMIT_DROP; } - /* Random duplication */ - if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) { - struct sk_buff *skb2; - - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 && netem_delay(sch, skb2) == NET_XMIT_SUCCESS) { - struct Qdisc *qp; - - /* Since one packet can generate two packets in the - * queue, the parent's qlen accounting gets confused, - * so fix it. - */ - qp = qdisc_lookup(sch->dev, TC_H_MAJ(sch->parent)); - if (qp) - qp->q.qlen++; - - sch->q.qlen++; - sch->bstats.bytes += skb2->len; - sch->bstats.packets++; - } else - sch->qstats.drops++; + /* + * If we need to duplicate packet, then re-insert at top of the + * qdisc tree, since parent queuer expects that only one + * skb will be queued. + */ + if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { + struct Qdisc *rootq = sch->dev->qdisc; + u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ + q->duplicate = 0; + + rootq->enqueue(skb2, rootq); + q->duplicate = dupsave; } - /* If doing simple delay then gap == 0 so all packets - * go into the delayed holding queue - * otherwise if doing out of order only "1 out of gap" - * packets will be delayed. - */ - if (q->counter < q->gap) { + if (q->gap == 0 /* not doing reordering */ + || q->counter < q->gap /* inside last reordering gap */ + || q->reorder < get_crandom(&q->reorder_cor)) { + psched_time_t now; + PSCHED_GET_TIME(now); + PSCHED_TADD2(now, tabledist(q->latency, q->jitter, + &q->delay_cor, q->delay_dist), + cb->time_to_send); ++q->counter; ret = q->qdisc->enqueue(skb, q->qdisc); } else { + /* + * Do re-ordering by putting one out of N packets at the front + * of the queue. + */ + PSCHED_GET_TIME(cb->time_to_send); q->counter = 0; - ret = netem_delay(sch, skb); - netem_run(sch); + ret = q->qdisc->ops->requeue(skb, q->qdisc); } if (likely(ret == NET_XMIT_SUCCESS)) { @@ -296,22 +242,33 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - int pending; - - pending = netem_run(sch); skb = q->qdisc->dequeue(q->qdisc); if (skb) { - pr_debug("netem_dequeue: return skb=%p\n", skb); - sch->q.qlen--; - sch->flags &= ~TCQ_F_THROTTLED; - } - else if (pending) { - pr_debug("netem_dequeue: throttling\n"); + const struct netem_skb_cb *cb + = (const struct netem_skb_cb *)skb->cb; + psched_time_t now; + long delay; + + /* if more time remaining? */ + PSCHED_GET_TIME(now); + delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now)); + pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay); + if (delay <= 0) { + pr_debug("netem_dequeue: return skb=%p\n", skb); + sch->q.qlen--; + sch->flags &= ~TCQ_F_THROTTLED; + return skb; + } + + mod_timer(&q->timer, jiffies + delay); sch->flags |= TCQ_F_THROTTLED; - } - return skb; + if (q->qdisc->ops->requeue(skb, q->qdisc) != 0) + sch->qstats.drops++; + } + + return NULL; } static void netem_watchdog(unsigned long arg) @@ -328,8 +285,6 @@ static void netem_reset(struct Qdisc *sch) struct netem_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); - skb_queue_purge(&q->delayed); - sch->q.qlen = 0; sch->flags &= ~TCQ_F_THROTTLED; del_timer_sync(&q->timer); @@ -397,6 +352,19 @@ static int get_correlation(struct Qdisc *sch, const struct rtattr *attr) return 0; } +static int get_reorder(struct Qdisc *sch, const struct rtattr *attr) +{ + struct netem_sched_data *q = qdisc_priv(sch); + const struct tc_netem_reorder *r = RTA_DATA(attr); + + if (RTA_PAYLOAD(attr) != sizeof(*r)) + return -EINVAL; + + q->reorder = r->probability; + init_crandom(&q->reorder_cor, r->correlation); + return 0; +} + static int netem_change(struct Qdisc *sch, struct rtattr *opt) { struct netem_sched_data *q = qdisc_priv(sch); @@ -417,9 +385,15 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) q->jitter = qopt->jitter; q->limit = qopt->limit; q->gap = qopt->gap; + q->counter = 0; q->loss = qopt->loss; q->duplicate = qopt->duplicate; + /* for compatiablity with earlier versions. + * if gap is set, need to assume 100% probablity + */ + q->reorder = ~0; + /* Handle nested options after initial queue options. * Should have put all options in nested format but too late now. */ @@ -441,6 +415,11 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) if (ret) return ret; } + if (tb[TCA_NETEM_REORDER-1]) { + ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]); + if (ret) + return ret; + } } @@ -455,11 +434,9 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt) if (!opt) return -EINVAL; - skb_queue_head_init(&q->delayed); init_timer(&q->timer); q->timer.function = netem_watchdog; q->timer.data = (unsigned long) sch; - q->counter = 0; q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); if (!q->qdisc) { @@ -491,6 +468,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct rtattr *rta = (struct rtattr *) b; struct tc_netem_qopt qopt; struct tc_netem_corr cor; + struct tc_netem_reorder reorder; qopt.latency = q->latency; qopt.jitter = q->jitter; @@ -504,6 +482,11 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) cor.loss_corr = q->loss_cor.rho; cor.dup_corr = q->dup_cor.rho; RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor); + + reorder.probability = q->reorder; + reorder.correlation = q->reorder_cor.rho; + RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder); + rta->rta_len = skb->tail - b; return skb->len; diff --git a/net/sctp/input.c b/net/sctp/input.c index b719a77d66b4..fffc880a646d 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -178,6 +178,37 @@ int sctp_rcv(struct sk_buff *skb) asoc = __sctp_rcv_lookup(skb, &src, &dest, &transport); + if (!asoc) + ep = __sctp_rcv_lookup_endpoint(&dest); + + /* Retrieve the common input handling substructure. */ + rcvr = asoc ? &asoc->base : &ep->base; + sk = rcvr->sk; + + /* + * If a frame arrives on an interface and the receiving socket is + * bound to another interface, via SO_BINDTODEVICE, treat it as OOTB + */ + if (sk->sk_bound_dev_if && (sk->sk_bound_dev_if != af->skb_iif(skb))) + { + sock_put(sk); + if (asoc) { + sctp_association_put(asoc); + asoc = NULL; + } else { + sctp_endpoint_put(ep); + ep = NULL; + } + sk = sctp_get_ctl_sock(); + ep = sctp_sk(sk)->ep; + sctp_endpoint_hold(ep); + sock_hold(sk); + rcvr = &ep->base; + } + + if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) + goto discard_release; + /* * RFC 2960, 8.4 - Handle "Out of the blue" Packets. * An SCTP packet is called an "out of the blue" (OOTB) @@ -187,22 +218,12 @@ int sctp_rcv(struct sk_buff *skb) * packet belongs. */ if (!asoc) { - ep = __sctp_rcv_lookup_endpoint(&dest); if (sctp_rcv_ootb(skb)) { SCTP_INC_STATS_BH(SCTP_MIB_OUTOFBLUES); goto discard_release; } } - /* Retrieve the common input handling substructure. */ - rcvr = asoc ? &asoc->base : &ep->base; - sk = rcvr->sk; - - if ((sk) && (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)) { - goto discard_release; - } - - /* SCTP seems to always need a timestamp right now (FIXME) */ if (skb->stamp.tv_sec == 0) { do_gettimeofday(&skb->stamp); @@ -265,13 +286,11 @@ discard_it: discard_release: /* Release any structures we may be holding. */ - if (asoc) { - sock_put(asoc->base.sk); + sock_put(sk); + if (asoc) sctp_association_put(asoc); - } else { - sock_put(ep->base.sk); + else sctp_endpoint_put(ep); - } goto discard_it; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index c9d9ea064734..c7e42d125b9c 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -812,26 +812,23 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) if (addr->sa.sa_family != AF_INET6) af = sctp_get_af_specific(addr->sa.sa_family); else { - struct sock *sk; int type = ipv6_addr_type(&addr->v6.sin6_addr); - sk = sctp_opt2sk(opt); + struct net_device *dev; + if (type & IPV6_ADDR_LINKLOCAL) { - /* Note: Behavior similar to af_inet6.c: - * 1) Overrides previous bound_dev_if - * 2) Destructive even if bind isn't successful. - */ - - if (addr->v6.sin6_scope_id) - sk->sk_bound_dev_if = addr->v6.sin6_scope_id; - if (!sk->sk_bound_dev_if) + if (!addr->v6.sin6_scope_id) + return 0; + dev = dev_get_by_index(addr->v6.sin6_scope_id); + if (!dev) return 0; + dev_put(dev); } af = opt->pf->af; } return af->available(addr, opt); } -/* Verify that the provided sockaddr looks bindable. Common verification, +/* Verify that the provided sockaddr looks sendable. Common verification, * has already been taken care of. */ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) @@ -842,19 +839,16 @@ static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) if (addr->sa.sa_family != AF_INET6) af = sctp_get_af_specific(addr->sa.sa_family); else { - struct sock *sk; int type = ipv6_addr_type(&addr->v6.sin6_addr); - sk = sctp_opt2sk(opt); + struct net_device *dev; + if (type & IPV6_ADDR_LINKLOCAL) { - /* Note: Behavior similar to af_inet6.c: - * 1) Overrides previous bound_dev_if - * 2) Destructive even if bind isn't successful. - */ - - if (addr->v6.sin6_scope_id) - sk->sk_bound_dev_if = addr->v6.sin6_scope_id; - if (!sk->sk_bound_dev_if) + if (!addr->v6.sin6_scope_id) + return 0; + dev = dev_get_by_index(addr->v6.sin6_scope_id); + if (!dev) return 0; + dev_put(dev); } af = opt->pf->af; } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index e42fd8c2916b..98d49ec9b74b 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -132,14 +132,25 @@ void sctp_snmp_proc_exit(void) static void sctp_seq_dump_local_addrs(struct seq_file *seq, struct sctp_ep_common *epb) { struct list_head *pos; + struct sctp_association *asoc; struct sctp_sockaddr_entry *laddr; - union sctp_addr *addr; + struct sctp_transport *peer; + union sctp_addr *addr, *primary = NULL; struct sctp_af *af; + if (epb->type == SCTP_EP_TYPE_ASSOCIATION) { + asoc = sctp_assoc(epb); + peer = asoc->peer.primary_path; + primary = &peer->saddr; + } + list_for_each(pos, &epb->bind_addr.address_list) { laddr = list_entry(pos, struct sctp_sockaddr_entry, list); addr = (union sctp_addr *)&laddr->a; af = sctp_get_af_specific(addr->sa.sa_family); + if (primary && af->cmp_addr(addr, primary)) { + seq_printf(seq, "*"); + } af->seq_dump_addr(seq, addr); } } @@ -149,17 +160,54 @@ static void sctp_seq_dump_remote_addrs(struct seq_file *seq, struct sctp_associa { struct list_head *pos; struct sctp_transport *transport; - union sctp_addr *addr; + union sctp_addr *addr, *primary; struct sctp_af *af; + primary = &(assoc->peer.primary_addr); list_for_each(pos, &assoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); addr = (union sctp_addr *)&transport->ipaddr; af = sctp_get_af_specific(addr->sa.sa_family); + if (af->cmp_addr(addr, primary)) { + seq_printf(seq, "*"); + } af->seq_dump_addr(seq, addr); } } +static void * sctp_eps_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos > sctp_ep_hashsize) + return NULL; + + if (*pos < 0) + *pos = 0; + + if (*pos == 0) + seq_printf(seq, " ENDPT SOCK STY SST HBKT LPORT UID INODE LADDRS\n"); + + ++*pos; + + return (void *)pos; +} + +static void sctp_eps_seq_stop(struct seq_file *seq, void *v) +{ + return; +} + + +static void * sctp_eps_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + if (*pos > sctp_ep_hashsize) + return NULL; + + ++*pos; + + return pos; +} + + /* Display sctp endpoints (/proc/net/sctp/eps). */ static int sctp_eps_seq_show(struct seq_file *seq, void *v) { @@ -167,38 +215,50 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) struct sctp_ep_common *epb; struct sctp_endpoint *ep; struct sock *sk; - int hash; - - seq_printf(seq, " ENDPT SOCK STY SST HBKT LPORT LADDRS\n"); - for (hash = 0; hash < sctp_ep_hashsize; hash++) { - head = &sctp_ep_hashtable[hash]; - read_lock(&head->lock); - for (epb = head->chain; epb; epb = epb->next) { - ep = sctp_ep(epb); - sk = epb->sk; - seq_printf(seq, "%8p %8p %-3d %-3d %-4d %-5d ", ep, sk, - sctp_sk(sk)->type, sk->sk_state, hash, - epb->bind_addr.port); - sctp_seq_dump_local_addrs(seq, epb); - seq_printf(seq, "\n"); - } - read_unlock(&head->lock); + int hash = *(int *)v; + + if (hash > sctp_ep_hashsize) + return -ENOMEM; + + head = &sctp_ep_hashtable[hash-1]; + sctp_local_bh_disable(); + read_lock(&head->lock); + for (epb = head->chain; epb; epb = epb->next) { + ep = sctp_ep(epb); + sk = epb->sk; + seq_printf(seq, "%8p %8p %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk, + sctp_sk(sk)->type, sk->sk_state, hash-1, + epb->bind_addr.port, + sock_i_uid(sk), sock_i_ino(sk)); + + sctp_seq_dump_local_addrs(seq, epb); + seq_printf(seq, "\n"); } + read_unlock(&head->lock); + sctp_local_bh_enable(); return 0; } +static struct seq_operations sctp_eps_ops = { + .start = sctp_eps_seq_start, + .next = sctp_eps_seq_next, + .stop = sctp_eps_seq_stop, + .show = sctp_eps_seq_show, +}; + + /* Initialize the seq file operations for 'eps' object. */ static int sctp_eps_seq_open(struct inode *inode, struct file *file) { - return single_open(file, sctp_eps_seq_show, NULL); + return seq_open(file, &sctp_eps_ops); } static struct file_operations sctp_eps_seq_fops = { .open = sctp_eps_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = seq_release, }; /* Set up the proc fs entry for 'eps' object. */ @@ -221,6 +281,40 @@ void sctp_eps_proc_exit(void) remove_proc_entry("eps", proc_net_sctp); } + +static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos) +{ + if (*pos > sctp_assoc_hashsize) + return NULL; + + if (*pos < 0) + *pos = 0; + + if (*pos == 0) + seq_printf(seq, " ASSOC SOCK STY SST ST HBKT ASSOC-ID TX_QUEUE RX_QUEUE UID INODE LPORT " + "RPORT LADDRS <-> RADDRS\n"); + + ++*pos; + + return (void *)pos; +} + +static void sctp_assocs_seq_stop(struct seq_file *seq, void *v) +{ + return; +} + + +static void * sctp_assocs_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + if (*pos > sctp_assoc_hashsize) + return NULL; + + ++*pos; + + return pos; +} + /* Display sctp associations (/proc/net/sctp/assocs). */ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) { @@ -228,43 +322,57 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) struct sctp_ep_common *epb; struct sctp_association *assoc; struct sock *sk; - int hash; - - seq_printf(seq, " ASSOC SOCK STY SST ST HBKT LPORT RPORT " - "LADDRS <-> RADDRS\n"); - for (hash = 0; hash < sctp_assoc_hashsize; hash++) { - head = &sctp_assoc_hashtable[hash]; - read_lock(&head->lock); - for (epb = head->chain; epb; epb = epb->next) { - assoc = sctp_assoc(epb); - sk = epb->sk; - seq_printf(seq, - "%8p %8p %-3d %-3d %-2d %-4d %-5d %-5d ", - assoc, sk, sctp_sk(sk)->type, sk->sk_state, - assoc->state, hash, epb->bind_addr.port, - assoc->peer.port); - sctp_seq_dump_local_addrs(seq, epb); - seq_printf(seq, "<-> "); - sctp_seq_dump_remote_addrs(seq, assoc); - seq_printf(seq, "\n"); - } - read_unlock(&head->lock); + int hash = *(int *)v; + + if (hash > sctp_assoc_hashsize) + return -ENOMEM; + + head = &sctp_assoc_hashtable[hash-1]; + sctp_local_bh_disable(); + read_lock(&head->lock); + for (epb = head->chain; epb; epb = epb->next) { + assoc = sctp_assoc(epb); + sk = epb->sk; + seq_printf(seq, + "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ", + assoc, sk, sctp_sk(sk)->type, sk->sk_state, + assoc->state, hash-1, assoc->assoc_id, + (sk->sk_rcvbuf - assoc->rwnd), + assoc->sndbuf_used, + sock_i_uid(sk), sock_i_ino(sk), + epb->bind_addr.port, + assoc->peer.port); + + seq_printf(seq, " "); + sctp_seq_dump_local_addrs(seq, epb); + seq_printf(seq, "<-> "); + sctp_seq_dump_remote_addrs(seq, assoc); + seq_printf(seq, "\n"); } + read_unlock(&head->lock); + sctp_local_bh_enable(); return 0; } +static struct seq_operations sctp_assoc_ops = { + .start = sctp_assocs_seq_start, + .next = sctp_assocs_seq_next, + .stop = sctp_assocs_seq_stop, + .show = sctp_assocs_seq_show, +}; + /* Initialize the seq file operations for 'assocs' object. */ static int sctp_assocs_seq_open(struct inode *inode, struct file *file) { - return single_open(file, sctp_assocs_seq_show, NULL); + return seq_open(file, &sctp_assoc_ops); } static struct file_operations sctp_assocs_seq_fops = { .open = sctp_assocs_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = single_release, + .release = seq_release, }; /* Set up the proc fs entry for 'assocs' object. */ diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 2e1f9c3556f5..5135e1a25d25 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -378,10 +378,13 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) { int ret = inet_addr_type(addr->v4.sin_addr.s_addr); - /* FIXME: ip_nonlocal_bind sysctl support. */ - if (addr->v4.sin_addr.s_addr != INADDR_ANY && ret != RTN_LOCAL) + if (addr->v4.sin_addr.s_addr != INADDR_ANY && + ret != RTN_LOCAL && + !sp->inet.freebind && + !sysctl_ip_nonlocal_bind) return 0; + return 1; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0b338eca6dc0..2a3c0e08a090 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4686,6 +4686,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sctp_endpoint *newep = newsp->ep; struct sk_buff *skb, *tmp; struct sctp_ulpevent *event; + int flags = 0; /* Migrate socket buffer sizes and all the socket level options to the * new socket. @@ -4707,6 +4708,17 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, sctp_sk(newsk)->bind_hash = pp; inet_sk(newsk)->num = inet_sk(oldsk)->num; + /* Copy the bind_addr list from the original endpoint to the new + * endpoint so that we can handle restarts properly + */ + if (assoc->peer.ipv4_address) + flags |= SCTP_ADDR4_PEERSUPP; + if (assoc->peer.ipv6_address) + flags |= SCTP_ADDR6_PEERSUPP; + sctp_bind_addr_copy(&newsp->ep->base.bind_addr, + &oldsp->ep->base.bind_addr, + SCTP_SCOPE_GLOBAL, GFP_KERNEL, flags); + /* Move any messages in the old socket's receive queue that are for the * peeled off association to the new socket's receive queue. */ diff --git a/net/socket.c b/net/socket.c index 2cd44990d8d3..cec0cb38b9ce 100644 --- a/net/socket.c +++ b/net/socket.c @@ -4,7 +4,7 @@ * Version: @(#)socket.c 1.1.93 18/02/95 * * Authors: Orest Zborowski, <obz@Kodak.COM> - * Ross Biro, <bir7@leland.Stanford.Edu> + * Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * * Fixes: diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index c478fc8db776..c420eba4876b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -770,33 +770,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd); if (err) goto out_mknod_parent; - /* - * Yucky last component or no last component at all? - * (foo/., foo/.., /////) - */ - err = -EEXIST; - if (nd.last_type != LAST_NORM) - goto out_mknod; - /* - * Lock the directory. - */ - down(&nd.dentry->d_inode->i_sem); - /* - * Do the final lookup. - */ - dentry = lookup_hash(&nd.last, nd.dentry); + + dentry = lookup_create(&nd, 0); err = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out_mknod_unlock; - err = -ENOENT; - /* - * Special case - lookup gave negative, but... we had foo/bar/ - * From the vfs_mknod() POV we just have a negative dentry - - * all is fine. Let's be bastards - you had / on the end, you've - * been asking for (non-existent) directory. -ENOENT for you. - */ - if (nd.last.name[nd.last.len] && !dentry->d_inode) - goto out_mknod_dput; + /* * All right, let's create it. */ @@ -845,7 +824,6 @@ out_mknod_dput: dput(dentry); out_mknod_unlock: up(&nd.dentry->d_inode->i_sem); -out_mknod: path_release(&nd); out_mknod_parent: if (err==-EEXIST) diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 080aae243ce0..2f4531fcaca2 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -698,7 +698,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) return -ENOMEM; if (skb1->sk) - skb_set_owner_w(skb, skb1->sk); + skb_set_owner_w(skb2, skb1->sk); /* Looking around. Are we still alive? * OK, link new skb, drop old one */ diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 55ed979db144..d07f5ce31824 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1136,7 +1136,7 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) struct xfrm_dst *last; u32 mtu; - if (!dst_check(dst->path, 0) || + if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) || (dst->dev && !netif_running(dst->dev))) return 0; @@ -1156,7 +1156,7 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) xdst->child_mtu_cached = mtu; } - if (!dst_check(xdst->route, 0)) + if (!dst_check(xdst->route, xdst->route_cookie)) return 0; mtu = dst_mtu(xdst->route); if (xdst->route_mtu_cached != mtu) { diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 5ddda2c98af9..97509011c274 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -34,14 +34,21 @@ static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) { struct rtattr *rt = xfrma[type - 1]; struct xfrm_algo *algp; + int len; if (!rt) return 0; - if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp)) + len = (rt->rta_len - sizeof(*rt)) - sizeof(*algp); + if (len < 0) return -EINVAL; algp = RTA_DATA(rt); + + len -= (algp->alg_key_len + 7U) / 8; + if (len < 0) + return -EINVAL; + switch (type) { case XFRMA_ALG_AUTH: if (!algp->alg_key_len && @@ -162,6 +169,7 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, struct rtattr *rta = u_arg; struct xfrm_algo *p, *ualg; struct xfrm_algo_desc *algo; + int len; if (!rta) return 0; @@ -173,11 +181,12 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, return -ENOSYS; *props = algo->desc.sadb_alg_id; - p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL); + len = sizeof(*ualg) + (ualg->alg_key_len + 7U) / 8; + p = kmalloc(len, GFP_KERNEL); if (!p) return -ENOMEM; - memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len); + memcpy(p, ualg, len); *algpp = p; return 0; } |