diff options
Diffstat (limited to 'include/net')
77 files changed, 1558 insertions, 554 deletions
diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 378d601258be..5f43f7a70fe6 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -59,6 +59,19 @@ struct in6_validator_info { struct netlink_ext_ack *extack; }; +struct ifa6_config { + const struct in6_addr *pfx; + unsigned int plen; + + const struct in6_addr *peer_pfx; + + u32 rt_priority; + u32 ifa_flags; + u32 preferred_lft; + u32 valid_lft; + u16 scope; +}; + int addrconf_init(void); void addrconf_cleanup(void); @@ -223,6 +236,22 @@ struct ipv6_stub { const struct in6_addr *addr); int (*ipv6_dst_lookup)(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); + + struct fib6_table *(*fib6_get_table)(struct net *net, u32 id); + struct fib6_info *(*fib6_lookup)(struct net *net, int oif, + struct flowi6 *fl6, int flags); + struct fib6_info *(*fib6_table_lookup)(struct net *net, + struct fib6_table *table, + int oif, struct flowi6 *fl6, + int flags); + struct fib6_info *(*fib6_multipath_select)(const struct net *net, + struct fib6_info *f6i, + struct flowi6 *fl6, int oif, + const struct sk_buff *skb, + int strict); + u32 (*ip6_mtu_from_fib6)(struct fib6_info *f6i, struct in6_addr *daddr, + struct in6_addr *saddr); + void (*udpv6_encap_enable)(void); void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, @@ -308,6 +337,20 @@ static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev) } /** + * __in6_dev_get_safely - get inet6_dev pointer from netdevice + * @dev: network device + * + * This is a safer version of __in6_dev_get + */ +static inline struct inet6_dev *__in6_dev_get_safely(const struct net_device *dev) +{ + if (likely(dev)) + return rcu_dereference_rtnl(dev->ip6_ptr); + else + return NULL; +} + +/** * in6_dev_get - get inet6_dev pointer from netdevice * @dev: network device * diff --git a/include/net/ax25.h b/include/net/ax25.h index c91bc87931c7..3f9aea8087e3 100644 --- a/include/net/ax25.h +++ b/include/net/ax25.h @@ -15,6 +15,7 @@ #include <linux/refcount.h> #include <net/neighbour.h> #include <net/sock.h> +#include <linux/seq_file.h> #define AX25_T1CLAMPLO 1 #define AX25_T1CLAMPHI (30 * HZ) @@ -399,7 +400,7 @@ int ax25_check_iframes_acked(ax25_cb *, unsigned short); /* ax25_route.c */ void ax25_rt_device_down(struct net_device *); int ax25_rt_ioctl(unsigned int, void __user *); -extern const struct file_operations ax25_route_fops; +extern const struct seq_operations ax25_rt_seqops; ax25_route *ax25_get_route(ax25_address *addr, struct net_device *dev); int ax25_rt_autobind(ax25_cb *, ax25_address *); struct sk_buff *ax25_rt_build_path(struct sk_buff *, ax25_address *, @@ -455,7 +456,7 @@ unsigned long ax25_display_timer(struct timer_list *); extern int ax25_uid_policy; ax25_uid_assoc *ax25_findbyuid(kuid_t); int __must_check ax25_uid_ioctl(int, struct sockaddr_ax25 *); -extern const struct file_operations ax25_uid_fops; +extern const struct seq_operations ax25_uid_seqops; void ax25_uid_free(void); /* sysctl_net_ax25.c */ diff --git a/include/net/ax88796.h b/include/net/ax88796.h index b9a3beca0ce4..84b3785d0e66 100644 --- a/include/net/ax88796.h +++ b/include/net/ax88796.h @@ -12,6 +12,10 @@ #ifndef __NET_AX88796_PLAT_H #define __NET_AX88796_PLAT_H +struct sk_buff; +struct net_device; +struct platform_device; + #define AXFLG_HAS_EEPROM (1<<0) #define AXFLG_MAC_FROMDEV (1<<1) /* device already has MAC */ #define AXFLG_HAS_93CX6 (1<<2) /* use eeprom_93cx6 driver */ @@ -26,6 +30,16 @@ struct ax_plat_data { u32 *reg_offsets; /* register offsets */ u8 *mac_addr; /* MAC addr (only used when AXFLG_MAC_FROMPLATFORM is used */ + + /* uses default ax88796 buffer if set to NULL */ + void (*block_output)(struct net_device *dev, int count, + const unsigned char *buf, int star_page); + void (*block_input)(struct net_device *dev, int count, + struct sk_buff *skb, int ring_offset); + /* returns nonzero if a pending interrupt request might by caused by + * the ax88786. Handles all interrupts if set to NULL + */ + int (*check_irq)(struct platform_device *pdev); }; #endif /* __NET_AX88796_PLAT_H */ diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index ec9d6bc65855..53ce8176c313 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -271,7 +271,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); -__poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait); +__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); int bt_sock_wait_ready(struct sock *sk, unsigned long flags); diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index b619a190ff12..893bbbb5d2fa 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -1393,6 +1393,8 @@ struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u32 timeout); struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param, u8 event, u32 timeout); +int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, + const void *param); int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, const void *param); diff --git a/include/net/bonding.h b/include/net/bonding.h index b52235158836..808f1d167349 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -285,8 +285,15 @@ static inline bool bond_needs_speed_duplex(const struct bonding *bond) static inline bool bond_is_nondyn_tlb(const struct bonding *bond) { - return (BOND_MODE(bond) == BOND_MODE_TLB) && - (bond->params.tlb_dynamic_lb == 0); + return (bond_is_lb(bond) && bond->params.tlb_dynamic_lb == 0); +} + +static inline bool bond_mode_can_use_xmit_hash(const struct bonding *bond) +{ + return (BOND_MODE(bond) == BOND_MODE_8023AD || + BOND_MODE(bond) == BOND_MODE_XOR || + BOND_MODE(bond) == BOND_MODE_TLB || + BOND_MODE(bond) == BOND_MODE_ALB); } static inline bool bond_mode_uses_xmit_hash(const struct bonding *bond) diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index 71c72a939bf8..c5187438af38 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -121,6 +121,21 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock) #endif } +static inline void sock_poll_busy_loop(struct socket *sock, __poll_t events) +{ + if (sk_can_busy_loop(sock->sk) && + events && (events & POLL_BUSY_LOOP)) { + /* once, only if requested by syscall */ + sk_busy_loop(sock->sk, 1); + } +} + +/* if this socket can poll_ll, tell the system call */ +static inline __poll_t sock_poll_busy_flag(struct socket *sock) +{ + return sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0; +} + /* used in the NIC receive handler to mark the skb */ static inline void skb_mark_napi_id(struct sk_buff *skb, struct napi_struct *napi) diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 250dac390806..5fbfe61f41c6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1080,6 +1080,37 @@ struct sta_bss_parameters { }; /** + * struct cfg80211_txq_stats - TXQ statistics for this TID + * @filled: bitmap of flags using the bits of &enum nl80211_txq_stats to + * indicate the relevant values in this struct are filled + * @backlog_bytes: total number of bytes currently backlogged + * @backlog_packets: total number of packets currently backlogged + * @flows: number of new flows seen + * @drops: total number of packets dropped + * @ecn_marks: total number of packets marked with ECN CE + * @overlimit: number of drops due to queue space overflow + * @overmemory: number of drops due to memory limit overflow + * @collisions: number of hash collisions + * @tx_bytes: total number of bytes dequeued + * @tx_packets: total number of packets dequeued + * @max_flows: maximum number of flows supported + */ +struct cfg80211_txq_stats { + u32 filled; + u32 backlog_bytes; + u32 backlog_packets; + u32 flows; + u32 drops; + u32 ecn_marks; + u32 overlimit; + u32 overmemory; + u32 collisions; + u32 tx_bytes; + u32 tx_packets; + u32 max_flows; +}; + +/** * struct cfg80211_tid_stats - per-TID statistics * @filled: bitmap of flags using the bits of &enum nl80211_tid_stats to * indicate the relevant values in this struct are filled @@ -1088,6 +1119,7 @@ struct sta_bss_parameters { * @tx_msdu_retries: number of retries (not counting the first) for * transmitted MSDUs * @tx_msdu_failed: number of failed transmitted MSDUs + * @txq_stats: TXQ statistics */ struct cfg80211_tid_stats { u32 filled; @@ -1095,6 +1127,7 @@ struct cfg80211_tid_stats { u64 tx_msdu; u64 tx_msdu_retries; u64 tx_msdu_failed; + struct cfg80211_txq_stats txq_stats; }; #define IEEE80211_MAX_CHAINS 4 @@ -1151,7 +1184,10 @@ struct cfg80211_tid_stats { * @rx_duration: aggregate PPDU duration(usecs) for all the frames from a peer * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last * (IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs. + * Note that this doesn't use the @filled bit, but is used if non-NULL. * @ack_signal: signal strength (in dBm) of the last ACK frame. + * @avg_ack_signal: average rssi value of ack packet for the no of msdu's has + * been sent. */ struct station_info { u64 filled; @@ -1195,8 +1231,9 @@ struct station_info { u64 rx_beacon; u64 rx_duration; u8 rx_beacon_signal_avg; - struct cfg80211_tid_stats pertid[IEEE80211_NUM_TIDS + 1]; + struct cfg80211_tid_stats *pertid; s8 ack_signal; + s8 avg_ack_signal; }; #if IS_ENABLED(CONFIG_CFG80211) @@ -2188,9 +2225,14 @@ struct cfg80211_connect_params { * have to be updated as part of update_connect_params() call. * * @UPDATE_ASSOC_IES: Indicates whether association request IEs are updated + * @UPDATE_FILS_ERP_INFO: Indicates that FILS connection parameters (realm, + * username, erp sequence number and rrk) are updated + * @UPDATE_AUTH_TYPE: Indicates that authentication type is updated */ enum cfg80211_connect_params_changed { UPDATE_ASSOC_IES = BIT(0), + UPDATE_FILS_ERP_INFO = BIT(1), + UPDATE_AUTH_TYPE = BIT(2), }; /** @@ -2201,6 +2243,9 @@ enum cfg80211_connect_params_changed { * @WIPHY_PARAM_RTS_THRESHOLD: wiphy->rts_threshold has changed * @WIPHY_PARAM_COVERAGE_CLASS: coverage class changed * @WIPHY_PARAM_DYN_ACK: dynack has been enabled + * @WIPHY_PARAM_TXQ_LIMIT: TXQ packet limit has been changed + * @WIPHY_PARAM_TXQ_MEMORY_LIMIT: TXQ memory limit has been changed + * @WIPHY_PARAM_TXQ_QUANTUM: TXQ scheduler quantum */ enum wiphy_params_flags { WIPHY_PARAM_RETRY_SHORT = 1 << 0, @@ -2209,6 +2254,9 @@ enum wiphy_params_flags { WIPHY_PARAM_RTS_THRESHOLD = 1 << 3, WIPHY_PARAM_COVERAGE_CLASS = 1 << 4, WIPHY_PARAM_DYN_ACK = 1 << 5, + WIPHY_PARAM_TXQ_LIMIT = 1 << 6, + WIPHY_PARAM_TXQ_MEMORY_LIMIT = 1 << 7, + WIPHY_PARAM_TXQ_QUANTUM = 1 << 8, }; /** @@ -2961,6 +3009,9 @@ struct cfg80211_external_auth_params { * * @set_multicast_to_unicast: configure multicast to unicast conversion for BSS * + * @get_txq_stats: Get TXQ stats for interface or phy. If wdev is %NULL, this + * function should return phy stats, and interface stats otherwise. + * * @set_pmk: configure the PMK to be used for offloaded 802.1X 4-Way handshake. * If not deleted through @del_pmk the PMK remains valid until disconnect * upon which the driver should clear it. @@ -3262,6 +3313,10 @@ struct cfg80211_ops { struct net_device *dev, const bool enabled); + int (*get_txq_stats)(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_txq_stats *txqstats); + int (*set_pmk)(struct wiphy *wiphy, struct net_device *dev, const struct cfg80211_pmk_conf *conf); int (*del_pmk)(struct wiphy *wiphy, struct net_device *dev, @@ -3806,6 +3861,10 @@ struct wiphy_iftype_ext_capab { * bitmap of &enum nl80211_band values. For instance, for * NL80211_BAND_2GHZ, bit 0 would be set * (i.e. BIT(NL80211_BAND_2GHZ)). + * + * @txq_limit: configuration of internal TX queue frame limit + * @txq_memory_limit: configuration internal TX queue memory limit + * @txq_quantum: configuration of internal TX queue scheduler quantum */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -3940,6 +3999,10 @@ struct wiphy { u8 nan_supported_bands; + u32 txq_limit; + u32 txq_memory_limit; + u32 txq_quantum; + char priv[0] __aligned(NETDEV_ALIGN); }; @@ -5363,6 +5426,30 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) #endif /** + * struct cfg80211_fils_resp_params - FILS connection response params + * @kek: KEK derived from a successful FILS connection (may be %NULL) + * @kek_len: Length of @fils_kek in octets + * @update_erp_next_seq_num: Boolean value to specify whether the value in + * @erp_next_seq_num is valid. + * @erp_next_seq_num: The next sequence number to use in ERP message in + * FILS Authentication. This value should be specified irrespective of the + * status for a FILS connection. + * @pmk: A new PMK if derived from a successful FILS connection (may be %NULL). + * @pmk_len: Length of @pmk in octets + * @pmkid: A new PMKID if derived from a successful FILS connection or the PMKID + * used for this FILS connection (may be %NULL). + */ +struct cfg80211_fils_resp_params { + const u8 *kek; + size_t kek_len; + bool update_erp_next_seq_num; + u16 erp_next_seq_num; + const u8 *pmk; + size_t pmk_len; + const u8 *pmkid; +}; + +/** * struct cfg80211_connect_resp_params - Connection response params * @status: Status code, %WLAN_STATUS_SUCCESS for successful connection, use * %WLAN_STATUS_UNSPECIFIED_FAILURE if your device cannot give you @@ -5380,17 +5467,7 @@ static inline void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) * @req_ie_len: Association request IEs length * @resp_ie: Association response IEs (may be %NULL) * @resp_ie_len: Association response IEs length - * @fils_kek: KEK derived from a successful FILS connection (may be %NULL) - * @fils_kek_len: Length of @fils_kek in octets - * @update_erp_next_seq_num: Boolean value to specify whether the value in - * @fils_erp_next_seq_num is valid. - * @fils_erp_next_seq_num: The next sequence number to use in ERP message in - * FILS Authentication. This value should be specified irrespective of the - * status for a FILS connection. - * @pmk: A new PMK if derived from a successful FILS connection (may be %NULL). - * @pmk_len: Length of @pmk in octets - * @pmkid: A new PMKID if derived from a successful FILS connection or the PMKID - * used for this FILS connection (may be %NULL). + * @fils: FILS connection response parameters. * @timeout_reason: Reason for connection timeout. This is used when the * connection fails due to a timeout instead of an explicit rejection from * the AP. %NL80211_TIMEOUT_UNSPECIFIED is used when the timeout reason is @@ -5406,13 +5483,7 @@ struct cfg80211_connect_resp_params { size_t req_ie_len; const u8 *resp_ie; size_t resp_ie_len; - const u8 *fils_kek; - size_t fils_kek_len; - bool update_erp_next_seq_num; - u16 fils_erp_next_seq_num; - const u8 *pmk; - size_t pmk_len; - const u8 *pmkid; + struct cfg80211_fils_resp_params fils; enum nl80211_timeout_reason timeout_reason; }; @@ -5558,6 +5629,7 @@ cfg80211_connect_timeout(struct net_device *dev, const u8 *bssid, * @req_ie_len: association request IEs length * @resp_ie: association response IEs (may be %NULL) * @resp_ie_len: assoc response IEs length + * @fils: FILS related roaming information. */ struct cfg80211_roam_info { struct ieee80211_channel *channel; @@ -5567,6 +5639,7 @@ struct cfg80211_roam_info { size_t req_ie_len; const u8 *resp_ie; size_t resp_ie_len; + struct cfg80211_fils_resp_params fils; }; /** @@ -5648,6 +5721,26 @@ void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, gfp_t gfp); +/** + * cfg80211_sinfo_alloc_tid_stats - allocate per-tid statistics. + * + * @sinfo: the station information + * @gfp: allocation flags + */ +int cfg80211_sinfo_alloc_tid_stats(struct station_info *sinfo, gfp_t gfp); + +/** + * cfg80211_sinfo_release_content - release contents of station info + * @sinfo: the station information + * + * Releases any potentially allocated sub-information of the station + * information, but not the struct itself (since it's typically on + * the stack.) + */ +static inline void cfg80211_sinfo_release_content(struct station_info *sinfo) +{ + kfree(sinfo->pertid); +} /** * cfg80211_new_sta - notify userspace about station diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index 207d9ba1f92c..0e5e91be2d30 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -101,6 +101,10 @@ struct dcbnl_rtnl_ops { /* CEE peer */ int (*cee_peer_getpg) (struct net_device *, struct cee_pg *); int (*cee_peer_getpfc) (struct net_device *, struct cee_pfc *); + + /* buffer settings */ + int (*dcbnl_getbuffer)(struct net_device *, struct dcbnl_buffer *); + int (*dcbnl_setbuffer)(struct net_device *, struct dcbnl_buffer *); }; #endif /* __NET_DCBNL_H__ */ diff --git a/include/net/devlink.h b/include/net/devlink.h index 2e4f71e16e95..e336ea9c73df 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -35,6 +35,14 @@ struct devlink { char priv[0] __aligned(NETDEV_ALIGN); }; +struct devlink_port_attrs { + bool set; + enum devlink_port_flavour flavour; + u32 port_number; /* same value as "split group" */ + bool split; + u32 split_subport_number; +}; + struct devlink_port { struct list_head list; struct devlink *devlink; @@ -43,8 +51,7 @@ struct devlink_port { enum devlink_port_type type; enum devlink_port_type desired_type; void *type_dev; - bool split; - u32 split_group; + struct devlink_port_attrs attrs; }; struct devlink_sb_pool_info { @@ -289,12 +296,13 @@ struct devlink_resource { #define DEVLINK_RESOURCE_ID_PARENT_TOP 0 struct devlink_ops { - int (*reload)(struct devlink *devlink); + int (*reload)(struct devlink *devlink, struct netlink_ext_ack *extack); int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); int (*port_split)(struct devlink *devlink, unsigned int port_index, - unsigned int count); - int (*port_unsplit)(struct devlink *devlink, unsigned int port_index); + unsigned int count, struct netlink_ext_ack *extack); + int (*port_unsplit)(struct devlink *devlink, unsigned int port_index, + struct netlink_ext_ack *extack); int (*sb_pool_get)(struct devlink *devlink, unsigned int sb_index, u16 pool_index, struct devlink_sb_pool_info *pool_info); @@ -367,8 +375,12 @@ void devlink_port_type_eth_set(struct devlink_port *devlink_port, void devlink_port_type_ib_set(struct devlink_port *devlink_port, struct ib_device *ibdev); void devlink_port_type_clear(struct devlink_port *devlink_port); -void devlink_port_split_set(struct devlink_port *devlink_port, - u32 split_group); +void devlink_port_attrs_set(struct devlink_port *devlink_port, + enum devlink_port_flavour flavour, + u32 port_number, bool split, + u32 split_subport_number); +int devlink_port_get_phys_port_name(struct devlink_port *devlink_port, + char *name, size_t len); int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, u16 egress_pools_count, u16 ingress_tc_count, @@ -466,11 +478,20 @@ static inline void devlink_port_type_clear(struct devlink_port *devlink_port) { } -static inline void devlink_port_split_set(struct devlink_port *devlink_port, - u32 split_group) +static inline void devlink_port_attrs_set(struct devlink_port *devlink_port, + enum devlink_port_flavour flavour, + u32 port_number, bool split, + u32 split_subport_number) { } +static inline int +devlink_port_get_phys_port_name(struct devlink_port *devlink_port, + char *name, size_t len) +{ + return -EOPNOTSUPP; +} + static inline int devlink_sb_register(struct devlink *devlink, unsigned int sb_index, u32 size, u16 ingress_pools_count, diff --git a/include/net/dsa.h b/include/net/dsa.h index 60fb4ec8ba61..fdbd6082945d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -20,12 +20,14 @@ #include <linux/of.h> #include <linux/ethtool.h> #include <linux/net_tstamp.h> +#include <linux/phy.h> #include <net/devlink.h> #include <net/switchdev.h> struct tc_action; struct phy_device; struct fixed_phy_status; +struct phylink_link_state; enum dsa_tag_protocol { DSA_TAG_PROTO_NONE = 0, @@ -199,6 +201,7 @@ struct dsa_port { u8 stp_state; struct net_device *bridge_dev; struct devlink_port devlink_port; + struct phylink *pl; /* * Original copy of the master netdev ethtool_ops */ @@ -354,12 +357,36 @@ struct dsa_switch_ops { struct fixed_phy_status *st); /* + * PHYLINK integration + */ + void (*phylink_validate)(struct dsa_switch *ds, int port, + unsigned long *supported, + struct phylink_link_state *state); + int (*phylink_mac_link_state)(struct dsa_switch *ds, int port, + struct phylink_link_state *state); + void (*phylink_mac_config)(struct dsa_switch *ds, int port, + unsigned int mode, + const struct phylink_link_state *state); + void (*phylink_mac_an_restart)(struct dsa_switch *ds, int port); + void (*phylink_mac_link_down)(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface); + void (*phylink_mac_link_up)(struct dsa_switch *ds, int port, + unsigned int mode, + phy_interface_t interface, + struct phy_device *phydev); + void (*phylink_fixed_state)(struct dsa_switch *ds, int port, + struct phylink_link_state *state); + /* * ethtool hardware statistics. */ - void (*get_strings)(struct dsa_switch *ds, int port, uint8_t *data); + void (*get_strings)(struct dsa_switch *ds, int port, + u32 stringset, uint8_t *data); void (*get_ethtool_stats)(struct dsa_switch *ds, int port, uint64_t *data); - int (*get_sset_count)(struct dsa_switch *ds, int port); + int (*get_sset_count)(struct dsa_switch *ds, int port, int sset); + void (*get_ethtool_phy_stats)(struct dsa_switch *ds, + int port, uint64_t *data); /* * ethtool Wake-on-LAN @@ -588,4 +615,10 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev, #define BRCM_TAG_GET_PORT(v) ((v) >> 8) #define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff) + +int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data); +int dsa_port_get_ethtool_phy_stats(struct dsa_port *dp, uint64_t *data); +int dsa_port_get_phy_sset_count(struct dsa_port *dp); +void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up); + #endif diff --git a/include/net/erspan.h b/include/net/erspan.h index d044aa60cc76..b39643ef4c95 100644 --- a/include/net/erspan.h +++ b/include/net/erspan.h @@ -219,6 +219,33 @@ static inline __be32 erspan_get_timestamp(void) return htonl((u32)h_usecs); } +/* ERSPAN BSO (Bad/Short/Oversized), see RFC1757 + * 00b --> Good frame with no error, or unknown integrity + * 01b --> Payload is a Short Frame + * 10b --> Payload is an Oversized Frame + * 11b --> Payload is a Bad Frame with CRC or Alignment Error + */ +enum erspan_bso { + BSO_NOERROR = 0x0, + BSO_SHORT = 0x1, + BSO_OVERSIZED = 0x2, + BSO_BAD = 0x3, +}; + +static inline u8 erspan_detect_bso(struct sk_buff *skb) +{ + /* BSO_BAD is not handled because the frame CRC + * or alignment error information is in FCS. + */ + if (skb->len < ETH_ZLEN) + return BSO_SHORT; + + if (skb->len > ETH_FRAME_LEN) + return BSO_OVERSIZED; + + return BSO_NOERROR; +} + static inline void erspan_build_header_v2(struct sk_buff *skb, u32 id, u8 direction, u16 hwid, bool truncate, bool is_ipv4) @@ -248,6 +275,7 @@ static inline void erspan_build_header_v2(struct sk_buff *skb, vlan_tci = ntohs(qp->tci); } + bso = erspan_detect_bso(skb); skb_push(skb, sizeof(*ershdr) + ERSPAN_V2_MDSIZE); ershdr = (struct erspan_base_hdr *)skb->data; memset(ershdr, 0, sizeof(*ershdr) + ERSPAN_V2_MDSIZE); diff --git a/include/net/failover.h b/include/net/failover.h new file mode 100644 index 000000000000..bb15438f39c7 --- /dev/null +++ b/include/net/failover.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _FAILOVER_H +#define _FAILOVER_H + +#include <linux/netdevice.h> + +struct failover_ops { + int (*slave_pre_register)(struct net_device *slave_dev, + struct net_device *failover_dev); + int (*slave_register)(struct net_device *slave_dev, + struct net_device *failover_dev); + int (*slave_pre_unregister)(struct net_device *slave_dev, + struct net_device *failover_dev); + int (*slave_unregister)(struct net_device *slave_dev, + struct net_device *failover_dev); + int (*slave_link_change)(struct net_device *slave_dev, + struct net_device *failover_dev); + int (*slave_name_change)(struct net_device *slave_dev, + struct net_device *failover_dev); + rx_handler_result_t (*slave_handle_frame)(struct sk_buff **pskb); +}; + +struct failover { + struct list_head list; + struct net_device __rcu *failover_dev; + struct failover_ops __rcu *ops; +}; + +struct failover *failover_register(struct net_device *dev, + struct failover_ops *ops); +void failover_unregister(struct failover *failover); +int failover_slave_unregister(struct net_device *slave_dev); + +#endif /* _FAILOVER_H */ diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index e5cfcfc7dd93..b473df5b9512 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -75,7 +75,8 @@ struct fib_rules_ops { int (*configure)(struct fib_rule *, struct sk_buff *, struct fib_rule_hdr *, - struct nlattr **); + struct nlattr **, + struct netlink_ext_ack *); int (*delete)(struct fib_rule *); int (*compare)(struct fib_rule *, struct fib_rule_hdr *, diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index d1fcf2442a42..adc24df56b90 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -226,6 +226,11 @@ struct flow_dissector { unsigned short int offset[FLOW_DISSECTOR_KEY_MAX]; }; +struct flow_keys_basic { + struct flow_dissector_key_control control; + struct flow_dissector_key_basic basic; +}; + struct flow_keys { struct flow_dissector_key_control control; #define FLOW_KEYS_HASH_START_FIELD basic @@ -244,7 +249,7 @@ __be32 flow_get_u32_src(const struct flow_keys *flow); __be32 flow_get_u32_dst(const struct flow_keys *flow); extern struct flow_dissector flow_keys_dissector; -extern struct flow_dissector flow_keys_buf_dissector; +extern struct flow_dissector flow_keys_basic_dissector; /* struct flow_keys_digest: * diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index d4088d1a688d..d7578cf49c3a 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -42,6 +42,7 @@ enum { struct inet6_ifaddr { struct in6_addr addr; __u32 prefix_len; + __u32 rt_priority; /* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */ __u32 valid_lft; @@ -64,7 +65,7 @@ struct inet6_ifaddr { struct delayed_work dad_work; struct inet6_dev *idev; - struct rt6_info *rt; + struct fib6_info *rt; struct hlist_node addr_lst; struct list_head if_list; @@ -143,8 +144,7 @@ struct ipv6_ac_socklist { struct ifacaddr6 { struct in6_addr aca_addr; - struct inet6_dev *aca_idev; - struct rt6_info *aca_rt; + struct fib6_info *aca_rt; struct ifacaddr6 *aca_next; int aca_users; refcount_t aca_refcnt; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b68fea022a82..0a6c9e0f2b5a 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -23,8 +23,6 @@ #include <net/inet_sock.h> #include <net/request_sock.h> -#define INET_CSK_DEBUG 1 - /* Cancel timers, when they are not required. */ #undef INET_CSK_CLEAR_TIMERS @@ -77,6 +75,7 @@ struct inet_connection_sock_af_ops { * @icsk_af_ops Operations which are AF_INET{4,6} specific * @icsk_ulp_ops Pluggable ULP control hook * @icsk_ulp_data ULP private data + * @icsk_clean_acked Clean acked data hook * @icsk_listen_portaddr_node hash to the portaddr listener hashtable * @icsk_ca_state: Congestion control state * @icsk_retransmits: Number of unrecovered [RTO] timeouts @@ -102,6 +101,7 @@ struct inet_connection_sock { const struct inet_connection_sock_af_ops *icsk_af_ops; const struct tcp_ulp_ops *icsk_ulp_ops; void *icsk_ulp_data; + void (*icsk_clean_acked)(struct sock *sk, u32 acked_seq); struct hlist_node icsk_listen_portaddr_node; unsigned int (*icsk_sync_mss)(struct sock *sk, u32 pmtu); __u8 icsk_ca_state:6, @@ -194,10 +194,6 @@ static inline void inet_csk_delack_init(struct sock *sk) void inet_csk_delete_keepalive_timer(struct sock *sk); void inet_csk_reset_keepalive_timer(struct sock *sk, unsigned long timeout); -#ifdef INET_CSK_DEBUG -extern const char inet_csk_timer_bug_msg[]; -#endif - static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -212,12 +208,9 @@ static inline void inet_csk_clear_xmit_timer(struct sock *sk, const int what) #ifdef INET_CSK_CLEAR_TIMERS sk_stop_timer(sk, &icsk->icsk_delack_timer); #endif + } else { + pr_debug("inet_csk BUG: unknown timer value\n"); } -#ifdef INET_CSK_DEBUG - else { - pr_debug("%s", inet_csk_timer_bug_msg); - } -#endif } /* @@ -230,10 +223,8 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, struct inet_connection_sock *icsk = inet_csk(sk); if (when > max_when) { -#ifdef INET_CSK_DEBUG pr_debug("reset_xmit_timer: sk=%p %d when=0x%lx, caller=%p\n", sk, what, when, current_text_addr()); -#endif when = max_when; } @@ -247,12 +238,9 @@ static inline void inet_csk_reset_xmit_timer(struct sock *sk, const int what, icsk->icsk_ack.pending |= ICSK_ACK_TIMER; icsk->icsk_ack.timeout = jiffies + when; sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout); + } else { + pr_debug("inet_csk BUG: unknown timer value\n"); } -#ifdef INET_CSK_DEBUG - else { - pr_debug("%s", inet_csk_timer_bug_msg); - } -#endif } static inline unsigned long diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 0a671c32d6b9..83d5b3c2ac42 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -147,6 +147,7 @@ struct inet_cork { __u8 ttl; __s16 tos; char priority; + __u16 gso_size; }; struct inet_cork_full { diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index c7be1ca8e562..78775038f011 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -61,7 +61,7 @@ struct inet_timewait_sock { #define tw_cookie __tw_common.skc_cookie #define tw_dr __tw_common.skc_tw_dr - int tw_timeout; + __u32 tw_mark; volatile unsigned char tw_substate; unsigned char tw_rcv_wscale; diff --git a/include/net/ip.h b/include/net/ip.h index ecffd843e7b8..0d2281b4b27a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -76,6 +76,7 @@ struct ipcm_cookie { __u8 ttl; __s16 tos; char priority; + __u16 gso_size; }; #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) @@ -171,7 +172,7 @@ struct sk_buff *ip_make_skb(struct sock *sk, struct flowi4 *fl4, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, struct ipcm_cookie *ipc, struct rtable **rtp, - unsigned int flags); + struct inet_cork *cork, unsigned int flags); static inline struct sk_buff *ip_finish_skb(struct sock *sk, struct flowi4 *fl4) { @@ -396,6 +397,9 @@ static inline unsigned int ip_skb_dst_mtu(struct sock *sk, return min(READ_ONCE(skb_dst(skb)->dev->mtu), IP_MAX_MTU); } +int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len, + u32 *metrics); + u32 ip_idents_reserve(u32 hash, int segs); void __ip_select_ident(struct net *net, struct iphdr *iph, int segs); @@ -660,4 +664,7 @@ extern int sysctl_icmp_msgs_burst; int ip_misc_proc_init(void); #endif +int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, + struct netlink_ext_ack *extack); + #endif /* _IP_H */ diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 5e86fd9dc857..5cba71d2dc44 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -38,6 +38,7 @@ #endif struct rt6_info; +struct fib6_info; struct fib6_config { u32 fc_table; @@ -74,12 +75,12 @@ struct fib6_node { #ifdef CONFIG_IPV6_SUBTREES struct fib6_node __rcu *subtree; #endif - struct rt6_info __rcu *leaf; + struct fib6_info __rcu *leaf; __u16 fn_bit; /* bit key */ __u16 fn_flags; int fn_sernum; - struct rt6_info __rcu *rr_ptr; + struct fib6_info __rcu *rr_ptr; struct rcu_head rcu; }; @@ -94,11 +95,6 @@ struct fib6_gc_args { #define FIB6_SUBTREE(fn) (rcu_dereference_protected((fn)->subtree, 1)) #endif -struct mx6_config { - const u32 *mx; - DECLARE_BITMAP(mx_valid, RTAX_MAX); -}; - /* * routing information * @@ -127,92 +123,104 @@ struct rt6_exception { #define FIB6_EXCEPTION_BUCKET_SIZE (1 << FIB6_EXCEPTION_BUCKET_SIZE_SHIFT) #define FIB6_MAX_DEPTH 5 -struct rt6_info { - struct dst_entry dst; - struct rt6_info __rcu *rt6_next; - struct rt6_info *from; +struct fib6_nh { + struct in6_addr nh_gw; + struct net_device *nh_dev; + struct lwtunnel_state *nh_lwtstate; - /* - * Tail elements of dst_entry (__refcnt etc.) - * and these elements (rarely used in hot path) are in - * the same cache line. - */ - struct fib6_table *rt6i_table; - struct fib6_node __rcu *rt6i_node; + unsigned int nh_flags; + atomic_t nh_upper_bound; + int nh_weight; +}; - struct in6_addr rt6i_gateway; +struct fib6_info { + struct fib6_table *fib6_table; + struct fib6_info __rcu *fib6_next; + struct fib6_node __rcu *fib6_node; /* Multipath routes: - * siblings is a list of rt6_info that have the the same metric/weight, + * siblings is a list of fib6_info that have the the same metric/weight, * destination, but not the same gateway. nsiblings is just a cache * to speed up lookup. */ - struct list_head rt6i_siblings; - unsigned int rt6i_nsiblings; - atomic_t rt6i_nh_upper_bound; + struct list_head fib6_siblings; + unsigned int fib6_nsiblings; - atomic_t rt6i_ref; + atomic_t fib6_ref; + unsigned long expires; + struct dst_metrics *fib6_metrics; +#define fib6_pmtu fib6_metrics->metrics[RTAX_MTU-1] - unsigned int rt6i_nh_flags; + struct rt6key fib6_dst; + u32 fib6_flags; + struct rt6key fib6_src; + struct rt6key fib6_prefsrc; - /* These are in a separate cache line. */ - struct rt6key rt6i_dst ____cacheline_aligned_in_smp; - u32 rt6i_flags; + struct rt6_info * __percpu *rt6i_pcpu; + struct rt6_exception_bucket __rcu *rt6i_exception_bucket; + + u32 fib6_metric; + u8 fib6_protocol; + u8 fib6_type; + u8 exception_bucket_flushed:1, + should_flush:1, + dst_nocount:1, + dst_nopolicy:1, + dst_host:1, + unused:3; + + struct fib6_nh fib6_nh; +}; + +struct rt6_info { + struct dst_entry dst; + struct fib6_info __rcu *from; + + struct rt6key rt6i_dst; struct rt6key rt6i_src; + struct in6_addr rt6i_gateway; + struct inet6_dev *rt6i_idev; + u32 rt6i_flags; struct rt6key rt6i_prefsrc; struct list_head rt6i_uncached; struct uncached_list *rt6i_uncached_list; - struct inet6_dev *rt6i_idev; - struct rt6_info * __percpu *rt6i_pcpu; - struct rt6_exception_bucket __rcu *rt6i_exception_bucket; - - u32 rt6i_metric; - u32 rt6i_pmtu; /* more non-fragment space at head required */ - int rt6i_nh_weight; unsigned short rt6i_nfheader_len; - u8 rt6i_protocol; - u8 exception_bucket_flushed:1, - should_flush:1, - unused:6; }; #define for_each_fib6_node_rt_rcu(fn) \ for (rt = rcu_dereference((fn)->leaf); rt; \ - rt = rcu_dereference(rt->rt6_next)) + rt = rcu_dereference(rt->fib6_next)) #define for_each_fib6_walker_rt(w) \ for (rt = (w)->leaf; rt; \ - rt = rcu_dereference_protected(rt->rt6_next, 1)) + rt = rcu_dereference_protected(rt->fib6_next, 1)) static inline struct inet6_dev *ip6_dst_idev(struct dst_entry *dst) { return ((struct rt6_info *)dst)->rt6i_idev; } -static inline void rt6_clean_expires(struct rt6_info *rt) +static inline void fib6_clean_expires(struct fib6_info *f6i) { - rt->rt6i_flags &= ~RTF_EXPIRES; - rt->dst.expires = 0; + f6i->fib6_flags &= ~RTF_EXPIRES; + f6i->expires = 0; } -static inline void rt6_set_expires(struct rt6_info *rt, unsigned long expires) +static inline void fib6_set_expires(struct fib6_info *f6i, + unsigned long expires) { - rt->dst.expires = expires; - rt->rt6i_flags |= RTF_EXPIRES; + f6i->expires = expires; + f6i->fib6_flags |= RTF_EXPIRES; } -static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) +static inline bool fib6_check_expired(const struct fib6_info *f6i) { - struct rt6_info *rt; - - for (rt = rt0; rt && !(rt->rt6i_flags & RTF_EXPIRES); rt = rt->from); - if (rt && rt != rt0) - rt0->dst.expires = rt->dst.expires; - dst_set_expires(&rt0->dst, timeout); - rt0->rt6i_flags |= RTF_EXPIRES; + if (f6i->fib6_flags & RTF_EXPIRES) + return time_after(jiffies, f6i->expires); + return false; } /* Function to safely get fn->sernum for passed in rt @@ -220,14 +228,13 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) * Return true if we can get cookie safely * Return false if not */ -static inline bool rt6_get_cookie_safe(const struct rt6_info *rt, - u32 *cookie) +static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i, + u32 *cookie) { struct fib6_node *fn; bool status = false; - rcu_read_lock(); - fn = rcu_dereference(rt->rt6i_node); + fn = rcu_dereference(f6i->fib6_node); if (fn) { *cookie = fn->fn_sernum; @@ -236,19 +243,22 @@ static inline bool rt6_get_cookie_safe(const struct rt6_info *rt, status = true; } - rcu_read_unlock(); return status; } static inline u32 rt6_get_cookie(const struct rt6_info *rt) { + struct fib6_info *from; u32 cookie = 0; - if (rt->rt6i_flags & RTF_PCPU || - (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from)) - rt = rt->from; + rcu_read_lock(); - rt6_get_cookie_safe(rt, &cookie); + from = rcu_dereference(rt->from); + if (from && (rt->rt6i_flags & RTF_PCPU || + unlikely(!list_empty(&rt->rt6i_uncached)))) + fib6_get_cookie_safe(from, &cookie); + + rcu_read_unlock(); return cookie; } @@ -262,20 +272,18 @@ static inline void ip6_rt_put(struct rt6_info *rt) dst_release(&rt->dst); } -void rt6_free_pcpu(struct rt6_info *non_pcpu_rt); +struct fib6_info *fib6_info_alloc(gfp_t gfp_flags); +void fib6_info_destroy(struct fib6_info *f6i); -static inline void rt6_hold(struct rt6_info *rt) +static inline void fib6_info_hold(struct fib6_info *f6i) { - atomic_inc(&rt->rt6i_ref); + atomic_inc(&f6i->fib6_ref); } -static inline void rt6_release(struct rt6_info *rt) +static inline void fib6_info_release(struct fib6_info *f6i) { - if (atomic_dec_and_test(&rt->rt6i_ref)) { - rt6_free_pcpu(rt); - dst_dev_put(&rt->dst); - dst_release(&rt->dst); - } + if (f6i && atomic_dec_and_test(&f6i->fib6_ref)) + fib6_info_destroy(f6i); } enum fib6_walk_state { @@ -291,7 +299,7 @@ enum fib6_walk_state { struct fib6_walker { struct list_head lh; struct fib6_node *root, *node; - struct rt6_info *leaf; + struct fib6_info *leaf; enum fib6_walk_state state; unsigned int skip; unsigned int count; @@ -355,7 +363,7 @@ typedef struct rt6_info *(*pol_lookup_t)(struct net *, struct fib6_entry_notifier_info { struct fib_notifier_info info; /* must be first */ - struct rt6_info *rt; + struct fib6_info *rt; }; /* @@ -368,24 +376,49 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, int flags, pol_lookup_t lookup); -struct fib6_node *fib6_lookup(struct fib6_node *root, - const struct in6_addr *daddr, - const struct in6_addr *saddr); +/* called with rcu lock held; can return error pointer + * caller needs to select path + */ +struct fib6_info *fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, + int flags); + +/* called with rcu lock held; caller needs to select path */ +struct fib6_info *fib6_table_lookup(struct net *net, struct fib6_table *table, + int oif, struct flowi6 *fl6, int strict); + +struct fib6_info *fib6_multipath_select(const struct net *net, + struct fib6_info *match, + struct flowi6 *fl6, int oif, + const struct sk_buff *skb, int strict); + +struct fib6_node *fib6_node_lookup(struct fib6_node *root, + const struct in6_addr *daddr, + const struct in6_addr *saddr); struct fib6_node *fib6_locate(struct fib6_node *root, const struct in6_addr *daddr, int dst_len, const struct in6_addr *saddr, int src_len, bool exact_match); -void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), +void fib6_clean_all(struct net *net, int (*func)(struct fib6_info *, void *arg), void *arg); -int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc, - struct netlink_ext_ack *extack); -int fib6_del(struct rt6_info *rt, struct nl_info *info); +int fib6_add(struct fib6_node *root, struct fib6_info *rt, + struct nl_info *info, struct netlink_ext_ack *extack); +int fib6_del(struct fib6_info *rt, struct nl_info *info); + +static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i) +{ + return f6i->fib6_nh.nh_dev; +} + +static inline +struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i) +{ + return f6i->fib6_nh.nh_lwtstate; +} -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info, +void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, unsigned int flags); void fib6_run_gc(unsigned long expires, struct net *net, bool force); @@ -394,7 +427,15 @@ void fib6_gc_cleanup(void); int fib6_init(void); -int ipv6_route_open(struct inode *inode, struct file *file); +struct ipv6_route_iter { + struct seq_net_private p; + struct fib6_walker w; + loff_t skip; + struct fib6_table *tbl; + int sernum; +}; + +extern const struct seq_operations ipv6_route_seq_ops; int call_fib6_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, @@ -408,8 +449,14 @@ void __net_exit fib6_notifier_exit(struct net *net); unsigned int fib6_tables_seq_read(struct net *net); int fib6_tables_dump(struct net *net, struct notifier_block *nb); -void fib6_update_sernum(struct rt6_info *rt); -void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt); +void fib6_update_sernum(struct net *net, struct fib6_info *rt); +void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt); + +void fib6_metric_set(struct fib6_info *f6i, int metric, u32 val); +static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric) +{ + return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric)); +} #ifdef CONFIG_IPV6_MULTIPLE_TABLES int fib6_rules_init(void); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 08b132381984..59656fc580df 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -66,12 +66,6 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } -static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt) -{ - return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == - RTF_GATEWAY; -} - void ip6_route_input(struct sk_buff *skb); struct dst_entry *ip6_route_input_lookup(struct net *net, struct net_device *dev, @@ -100,29 +94,29 @@ void ip6_route_cleanup(void); int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg); -int ip6_route_add(struct fib6_config *cfg, struct netlink_ext_ack *extack); -int ip6_ins_rt(struct rt6_info *); -int ip6_del_rt(struct rt6_info *); +int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); +int ip6_ins_rt(struct net *net, struct fib6_info *f6i); +int ip6_del_rt(struct net *net, struct fib6_info *f6i); -void rt6_flush_exceptions(struct rt6_info *rt); -int rt6_remove_exception_rt(struct rt6_info *rt); -void rt6_age_exceptions(struct rt6_info *rt, struct fib6_gc_args *gc_args, +void rt6_flush_exceptions(struct fib6_info *f6i); +void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args, unsigned long now); -static inline int ip6_route_get_saddr(struct net *net, struct rt6_info *rt, +static inline int ip6_route_get_saddr(struct net *net, struct fib6_info *f6i, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) { - struct inet6_dev *idev = - rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; int err = 0; - if (rt && rt->rt6i_prefsrc.plen) - *saddr = rt->rt6i_prefsrc.addr; - else - err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, - daddr, prefs, saddr); + if (f6i && f6i->fib6_prefsrc.plen) { + *saddr = f6i->fib6_prefsrc.addr; + } else { + struct net_device *dev = f6i ? fib6_info_nh_dev(f6i) : NULL; + + err = ipv6_dev_get_saddr(net, dev, daddr, prefs, saddr); + } return err; } @@ -137,8 +131,9 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, struct flowi6 *fl6); void fib6_force_start_gc(struct net *net); -struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, - const struct in6_addr *addr, bool anycast); +struct fib6_info *addrconf_f6i_alloc(struct net *net, struct inet6_dev *idev, + const struct in6_addr *addr, bool anycast, + gfp_t gfp_flags); struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, int flags); @@ -147,9 +142,11 @@ struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, * support functions for ND * */ -struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, +struct fib6_info *rt6_get_dflt_router(struct net *net, + const struct in6_addr *addr, struct net_device *dev); -struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, +struct fib6_info *rt6_add_dflt_router(struct net *net, + const struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref); void rt6_purge_dflt_routers(struct net *net); @@ -174,14 +171,14 @@ struct rt6_rtnl_dump_arg { struct net *net; }; -int rt6_dump_route(struct rt6_info *rt, void *p_arg); +int rt6_dump_route(struct fib6_info *f6i, void *p_arg); void rt6_mtu_change(struct net_device *dev, unsigned int mtu); void rt6_remove_prefsrc(struct inet6_ifaddr *ifp); void rt6_clean_tohost(struct net *net, struct in6_addr *gateway); void rt6_sync_up(struct net_device *dev, unsigned int nh_flags); void rt6_disable_ip(struct net_device *dev, unsigned long event); void rt6_sync_down_dev(struct net_device *dev, unsigned long event); -void rt6_multipath_rebalance(struct rt6_info *rt); +void rt6_multipath_rebalance(struct fib6_info *f6i); void rt6_uncached_list_add(struct rt6_info *rt); void rt6_uncached_list_del(struct rt6_info *rt); @@ -269,12 +266,38 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, return daddr; } -static inline bool rt6_duplicate_nexthop(struct rt6_info *a, struct rt6_info *b) +static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b) { - return a->dst.dev == b->dst.dev && - a->rt6i_idev == b->rt6i_idev && - ipv6_addr_equal(&a->rt6i_gateway, &b->rt6i_gateway) && - !lwtunnel_cmp_encap(a->dst.lwtstate, b->dst.lwtstate); + return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev && + ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) && + !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate); } +static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +{ + struct inet6_dev *idev; + unsigned int mtu; + + if (dst_metric_locked(dst, RTAX_MTU)) { + mtu = dst_metric_raw(dst, RTAX_MTU); + if (mtu) + return mtu; + } + + mtu = IPV6_MIN_MTU; + rcu_read_lock(); + idev = __in6_dev_get(dst->dev); + if (idev) + mtu = idev->cnf.mtu6; + rcu_read_unlock(); + + return mtu; +} + +u32 ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr, + struct in6_addr *saddr); + +struct neighbour *ip6_neigh_lookup(const struct in6_addr *gw, + struct net_device *dev, struct sk_buff *skb, + const void *daddr); #endif diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 81d0f2107ff1..69c91d1934c1 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -449,4 +449,6 @@ static inline void fib_proc_exit(struct net *net) } #endif +u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); + #endif /* _NET_FIB_H */ diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 540a4b4417bf..90ff430f5e9d 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -379,6 +379,17 @@ static inline u8 ip_tunnel_get_dsfield(const struct iphdr *iph, return 0; } +static inline u8 ip_tunnel_get_ttl(const struct iphdr *iph, + const struct sk_buff *skb) +{ + if (skb->protocol == htons(ETH_P_IP)) + return iph->ttl; + else if (skb->protocol == htons(ETH_P_IPV6)) + return ((const struct ipv6hdr *)iph)->hop_limit; + else + return 0; +} + /* Propogate ECN bits out */ static inline u8 ip_tunnel_ecn_encap(u8 tos, const struct iphdr *iph, const struct sk_buff *skb) @@ -466,12 +477,12 @@ static inline struct ip_tunnel_info *lwt_tun_info(struct lwtunnel_state *lwtstat return (struct ip_tunnel_info *)lwtstate->data; } -extern struct static_key ip_tunnel_metadata_cnt; +DECLARE_STATIC_KEY_FALSE(ip_tunnel_metadata_cnt); /* Returns > 0 if metadata should be collected */ static inline int ip_tunnel_collect_metadata(void) { - return static_key_false(&ip_tunnel_metadata_cnt); + return static_branch_unlikely(&ip_tunnel_metadata_cnt); } void __init ip_tunnel_core_init(void); diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index eb0bec043c96..a0bec23c6d5e 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -41,18 +41,6 @@ static inline struct netns_ipvs *net_ipvs(struct net* net) return net->ipvs; } -/* This one needed for single_open_net since net is stored directly in - * private not as a struct i.e. seq_file_net can't be used. - */ -static inline struct net *seq_file_single_net(struct seq_file *seq) -{ -#ifdef CONFIG_NET_NS - return (struct net *)seq->private; -#else - return &init_net; -#endif -} - /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; @@ -643,6 +631,7 @@ struct ip_vs_service { /* alternate persistence engine */ struct ip_vs_pe __rcu *pe; + int conntrack_afmask; struct rcu_head rcu_head; }; @@ -668,6 +657,7 @@ struct ip_vs_dest { volatile unsigned int flags; /* dest status flags */ atomic_t conn_flags; /* flags to copy to conn */ atomic_t weight; /* server weight */ + atomic_t last_weight; /* server latest weight */ refcount_t refcnt; /* reference counter */ struct ip_vs_stats stats; /* statistics */ @@ -762,14 +752,14 @@ struct ip_vs_app { * 2=Mangled but checksum was not updated */ int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *, - struct sk_buff *, int *diff); + struct sk_buff *, int *diff, struct ip_vs_iphdr *ipvsh); /* input hook: Process packet in outin direction, diff set for TCP. * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, * 2=Mangled but checksum was not updated */ int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *, - struct sk_buff *, int *diff); + struct sk_buff *, int *diff, struct ip_vs_iphdr *ipvsh); /* ip_vs_app initializer */ int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *); @@ -1327,8 +1317,10 @@ int register_ip_vs_app_inc(struct netns_ipvs *ipvs, struct ip_vs_app *app, __u16 int ip_vs_app_inc_get(struct ip_vs_app *inc); void ip_vs_app_inc_put(struct ip_vs_app *inc); -int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb); -int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); +int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb, + struct ip_vs_iphdr *ipvsh); +int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb, + struct ip_vs_iphdr *ipvsh); int register_ip_vs_pe(struct ip_vs_pe *pe); int unregister_ip_vs_pe(struct ip_vs_pe *pe); @@ -1620,6 +1612,35 @@ static inline bool ip_vs_conn_uses_conntrack(struct ip_vs_conn *cp, return false; } +static inline int ip_vs_register_conntrack(struct ip_vs_service *svc) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + int afmask = (svc->af == AF_INET6) ? 2 : 1; + int ret = 0; + + if (!(svc->conntrack_afmask & afmask)) { + ret = nf_ct_netns_get(svc->ipvs->net, svc->af); + if (ret >= 0) + svc->conntrack_afmask |= afmask; + } + return ret; +#else + return 0; +#endif +} + +static inline void ip_vs_unregister_conntrack(struct ip_vs_service *svc) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + int afmask = (svc->af == AF_INET6) ? 2 : 1; + + if (svc->conntrack_afmask & afmask) { + nf_ct_netns_put(svc->ipvs->net, svc->af); + svc->conntrack_afmask &= ~afmask; + } +#endif +} + static inline int ip_vs_dest_conn_overhead(struct ip_vs_dest *dest) { diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 836f31af1369..16475c269749 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -298,6 +298,7 @@ struct ipcm6_cookie { __s16 tclass; __s8 dontfrag; struct ipv6_txoptions *opt; + __u16 gso_size; }; static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np) @@ -906,6 +907,11 @@ static inline __be32 ip6_make_flowinfo(unsigned int tclass, __be32 flowlabel) return htonl(tclass << IPV6_TCLASS_SHIFT) | flowlabel; } +static inline __be32 flowi6_get_flowlabel(const struct flowi6 *fl6) +{ + return fl6->flowlabel & IPV6_FLOWLABEL_MASK; +} + /* * Prototypes exported by ipv6 */ @@ -950,6 +956,7 @@ struct sk_buff *ip6_make_skb(struct sock *sk, void *from, int length, int transhdrlen, struct ipcm6_cookie *ipc6, struct flowi6 *fl6, struct rt6_info *rt, unsigned int flags, + struct inet_cork_full *cork, const struct sockcm_cookie *sockc); static inline struct sk_buff *ip6_finish_skb(struct sock *sk) @@ -958,8 +965,6 @@ static inline struct sk_buff *ip6_finish_skb(struct sock *sk) &inet6_sk(sk)->cork); } -unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst); - int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6); struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, @@ -1044,8 +1049,6 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); int inet6_release(struct socket *sock); -int __inet6_bind(struct sock *sock, struct sockaddr *uaddr, int addr_len, - bool force_bind_address_no_port, bool with_lock); int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer); diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h index f4c21b5a1242..b0eaeb02d46d 100644 --- a/include/net/iucv/af_iucv.h +++ b/include/net/iucv/af_iucv.h @@ -153,8 +153,6 @@ struct iucv_sock_list { atomic_t autobind_name; }; -__poll_t iucv_sock_poll(struct file *file, struct socket *sock, - poll_table *wait); void iucv_sock_link(struct iucv_sock_list *l, struct sock *s); void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s); void iucv_accept_enqueue(struct sock *parent, struct sock *sk); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b2f3a0c018e7..851a5e19ae32 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3378,6 +3378,8 @@ enum ieee80211_reconfig_type { * frame in case that no beacon was heard from the AP/P2P GO. * The callback will be called before each transmission and upon return * mac80211 will transmit the frame right away. + * If duration is greater than zero, mac80211 hints to the driver the + * duration for which the operation is requested. * The callback is optional and can (should!) sleep. * * @mgd_protect_tdls_discover: Protect a TDLS discovery session. After sending @@ -3697,7 +3699,8 @@ struct ieee80211_ops { u32 sset, u8 *data); void (*mgd_prepare_tx)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif); + struct ieee80211_vif *vif, + u16 duration); void (*mgd_protect_tdls_discover)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); @@ -4450,6 +4453,19 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, u8 ieee80211_csa_update_counter(struct ieee80211_vif *vif); /** + * ieee80211_csa_set_counter - request mac80211 to set csa counter + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @counter: the new value for the counter + * + * The csa counter can be changed by the device, this API should be + * used by the device driver to update csa counter in mac80211. + * + * It should never be used together with ieee80211_csa_update_counter(), + * as it will cause a race condition around the counter value. + */ +void ieee80211_csa_set_counter(struct ieee80211_vif *vif, u8 counter); + +/** * ieee80211_csa_finish - notify mac80211 about channel switch * @vif: &struct ieee80211_vif pointer from the add_interface callback. * diff --git a/include/net/neighbour.h b/include/net/neighbour.h index e421f86af043..6c1eecd56a4d 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -246,6 +246,7 @@ static inline void *neighbour_priv(const struct neighbour *n) #define NEIGH_UPDATE_F_OVERRIDE 0x00000001 #define NEIGH_UPDATE_F_WEAK_OVERRIDE 0x00000002 #define NEIGH_UPDATE_F_OVERRIDE_ISROUTER 0x00000004 +#define NEIGH_UPDATE_F_EXT_LEARNED 0x20000000 #define NEIGH_UPDATE_F_ISROUTER 0x40000000 #define NEIGH_UPDATE_F_ADMIN 0x80000000 @@ -526,5 +527,21 @@ static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, } while (read_seqretry(&n->ha_lock, seq)); } - +static inline void neigh_update_ext_learned(struct neighbour *neigh, u32 flags, + int *notify) +{ + u8 ndm_flags = 0; + + if (!(flags & NEIGH_UPDATE_F_ADMIN)) + return; + + ndm_flags |= (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; + if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { + if (ndm_flags & NTF_EXT_LEARNED) + neigh->flags |= NTF_EXT_LEARNED; + else + neigh->flags &= ~NTF_EXT_LEARNED; + *notify = 1; + } +} #endif diff --git a/include/net/net_failover.h b/include/net/net_failover.h new file mode 100644 index 000000000000..b12a1c469d1c --- /dev/null +++ b/include/net/net_failover.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2018, Intel Corporation. */ + +#ifndef _NET_FAILOVER_H +#define _NET_FAILOVER_H + +#include <net/failover.h> + +/* failover state */ +struct net_failover_info { + /* primary netdev with same MAC */ + struct net_device __rcu *primary_dev; + + /* standby netdev */ + struct net_device __rcu *standby_dev; + + /* primary netdev stats */ + struct rtnl_link_stats64 primary_stats; + + /* standby netdev stats */ + struct rtnl_link_stats64 standby_stats; + + /* aggregated stats */ + struct rtnl_link_stats64 failover_stats; + + /* spinlock while updating stats */ + spinlock_t stats_lock; +}; + +struct failover *net_failover_create(struct net_device *standby_dev); +void net_failover_destroy(struct failover *failover); + +#define FAILOVER_VLAN_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ + NETIF_F_HIGHDMA | NETIF_F_LRO) + +#define FAILOVER_ENC_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG | \ + NETIF_F_RXCSUM | NETIF_F_ALL_TSO) + +#endif /* _NET_FAILOVER_H */ diff --git a/include/net/netfilter/ipv4/nf_nat_masquerade.h b/include/net/netfilter/ipv4/nf_nat_masquerade.h index ebd869473603..cd24be4c4a99 100644 --- a/include/net/netfilter/ipv4/nf_nat_masquerade.h +++ b/include/net/netfilter/ipv4/nf_nat_masquerade.h @@ -6,7 +6,7 @@ unsigned int nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, - const struct nf_nat_range *range, + const struct nf_nat_range2 *range, const struct net_device *out); void nf_nat_masquerade_ipv4_register_notifier(void); diff --git a/include/net/netfilter/ipv6/nf_nat_masquerade.h b/include/net/netfilter/ipv6/nf_nat_masquerade.h index 1ed4f2631ed6..0c3b5ebf0bb8 100644 --- a/include/net/netfilter/ipv6/nf_nat_masquerade.h +++ b/include/net/netfilter/ipv6/nf_nat_masquerade.h @@ -3,7 +3,7 @@ #define _NF_NAT_MASQUERADE_IPV6_H_ unsigned int -nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, +nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, const struct net_device *out); void nf_nat_masquerade_ipv6_register_notifier(void); void nf_nat_masquerade_ipv6_unregister_notifier(void); diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index e61184fbfb71..3a188a0923a3 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -13,4 +13,16 @@ unsigned int nf_conncount_count(struct net *net, const u32 *key, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone); + +unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone, + bool *addit); + +bool nf_conncount_add(struct hlist_head *head, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone); + +void nf_conncount_cache_free(struct hlist_head *hhead); + #endif diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 833752dd0c58..ba9fa4592f2b 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -6,6 +6,7 @@ #include <linux/netdevice.h> #include <linux/rhashtable.h> #include <linux/rcupdate.h> +#include <linux/netfilter/nf_conntrack_tuple_common.h> #include <net/dst.h> struct nf_flowtable; @@ -13,25 +14,24 @@ struct nf_flowtable; struct nf_flowtable_type { struct list_head list; int family; - void (*gc)(struct work_struct *work); + int (*init)(struct nf_flowtable *ft); void (*free)(struct nf_flowtable *ft); - const struct rhashtable_params *params; nf_hookfn *hook; struct module *owner; }; struct nf_flowtable { + struct list_head list; struct rhashtable rhashtable; const struct nf_flowtable_type *type; struct delayed_work gc_work; }; enum flow_offload_tuple_dir { - FLOW_OFFLOAD_DIR_ORIGINAL, - FLOW_OFFLOAD_DIR_REPLY, - __FLOW_OFFLOAD_DIR_MAX = FLOW_OFFLOAD_DIR_REPLY, + FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL, + FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY, + FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX }; -#define FLOW_OFFLOAD_DIR_MAX (__FLOW_OFFLOAD_DIR_MAX + 1) struct flow_offload_tuple { union { @@ -55,6 +55,8 @@ struct flow_offload_tuple { int oifidx; + u16 mtu; + struct dst_entry *dst_cache; }; @@ -66,6 +68,7 @@ struct flow_offload_tuple_rhash { #define FLOW_OFFLOAD_SNAT 0x1 #define FLOW_OFFLOAD_DNAT 0x2 #define FLOW_OFFLOAD_DYING 0x4 +#define FLOW_OFFLOAD_TEARDOWN 0x8 struct flow_offload { struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX]; @@ -98,11 +101,14 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table, void nf_flow_table_cleanup(struct net *net, struct net_device *dev); +int nf_flow_table_init(struct nf_flowtable *flow_table); void nf_flow_table_free(struct nf_flowtable *flow_table); -void nf_flow_offload_work_gc(struct work_struct *work); -extern const struct rhashtable_params nf_flow_offload_rhash_params; -void flow_offload_dead(struct flow_offload *flow); +void flow_offload_teardown(struct flow_offload *flow); +static inline void flow_offload_dead(struct flow_offload *flow) +{ + flow->flags |= FLOW_OFFLOAD_DYING; +} int nf_flow_snat_port(const struct flow_offload *flow, struct sk_buff *skb, unsigned int thoff, diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 207a467e7ca6..a17eb2f8d40e 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -39,7 +39,7 @@ struct nf_conn_nat { /* Set up the info structure to map into this range. */ unsigned int nf_nat_setup_info(struct nf_conn *ct, - const struct nf_nat_range *range, + const struct nf_nat_range2 *range, enum nf_nat_manip_type maniptype); extern unsigned int nf_nat_alloc_null_binding(struct nf_conn *ct, @@ -75,4 +75,8 @@ static inline bool nf_nat_oif_changed(unsigned int hooknum, #endif } +int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops, + const struct nf_hook_ops *nat_ops, unsigned int ops_count); +void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops, + unsigned int ops_count); #endif diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h index 235bd0e9a5aa..dc7cd0440229 100644 --- a/include/net/netfilter/nf_nat_core.h +++ b/include/net/netfilter/nf_nat_core.h @@ -11,6 +11,10 @@ unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, struct sk_buff *skb); +unsigned int +nf_nat_inet_fn(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); + int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family); static inline int nf_nat_initialized(struct nf_conn *ct, @@ -22,11 +26,4 @@ static inline int nf_nat_initialized(struct nf_conn *ct, return ct->status & IPS_DST_NAT_DONE; } -struct nlattr; - -extern int -(*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, - enum nf_nat_manip_type manip, - const struct nlattr *attr); - #endif /* _NF_NAT_CORE_H */ diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h index ce7c2b4e64bb..d300b8f03972 100644 --- a/include/net/netfilter/nf_nat_l3proto.h +++ b/include/net/netfilter/nf_nat_l3proto.h @@ -7,7 +7,7 @@ struct nf_nat_l3proto { u8 l3proto; bool (*in_range)(const struct nf_conntrack_tuple *t, - const struct nf_nat_range *range); + const struct nf_nat_range2 *range); u32 (*secure_port)(const struct nf_conntrack_tuple *t, __be16); @@ -33,7 +33,7 @@ struct nf_nat_l3proto { struct flowi *fl); int (*nlattr_to_range)(struct nlattr *tb[], - struct nf_nat_range *range); + struct nf_nat_range2 *range); }; int nf_nat_l3proto_register(const struct nf_nat_l3proto *); @@ -44,66 +44,14 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum); -unsigned int nf_nat_ipv4_in(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state, - unsigned int (*do_chain)(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state, - struct nf_conn *ct)); - -unsigned int nf_nat_ipv4_out(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state, - unsigned int (*do_chain)(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state, - struct nf_conn *ct)); - -unsigned int nf_nat_ipv4_local_fn(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state, - unsigned int (*do_chain)(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state, - struct nf_conn *ct)); - -unsigned int nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state, - unsigned int (*do_chain)(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state, - struct nf_conn *ct)); - int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, unsigned int hdrlen); -unsigned int nf_nat_ipv6_in(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state, - unsigned int (*do_chain)(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state, - struct nf_conn *ct)); - -unsigned int nf_nat_ipv6_out(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state, - unsigned int (*do_chain)(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state, - struct nf_conn *ct)); - -unsigned int nf_nat_ipv6_local_fn(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state, - unsigned int (*do_chain)(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state, - struct nf_conn *ct)); +int nf_nat_l3proto_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops); +void nf_nat_l3proto_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops); -unsigned int nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state, - unsigned int (*do_chain)(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state, - struct nf_conn *ct)); +int nf_nat_l3proto_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops); +void nf_nat_l3proto_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops); #endif /* _NF_NAT_L3PROTO_H */ diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h index 67835ff8a2d9..b4d6b29bca62 100644 --- a/include/net/netfilter/nf_nat_l4proto.h +++ b/include/net/netfilter/nf_nat_l4proto.h @@ -34,12 +34,12 @@ struct nf_nat_l4proto { */ void (*unique_tuple)(const struct nf_nat_l3proto *l3proto, struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_range2 *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct); int (*nlattr_to_range)(struct nlattr *tb[], - struct nf_nat_range *range); + struct nf_nat_range2 *range); }; /* Protocol registration. */ @@ -72,11 +72,11 @@ bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_range2 *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct, u16 *rover); int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_range *range); + struct nf_nat_range2 *range); #endif /*_NF_NAT_L4PROTO_H*/ diff --git a/include/net/netfilter/nf_nat_redirect.h b/include/net/netfilter/nf_nat_redirect.h index 5ddabb08c472..c129aacc8ae8 100644 --- a/include/net/netfilter/nf_nat_redirect.h +++ b/include/net/netfilter/nf_nat_redirect.h @@ -7,7 +7,7 @@ nf_nat_redirect_ipv4(struct sk_buff *skb, const struct nf_nat_ipv4_multi_range_compat *mr, unsigned int hooknum); unsigned int -nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, +nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, unsigned int hooknum); #endif /* _NF_NAT_REDIRECT_H_ */ diff --git a/include/net/netfilter/nf_socket.h b/include/net/netfilter/nf_socket.h index 8230fefff9f5..f9d7bee9bd4e 100644 --- a/include/net/netfilter/nf_socket.h +++ b/include/net/netfilter/nf_socket.h @@ -2,22 +2,7 @@ #ifndef _NF_SOCK_H_ #define _NF_SOCK_H_ -struct net_device; -struct sk_buff; -struct sock; -struct net; - -static inline bool nf_sk_is_transparent(struct sock *sk) -{ - switch (sk->sk_state) { - case TCP_TIME_WAIT: - return inet_twsk(sk)->tw_transparent; - case TCP_NEW_SYN_RECV: - return inet_rsk(inet_reqsk(sk))->no_srccheck; - default: - return inet_sk(sk)->transparent; - } -} +#include <net/sock.h> struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, const struct net_device *indev); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index a1e28dd5d0bf..08c005ce56e9 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -9,6 +9,7 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/nf_tables.h> #include <linux/u64_stats_sync.h> +#include <linux/rhashtable.h> #include <net/netfilter/nf_flow_table.h> #include <net/netlink.h> @@ -276,23 +277,6 @@ struct nft_set_estimate { enum nft_set_class space; }; -/** - * struct nft_set_type - nf_tables set type - * - * @select_ops: function to select nft_set_ops - * @ops: default ops, used when no select_ops functions is present - * @list: used internally - * @owner: module reference - */ -struct nft_set_type { - const struct nft_set_ops *(*select_ops)(const struct nft_ctx *, - const struct nft_set_desc *desc, - u32 flags); - const struct nft_set_ops *ops; - struct list_head list; - struct module *owner; -}; - struct nft_set_ext; struct nft_expr; @@ -311,7 +295,6 @@ struct nft_expr; * @init: initialize private data of new set instance * @destroy: destroy private data of set instance * @elemsize: element private size - * @features: features supported by the implementation */ struct nft_set_ops { bool (*lookup)(const struct net *net, @@ -360,11 +343,26 @@ struct nft_set_ops { const struct nft_set_desc *desc, const struct nlattr * const nla[]); void (*destroy)(const struct nft_set *set); + void (*gc_init)(const struct nft_set *set); unsigned int elemsize; +}; + +/** + * struct nft_set_type - nf_tables set type + * + * @ops: set ops for this type + * @list: used internally + * @owner: module reference + * @features: features supported by the implementation + */ +struct nft_set_type { + const struct nft_set_ops ops; + struct list_head list; + struct module *owner; u32 features; - const struct nft_set_type *type; }; +#define to_set_type(o) container_of(o, struct nft_set_type, ops) int nft_register_set(struct nft_set_type *type); void nft_unregister_set(struct nft_set_type *type); @@ -374,6 +372,8 @@ void nft_unregister_set(struct nft_set_type *type); * * @list: table set list node * @bindings: list of set bindings + * @table: table this set belongs to + * @net: netnamespace this set belongs to * @name: name of the set * @handle: unique handle of the set * @ktype: key type (numeric type defined by userspace, not used in the kernel) @@ -397,6 +397,8 @@ void nft_unregister_set(struct nft_set_type *type); struct nft_set { struct list_head list; struct list_head bindings; + struct nft_table *table; + possible_net_t net; char *name; u64 handle; u32 ktype; @@ -590,7 +592,7 @@ static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext) return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT); } -static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ext) +static inline u64 *nft_set_ext_expiration(const struct nft_set_ext *ext) { return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION); } @@ -608,7 +610,7 @@ static inline struct nft_expr *nft_set_ext_expr(const struct nft_set_ext *ext) static inline bool nft_set_elem_expired(const struct nft_set_ext *ext) { return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) && - time_is_before_eq_jiffies(*nft_set_ext_expiration(ext)); + time_is_before_eq_jiffies64(*nft_set_ext_expiration(ext)); } static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set, @@ -712,6 +714,7 @@ struct nft_expr_type { }; #define NFT_EXPR_STATEFUL 0x1 +#define NFT_EXPR_GC 0x2 /** * struct nft_expr_ops - nf_tables expression operations @@ -743,11 +746,15 @@ struct nft_expr_ops { const struct nft_expr *expr); void (*destroy)(const struct nft_ctx *ctx, const struct nft_expr *expr); + void (*destroy_clone)(const struct nft_ctx *ctx, + const struct nft_expr *expr); int (*dump)(struct sk_buff *skb, const struct nft_expr *expr); int (*validate)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data); + bool (*gc)(struct net *net, + const struct nft_expr *expr); const struct nft_expr_type *type; void *data; }; @@ -854,6 +861,7 @@ enum nft_chain_flags { * * @rules: list of rules in the chain * @list: used internally + * @rhlhead: used internally * @table: table that this chain belongs to * @handle: chain handle * @use: number of jump references to this chain @@ -862,8 +870,11 @@ enum nft_chain_flags { * @name: name of the chain */ struct nft_chain { + struct nft_rule *__rcu *rules_gen_0; + struct nft_rule *__rcu *rules_gen_1; struct list_head rules; struct list_head list; + struct rhlist_head rhlhead; struct nft_table *table; u64 handle; u32 use; @@ -871,8 +882,13 @@ struct nft_chain { u8 flags:6, genmask:2; char *name; + + /* Only used during control plane commit phase: */ + struct nft_rule **rules_next; }; +int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain); + enum nft_chain_types { NFT_CHAIN_T_DEFAULT = 0, NFT_CHAIN_T_ROUTE, @@ -889,8 +905,8 @@ enum nft_chain_types { * @owner: module owner * @hook_mask: mask of valid hooks * @hooks: array of hook functions - * @init: chain initialization function - * @free: chain release function + * @ops_register: base chain register function + * @ops_unregister: base chain unregister function */ struct nft_chain_type { const char *name; @@ -899,8 +915,8 @@ struct nft_chain_type { struct module *owner; unsigned int hook_mask; nf_hookfn *hooks[NF_MAX_HOOKS]; - int (*init)(struct nft_ctx *ctx); - void (*free)(struct nft_ctx *ctx); + int (*ops_register)(struct net *net, const struct nf_hook_ops *ops); + void (*ops_unregister)(struct net *net, const struct nf_hook_ops *ops); }; int nft_chain_validate_dependency(const struct nft_chain *chain, @@ -952,7 +968,8 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); * struct nft_table - nf_tables table * * @list: used internally - * @chains: chains in the table + * @chains_ht: chains in the table + * @chains: same, for stable walks * @sets: sets in the table * @objects: stateful objects in the table * @flowtables: flow tables in the table @@ -966,6 +983,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt, void *priv); */ struct nft_table { struct list_head list; + struct rhltable chains_ht; struct list_head chains; struct list_head sets; struct list_head objects; @@ -1020,9 +1038,9 @@ static inline void *nft_obj_data(const struct nft_object *obj) #define nft_expr_obj(expr) *((struct nft_object **)nft_expr_priv(expr)) -struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, - const struct nlattr *nla, u32 objtype, - u8 genmask); +struct nft_object *nft_obj_lookup(const struct nft_table *table, + const struct nlattr *nla, u32 objtype, + u8 genmask); void nft_obj_notify(struct net *net, struct nft_table *table, struct nft_object *obj, u32 portid, u32 seq, @@ -1067,7 +1085,8 @@ struct nft_object_ops { int (*init)(const struct nft_ctx *ctx, const struct nlattr *const tb[], struct nft_object *obj); - void (*destroy)(struct nft_object *obj); + void (*destroy)(const struct nft_ctx *ctx, + struct nft_object *obj); int (*dump)(struct sk_buff *skb, struct nft_object *obj, bool reset); @@ -1111,12 +1130,9 @@ struct nft_flowtable { struct nf_flowtable data; }; -struct nft_flowtable *nf_tables_flowtable_lookup(const struct nft_table *table, - const struct nlattr *nla, - u8 genmask); -void nft_flow_table_iterate(struct net *net, - void (*iter)(struct nf_flowtable *flowtable, void *data), - void *data); +struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table, + const struct nlattr *nla, + u8 genmask); void nft_register_flowtable_type(struct nf_flowtable_type *type); void nft_unregister_flowtable_type(struct nf_flowtable_type *type); diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index ea5aab568be8..e0c0c2558ec4 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -2,6 +2,8 @@ #ifndef _NET_NF_TABLES_CORE_H #define _NET_NF_TABLES_CORE_H +#include <net/netfilter/nf_tables.h> + extern struct nft_expr_type nft_imm_type; extern struct nft_expr_type nft_cmp_type; extern struct nft_expr_type nft_lookup_type; @@ -10,6 +12,9 @@ extern struct nft_expr_type nft_byteorder_type; extern struct nft_expr_type nft_payload_type; extern struct nft_expr_type nft_dynset_type; extern struct nft_expr_type nft_range_type; +extern struct nft_expr_type nft_meta_type; +extern struct nft_expr_type nft_rt_type; +extern struct nft_expr_type nft_exthdr_type; int nf_tables_core_module_init(void); void nf_tables_core_module_exit(void); @@ -20,6 +25,12 @@ struct nft_cmp_fast_expr { u8 len; }; +struct nft_immediate_expr { + struct nft_data data; + enum nft_registers dreg:8; + u8 dlen; +}; + /* Calculate the mask for the nft_cmp_fast expression. On big endian the * mask needs to include the *upper* bytes when interpreting that data as * something smaller than the full u32, therefore a cpu_to_le32 is done. diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h new file mode 100644 index 000000000000..9754a50ecde9 --- /dev/null +++ b/include/net/netfilter/nf_tproxy.h @@ -0,0 +1,113 @@ +#ifndef _NF_TPROXY_H_ +#define _NF_TPROXY_H_ + +#include <net/tcp.h> + +enum nf_tproxy_lookup_t { + NF_TPROXY_LOOKUP_LISTENER, + NF_TPROXY_LOOKUP_ESTABLISHED, +}; + +static inline bool nf_tproxy_sk_is_transparent(struct sock *sk) +{ + if (inet_sk_transparent(sk)) + return true; + + sock_gen_put(sk); + return false; +} + +__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr); + +/** + * nf_tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections + * @skb: The skb being processed. + * @laddr: IPv4 address to redirect to or zero. + * @lport: TCP port to redirect to or zero. + * @sk: The TIME_WAIT TCP socket found by the lookup. + * + * We have to handle SYN packets arriving to TIME_WAIT sockets + * differently: instead of reopening the connection we should rather + * redirect the new connection to the proxy if there's a listener + * socket present. + * + * nf_tproxy_handle_time_wait4() consumes the socket reference passed in. + * + * Returns the listener socket if there's one, the TIME_WAIT socket if + * no such listener is found, or NULL if the TCP header is incomplete. + */ +struct sock * +nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, + __be32 laddr, __be16 lport, struct sock *sk); + +/* + * This is used when the user wants to intercept a connection matching + * an explicit iptables rule. In this case the sockets are assumed + * matching in preference order: + * + * - match: if there's a fully established connection matching the + * _packet_ tuple, it is returned, assuming the redirection + * already took place and we process a packet belonging to an + * established connection + * + * - match: if there's a listening socket matching the redirection + * (e.g. on-port & on-ip of the connection), it is returned, + * regardless if it was bound to 0.0.0.0 or an explicit + * address. The reasoning is that if there's an explicit rule, it + * does not really matter if the listener is bound to an interface + * or to 0. The user already stated that he wants redirection + * (since he added the rule). + * + * Please note that there's an overlap between what a TPROXY target + * and a socket match will match. Normally if you have both rules the + * "socket" match will be the first one, effectively all packets + * belonging to established connections going through that one. + */ +struct sock * +nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, + const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, + const enum nf_tproxy_lookup_t lookup_type); + +const struct in6_addr * +nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, + const struct in6_addr *daddr); + +/** + * nf_tproxy_handle_time_wait6 - handle IPv6 TCP TIME_WAIT reopen redirections + * @skb: The skb being processed. + * @tproto: Transport protocol. + * @thoff: Transport protocol header offset. + * @net: Network namespace. + * @laddr: IPv6 address to redirect to. + * @lport: TCP port to redirect to or zero. + * @sk: The TIME_WAIT TCP socket found by the lookup. + * + * We have to handle SYN packets arriving to TIME_WAIT sockets + * differently: instead of reopening the connection we should rather + * redirect the new connection to the proxy if there's a listener + * socket present. + * + * nf_tproxy_handle_time_wait6() consumes the socket reference passed in. + * + * Returns the listener socket if there's one, the TIME_WAIT socket if + * no such listener is found, or NULL if the TCP header is incomplete. + */ +struct sock * +nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, + struct net *net, + const struct in6_addr *laddr, + const __be16 lport, + struct sock *sk); + +struct sock * +nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, + const u8 protocol, + const struct in6_addr *saddr, const struct in6_addr *daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, + const enum nf_tproxy_lookup_t lookup_type); + +#endif /* _NF_TPROXY_H_ */ diff --git a/include/net/netfilter/nfnetlink_log.h b/include/net/netfilter/nfnetlink_log.h index 612cfb63ac68..ea32a7d3cf1b 100644 --- a/include/net/netfilter/nfnetlink_log.h +++ b/include/net/netfilter/nfnetlink_log.h @@ -1,18 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _KER_NFNETLINK_LOG_H -#define _KER_NFNETLINK_LOG_H - -void -nfulnl_log_packet(struct net *net, - u_int8_t pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *li_user, - const char *prefix); - -#define NFULNL_COPY_DISABLED 0xff - -#endif /* _KER_NFNETLINK_LOG_H */ - diff --git a/include/net/netfilter/nft_dup.h b/include/net/netfilter/nft_dup.h deleted file mode 100644 index 4d9d512984b2..000000000000 --- a/include/net/netfilter/nft_dup.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NFT_DUP_H_ -#define _NFT_DUP_H_ - -struct nft_dup_inet { - enum nft_registers sreg_addr:8; - enum nft_registers sreg_dev:8; -}; - -#endif /* _NFT_DUP_H_ */ diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h deleted file mode 100644 index 5c69e9b09388..000000000000 --- a/include/net/netfilter/nft_meta.h +++ /dev/null @@ -1,44 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NFT_META_H_ -#define _NFT_META_H_ - -struct nft_meta { - enum nft_meta_keys key:8; - union { - enum nft_registers dreg:8; - enum nft_registers sreg:8; - }; -}; - -extern const struct nla_policy nft_meta_policy[]; - -int nft_meta_get_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]); - -int nft_meta_set_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]); - -int nft_meta_get_dump(struct sk_buff *skb, - const struct nft_expr *expr); - -int nft_meta_set_dump(struct sk_buff *skb, - const struct nft_expr *expr); - -void nft_meta_get_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt); - -void nft_meta_set_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt); - -void nft_meta_set_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr); - -int nft_meta_set_validate(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nft_data **data); - -#endif diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 8491bc9c86b1..661348f23ea5 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -160,6 +160,8 @@ struct netns_ipv4 { int sysctl_tcp_pacing_ca_ratio; int sysctl_tcp_wmem[3]; int sysctl_tcp_rmem[3]; + int sysctl_tcp_comp_sack_nr; + unsigned long sysctl_tcp_comp_sack_delay_ns; struct inet_timewait_death_row tcp_death_row; int sysctl_max_syn_backlog; int sysctl_tcp_fastopen; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index c29f09cfc9d7..c978a31b0f84 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -43,6 +43,7 @@ struct netns_sysctl_ipv6 { int max_hbh_opts_cnt; int max_dst_opts_len; int max_hbh_opts_len; + int seg6_flowlabel; }; struct netns_ipv6 { @@ -60,7 +61,8 @@ struct netns_ipv6 { #endif struct xt_table *ip6table_nat; #endif - struct rt6_info *ip6_null_entry; + struct fib6_info *fib6_null_entry; + struct rt6_info *ip6_null_entry; struct rt6_statistics *rt6_stats; struct timer_list ip6_fib_timer; struct hlist_head *fib_table_hash; diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h index 48134353411d..94767ea3a490 100644 --- a/include/net/netns/nftables.h +++ b/include/net/netns/nftables.h @@ -4,13 +4,12 @@ #include <linux/list.h> -struct nft_af_info; - struct netns_nftables { struct list_head tables; struct list_head commit_list; unsigned int base_seq; u8 gencursor; + u8 validate_state; }; #endif diff --git a/include/net/netrom.h b/include/net/netrom.h index 0dad2dd5f9d7..5a0714ff500f 100644 --- a/include/net/netrom.h +++ b/include/net/netrom.h @@ -13,6 +13,7 @@ #include <linux/slab.h> #include <net/sock.h> #include <linux/refcount.h> +#include <linux/seq_file.h> #define NR_NETWORK_LEN 15 #define NR_TRANSPORT_LEN 5 @@ -216,8 +217,8 @@ struct net_device *nr_dev_get(ax25_address *); int nr_rt_ioctl(unsigned int, void __user *); void nr_link_failed(ax25_cb *, int); int nr_route_frame(struct sk_buff *, ax25_cb *); -extern const struct file_operations nr_nodes_fops; -extern const struct file_operations nr_neigh_fops; +extern const struct seq_operations nr_node_seqops; +extern const struct seq_operations nr_neigh_seqops; void nr_rt_free(void); /* nr_subr.c */ diff --git a/include/net/page_pool.h b/include/net/page_pool.h new file mode 100644 index 000000000000..694d055e01ef --- /dev/null +++ b/include/net/page_pool.h @@ -0,0 +1,144 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * page_pool.h + * Author: Jesper Dangaard Brouer <netoptimizer@brouer.com> + * Copyright (C) 2016 Red Hat, Inc. + */ + +/** + * DOC: page_pool allocator + * + * This page_pool allocator is optimized for the XDP mode that + * uses one-frame-per-page, but have fallbacks that act like the + * regular page allocator APIs. + * + * Basic use involve replacing alloc_pages() calls with the + * page_pool_alloc_pages() call. Drivers should likely use + * page_pool_dev_alloc_pages() replacing dev_alloc_pages(). + * + * If page_pool handles DMA mapping (use page->private), then API user + * is responsible for invoking page_pool_put_page() once. In-case of + * elevated refcnt, the DMA state is released, assuming other users of + * the page will eventually call put_page(). + * + * If no DMA mapping is done, then it can act as shim-layer that + * fall-through to alloc_page. As no state is kept on the page, the + * regular put_page() call is sufficient. + */ +#ifndef _NET_PAGE_POOL_H +#define _NET_PAGE_POOL_H + +#include <linux/mm.h> /* Needed by ptr_ring */ +#include <linux/ptr_ring.h> +#include <linux/dma-direction.h> + +#define PP_FLAG_DMA_MAP 1 /* Should page_pool do the DMA map/unmap */ +#define PP_FLAG_ALL PP_FLAG_DMA_MAP + +/* + * Fast allocation side cache array/stack + * + * The cache size and refill watermark is related to the network + * use-case. The NAPI budget is 64 packets. After a NAPI poll the RX + * ring is usually refilled and the max consumed elements will be 64, + * thus a natural max size of objects needed in the cache. + * + * Keeping room for more objects, is due to XDP_DROP use-case. As + * XDP_DROP allows the opportunity to recycle objects directly into + * this array, as it shares the same softirq/NAPI protection. If + * cache is already full (or partly full) then the XDP_DROP recycles + * would have to take a slower code path. + */ +#define PP_ALLOC_CACHE_SIZE 128 +#define PP_ALLOC_CACHE_REFILL 64 +struct pp_alloc_cache { + u32 count; + void *cache[PP_ALLOC_CACHE_SIZE]; +}; + +struct page_pool_params { + unsigned int flags; + unsigned int order; + unsigned int pool_size; + int nid; /* Numa node id to allocate from pages from */ + struct device *dev; /* device, for DMA pre-mapping purposes */ + enum dma_data_direction dma_dir; /* DMA mapping direction */ +}; + +struct page_pool { + struct rcu_head rcu; + struct page_pool_params p; + + /* + * Data structure for allocation side + * + * Drivers allocation side usually already perform some kind + * of resource protection. Piggyback on this protection, and + * require driver to protect allocation side. + * + * For NIC drivers this means, allocate a page_pool per + * RX-queue. As the RX-queue is already protected by + * Softirq/BH scheduling and napi_schedule. NAPI schedule + * guarantee that a single napi_struct will only be scheduled + * on a single CPU (see napi_schedule). + */ + struct pp_alloc_cache alloc ____cacheline_aligned_in_smp; + + /* Data structure for storing recycled pages. + * + * Returning/freeing pages is more complicated synchronization + * wise, because free's can happen on remote CPUs, with no + * association with allocation resource. + * + * Use ptr_ring, as it separates consumer and producer + * effeciently, it a way that doesn't bounce cache-lines. + * + * TODO: Implement bulk return pages into this structure. + */ + struct ptr_ring ring; +}; + +struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); + +static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) +{ + gfp_t gfp = (GFP_ATOMIC | __GFP_NOWARN); + + return page_pool_alloc_pages(pool, gfp); +} + +struct page_pool *page_pool_create(const struct page_pool_params *params); + +void page_pool_destroy(struct page_pool *pool); + +/* Never call this directly, use helpers below */ +void __page_pool_put_page(struct page_pool *pool, + struct page *page, bool allow_direct); + +static inline void page_pool_put_page(struct page_pool *pool, + struct page *page, bool allow_direct) +{ + /* When page_pool isn't compiled-in, net/core/xdp.c doesn't + * allow registering MEM_TYPE_PAGE_POOL, but shield linker. + */ +#ifdef CONFIG_PAGE_POOL + __page_pool_put_page(pool, page, allow_direct); +#endif +} +/* Very limited use-cases allow recycle direct */ +static inline void page_pool_recycle_direct(struct page_pool *pool, + struct page *page) +{ + __page_pool_put_page(pool, page, true); +} + +static inline bool is_page_pool_compiled_in(void) +{ +#ifdef CONFIG_PAGE_POOL + return true; +#else + return false; +#endif +} + +#endif /* _NET_PAGE_POOL_H */ diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 8639de5750f6..cbee32be1d9c 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -56,7 +56,7 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr); #define PN_NO_ADDR 0xff -extern const struct file_operations pn_sock_seq_fops; -extern const struct file_operations pn_res_seq_fops; +extern const struct seq_operations pn_sock_seq_ops; +extern const struct seq_operations pn_res_seq_ops; #endif diff --git a/include/net/ping.h b/include/net/ping.h index 4cd90d6b5c25..fd080e043a6e 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -83,20 +83,9 @@ int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); bool ping_rcv(struct sk_buff *skb); #ifdef CONFIG_PROC_FS -struct ping_seq_afinfo { - char *name; - sa_family_t family; - const struct file_operations *seq_fops; - const struct seq_operations seq_ops; -}; - -extern const struct file_operations ping_seq_fops; - void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family); void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos); void ping_seq_stop(struct seq_file *seq, void *v); -int ping_proc_register(struct net *net, struct ping_seq_afinfo *afinfo); -void ping_proc_unregister(struct net *net, struct ping_seq_afinfo *afinfo); int __init ping_proc_init(void); void ping_proc_exit(void); diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e828d31be5da..a3c1a2c47cd4 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -33,7 +33,7 @@ struct tcf_block_ext_info { }; struct tcf_block_cb; -bool tcf_queue_work(struct work_struct *work); +bool tcf_queue_work(struct rcu_work *rwork, work_func_t func); #ifdef CONFIG_NET_CLS struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, @@ -683,9 +683,11 @@ static inline bool tc_skip_sw(u32 flags) /* SKIP_HW and SKIP_SW are mutually exclusive flags. */ static inline bool tc_flags_valid(u32 flags) { - if (flags & ~(TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW)) + if (flags & ~(TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW | + TCA_CLS_FLAGS_VERBOSE)) return false; + flags &= TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW; if (!(flags ^ (TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW))) return false; @@ -705,7 +707,7 @@ tc_cls_common_offload_init(struct tc_cls_common_offload *cls_common, cls_common->chain_index = tp->chain->index; cls_common->protocol = tp->protocol; cls_common->prio = tp->prio; - if (tc_skip_sw(flags)) + if (tc_skip_sw(flags) || flags & TCA_CLS_FLAGS_VERBOSE) cls_common->extack = extack; } @@ -776,6 +778,18 @@ struct tc_qopt_offload_stats { struct gnet_stats_queue *qstats; }; +enum tc_mq_command { + TC_MQ_CREATE, + TC_MQ_DESTROY, + TC_MQ_STATS, +}; + +struct tc_mq_qopt_offload { + enum tc_mq_command command; + u32 handle; + struct tc_qopt_offload_stats stats; +}; + enum tc_red_command { TC_RED_REPLACE, TC_RED_DESTROY, diff --git a/include/net/raw.h b/include/net/raw.h index 99d26d0c4a19..9c9fa98a91a4 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -48,7 +48,6 @@ void raw_proc_exit(void); struct raw_iter_state { struct seq_net_private p; int bucket; - struct raw_hashinfo *h; }; static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq) @@ -58,9 +57,6 @@ static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq) void *raw_seq_start(struct seq_file *seq, loff_t *pos); void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos); void raw_seq_stop(struct seq_file *seq, void *v); -int raw_seq_open(struct inode *ino, struct file *file, - struct raw_hashinfo *h, const struct seq_operations *ops); - #endif int raw_hash_sk(struct sock *sk); diff --git a/include/net/rose.h b/include/net/rose.h index 04b72681f2ab..cf517d306a28 100644 --- a/include/net/rose.h +++ b/include/net/rose.h @@ -200,9 +200,9 @@ void rose_enquiry_response(struct sock *); /* rose_route.c */ extern struct rose_neigh *rose_loopback_neigh; -extern const struct file_operations rose_neigh_fops; -extern const struct file_operations rose_nodes_fops; -extern const struct file_operations rose_routes_fops; +extern const struct seq_operations rose_neigh_seqops; +extern const struct seq_operations rose_node_seqops; +extern struct seq_operations rose_route_seqops; void rose_add_loopback_neigh(void); int __must_check rose_add_loopback_node(rose_address *); diff --git a/include/net/route.h b/include/net/route.h index dbb032d5921b..bb53cdba38dc 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -225,6 +225,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev, struct in_ifaddr; void fib_add_ifaddr(struct in_ifaddr *); void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *); +void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric); void rt_add_uncached_list(struct rtable *rt); void rt_del_uncached_list(struct rtable *rt); diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index 14b6b3af8918..0bbaa5488423 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -64,7 +64,7 @@ struct rtnl_link_ops { size_t priv_size; void (*setup)(struct net_device *dev); - int maxtype; + unsigned int maxtype; const struct nla_policy *policy; int (*validate)(struct nlattr *tb[], struct nlattr *data[], @@ -92,7 +92,7 @@ struct rtnl_link_ops { unsigned int (*get_num_tx_queues)(void); unsigned int (*get_num_rx_queues)(void); - int slave_maxtype; + unsigned int slave_maxtype; const struct nla_policy *slave_policy; int (*slave_changelink)(struct net_device *dev, struct net_device *slave_dev, diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 5154c8300262..6488daa32f82 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -30,7 +30,6 @@ struct qdisc_rate_table { enum qdisc_state_t { __QDISC_STATE_SCHED, __QDISC_STATE_DEACTIVATED, - __QDISC_STATE_RUNNING, }; struct qdisc_size_table { @@ -86,6 +85,8 @@ struct Qdisc { struct net_rate_estimator __rcu *rate_est; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; + int padded; + refcount_t refcnt; /* * For performance sake on SMP, we put highly modified fields at the end @@ -98,10 +99,9 @@ struct Qdisc { unsigned long state; struct Qdisc *next_sched; struct sk_buff_head skb_bad_txq; - int padded; - refcount_t refcnt; spinlock_t busylock ____cacheline_aligned_in_smp; + spinlock_t seqlock; }; static inline void qdisc_refcount_inc(struct Qdisc *qdisc) @@ -111,15 +111,21 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc) refcount_inc(&qdisc->refcnt); } -static inline bool qdisc_is_running(const struct Qdisc *qdisc) +static inline bool qdisc_is_running(struct Qdisc *qdisc) { + if (qdisc->flags & TCQ_F_NOLOCK) + return spin_is_locked(&qdisc->seqlock); return (raw_read_seqcount(&qdisc->running) & 1) ? true : false; } static inline bool qdisc_run_begin(struct Qdisc *qdisc) { - if (qdisc_is_running(qdisc)) + if (qdisc->flags & TCQ_F_NOLOCK) { + if (!spin_trylock(&qdisc->seqlock)) + return false; + } else if (qdisc_is_running(qdisc)) { return false; + } /* Variant of write_seqcount_begin() telling lockdep a trylock * was attempted. */ @@ -131,6 +137,8 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) static inline void qdisc_run_end(struct Qdisc *qdisc) { write_seqcount_end(&qdisc->running); + if (qdisc->flags & TCQ_F_NOLOCK) + spin_unlock(&qdisc->seqlock); } static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) @@ -342,14 +350,14 @@ static inline int qdisc_qlen(const struct Qdisc *q) static inline int qdisc_qlen_sum(const struct Qdisc *q) { - __u32 qlen = 0; + __u32 qlen = q->qstats.qlen; int i; if (q->flags & TCQ_F_NOLOCK) { for_each_possible_cpu(i) qlen += per_cpu_ptr(q->cpu_qstats, i)->qlen; } else { - qlen = q->q.qlen; + qlen += q->q.qlen; } return qlen; diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 20ff237c5eb2..86f034b524d4 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -254,11 +254,10 @@ enum { SCTP_ARBITRARY_COOKIE_ECHO_LEN = 200 }; #define SCTP_TSN_MAP_SIZE 4096 /* We will not record more than this many duplicate TSNs between two - * SACKs. The minimum PMTU is 576. Remove all the headers and there - * is enough room for 131 duplicate reports. Round down to the + * SACKs. The minimum PMTU is 512. Remove all the headers and there + * is enough room for 117 duplicate reports. Round down to the * nearest power of 2. */ -enum { SCTP_MIN_PMTU = 576 }; enum { SCTP_MAX_DUP_TSNS = 16 }; enum { SCTP_MAX_GABS = 16 }; diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 35498e613ff5..30b3e2fe240a 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -109,8 +109,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb); int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); void sctp_data_ready(struct sock *sk); -__poll_t sctp_poll(struct file *file, struct socket *sock, - poll_table *wait); +__poll_t sctp_poll_mask(struct socket *sock, __poll_t events); void sctp_sock_rfree(struct sk_buff *skb); void sctp_copy_sock(struct sock *newsk, struct sock *sk, struct sctp_association *asoc); @@ -430,32 +429,6 @@ static inline int sctp_list_single_entry(struct list_head *head) return (head->next != head) && (head->next == head->prev); } -/* Break down data chunks at this point. */ -static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) -{ - struct sctp_sock *sp = sctp_sk(asoc->base.sk); - struct sctp_af *af = sp->pf->af; - int frag = pmtu; - - frag -= af->ip_options_len(asoc->base.sk); - frag -= af->net_header_len; - frag -= sizeof(struct sctphdr) + sctp_datachk_len(&asoc->stream); - - if (asoc->user_frag) - frag = min_t(int, frag, asoc->user_frag); - - frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN - - sctp_datachk_len(&asoc->stream))); - - return frag; -} - -static inline void sctp_assoc_pending_pmtu(struct sctp_association *asoc) -{ - sctp_assoc_sync_pmtu(asoc); - asoc->pmtu_pending = 0; -} - static inline bool sctp_chunk_pending(const struct sctp_chunk *chunk) { return !list_empty(&chunk->list); @@ -609,17 +582,29 @@ static inline struct dst_entry *sctp_transport_dst_check(struct sctp_transport * return t->dst; } -static inline bool sctp_transport_pmtu_check(struct sctp_transport *t) +/* Calculate max payload size given a MTU, or the total overhead if + * given MTU is zero + */ +static inline __u32 sctp_mtu_payload(const struct sctp_sock *sp, + __u32 mtu, __u32 extra) { - __u32 pmtu = max_t(size_t, SCTP_TRUNC4(dst_mtu(t->dst)), - SCTP_DEFAULT_MINSEGMENT); + __u32 overhead = sizeof(struct sctphdr) + extra; - if (t->pathmtu == pmtu) - return true; + if (sp) + overhead += sp->pf->af->net_header_len; + else + overhead += sizeof(struct ipv6hdr); - t->pathmtu = pmtu; + if (WARN_ON_ONCE(mtu && mtu <= overhead)) + mtu = overhead; - return false; + return mtu ? mtu - overhead : overhead; +} + +static inline __u32 sctp_dst_mtu(const struct dst_entry *dst) +{ + return SCTP_TRUNC4(max_t(__u32, dst_mtu(dst), + SCTP_DEFAULT_MINSEGMENT)); } #endif /* __net_sctp_h__ */ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 2d0e782c9055..5ef1bad81ef5 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -207,7 +207,7 @@ struct sctp_chunk *sctp_make_datafrag_empty(const struct sctp_association *asoc, int len, __u8 flags, gfp_t gfp); struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc, const __u32 lowest_tsn); -struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc); +struct sctp_chunk *sctp_make_sack(struct sctp_association *asoc); struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc, const struct sctp_chunk *chunk); struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc, @@ -215,7 +215,7 @@ struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc, struct sctp_chunk *sctp_make_shutdown_complete( const struct sctp_association *asoc, const struct sctp_chunk *chunk); -void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen); +int sctp_init_cause(struct sctp_chunk *chunk, __be16 cause, size_t paylen); struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc, const struct sctp_chunk *chunk, const size_t hint); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index a0ec462bc1a9..dbe1b911a24d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1133,6 +1133,11 @@ struct sctp_input_cb { }; #define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) +struct sctp_output_cb { + struct sk_buff *last; +}; +#define SCTP_OUTPUT_CB(__skb) ((struct sctp_output_cb *)&((__skb)->cb[0])) + static inline const struct sk_buff *sctp_gso_headskb(const struct sk_buff *skb) { const struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; @@ -2091,16 +2096,14 @@ void sctp_assoc_control_transport(struct sctp_association *asoc, enum sctp_transport_cmd command, sctp_sn_error_t error); struct sctp_transport *sctp_assoc_lookup_tsn(struct sctp_association *, __u32); -struct sctp_transport *sctp_assoc_is_match(struct sctp_association *, - struct net *, - const union sctp_addr *, - const union sctp_addr *); void sctp_assoc_migrate(struct sctp_association *, struct sock *); int sctp_assoc_update(struct sctp_association *old, struct sctp_association *new); __u32 sctp_association_get_next_tsn(struct sctp_association *); +void sctp_assoc_update_frag_point(struct sctp_association *asoc); +void sctp_assoc_set_pmtu(struct sctp_association *asoc, __u32 pmtu); void sctp_assoc_sync_pmtu(struct sctp_association *asoc); void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int); void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int); diff --git a/include/net/seg6.h b/include/net/seg6.h index 099bad59dc90..e029e301faa5 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -49,7 +49,11 @@ struct seg6_pernet_data { static inline struct seg6_pernet_data *seg6_pernet(struct net *net) { +#if IS_ENABLED(CONFIG_IPV6) return net->ipv6.seg6_data; +#else + return NULL; +#endif } extern int seg6_init(void); @@ -63,5 +67,6 @@ extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len); extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto); extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh); - +extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, + u32 tbl_id); #endif diff --git a/include/net/seg6_local.h b/include/net/seg6_local.h new file mode 100644 index 000000000000..661fd5b4d3e0 --- /dev/null +++ b/include/net/seg6_local.h @@ -0,0 +1,32 @@ +/* + * SR-IPv6 implementation + * + * Authors: + * David Lebrun <david.lebrun@uclouvain.be> + * eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _NET_SEG6_LOCAL_H +#define _NET_SEG6_LOCAL_H + +#include <linux/percpu.h> +#include <linux/net.h> +#include <linux/ipv6.h> + +extern int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, + u32 tbl_id); + +struct seg6_bpf_srh_state { + bool valid; + u16 hdrlen; +}; + +DECLARE_PER_CPU(struct seg6_bpf_srh_state, seg6_bpf_srh_states); + +#endif diff --git a/include/net/sock.h b/include/net/sock.h index 74d725fdbe0f..b3b75419eafe 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -481,6 +481,11 @@ struct sock { void (*sk_error_report)(struct sock *sk); int (*sk_backlog_rcv)(struct sock *sk, struct sk_buff *skb); +#ifdef CONFIG_SOCK_VALIDATE_XMIT + struct sk_buff* (*sk_validate_xmit_skb)(struct sock *sk, + struct net_device *dev, + struct sk_buff *skb); +#endif void (*sk_destruct)(struct sock *sk); struct sock_reuseport __rcu *sk_reuseport_cb; struct rcu_head sk_rcu; @@ -803,10 +808,10 @@ static inline bool sock_flag(const struct sock *sk, enum sock_flags flag) } #ifdef CONFIG_NET -extern struct static_key memalloc_socks; +DECLARE_STATIC_KEY_FALSE(memalloc_socks_key); static inline int sk_memalloc_socks(void) { - return static_key_false(&memalloc_socks); + return static_branch_unlikely(&memalloc_socks_key); } #else @@ -1591,8 +1596,6 @@ int sock_no_connect(struct socket *, struct sockaddr *, int, int); int sock_no_socketpair(struct socket *, struct socket *); int sock_no_accept(struct socket *, struct socket *, int, bool); int sock_no_getname(struct socket *, struct sockaddr *, int); -__poll_t sock_no_poll(struct file *, struct socket *, - struct poll_table_struct *); int sock_no_ioctl(struct socket *, unsigned int, unsigned long); int sock_no_listen(struct socket *, int); int sock_no_shutdown(struct socket *, int); @@ -2332,6 +2335,22 @@ static inline bool sk_fullsock(const struct sock *sk) return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV); } +/* Checks if this SKB belongs to an HW offloaded socket + * and whether any SW fallbacks are required based on dev. + */ +static inline struct sk_buff *sk_validate_xmit_skb(struct sk_buff *skb, + struct net_device *dev) +{ +#ifdef CONFIG_SOCK_VALIDATE_XMIT + struct sock *sk = skb->sk; + + if (sk && sk_fullsock(sk) && sk->sk_validate_xmit_skb) + skb = sk->sk_validate_xmit_skb(sk, dev, skb); +#endif + + return skb; +} + /* This helper checks if a socket is a LISTEN or NEW_SYN_RECV * SYNACK messages can be attached to either ones (depending on SYNCOOKIE) */ diff --git a/include/net/strparser.h b/include/net/strparser.h index d96b59f45eba..f177c87ce38b 100644 --- a/include/net/strparser.h +++ b/include/net/strparser.h @@ -90,6 +90,8 @@ static inline void strp_pause(struct strparser *strp) /* May be called without holding lock for attached socket */ void strp_unpause(struct strparser *strp); +/* Must be called with process lock held (lock_sock) */ +void __strp_unpause(struct strparser *strp); static inline void save_strp_stats(struct strparser *strp, struct strp_aggr_stats *agg_stats) diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 39bc855d7fee..d574ce63bf22 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -155,6 +155,7 @@ struct switchdev_notifier_fdb_info { struct switchdev_notifier_info info; /* must be first */ const unsigned char *addr; u16 vid; + bool added_by_user; }; static inline struct net_device * diff --git a/include/net/tcp.h b/include/net/tcp.h index 9c9b3768b350..0448e7c5d2b4 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -245,6 +245,7 @@ extern long sysctl_tcp_mem[3]; #define TCP_RACK_LOSS_DETECTION 0x1 /* Use RACK to detect losses */ #define TCP_RACK_STATIC_REO_WND 0x2 /* Use static RACK reo wnd */ +#define TCP_RACK_NO_DUPTHRESH 0x4 /* Do not use DUPACK threshold in RACK */ extern atomic_long_t tcp_memory_allocated; extern struct percpu_counter tcp_sockets_allocated; @@ -333,8 +334,7 @@ void tcp_write_timer_handler(struct sock *sk); void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); -void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th); +void tcp_rcv_established(struct sock *sk, struct sk_buff *skb); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); @@ -388,8 +388,7 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); void tcp_init_transfer(struct sock *sk, int bpf_op); -__poll_t tcp_poll(struct file *file, struct socket *sock, - struct poll_table_struct *wait); +__poll_t tcp_poll_mask(struct socket *sock, __poll_t events); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int tcp_setsockopt(struct sock *sk, int level, int optname, @@ -402,6 +401,10 @@ void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); +int tcp_set_rcvlowat(struct sock *sk, int val); +void tcp_data_ready(struct sock *sk); +int tcp_mmap(struct file *file, struct socket *sock, + struct vm_area_struct *vma); void tcp_parse_options(const struct net *net, const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); @@ -553,7 +556,12 @@ void tcp_fin(struct sock *sk); void tcp_init_xmit_timers(struct sock *); static inline void tcp_clear_xmit_timers(struct sock *sk) { - hrtimer_cancel(&tcp_sk(sk)->pacing_timer); + if (hrtimer_try_to_cancel(&tcp_sk(sk)->pacing_timer) == 1) + __sock_put(sk); + + if (hrtimer_try_to_cancel(&tcp_sk(sk)->compressed_ack_timer) == 1) + __sock_put(sk); + inet_csk_clear_xmit_timers(sk); } @@ -810,9 +818,8 @@ struct tcp_skb_cb { #endif } header; /* For incoming skbs */ struct { - __u32 key; __u32 flags; - struct bpf_map *map; + struct sock *sk_redir; void *data_end; } bpf; }; @@ -1747,27 +1754,22 @@ enum tcp_seq_states { TCP_SEQ_STATE_ESTABLISHED, }; -int tcp_seq_open(struct inode *inode, struct file *file); +void *tcp_seq_start(struct seq_file *seq, loff_t *pos); +void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos); +void tcp_seq_stop(struct seq_file *seq, void *v); struct tcp_seq_afinfo { - char *name; sa_family_t family; - const struct file_operations *seq_fops; - struct seq_operations seq_ops; }; struct tcp_iter_state { struct seq_net_private p; - sa_family_t family; enum tcp_seq_states state; struct sock *syn_wait_sk; int bucket, offset, sbucket, num; loff_t last_pos; }; -int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo); -void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo); - extern struct request_sock_ops tcp_request_sock_ops; extern struct request_sock_ops tcp6_request_sock_ops; @@ -1871,6 +1873,10 @@ void tcp_v4_init(void); void tcp_init(void); /* tcp_recovery.c */ +void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb); +void tcp_newreno_mark_lost(struct sock *sk, bool snd_una_advanced); +extern s32 tcp_rack_skb_timeout(struct tcp_sock *tp, struct sk_buff *skb, + u32 reo_wnd); extern void tcp_rack_mark_lost(struct sock *sk); extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, u64 xmit_time); @@ -2101,4 +2107,12 @@ static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk) #if IS_ENABLED(CONFIG_SMC) extern struct static_key_false tcp_have_smc; #endif + +#if IS_ENABLED(CONFIG_TLS_DEVICE) +void clean_acked_data_enable(struct inet_connection_sock *icsk, + void (*cad)(struct sock *sk, u32 ack_seq)); +void clean_acked_data_disable(struct inet_connection_sock *icsk); + +#endif + #endif /* _TCP_H */ diff --git a/include/net/tipc.h b/include/net/tipc.h index 07670ec022a7..f0e7e6bc1bef 100644 --- a/include/net/tipc.h +++ b/include/net/tipc.h @@ -44,11 +44,11 @@ struct tipc_basic_hdr { __be32 w[4]; }; -static inline u32 tipc_hdr_rps_key(struct tipc_basic_hdr *hdr) +static inline __be32 tipc_hdr_rps_key(struct tipc_basic_hdr *hdr) { u32 w0 = ntohl(hdr->w[0]); bool keepalive_msg = (w0 & KEEPALIVE_MSG_MASK) == KEEPALIVE_MSG_MASK; - int key; + __be32 key; /* Return source node identity as key */ if (likely(!keepalive_msg)) diff --git a/include/net/tls.h b/include/net/tls.h index f5fb16da3860..7f84ea3e217c 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -83,24 +83,10 @@ struct tls_device { void (*unhash)(struct tls_device *device, struct sock *sk); }; -struct tls_sw_context { +struct tls_sw_context_tx { struct crypto_aead *aead_send; - struct crypto_aead *aead_recv; struct crypto_wait async_wait; - /* Receive context */ - struct strparser strp; - void (*saved_data_ready)(struct sock *sk); - unsigned int (*sk_poll)(struct file *file, struct socket *sock, - struct poll_table_struct *wait); - struct sk_buff *recv_pkt; - u8 control; - bool decrypted; - - char rx_aad_ciphertext[TLS_AAD_SPACE_SIZE]; - char rx_aad_plaintext[TLS_AAD_SPACE_SIZE]; - - /* Sending context */ char aad_space[TLS_AAD_SPACE_SIZE]; unsigned int sg_plaintext_size; @@ -117,6 +103,53 @@ struct tls_sw_context { struct scatterlist sg_aead_out[2]; }; +struct tls_sw_context_rx { + struct crypto_aead *aead_recv; + struct crypto_wait async_wait; + + struct strparser strp; + void (*saved_data_ready)(struct sock *sk); + __poll_t (*sk_poll_mask)(struct socket *sock, __poll_t events); + struct sk_buff *recv_pkt; + u8 control; + bool decrypted; + + char rx_aad_ciphertext[TLS_AAD_SPACE_SIZE]; + char rx_aad_plaintext[TLS_AAD_SPACE_SIZE]; + +}; + +struct tls_record_info { + struct list_head list; + u32 end_seq; + int len; + int num_frags; + skb_frag_t frags[MAX_SKB_FRAGS]; +}; + +struct tls_offload_context { + struct crypto_aead *aead_send; + spinlock_t lock; /* protects records list */ + struct list_head records_list; + struct tls_record_info *open_record; + struct tls_record_info *retransmit_hint; + u64 hint_record_sn; + u64 unacked_record_sn; + + struct scatterlist sg_tx_data[MAX_SKB_FRAGS]; + void (*sk_destruct)(struct sock *sk); + u8 driver_state[]; + /* The TLS layer reserves room for driver specific state + * Currently the belief is that there is not enough + * driver specific state to justify another layer of indirection + */ +#define TLS_DRIVER_STATE_SIZE (max_t(size_t, 8, sizeof(void *))) +}; + +#define TLS_OFFLOAD_CONTEXT_SIZE \ + (ALIGN(sizeof(struct tls_offload_context), sizeof(void *)) + \ + TLS_DRIVER_STATE_SIZE) + enum { TLS_PENDING_CLOSED_RECORD }; @@ -141,9 +174,15 @@ struct tls_context { struct tls12_crypto_info_aes_gcm_128 crypto_recv_aes_gcm_128; }; - void *priv_ctx; + struct list_head list; + struct net_device *netdev; + refcount_t refcount; + + void *priv_ctx_tx; + void *priv_ctx_rx; - u8 conf:3; + u8 tx_conf:3; + u8 rx_conf:3; struct cipher_context tx; struct cipher_context rx; @@ -181,18 +220,37 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tls_sw_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tls_sw_close(struct sock *sk, long timeout); -void tls_sw_free_resources(struct sock *sk); +void tls_sw_free_resources_tx(struct sock *sk); +void tls_sw_free_resources_rx(struct sock *sk); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -unsigned int tls_sw_poll(struct file *file, struct socket *sock, - struct poll_table_struct *wait); +__poll_t tls_sw_poll_mask(struct socket *sock, __poll_t events); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); -void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); -void tls_icsk_clean_acked(struct sock *sk); +int tls_set_device_offload(struct sock *sk, struct tls_context *ctx); +int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); +int tls_device_sendpage(struct sock *sk, struct page *page, + int offset, size_t size, int flags); +void tls_device_sk_destruct(struct sock *sk); +void tls_device_init(void); +void tls_device_cleanup(void); + +struct tls_record_info *tls_get_record(struct tls_offload_context *context, + u32 seq, u64 *p_record_sn); + +static inline bool tls_record_is_start_marker(struct tls_record_info *rec) +{ + return rec->len == 0; +} +static inline u32 tls_record_start_seq(struct tls_record_info *rec) +{ + return rec->end_seq - rec->len; +} + +void tls_sk_destruct(struct sock *sk, struct tls_context *ctx); int tls_push_sg(struct sock *sk, struct tls_context *ctx, struct scatterlist *sg, u16 first_offset, int flags); @@ -229,6 +287,13 @@ static inline bool tls_is_pending_open_record(struct tls_context *tls_ctx) return tls_ctx->pending_open_record_frags; } +static inline bool tls_is_sk_tx_device_offloaded(struct sock *sk) +{ + return sk_fullsock(sk) && + /* matches smp_store_release in tls_set_device_offload */ + smp_load_acquire(&sk->sk_destruct) == &tls_device_sk_destruct; +} + static inline void tls_err_abort(struct sock *sk, int err) { sk->sk_err = err; @@ -301,16 +366,22 @@ static inline struct tls_context *tls_get_ctx(const struct sock *sk) return icsk->icsk_ulp_data; } -static inline struct tls_sw_context *tls_sw_ctx( +static inline struct tls_sw_context_rx *tls_sw_ctx_rx( const struct tls_context *tls_ctx) { - return (struct tls_sw_context *)tls_ctx->priv_ctx; + return (struct tls_sw_context_rx *)tls_ctx->priv_ctx_rx; +} + +static inline struct tls_sw_context_tx *tls_sw_ctx_tx( + const struct tls_context *tls_ctx) +{ + return (struct tls_sw_context_tx *)tls_ctx->priv_ctx_tx; } static inline struct tls_offload_context *tls_offload_ctx( const struct tls_context *tls_ctx) { - return (struct tls_offload_context *)tls_ctx->priv_ctx; + return (struct tls_offload_context *)tls_ctx->priv_ctx_tx; } int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, @@ -318,4 +389,12 @@ int tls_proccess_cmsg(struct sock *sk, struct msghdr *msg, void tls_register_device(struct tls_device *device); void tls_unregister_device(struct tls_device *device); +struct sk_buff *tls_validate_xmit_skb(struct sock *sk, + struct net_device *dev, + struct sk_buff *skb); + +int tls_sw_fallback_init(struct sock *sk, + struct tls_offload_context *offload_ctx, + struct tls_crypto_info *crypto_info); + #endif /* _TLS_OFFLOAD_H */ diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index c4f5caaf3778..f6a3543e5247 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -45,8 +45,15 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, struct msghdr *msg, struct flowi6 *fl6, struct ipcm6_cookie *ipc6, struct sockcm_cookie *sockc); -void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, - __u16 srcp, __u16 destp, int bucket); +void __ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, + __u16 srcp, __u16 destp, int rqueue, int bucket); +static inline void +ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp, + __u16 destp, int bucket) +{ + __ip6_dgram_sock_seq_show(seq, sp, srcp, destp, sk_rmem_alloc_get(sp), + bucket); +} #define LOOPBACK4_IPV6 cpu_to_be32(0x7f000006) diff --git a/include/net/udp.h b/include/net/udp.h index 0676b272f6ac..b1ea8b0f5e6a 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -174,6 +174,9 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb, struct udphdr *uh, udp_lookup_t lookup); int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup); +struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, + netdev_features_t features); + static inline struct udphdr *udp_gro_udphdr(struct sk_buff *skb) { struct udphdr *uh; @@ -244,6 +247,11 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb, return htons((((u64) hash * (max - min)) >> 32) + min); } +static inline int udp_rqueue_get(struct sock *sk) +{ + return sk_rmem_alloc_get(sk) - READ_ONCE(udp_sk(sk)->forward_deficit); +} + /* net/ipv4/udp.c */ void udp_destruct_sock(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); @@ -269,6 +277,7 @@ int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udp_push_pending_frames(struct sock *sk); void udp_flush_pending_frames(struct sock *sk); +int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size); void udp4_hwcsum(struct sk_buff *skb, __be32 src, __be32 dst); int udp_rcv(struct sk_buff *skb); int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); @@ -276,7 +285,7 @@ int udp_init_sock(struct sock *sk); int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int __udp_disconnect(struct sock *sk, int flags); int udp_disconnect(struct sock *sk, int flags); -__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait); +__poll_t udp_poll_mask(struct socket *sock, __poll_t events); struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, bool is_ipv6); @@ -408,31 +417,27 @@ do { \ #define __UDPX_INC_STATS(sk, field) __UDP_INC_STATS(sock_net(sk), field, 0) #endif -/* /proc */ -int udp_seq_open(struct inode *inode, struct file *file); - +#ifdef CONFIG_PROC_FS struct udp_seq_afinfo { - char *name; sa_family_t family; struct udp_table *udp_table; - const struct file_operations *seq_fops; - struct seq_operations seq_ops; }; struct udp_iter_state { struct seq_net_private p; - sa_family_t family; int bucket; - struct udp_table *udp_table; }; -#ifdef CONFIG_PROC_FS -int udp_proc_register(struct net *net, struct udp_seq_afinfo *afinfo); -void udp_proc_unregister(struct net *net, struct udp_seq_afinfo *afinfo); +void *udp_seq_start(struct seq_file *seq, loff_t *pos); +void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos); +void udp_seq_stop(struct seq_file *seq, void *v); + +extern const struct seq_operations udp_seq_ops; +extern const struct seq_operations udp6_seq_ops; int udp4_proc_init(void); void udp4_proc_exit(void); -#endif +#endif /* CONFIG_PROC_FS */ int udpv4_offload_init(void); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index ad73d8b3fcc2..b99a02ae3934 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -262,6 +262,7 @@ struct vxlan_dev { #define VXLAN_F_COLLECT_METADATA 0x2000 #define VXLAN_F_GPE 0x4000 #define VXLAN_F_IPV6_LINKLOCAL 0x8000 +#define VXLAN_F_TTL_INHERIT 0x10000 /* Flags that are used in the receive path. These flags must match in * order for a socket to be shareable diff --git a/include/net/xdp.h b/include/net/xdp.h index b2362ddfa694..2deea7166a34 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -33,16 +33,115 @@ * also mandatory during RX-ring setup. */ +enum xdp_mem_type { + MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */ + MEM_TYPE_PAGE_ORDER0, /* Orig XDP full page model */ + MEM_TYPE_PAGE_POOL, + MEM_TYPE_ZERO_COPY, + MEM_TYPE_MAX, +}; + +/* XDP flags for ndo_xdp_xmit */ +#define XDP_XMIT_FLUSH (1U << 0) /* doorbell signal consumer */ +#define XDP_XMIT_FLAGS_MASK XDP_XMIT_FLUSH + +struct xdp_mem_info { + u32 type; /* enum xdp_mem_type, but known size type */ + u32 id; +}; + +struct page_pool; + +struct zero_copy_allocator { + void (*free)(struct zero_copy_allocator *zca, unsigned long handle); +}; + struct xdp_rxq_info { struct net_device *dev; u32 queue_index; u32 reg_state; + struct xdp_mem_info mem; } ____cacheline_aligned; /* perf critical, avoid false-sharing */ +struct xdp_buff { + void *data; + void *data_end; + void *data_meta; + void *data_hard_start; + unsigned long handle; + struct xdp_rxq_info *rxq; +}; + +struct xdp_frame { + void *data; + u16 len; + u16 headroom; + u16 metasize; + /* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time, + * while mem info is valid on remote CPU. + */ + struct xdp_mem_info mem; + struct net_device *dev_rx; /* used by cpumap */ +}; + +/* Convert xdp_buff to xdp_frame */ +static inline +struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) +{ + struct xdp_frame *xdp_frame; + int metasize; + int headroom; + + /* TODO: implement clone, copy, use "native" MEM_TYPE */ + if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) + return NULL; + + /* Assure headroom is available for storing info */ + headroom = xdp->data - xdp->data_hard_start; + metasize = xdp->data - xdp->data_meta; + metasize = metasize > 0 ? metasize : 0; + if (unlikely((headroom - metasize) < sizeof(*xdp_frame))) + return NULL; + + /* Store info in top of packet */ + xdp_frame = xdp->data_hard_start; + + xdp_frame->data = xdp->data; + xdp_frame->len = xdp->data_end - xdp->data; + xdp_frame->headroom = headroom - sizeof(*xdp_frame); + xdp_frame->metasize = metasize; + + /* rxq only valid until napi_schedule ends, convert to xdp_mem_info */ + xdp_frame->mem = xdp->rxq->mem; + + return xdp_frame; +} + +void xdp_return_frame(struct xdp_frame *xdpf); +void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); +void xdp_return_buff(struct xdp_buff *xdp); + int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, struct net_device *dev, u32 queue_index); void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); +int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, + enum xdp_mem_type type, void *allocator); + +/* Drivers not supporting XDP metadata can use this helper, which + * rejects any room expansion for metadata as a result. + */ +static __always_inline void +xdp_set_data_meta_invalid(struct xdp_buff *xdp) +{ + xdp->data_meta = xdp->data + 1; +} + +static __always_inline bool +xdp_data_meta_unsupported(const struct xdp_buff *xdp) +{ + return unlikely(xdp->data_meta > xdp->data); +} #endif /* __LINUX_NET_XDP_H__ */ diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h new file mode 100644 index 000000000000..9fe472f2ac95 --- /dev/null +++ b/include/net/xdp_sock.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* AF_XDP internal functions + * Copyright(c) 2018 Intel Corporation. + */ + +#ifndef _LINUX_XDP_SOCK_H +#define _LINUX_XDP_SOCK_H + +#include <linux/workqueue.h> +#include <linux/if_xdp.h> +#include <linux/mutex.h> +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <net/sock.h> + +struct net_device; +struct xsk_queue; + +struct xdp_umem_props { + u64 chunk_mask; + u64 size; +}; + +struct xdp_umem_page { + void *addr; + dma_addr_t dma; +}; + +struct xdp_umem { + struct xsk_queue *fq; + struct xsk_queue *cq; + struct xdp_umem_page *pages; + struct xdp_umem_props props; + u32 headroom; + u32 chunk_size_nohr; + struct user_struct *user; + struct pid *pid; + unsigned long address; + refcount_t users; + struct work_struct work; + struct page **pgs; + u32 npgs; + struct net_device *dev; + u16 queue_id; + bool zc; + spinlock_t xsk_list_lock; + struct list_head xsk_list; +}; + +struct xdp_sock { + /* struct sock must be the first member of struct xdp_sock */ + struct sock sk; + struct xsk_queue *rx; + struct net_device *dev; + struct xdp_umem *umem; + struct list_head flush_node; + u16 queue_id; + struct xsk_queue *tx ____cacheline_aligned_in_smp; + struct list_head list; + bool zc; + /* Protects multiple processes in the control path */ + struct mutex mutex; + u64 rx_dropped; +}; + +struct xdp_buff; +#ifdef CONFIG_XDP_SOCKETS +int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); +int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); +void xsk_flush(struct xdp_sock *xs); +bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs); +/* Used from netdev driver */ +u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr); +void xsk_umem_discard_addr(struct xdp_umem *umem); +void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); +bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len); +void xsk_umem_consume_tx_done(struct xdp_umem *umem); +#else +static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) +{ + return -ENOTSUPP; +} + +static inline int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) +{ + return -ENOTSUPP; +} + +static inline void xsk_flush(struct xdp_sock *xs) +{ +} + +static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) +{ + return false; +} +#endif /* CONFIG_XDP_SOCKETS */ + +#endif /* _LINUX_XDP_SOCK_H */ diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 45e75c36b738..557122846e0e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -737,7 +737,7 @@ static inline struct audit_buffer *xfrm_audit_start(const char *op) if (audit_enabled == 0) return NULL; - audit_buf = audit_log_start(current->audit_context, GFP_ATOMIC, + audit_buf = audit_log_start(audit_context(), GFP_ATOMIC, AUDIT_MAC_IPSEC_EVENT); if (audit_buf == NULL) return NULL; @@ -752,7 +752,7 @@ static inline void xfrm_audit_helper_usrinfo(bool task_valid, audit_get_loginuid(current) : INVALID_UID); const unsigned int ses = task_valid ? audit_get_sessionid(current) : - (unsigned int) -1; + AUDIT_SID_UNSET; audit_log_format(audit_buf, " auid=%u ses=%u", auid, ses); audit_log_task_context(audit_buf); |