From 0b5c0305e57ca940713bcb2b202fd2b412c62f31 Mon Sep 17 00:00:00 2001 From: Arend Van Spriel Date: Tue, 3 Apr 2018 10:18:15 +0200 Subject: brcmfmac: fix firmware request processing if nvram load fails When nvram loading fails a double free occurred. Fix this and reorg the code a little. Fixes: d09ae51a4b67 ("brcmfmac: pass struct in brcmf_fw_get_firmwares()") Reported-by: Dan Carpenter Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- .../broadcom/brcm80211/brcmfmac/firmware.c | 36 ++++++++++++---------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c index 9277f4c2bfeb..94e177d7c9b5 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/firmware.c @@ -459,7 +459,7 @@ static void brcmf_fw_free_request(struct brcmf_fw_request *req) kfree(req); } -static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx) +static int brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx) { struct brcmf_fw *fwctx = ctx; struct brcmf_fw_item *cur; @@ -498,13 +498,10 @@ static void brcmf_fw_request_nvram_done(const struct firmware *fw, void *ctx) brcmf_dbg(TRACE, "nvram %p len %d\n", nvram, nvram_length); cur->nv_data.data = nvram; cur->nv_data.len = nvram_length; - return; + return 0; fail: - brcmf_dbg(TRACE, "failed: dev=%s\n", dev_name(fwctx->dev)); - fwctx->done(fwctx->dev, -ENOENT, NULL); - brcmf_fw_free_request(fwctx->req); - kfree(fwctx); + return -ENOENT; } static int brcmf_fw_request_next_item(struct brcmf_fw *fwctx, bool async) @@ -553,20 +550,27 @@ static void brcmf_fw_request_done(const struct firmware *fw, void *ctx) brcmf_dbg(TRACE, "enter: firmware %s %sfound\n", cur->path, fw ? "" : "not "); - if (fw) { - if (cur->type == BRCMF_FW_TYPE_BINARY) - cur->binary = fw; - else if (cur->type == BRCMF_FW_TYPE_NVRAM) - brcmf_fw_request_nvram_done(fw, fwctx); - else - release_firmware(fw); - } else if (cur->type == BRCMF_FW_TYPE_NVRAM) { - brcmf_fw_request_nvram_done(NULL, fwctx); - } else if (!(cur->flags & BRCMF_FW_REQF_OPTIONAL)) { + if (!fw) ret = -ENOENT; + + switch (cur->type) { + case BRCMF_FW_TYPE_NVRAM: + ret = brcmf_fw_request_nvram_done(fw, fwctx); + break; + case BRCMF_FW_TYPE_BINARY: + cur->binary = fw; + break; + default: + /* something fishy here so bail out early */ + brcmf_err("unknown fw type: %d\n", cur->type); + release_firmware(fw); + ret = -EINVAL; goto fail; } + if (ret < 0 && !(cur->flags & BRCMF_FW_REQF_OPTIONAL)) + goto fail; + do { if (++fwctx->curpos == fwctx->req->n_items) { ret = 0; -- cgit v1.2.3 From 77e30e10ee28a53c8af95809866ee8493583e29a Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Wed, 4 Apr 2018 09:23:48 +0300 Subject: iwlwifi: mvm: query regdb for wmm rule if needed Since our device is regulatory self managed it maintains its regulatory rules by its own. However the wmm_rules values can't be set by the device itself but only the indication about the need to set it. In case the device set wmm indication, proactively query the regulatory data base to get these values Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 111 ++++++++++++++++++--- drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h | 6 +- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 3 +- 3 files changed, 101 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 8928613e033e..ca0174680af9 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -76,6 +76,7 @@ #include "iwl-io.h" #include "iwl-csr.h" #include "fw/acpi.h" +#include "fw/api/nvm-reg.h" /* NVM offsets (in words) definitions */ enum nvm_offsets { @@ -146,8 +147,8 @@ static const u8 iwl_ext_nvm_channels[] = { 149, 153, 157, 161, 165, 169, 173, 177, 181 }; -#define IWL_NUM_CHANNELS ARRAY_SIZE(iwl_nvm_channels) -#define IWL_NUM_CHANNELS_EXT ARRAY_SIZE(iwl_ext_nvm_channels) +#define IWL_NVM_NUM_CHANNELS ARRAY_SIZE(iwl_nvm_channels) +#define IWL_NVM_NUM_CHANNELS_EXT ARRAY_SIZE(iwl_ext_nvm_channels) #define NUM_2GHZ_CHANNELS 14 #define NUM_2GHZ_CHANNELS_EXT 14 #define FIRST_2GHZ_HT_MINUS 5 @@ -301,11 +302,11 @@ static int iwl_init_channel_map(struct device *dev, const struct iwl_cfg *cfg, const u8 *nvm_chan; if (cfg->nvm_type != IWL_NVM_EXT) { - num_of_ch = IWL_NUM_CHANNELS; + num_of_ch = IWL_NVM_NUM_CHANNELS; nvm_chan = &iwl_nvm_channels[0]; num_2ghz_channels = NUM_2GHZ_CHANNELS; } else { - num_of_ch = IWL_NUM_CHANNELS_EXT; + num_of_ch = IWL_NVM_NUM_CHANNELS_EXT; nvm_chan = &iwl_ext_nvm_channels[0]; num_2ghz_channels = NUM_2GHZ_CHANNELS_EXT; } @@ -720,12 +721,12 @@ iwl_parse_nvm_data(struct iwl_trans *trans, const struct iwl_cfg *cfg, if (cfg->nvm_type != IWL_NVM_EXT) data = kzalloc(sizeof(*data) + sizeof(struct ieee80211_channel) * - IWL_NUM_CHANNELS, + IWL_NVM_NUM_CHANNELS, GFP_KERNEL); else data = kzalloc(sizeof(*data) + sizeof(struct ieee80211_channel) * - IWL_NUM_CHANNELS_EXT, + IWL_NVM_NUM_CHANNELS_EXT, GFP_KERNEL); if (!data) return NULL; @@ -842,24 +843,34 @@ static u32 iwl_nvm_get_regdom_bw_flags(const u8 *nvm_chan, return flags; } +struct regdb_ptrs { + struct ieee80211_wmm_rule *rule; + u32 token; +}; + struct ieee80211_regdomain * iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, - int num_of_ch, __le32 *channels, u16 fw_mcc) + int num_of_ch, __le32 *channels, u16 fw_mcc, + u16 geo_info) { int ch_idx; u16 ch_flags; u32 reg_rule_flags, prev_reg_rule_flags = 0; const u8 *nvm_chan = cfg->nvm_type == IWL_NVM_EXT ? iwl_ext_nvm_channels : iwl_nvm_channels; - struct ieee80211_regdomain *regd; - int size_of_regd; + struct ieee80211_regdomain *regd, *copy_rd; + int size_of_regd, regd_to_copy, wmms_to_copy; + int size_of_wmms = 0; struct ieee80211_reg_rule *rule; + struct ieee80211_wmm_rule *wmm_rule, *d_wmm, *s_wmm; + struct regdb_ptrs *regdb_ptrs; enum nl80211_band band; int center_freq, prev_center_freq = 0; - int valid_rules = 0; + int valid_rules = 0, n_wmms = 0; + int i; bool new_rule; int max_num_ch = cfg->nvm_type == IWL_NVM_EXT ? - IWL_NUM_CHANNELS_EXT : IWL_NUM_CHANNELS; + IWL_NVM_NUM_CHANNELS_EXT : IWL_NVM_NUM_CHANNELS; if (WARN_ON_ONCE(num_of_ch > NL80211_MAX_SUPP_REG_RULES)) return ERR_PTR(-EINVAL); @@ -875,10 +886,26 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, sizeof(struct ieee80211_regdomain) + num_of_ch * sizeof(struct ieee80211_reg_rule); - regd = kzalloc(size_of_regd, GFP_KERNEL); + if (geo_info & GEO_WMM_ETSI_5GHZ_INFO) + size_of_wmms = + num_of_ch * sizeof(struct ieee80211_wmm_rule); + + regd = kzalloc(size_of_regd + size_of_wmms, GFP_KERNEL); if (!regd) return ERR_PTR(-ENOMEM); + regdb_ptrs = kcalloc(num_of_ch, sizeof(*regdb_ptrs), GFP_KERNEL); + if (!regdb_ptrs) { + copy_rd = ERR_PTR(-ENOMEM); + goto out; + } + + /* set alpha2 from FW. */ + regd->alpha2[0] = fw_mcc >> 8; + regd->alpha2[1] = fw_mcc & 0xff; + + wmm_rule = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd); + for (ch_idx = 0; ch_idx < num_of_ch; ch_idx++) { ch_flags = (u16)__le32_to_cpup(channels + ch_idx); band = (ch_idx < NUM_2GHZ_CHANNELS) ? @@ -927,14 +954,66 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, iwl_nvm_print_channel_flags(dev, IWL_DL_LAR, nvm_chan[ch_idx], ch_flags); + + if (!(geo_info & GEO_WMM_ETSI_5GHZ_INFO) || + band == NL80211_BAND_2GHZ) + continue; + + if (!reg_query_regdb_wmm(regd->alpha2, center_freq, + ®db_ptrs[n_wmms].token, wmm_rule)) { + /* Add only new rules */ + for (i = 0; i < n_wmms; i++) { + if (regdb_ptrs[i].token == + regdb_ptrs[n_wmms].token) { + rule->wmm_rule = regdb_ptrs[i].rule; + break; + } + } + if (i == n_wmms) { + rule->wmm_rule = wmm_rule; + regdb_ptrs[n_wmms++].rule = wmm_rule; + wmm_rule++; + } + } } regd->n_reg_rules = valid_rules; + regd->n_wmm_rules = n_wmms; - /* set alpha2 from FW. */ - regd->alpha2[0] = fw_mcc >> 8; - regd->alpha2[1] = fw_mcc & 0xff; + /* + * Narrow down regdom for unused regulatory rules to prevent hole + * between reg rules to wmm rules. + */ + regd_to_copy = sizeof(struct ieee80211_regdomain) + + valid_rules * sizeof(struct ieee80211_reg_rule); + + wmms_to_copy = sizeof(struct ieee80211_wmm_rule) * n_wmms; + + copy_rd = kzalloc(regd_to_copy + wmms_to_copy, GFP_KERNEL); + if (!copy_rd) { + copy_rd = ERR_PTR(-ENOMEM); + goto out; + } + + memcpy(copy_rd, regd, regd_to_copy); + memcpy((u8 *)copy_rd + regd_to_copy, (u8 *)regd + size_of_regd, + wmms_to_copy); + + d_wmm = (struct ieee80211_wmm_rule *)((u8 *)copy_rd + regd_to_copy); + s_wmm = (struct ieee80211_wmm_rule *)((u8 *)regd + size_of_regd); + + for (i = 0; i < regd->n_reg_rules; i++) { + if (!regd->reg_rules[i].wmm_rule) + continue; + + copy_rd->reg_rules[i].wmm_rule = d_wmm + + (regd->reg_rules[i].wmm_rule - s_wmm) / + sizeof(struct ieee80211_wmm_rule); + } - return regd; +out: + kfree(regdb_ptrs); + kfree(regd); + return copy_rd; } IWL_EXPORT_SYMBOL(iwl_parse_nvm_mcc_info); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h index 306736c7a042..3071a23b7606 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h @@ -101,12 +101,14 @@ void iwl_init_sbands(struct device *dev, const struct iwl_cfg *cfg, * * This function parses the regulatory channel data received as a * MCC_UPDATE_CMD command. It returns a newly allocation regulatory domain, - * to be fed into the regulatory core. An ERR_PTR is returned on error. + * to be fed into the regulatory core. In case the geo_info is set handle + * accordingly. An ERR_PTR is returned on error. * If not given to the regulatory core, the user is responsible for freeing * the regdomain returned here with kfree. */ struct ieee80211_regdomain * iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, - int num_of_ch, __le32 *channels, u16 fw_mcc); + int num_of_ch, __le32 *channels, u16 fw_mcc, + u16 geo_info); #endif /* __iwl_nvm_parse_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 51b30424575b..90f8c89ea59c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -311,7 +311,8 @@ struct ieee80211_regdomain *iwl_mvm_get_regdomain(struct wiphy *wiphy, regd = iwl_parse_nvm_mcc_info(mvm->trans->dev, mvm->cfg, __le32_to_cpu(resp->n_channels), resp->channels, - __le16_to_cpu(resp->mcc)); + __le16_to_cpu(resp->mcc), + __le16_to_cpu(resp->geo_info)); /* Store the return source id */ src_id = resp->source_id; kfree(resp); -- cgit v1.2.3 From 6899b32b5b2dee358936b82b8363b716607a138f Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 23 Apr 2018 18:09:21 +0100 Subject: bpf: disable and restore preemption in __BPF_PROG_RUN_ARRAY Running bpf programs requires disabled preemption, however at least some* of the BPF_PROG_RUN_ARRAY users do not follow this rule. To fix this bug, and also to make it not happen in the future, let's add explicit preemption disabling/re-enabling to the __BPF_PROG_RUN_ARRAY code. * for example: [ 17.624472] RIP: 0010:__cgroup_bpf_run_filter_sk+0x1c4/0x1d0 ... [ 17.640890] inet6_create+0x3eb/0x520 [ 17.641405] __sock_create+0x242/0x340 [ 17.641939] __sys_socket+0x57/0xe0 [ 17.642370] ? trace_hardirqs_off_thunk+0x1a/0x1c [ 17.642944] SyS_socket+0xa/0x10 [ 17.643357] do_syscall_64+0x79/0x220 [ 17.643879] entry_SYSCALL_64_after_hwframe+0x42/0xb7 Signed-off-by: Roman Gushchin Cc: Alexei Starovoitov Cc: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 486e65e3db26..dc586cc64bc2 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -351,6 +351,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, struct bpf_prog **_prog, *__prog; \ struct bpf_prog_array *_array; \ u32 _ret = 1; \ + preempt_disable(); \ rcu_read_lock(); \ _array = rcu_dereference(array); \ if (unlikely(check_non_null && !_array))\ @@ -362,6 +363,7 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, } \ _out: \ rcu_read_unlock(); \ + preempt_enable_no_resched(); \ _ret; \ }) -- cgit v1.2.3 From 514d6c1959f9b396f1b51850925900adedffb951 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 23 Apr 2018 12:11:02 -0700 Subject: bpf: Document sockmap '-target bpf' requirement for PROG_TYPE_SK_MSG BPF_PROG_TYPE_SK_MSG programs use a 'void *' for both data and the data_end pointers. Additionally, the verifier ensures that every accesses into the values is a __u64 read. This correctly maps on to the BPF 64-bit architecture. However, to ensure that when building on 32bit architectures that clang uses correct types the '-target bpf' option _must_ be specified. To make this clear add a note to the Documentation. Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- Documentation/bpf/bpf_devel_QA.txt | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Documentation/bpf/bpf_devel_QA.txt b/Documentation/bpf/bpf_devel_QA.txt index 1a0b704e1a38..da57601153a0 100644 --- a/Documentation/bpf/bpf_devel_QA.txt +++ b/Documentation/bpf/bpf_devel_QA.txt @@ -557,6 +557,14 @@ A: Although LLVM IR generation and optimization try to stay architecture pulls in some header files containing file scope host assembly codes. - You can add "-fno-jump-tables" to work around the switch table issue. - Otherwise, you can use bpf target. + Otherwise, you can use bpf target. Additionally, you _must_ use bpf target + when: + + - Your program uses data structures with pointer or long / unsigned long + types that interface with BPF helpers or context data structures. Access + into these structures is verified by the BPF verifier and may result + in verification failures if the native architecture is not aligned with + the BPF architecture, e.g. 64-bit. An example of this is + BPF_PROG_TYPE_SK_MSG require '-target bpf' Happy BPF hacking! -- cgit v1.2.3 From 4dfe1bb95235c553e216222cf0c377faf191dacd Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 23 Apr 2018 12:11:08 -0700 Subject: bpf: sockmap sample use clang flag, -target bpf Per Documentation/bpf/bpf_devel_QA.txt add the -target flag to the sockmap Makefile. Relevant text quoted here, Otherwise, you can use bpf target. Additionally, you _must_ use bpf target when: - Your program uses data structures with pointer or long / unsigned long types that interface with BPF helpers or context data structures. Access into these structures is verified by the BPF verifier and may result in verification failures if the native architecture is not aligned with the BPF architecture, e.g. 64-bit. An example of this is BPF_PROG_TYPE_SK_MSG require '-target bpf' Fixes: 69e8cc134bcb ("bpf: sockmap sample program") Signed-off-by: John Fastabend Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- samples/sockmap/Makefile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/samples/sockmap/Makefile b/samples/sockmap/Makefile index 9bf2881bd11b..fa53f4d77834 100644 --- a/samples/sockmap/Makefile +++ b/samples/sockmap/Makefile @@ -65,11 +65,14 @@ $(src)/*.c: verify_target_bpf # asm/sysreg.h - inline assembly used by it is incompatible with llvm. # But, there is no easy way to fix it, so just exclude it since it is # useless for BPF samples. +# +# -target bpf option required with SK_MSG programs, this is to ensure +# reading 'void *' data types for data and data_end are __u64 reads. $(obj)/%.o: $(src)/%.c $(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ -D__KERNEL__ -D__ASM_SYSREG_H -Wno-unused-value -Wno-pointer-sign \ -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ - -Wno-unknown-warning-option \ - -O2 -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ + -Wno-unknown-warning-option -O2 -target bpf \ + -emit-llvm -c $< -o -| $(LLC) -march=bpf -filetype=obj -o $@ -- cgit v1.2.3 From ba6b8de423f8d0dee48d6030288ed81c03ddf9f0 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 23 Apr 2018 15:39:23 -0700 Subject: bpf: sockmap, map_release does not hold refcnt for pinned maps Relying on map_release hook to decrement the reference counts when a map is removed only works if the map is not being pinned. In the pinned case the ref is decremented immediately and the BPF programs released. After this BPF programs may not be in-use which is not what the user would expect. This patch moves the release logic into bpf_map_put_uref() and brings sockmap in-line with how a similar case is handled in prog array maps. Fixes: 3d9e952697de ("bpf: sockmap, fix leaking maps with attached but not detached progs") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 +- kernel/bpf/arraymap.c | 3 ++- kernel/bpf/sockmap.c | 4 ++-- kernel/bpf/syscall.c | 4 ++-- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index dc586cc64bc2..469b20e1dd7e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -31,6 +31,7 @@ struct bpf_map_ops { void (*map_release)(struct bpf_map *map, struct file *map_file); void (*map_free)(struct bpf_map *map); int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); + void (*map_release_uref)(struct bpf_map *map); /* funcs callable from userspace and from eBPF programs */ void *(*map_lookup_elem)(struct bpf_map *map, void *key); @@ -436,7 +437,6 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); -void bpf_fd_array_map_clear(struct bpf_map *map); int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 14750e7c5ee4..027107f4be53 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -476,7 +476,7 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr) } /* decrement refcnt of all bpf_progs that are stored in this map */ -void bpf_fd_array_map_clear(struct bpf_map *map) +static void bpf_fd_array_map_clear(struct bpf_map *map) { struct bpf_array *array = container_of(map, struct bpf_array, map); int i; @@ -495,6 +495,7 @@ const struct bpf_map_ops prog_array_map_ops = { .map_fd_get_ptr = prog_fd_array_get_ptr, .map_fd_put_ptr = prog_fd_array_put_ptr, .map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, + .map_release_uref = bpf_fd_array_map_clear, }; static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index a3b21385e947..a73d484b6e4c 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -1831,7 +1831,7 @@ static int sock_map_update_elem(struct bpf_map *map, return err; } -static void sock_map_release(struct bpf_map *map, struct file *map_file) +static void sock_map_release(struct bpf_map *map) { struct bpf_stab *stab = container_of(map, struct bpf_stab, map); struct bpf_prog *orig; @@ -1855,7 +1855,7 @@ const struct bpf_map_ops sock_map_ops = { .map_get_next_key = sock_map_get_next_key, .map_update_elem = sock_map_update_elem, .map_delete_elem = sock_map_delete_elem, - .map_release = sock_map_release, + .map_release_uref = sock_map_release, }; BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4ca46df19c9a..ebfe9f29dae8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -257,8 +257,8 @@ static void bpf_map_free_deferred(struct work_struct *work) static void bpf_map_put_uref(struct bpf_map *map) { if (atomic_dec_and_test(&map->usercnt)) { - if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) - bpf_fd_array_map_clear(map); + if (map->ops->map_release_uref) + map->ops->map_release_uref(map); } } -- cgit v1.2.3 From e20f7334837ae47341d8ec4e3170d0b4336a3676 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 23 Apr 2018 15:39:28 -0700 Subject: bpf: sockmap, sk_wait_event needed to handle blocking cases In the recvmsg handler we need to add a wait event to support the blocking use cases. Without this we return zero and may confuse user applications. In the wait event any data received on the sk either via sk_receive_queue or the psock ingress list will wake up the sock. Fixes: fa246693a111 ("bpf: sockmap, BPF_F_INGRESS flag for BPF_SK_SKB_STREAM_VERDICT") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- kernel/bpf/sockmap.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index a73d484b6e4c..aaf50ec77c94 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -43,6 +43,7 @@ #include #include #include +#include #define SOCK_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) @@ -732,6 +733,26 @@ out_err: return err; } +static int bpf_wait_data(struct sock *sk, + struct smap_psock *psk, int flags, + long timeo, int *err) +{ + int rc; + + DEFINE_WAIT_FUNC(wait, woken_wake_function); + + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + rc = sk_wait_event(sk, &timeo, + !list_empty(&psk->ingress) || + !skb_queue_empty(&sk->sk_receive_queue), + &wait); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + + return rc; +} + static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { @@ -755,6 +776,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); lock_sock(sk); +bytes_ready: while (copied != len) { struct scatterlist *sg; struct sk_msg_buff *md; @@ -809,6 +831,28 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, } } + if (!copied) { + long timeo; + int data; + int err = 0; + + timeo = sock_rcvtimeo(sk, nonblock); + data = bpf_wait_data(sk, psock, flags, timeo, &err); + + if (data) { + if (!skb_queue_empty(&sk->sk_receive_queue)) { + release_sock(sk); + smap_release_sock(psock, sk); + copied = tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + return copied; + } + goto bytes_ready; + } + + if (err) + copied = err; + } + release_sock(sk); smap_release_sock(psock, sk); return copied; -- cgit v1.2.3 From 4fcfdfb83391c74e62683469289db42a143440ac Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 23 Apr 2018 15:39:33 -0700 Subject: bpf: sockmap, fix double page_put on ENOMEM error in redirect path In the case where the socket memory boundary is hit the redirect path returns an ENOMEM error. However, before checking for this condition the redirect scatterlist buffer is setup with a valid page and length. This is never unwound so when the buffers are released latter in the error path we do a put_page() and clear the scatterlist fields. But, because the initial error happens before completing the scatterlist buffer we end up with both the original buffer and the redirect buffer pointing to the same page resulting in duplicate put_page() calls. To fix this simply move the initial configuration of the redirect scatterlist buffer below the sock memory check. Found this while running TCP_STREAM test with netperf using Cilium. Fixes: fa246693a111 ("bpf: sockmap, BPF_F_INGRESS flag for BPF_SK_SKB_STREAM_VERDICT") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- kernel/bpf/sockmap.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index aaf50ec77c94..634415c7fbcd 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -524,8 +524,6 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes, i = md->sg_start; do { - r->sg_data[i] = md->sg_data[i]; - size = (apply && apply_bytes < md->sg_data[i].length) ? apply_bytes : md->sg_data[i].length; @@ -536,6 +534,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes, } sk_mem_charge(sk, size); + r->sg_data[i] = md->sg_data[i]; r->sg_data[i].length = size; md->sg_data[i].length -= size; md->sg_data[i].offset += size; -- cgit v1.2.3 From a083429e133df63bf2e618f51e4061649fb3c65e Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 23 Apr 2018 16:01:31 +0300 Subject: iwlwifi: mvm: fix old scan version sizes When version 8 of the scan command API was introduced, only the size of version 7 was updated, causing older versions of the firmware to throw BAD_COMMAND errors. Calculating the old version based on the size of the latest version got too complicated and the size of the older versions will never change anyway, so it's better to just hardcoded the sizes. Fixes: 66fa2424df16 ("iwlwifi: fw api: support the new scan request FW API version") Reported-by: Scott Register Signed-off-by: Luca Coelho Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/fw/api/scan.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h index 7af3a0f51b77..a17c4a79b8d4 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h +++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h @@ -8,6 +8,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH + * Copyright(c) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -30,7 +31,7 @@ * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH * Copyright(c) 2016 - 2017 Intel Deutschland GmbH - * Copyright(c) 2018 Intel Corporation + * Copyright(c) 2018 Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -749,13 +750,9 @@ struct iwl_scan_req_umac { } __packed; #define IWL_SCAN_REQ_UMAC_SIZE_V8 sizeof(struct iwl_scan_req_umac) -#define IWL_SCAN_REQ_UMAC_SIZE_V7 (sizeof(struct iwl_scan_req_umac) - \ - 4 * sizeof(u8)) -#define IWL_SCAN_REQ_UMAC_SIZE_V6 (sizeof(struct iwl_scan_req_umac) - \ - 2 * sizeof(u8) - sizeof(__le16)) -#define IWL_SCAN_REQ_UMAC_SIZE_V1 (sizeof(struct iwl_scan_req_umac) - \ - 2 * sizeof(__le32) - 2 * sizeof(u8) - \ - sizeof(__le16)) +#define IWL_SCAN_REQ_UMAC_SIZE_V7 48 +#define IWL_SCAN_REQ_UMAC_SIZE_V6 44 +#define IWL_SCAN_REQ_UMAC_SIZE_V1 36 /** * struct iwl_umac_scan_abort -- cgit v1.2.3 From af8a41cccf8f469165c6debc8fe07c5fd2ca501a Mon Sep 17 00:00:00 2001 From: Ping-Ke Shih Date: Fri, 20 Apr 2018 10:30:09 +0800 Subject: rtlwifi: cleanup 8723be ant_sel definition Some HP laptops have only a single wifi antenna. This would not be a problem except that they were shipped with an incorrectly encoded EFUSE. It should have been possible to open the computer and transfer the antenna connection to the other terminal except that such action might void the warranty, and moving the antenna broke the Windows driver. The fix was to add a module option that would override the EFUSE encoding. That was done with commit c18d8f509571 ("rtlwifi: rtl8723be: Add antenna select module parameter"). There was still a problem with Bluetooth coexistence, which was addressed with commit baa170229095 ("rtlwifi: btcoexist: Implement antenna selection"). There were still problems, thus there were commit 0ff78adeef11 ("rtlwifi: rtl8723be: fix ant_sel code") and commit 6d6226928369 ("rtlwifi: btcoexist: Fix antenna selection code"). Despite all these attempts at fixing the problem, the code is not yet right. A proper fix is important as there are now instances of laptops having RTL8723DE chips with the same problem. The module parameter ant_sel is used to control antenna number and path. At present enum ANT_{X2,X1} is used to define the antenna number, but this choice is not intuitive, thus change to a new enum ANT_{MAIN,AUX} to make it more readable. This change showed examples where incorrect values were used. It was also possible to remove a workaround in halbtcoutsrc.c. The experimental results with single antenna connected to specific path are now as follows: ant_sel ANT_MAIN(#1) ANT_AUX(#2) 0 -8 -62 1 -62 -10 2 -6 -60 Signed-off-by: Ping-Ke Shih Fixes: c18d8f509571 ("rtlwifi: rtl8723be: Add antenna select module parameter") Fixes: baa170229095 ("rtlwifi: btcoexist: Implement antenna selection") Fixes: 0ff78adeef11 ("rtlwifi: rtl8723be: fix ant_sel code") Fixes: 6d6226928369 ("rtlwifi: btcoexist: Fix antenna selection code") Cc: Stable # 4.7+ Reviewed-by: Larry Finger Signed-off-by: Kalle Valo --- .../net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c | 15 --------------- drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c | 11 +++++++---- drivers/net/wireless/realtek/rtlwifi/wifi.h | 5 +++++ 3 files changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c index 8b6b07a936f5..b026e80940a4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c +++ b/drivers/net/wireless/realtek/rtlwifi/btcoexist/halbtcoutsrc.c @@ -158,16 +158,6 @@ static u8 halbtc_get_wifi_central_chnl(struct btc_coexist *btcoexist) static u8 rtl_get_hwpg_single_ant_path(struct rtl_priv *rtlpriv) { - struct rtl_mod_params *mod_params = rtlpriv->cfg->mod_params; - - /* override ant_num / ant_path */ - if (mod_params->ant_sel) { - rtlpriv->btcoexist.btc_info.ant_num = - (mod_params->ant_sel == 1 ? ANT_X2 : ANT_X1); - - rtlpriv->btcoexist.btc_info.single_ant_path = - (mod_params->ant_sel == 1 ? 0 : 1); - } return rtlpriv->btcoexist.btc_info.single_ant_path; } @@ -178,7 +168,6 @@ static u8 rtl_get_hwpg_bt_type(struct rtl_priv *rtlpriv) static u8 rtl_get_hwpg_ant_num(struct rtl_priv *rtlpriv) { - struct rtl_mod_params *mod_params = rtlpriv->cfg->mod_params; u8 num; if (rtlpriv->btcoexist.btc_info.ant_num == ANT_X2) @@ -186,10 +175,6 @@ static u8 rtl_get_hwpg_ant_num(struct rtl_priv *rtlpriv) else num = 1; - /* override ant_num / ant_path */ - if (mod_params->ant_sel) - num = (mod_params->ant_sel == 1 ? ANT_X2 : ANT_X1) + 1; - return num; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c index e7bbbc95cdb1..b4f3f91b590e 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c @@ -848,6 +848,9 @@ static bool _rtl8723be_init_mac(struct ieee80211_hw *hw) return false; } + if (rtlpriv->cfg->ops->get_btc_status()) + rtlpriv->btcoexist.btc_ops->btc_power_on_setting(rtlpriv); + bytetmp = rtl_read_byte(rtlpriv, REG_MULTI_FUNC_CTRL); rtl_write_byte(rtlpriv, REG_MULTI_FUNC_CTRL, bytetmp | BIT(3)); @@ -2696,21 +2699,21 @@ void rtl8723be_read_bt_coexist_info_from_hwpg(struct ieee80211_hw *hw, rtlpriv->btcoexist.btc_info.bt_type = BT_RTL8723B; rtlpriv->btcoexist.btc_info.ant_num = (value & 0x1); rtlpriv->btcoexist.btc_info.single_ant_path = - (value & 0x40); /*0xc3[6]*/ + (value & 0x40 ? ANT_AUX : ANT_MAIN); /*0xc3[6]*/ } else { rtlpriv->btcoexist.btc_info.btcoexist = 0; rtlpriv->btcoexist.btc_info.bt_type = BT_RTL8723B; rtlpriv->btcoexist.btc_info.ant_num = ANT_X2; - rtlpriv->btcoexist.btc_info.single_ant_path = 0; + rtlpriv->btcoexist.btc_info.single_ant_path = ANT_MAIN; } /* override ant_num / ant_path */ if (mod_params->ant_sel) { rtlpriv->btcoexist.btc_info.ant_num = - (mod_params->ant_sel == 1 ? ANT_X2 : ANT_X1); + (mod_params->ant_sel == 1 ? ANT_X1 : ANT_X2); rtlpriv->btcoexist.btc_info.single_ant_path = - (mod_params->ant_sel == 1 ? 0 : 1); + (mod_params->ant_sel == 1 ? ANT_AUX : ANT_MAIN); } } diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index d27e33960e77..ce1754054a07 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -2823,6 +2823,11 @@ enum bt_ant_num { ANT_X1 = 1, }; +enum bt_ant_path { + ANT_MAIN = 0, + ANT_AUX = 1, +}; + enum bt_co_type { BT_2WIRE = 0, BT_ISSC_3WIRE = 1, -- cgit v1.2.3 From 5540fbf43845868defcb599ec91c678275a20671 Mon Sep 17 00:00:00 2001 From: William Tu Date: Tue, 24 Apr 2018 23:46:59 -0700 Subject: bpf: clear the ip_tunnel_info. The percpu metadata_dst might carry the stale ip_tunnel_info and cause incorrect behavior. When mixing tests using ipv4/ipv6 bpf vxlan and geneve tunnel, the ipv6 tunnel info incorrectly uses ipv4's src ip addr as its ipv6 src address, because the previous tunnel info does not clean up. The patch zeros the fields in ip_tunnel_info. Signed-off-by: William Tu Reported-by: Yifeng Sun Signed-off-by: Daniel Borkmann --- net/core/filter.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/filter.c b/net/core/filter.c index d31aff93270d..e77c30ca491d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3240,6 +3240,7 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, skb_dst_set(skb, (struct dst_entry *) md); info = &md->u.tun_info; + memset(info, 0, sizeof(*info)); info->mode = IP_TUNNEL_INFO_TX; info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE; -- cgit v1.2.3 From 1612a981b76688c598dc944bbfbe29a2b33e3973 Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Wed, 25 Apr 2018 05:42:16 +0000 Subject: bpf, x64: fix JIT emission for dead code Commit 2a5418a13fcf ("bpf: improve dead code sanitizing") replaced dead code with a series of ja-1 instructions, for safety. That made JIT compilation much more complex for some BPF programs. One instance of such programs is, for example: bool flag = false ... /* A bunch of other code */ ... if (flag) do_something() In some cases llvm is not able to remove at compile time the code for do_something(), so the generated BPF program ends up with a large amount of dead instructions. In one specific real life example, there are two series of ~500 and ~1000 dead instructions in the program. When the verifier replaces them with a series of ja-1 instructions, it causes an interesting behavior at JIT time. During the first pass, since all the instructions are estimated at 64 bytes, the ja-1 instructions end up being translated as 5 bytes JMP instructions (0xE9), since the jump offsets become increasingly large (> 127) as each instruction gets discovered to be 5 bytes instead of the estimated 64. Starting from the second pass, the first N instructions of the ja-1 sequence get translated into 2 bytes JMPs (0xEB) because the jump offsets become <= 127 this time. In particular, N is defined as roughly 127 / (5 - 2) ~= 42. So, each further pass will make the subsequent N JMP instructions shrink from 5 to 2 bytes, making the image shrink every time. This means that in order to have the entire program converge, there need to be, in the real example above, at least ~1000 / 42 ~= 24 passes just for translating the dead code. If we add this number to the passes needed to translate the other non dead code, it brings such program to 40+ passes, and JIT doesn't complete. Ultimately the userspace loader fails because such BPF program was supposed to be part of a prog array owner being JITed. While it is certainly possible to try to refactor such programs to help the compiler remove dead code, the behavior is not really intuitive and it puts further burden on the BPF developer who is not expecting such behavior. To make things worse, such programs are working just fine in all the kernel releases prior to the ja-1 fix. A possible approach to mitigate this behavior consists into noticing that for ja-1 instructions we don't really need to rely on the estimated size of the previous and current instructions, we know that a -1 BPF jump offset can be safely translated into a 0xEB instruction with a jump offset of -2. Such fix brings the BPF program in the previous example to complete again in ~9 passes. Fixes: 2a5418a13fcf ("bpf: improve dead code sanitizing") Signed-off-by: Gianluca Borello Acked-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- arch/x86/net/bpf_jit_comp.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b725154182cc..abce27ceb411 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1027,7 +1027,17 @@ emit_cond_jmp: /* convert BPF opcode to x86 */ break; case BPF_JMP | BPF_JA: - jmp_offset = addrs[i + insn->off] - addrs[i]; + if (insn->off == -1) + /* -1 jmp instructions will always jump + * backwards two bytes. Explicitly handling + * this case avoids wasting too many passes + * when there are long sequences of replaced + * dead code. + */ + jmp_offset = -2; + else + jmp_offset = addrs[i + insn->off] - addrs[i]; + if (!jmp_offset) /* optimize out nop jumps */ break; -- cgit v1.2.3 From 02a6efcab675fe32815d824837784c3f42a7d892 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Tue, 24 Apr 2018 18:09:04 +0200 Subject: net: phy: allow scanning busses with missing phys Some MDIO busses will error out when trying to read a phy address with no phy present at that address. In that case, probing the bus will fail because __mdiobus_register() is scanning the bus for all possible phys addresses. In case MII_PHYSID1 returns -EIO or -ENODEV, consider there is no phy at this address and set the phy ID to 0xffffffff which is then properly handled in get_phy_device(). Suggested-by: Andrew Lunn Signed-off-by: Alexandre Belloni Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index ac23322a32e1..9e4ba8e80a18 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -535,8 +535,17 @@ static int get_phy_id(struct mii_bus *bus, int addr, u32 *phy_id, /* Grab the bits from PHYIR1, and put them in the upper half */ phy_reg = mdiobus_read(bus, addr, MII_PHYSID1); - if (phy_reg < 0) + if (phy_reg < 0) { + /* if there is no device, return without an error so scanning + * the bus works properly + */ + if (phy_reg == -EIO || phy_reg == -ENODEV) { + *phy_id = 0xffffffff; + return 0; + } + return -EIO; + } *phy_id = (phy_reg & 0xffff) << 16; -- cgit v1.2.3 From 070204a34884110ac5e19c1e2e036fcfd033f8e3 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 25 Apr 2018 12:48:58 +0200 Subject: net/smc: keep clcsock reference in smc_tcp_listen_work() The internal CLC socket should exist till the SMC-socket is released. Function tcp_listen_worker() releases the internal CLC socket of a listen socket, if an smc_close_active() is called. This function is called for the final release(), but it is called for shutdown SHUT_RDWR as well. This opens a door for protection faults, if socket calls using the internal CLC socket are called for a shutdown listen socket. With the changes of commit 3d502067599f ("net/smc: simplify wait when closing listen socket") there is no need anymore to release the internal CLC socket in function tcp_listen_worker((). It is sufficient to release it in smc_release(). Fixes: 127f49705823 ("net/smc: release clcsock from tcp_listen_worker") Signed-off-by: Ursula Braun Reported-by: syzbot+9045fc589fcd196ef522@syzkaller.appspotmail.com Reported-by: syzbot+28a2c86cf19c81d871fa@syzkaller.appspotmail.com Reported-by: syzbot+9605e6cace1b5efd4a0a@syzkaller.appspotmail.com Reported-by: syzbot+cf9012c597c8379d535c@syzkaller.appspotmail.com Signed-off-by: David S. Miller --- net/smc/af_smc.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index f5d4b69dbabc..4470501374bf 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -978,10 +978,6 @@ static void smc_tcp_listen_work(struct work_struct *work) } out: - if (lsmc->clcsock) { - sock_release(lsmc->clcsock); - lsmc->clcsock = NULL; - } release_sock(lsk); sock_put(&lsmc->sk); /* sock_hold in smc_listen */ } -- cgit v1.2.3 From 91a825290ca4eae88603bc811bf74a45f94a3f46 Mon Sep 17 00:00:00 2001 From: Dag Moxnes Date: Wed, 25 Apr 2018 13:22:01 +0200 Subject: rds: ib: Fix missing call to rds_ib_dev_put in rds_ib_setup_qp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function rds_ib_setup_qp is calling rds_ib_get_client_data and should correspondingly call rds_ib_dev_put. This call was lost in the non-error path with the introduction of error handling done in commit 3b12f73a5c29 ("rds: ib: add error handle") Signed-off-by: Dag Moxnes Reviewed-by: Håkon Bugge Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_cm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index eea1d8611b20..13b38ad0fa4a 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -547,7 +547,7 @@ static int rds_ib_setup_qp(struct rds_connection *conn) rdsdebug("conn %p pd %p cq %p %p\n", conn, ic->i_pd, ic->i_send_cq, ic->i_recv_cq); - return ret; + goto out; sends_out: vfree(ic->i_sends); @@ -572,6 +572,7 @@ send_cq_out: ic->i_send_cq = NULL; rds_ibdev_out: rds_ib_remove_conn(rds_ibdev, conn); +out: rds_ib_dev_put(rds_ibdev); return ret; -- cgit v1.2.3 From 9c299a32ede98dc9faafb267034ed830a15304db Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 25 Apr 2018 14:22:45 -0700 Subject: bpf: fix for lex/yacc build error with gcc-5 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix build error found with Ubuntu shipped gcc-5 ~/git/bpf/tools/bpf$ make all Auto-detecting system features: ... libbfd: [ OFF ] ... disassembler-four-args: [ OFF ] CC bpf_jit_disasm.o LINK bpf_jit_disasm CC bpf_dbg.o /home/john/git/bpf/tools/bpf/bpf_dbg.c: In function ‘cmd_load’: /home/john/git/bpf/tools/bpf/bpf_dbg.c:1077:13: warning: ‘cont’ may be used uninitialized in this function [-Wmaybe-uninitialized] } else if (matches(subcmd, "pcap") == 0) { ^ LINK bpf_dbg CC bpf_asm.o make: *** No rule to make target `bpf_exp.yacc.o', needed by `bpf_asm'. Stop. Fixes: 5a8997f20715 ("tools: bpf: respect output directory during build") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- tools/bpf/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 1ea545965ee3..53b60ad452f5 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -76,6 +76,8 @@ $(OUTPUT)bpf_asm: $(OUTPUT)bpf_asm.o $(OUTPUT)bpf_exp.yacc.o $(OUTPUT)bpf_exp.le $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $^ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c +$(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c +$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c clean: bpftool_clean $(call QUIET_CLEAN, bpf-progs) -- cgit v1.2.3 From 1ccef350db2f13715040a10df77ae672206004cf Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 27 Mar 2018 09:22:16 +0000 Subject: net/mlx5e: Allow offloading ipv4 header re-write for icmp For ICMPv4, the checksum is calculated from the ICMP headers and data. Since the ICMPv4 checksum doesn't cover the IP header, we can allow to do L3 header re-write for this protocol. Fixes: bdd66ac0aeed ('net/mlx5e: Disallow TC offloading of unsupported match/action combinations') Signed-off-by: Jianbo Liu Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 4197001f9801..3c534fc43400 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -1864,7 +1864,8 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec, } ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); - if (modify_ip_header && ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP) { + if (modify_ip_header && ip_proto != IPPROTO_TCP && + ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) { pr_info("can't offload re-write of ip proto %d\n", ip_proto); return false; } -- cgit v1.2.3 From 35f80acb24cd53dabd65e0660e46afdf5c45991d Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 21 Dec 2017 15:15:24 -0600 Subject: net/mlx5e: DCBNL fix min inline header size for dscp When the trust state is set to dscp and the netdev is down, the inline header size is not updated. When netdev is up, the inline header size stays at L2 instead of IP. Fix this issue by updating the private parameter when the netdev is in down so that when netdev is up, it picks up the right header size. Fixes: fbcb127e89ba ("net/mlx5e: Support DSCP trust state ...") Signed-off-by: Huy Nguyen Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index 3d46ef48d5b8..c641d5656b2d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1007,12 +1007,14 @@ static void mlx5e_trust_update_sq_inline_mode(struct mlx5e_priv *priv) mutex_lock(&priv->state_lock); - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) - goto out; - new_channels.params = priv->channels.params; mlx5e_trust_update_tx_min_inline_mode(priv, &new_channels.params); + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + /* Skip if tx_min_inline is the same */ if (new_channels.params.tx_min_inline_mode == priv->channels.params.tx_min_inline_mode) -- cgit v1.2.3 From 6082d9c9c94a408d7409b5f2e4e42ac9e8b16d0d Mon Sep 17 00:00:00 2001 From: Israel Rukshin Date: Thu, 12 Apr 2018 09:49:11 +0000 Subject: net/mlx5: Fix mlx5_get_vector_affinity function Adding the vector offset when calling to mlx5_vector2eqn() is wrong. This is because mlx5_vector2eqn() checks if EQ index is equal to vector number and the fact that the internal completion vectors that mlx5 allocates don't get an EQ index. The second problem here is that using effective_affinity_mask gives the same CPU for different vectors. This leads to unmapped queues when calling it from blk_mq_rdma_map_queues(). This doesn't happen when using affinity_hint mask. Fixes: 2572cf57d75a ("mlx5: fix mlx5_get_vector_affinity to start from completion vector 0") Fixes: 05e0cc84e00c ("net/mlx5: Fix get vector affinity helper function") Signed-off-by: Israel Rukshin Reviewed-by: Max Gurtovoy Reviewed-by: Sagi Grimberg --- drivers/infiniband/hw/mlx5/main.c | 2 +- include/linux/mlx5/driver.h | 12 +++--------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index daa919e5a442..241cf4ff9901 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4757,7 +4757,7 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) { struct mlx5_ib_dev *dev = to_mdev(ibdev); - return mlx5_get_vector_affinity(dev->mdev, comp_vector); + return mlx5_get_vector_affinity_hint(dev->mdev, comp_vector); } /* The mlx5_ib_multiport_mutex should be held when calling this function */ diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 767d193c269a..2a156c5dfadd 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1284,25 +1284,19 @@ enum { }; static inline const struct cpumask * -mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector) +mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) { - const struct cpumask *mask; struct irq_desc *desc; unsigned int irq; int eqn; int err; - err = mlx5_vector2eqn(dev, MLX5_EQ_VEC_COMP_BASE + vector, &eqn, &irq); + err = mlx5_vector2eqn(dev, vector, &eqn, &irq); if (err) return NULL; desc = irq_to_desc(irq); -#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK - mask = irq_data_get_effective_affinity_mask(&desc->irq_data); -#else - mask = desc->irq_common_data.affinity; -#endif - return mask; + return desc->affinity_hint; } #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From d9a96ec362e3da878c378854e25321c85bac52c2 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 20 Mar 2018 18:17:25 +0200 Subject: net/mlx5e: TX, Use correct counter in dma_map error flow In case of a dma_mapping_error, do not use wi->num_dma as a parameter for dma unmap function because it's yet to be set, and holds an out-of-date value. Use actual value (local variable num_dma) instead. Fixes: 34802a42b352 ("net/mlx5e: Do not modify the TX SKB") Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 20297108528a..5532aa3675c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -255,7 +255,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, dma_addr = dma_map_single(sq->pdev, skb_data, headlen, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) - return -ENOMEM; + goto dma_unmap_wqe_err; dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; @@ -273,7 +273,7 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(sq->pdev, dma_addr))) - return -ENOMEM; + goto dma_unmap_wqe_err; dseg->addr = cpu_to_be64(dma_addr); dseg->lkey = sq->mkey_be; @@ -285,6 +285,10 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb, } return num_dma; + +dma_unmap_wqe_err: + mlx5e_dma_unmap_wqe_err(sq, num_dma); + return -ENOMEM; } static inline void @@ -380,17 +384,15 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen, (struct mlx5_wqe_data_seg *)cseg + ds_cnt); if (unlikely(num_dma < 0)) - goto dma_unmap_wqe_err; + goto err_drop; mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma, num_bytes, num_dma, wi, cseg); return NETDEV_TX_OK; -dma_unmap_wqe_err: +err_drop: sq->stats.dropped++; - mlx5e_dma_unmap_wqe_err(sq, wi->num_dma); - dev_kfree_skb_any(skb); return NETDEV_TX_OK; @@ -645,17 +647,15 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb, num_dma = mlx5e_txwqe_build_dsegs(sq, skb, skb_data, headlen, (struct mlx5_wqe_data_seg *)cseg + ds_cnt); if (unlikely(num_dma < 0)) - goto dma_unmap_wqe_err; + goto err_drop; mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt + num_dma, num_bytes, num_dma, wi, cseg); return NETDEV_TX_OK; -dma_unmap_wqe_err: +err_drop: sq->stats.dropped++; - mlx5e_dma_unmap_wqe_err(sq, wi->num_dma); - dev_kfree_skb_any(skb); return NETDEV_TX_OK; -- cgit v1.2.3 From 9c26f5f89d01ca21560c6b8a8e4054c271cc3a9c Mon Sep 17 00:00:00 2001 From: Talat Batheesh Date: Sun, 15 Apr 2018 11:26:19 +0300 Subject: net/mlx5: Avoid cleaning flow steering table twice during error flow When we fail to initialize the RX root namespace, we need to clean only that and not the entire flow steering. Currently the code may try to clean the flow steering twice on error witch leads to null pointer deference. Make sure we clean correctly. Fixes: fba53f7b5719 ("net/mlx5: Introduce mlx5_flow_steering structure") Signed-off-by: Talat Batheesh Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index de51e7c39bc8..2595c67ea39e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -187,6 +187,7 @@ static void del_sw_ns(struct fs_node *node); static void del_sw_hw_rule(struct fs_node *node); static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1, struct mlx5_flow_destination *d2); +static void cleanup_root_ns(struct mlx5_flow_root_namespace *root_ns); static struct mlx5_flow_rule * find_flow_rule(struct fs_fte *fte, struct mlx5_flow_destination *dest); @@ -2351,23 +2352,27 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering) static int init_root_ns(struct mlx5_flow_steering *steering) { + int err; + steering->root_ns = create_root_ns(steering, FS_FT_NIC_RX); if (!steering->root_ns) - goto cleanup; + return -ENOMEM; - if (init_root_tree(steering, &root_fs, &steering->root_ns->ns.node)) - goto cleanup; + err = init_root_tree(steering, &root_fs, &steering->root_ns->ns.node); + if (err) + goto out_err; set_prio_attrs(steering->root_ns); - - if (create_anchor_flow_table(steering)) - goto cleanup; + err = create_anchor_flow_table(steering); + if (err) + goto out_err; return 0; -cleanup: - mlx5_cleanup_fs(steering->dev); - return -ENOMEM; +out_err: + cleanup_root_ns(steering->root_ns); + steering->root_ns = NULL; + return err; } static void clean_tree(struct fs_node *node) -- cgit v1.2.3 From 99beaa22f11152e128861f1c681744ca4749e9f8 Mon Sep 17 00:00:00 2001 From: Shahar Klein Date: Sun, 8 Apr 2018 09:50:53 +0300 Subject: net/mlx5e: Fix traffic between VF and representor After the cited commit, WQE RQ size is calculated based on sw_mtu but it was not set for representors. This commit fixes that. Fixes: 472a1e44b349 ("net/mlx5e: Save MTU in channels params") Signed-off-by: Shahar Klein Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index d8f68e4d1018..876c3e4c6193 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -877,13 +877,14 @@ static const struct net_device_ops mlx5e_netdev_ops_rep = { }; static void mlx5e_build_rep_params(struct mlx5_core_dev *mdev, - struct mlx5e_params *params) + struct mlx5e_params *params, u16 mtu) { u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; params->hard_mtu = MLX5E_ETH_HARD_MTU; + params->sw_mtu = mtu; params->log_sq_size = MLX5E_REP_PARAMS_LOG_SQ_SIZE; params->rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; params->log_rq_mtu_frames = MLX5E_REP_PARAMS_LOG_RQ_SIZE; @@ -931,7 +932,7 @@ static void mlx5e_init_rep(struct mlx5_core_dev *mdev, priv->channels.params.num_channels = profile->max_nch(mdev); - mlx5e_build_rep_params(mdev, &priv->channels.params); + mlx5e_build_rep_params(mdev, &priv->channels.params, netdev->mtu); mlx5e_build_rep_netdev(netdev); mlx5e_timestamp_init(priv); -- cgit v1.2.3 From 202854e9f4df99df1f79962a9e8f94a7de602f7b Mon Sep 17 00:00:00 2001 From: Chris Mi Date: Mon, 16 Apr 2018 10:36:51 +0900 Subject: net/mlx5: Properly deal with flow counters when deleting rules When deleting a flow counter, the modify mask should be the action and the flow counter. Otherwise the flow counter is not deleted and we'll get a firmware warning when deleting the remaining destinations on the same FTE. It only happens in the presence of flow counter and multiple vport destinations. If there is only one vport destination, there is no need to update the FTE when deleting the only vport destination, we just delete the FTE. Fixes: ae05831424ed ("net/mlx5: Add option to add fwd rule with counter") Signed-off-by: Chris Mi Signed-off-by: Jianbo Liu Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 2595c67ea39e..c39c1692e674 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -482,7 +482,8 @@ static void del_sw_hw_rule(struct fs_node *node) if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER && --fte->dests_size) { - modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION); + modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; update_fte = true; goto out; -- cgit v1.2.3 From 815425567dea6c54494e85050631d6bdda907c5d Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 25 Apr 2018 15:08:53 -0700 Subject: bpf: fix uninitialized variable in bpf tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Here the variable cont is used as the saved_pointer for a call to strtok_r(). It is safe to use the value uninitialized in this context however and the later reference is only ever used if the strtok_r is successful. But, 'gcc-5' at least doesn't have all this knowledge so initialize cont to NULL. Additionally, do the natural NULL check before accessing just for completness. The warning is the following: ./bpf/tools/bpf/bpf_dbg.c: In function ‘cmd_load’: ./bpf/tools/bpf/bpf_dbg.c:1077:13: warning: ‘cont’ may be used uninitialized in this function [-Wmaybe-uninitialized] } else if (matches(subcmd, "pcap") == 0) { Fixes: fd981e3c321a "filter: bpf_dbg: add minimal bpf debugger" Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann --- tools/bpf/bpf_dbg.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c index 4f254bcc4423..61b9aa5d6415 100644 --- a/tools/bpf/bpf_dbg.c +++ b/tools/bpf/bpf_dbg.c @@ -1063,7 +1063,7 @@ static int cmd_load_pcap(char *file) static int cmd_load(char *arg) { - char *subcmd, *cont, *tmp = strdup(arg); + char *subcmd, *cont = NULL, *tmp = strdup(arg); int ret = CMD_OK; subcmd = strtok_r(tmp, " ", &cont); @@ -1073,7 +1073,10 @@ static int cmd_load(char *arg) bpf_reset(); bpf_reset_breakpoints(); - ret = cmd_load_bpf(cont); + if (!cont) + ret = CMD_ERR; + else + ret = cmd_load_bpf(cont); } else if (matches(subcmd, "pcap") == 0) { ret = cmd_load_pcap(cont); } else { -- cgit v1.2.3 From 7dbc73e6124ce4d0cfbdd6166de388e9367c47ad Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Wed, 25 Apr 2018 18:29:25 +0200 Subject: tipc: fix bug in function tipc_nl_node_dump_monitor Commit 36a50a989ee8 ("tipc: fix infinite loop when dumping link monitor summary") intended to fix a problem with user tool looping when max number of bearers are enabled. Unfortunately, the wrong version of the commit was posted, so the problem was not solved at all. This commit adds the missing part. Fixes: 36a50a989ee8 ("tipc: fix infinite loop when dumping link monitor summary") Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 6f98b56dd48e..baaf93f12cbd 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2244,7 +2244,7 @@ int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb) rtnl_lock(); for (bearer_id = prev_bearer; bearer_id < MAX_BEARERS; bearer_id++) { - err = __tipc_nl_add_monitor(net, &msg, prev_bearer); + err = __tipc_nl_add_monitor(net, &msg, bearer_id); if (err) break; } -- cgit v1.2.3 From c55ca688ed99a9cb79367aee2ed2ff6cb80fc039 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 25 Apr 2018 11:21:08 -0700 Subject: nfp: don't depend on eth_tbl being available For very very old generation of the management FW Ethernet port information table may theoretically not be available. This in turn will cause the nfp_port structures to not be allocated. Make sure we don't crash the kernel when there is no eth_tbl: RIP: 0010:nfp_net_pci_probe+0xf2/0xb40 [nfp] ... Call Trace: nfp_pci_probe+0x6de/0xab0 [nfp] local_pci_probe+0x47/0xa0 work_for_cpu_fn+0x1a/0x30 process_one_work+0x1de/0x3e0 Found while working with broken/development version of management FW. Fixes: a5950182c00e ("nfp: map mac_stats and vf_cfg BARs") Fixes: 93da7d9660ee ("nfp: provide nfp_port to of nfp_net_get_mac_addr()") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/main.c | 2 +- drivers/net/ethernet/netronome/nfp/nfp_app_nic.c | 2 +- drivers/net/ethernet/netronome/nfp/nfp_main.h | 4 ++- drivers/net/ethernet/netronome/nfp/nfp_net_main.c | 31 +++++++++++++---------- 4 files changed, 23 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c index ad02592a82b7..a997e34bcec2 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/main.c +++ b/drivers/net/ethernet/netronome/nfp/flower/main.c @@ -360,7 +360,7 @@ nfp_flower_spawn_phy_reprs(struct nfp_app *app, struct nfp_flower_priv *priv) } SET_NETDEV_DEV(repr, &priv->nn->pdev->dev); - nfp_net_get_mac_addr(app->pf, port); + nfp_net_get_mac_addr(app->pf, repr, port); cmsg_port_id = nfp_flower_cmsg_phys_port(phys_port); err = nfp_repr_init(app, repr, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c index 2a2f2fbc8850..b9618c37403f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_app_nic.c @@ -69,7 +69,7 @@ int nfp_app_nic_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, if (err) return err < 0 ? err : 0; - nfp_net_get_mac_addr(app->pf, nn->port); + nfp_net_get_mac_addr(app->pf, nn->dp.netdev, nn->port); return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.h b/drivers/net/ethernet/netronome/nfp/nfp_main.h index add46e28212b..42211083b51f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.h @@ -171,7 +171,9 @@ void nfp_net_pci_remove(struct nfp_pf *pf); int nfp_hwmon_register(struct nfp_pf *pf); void nfp_hwmon_unregister(struct nfp_pf *pf); -void nfp_net_get_mac_addr(struct nfp_pf *pf, struct nfp_port *port); +void +nfp_net_get_mac_addr(struct nfp_pf *pf, struct net_device *netdev, + struct nfp_port *port); bool nfp_ctrl_tx(struct nfp_net *nn, struct sk_buff *skb); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c index 15fa47f622aa..45cd2092e498 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_main.c @@ -67,23 +67,26 @@ /** * nfp_net_get_mac_addr() - Get the MAC address. * @pf: NFP PF handle + * @netdev: net_device to set MAC address on * @port: NFP port structure * * First try to get the MAC address from NSP ETH table. If that * fails generate a random address. */ -void nfp_net_get_mac_addr(struct nfp_pf *pf, struct nfp_port *port) +void +nfp_net_get_mac_addr(struct nfp_pf *pf, struct net_device *netdev, + struct nfp_port *port) { struct nfp_eth_table_port *eth_port; eth_port = __nfp_port_get_eth_port(port); if (!eth_port) { - eth_hw_addr_random(port->netdev); + eth_hw_addr_random(netdev); return; } - ether_addr_copy(port->netdev->dev_addr, eth_port->mac_addr); - ether_addr_copy(port->netdev->perm_addr, eth_port->mac_addr); + ether_addr_copy(netdev->dev_addr, eth_port->mac_addr); + ether_addr_copy(netdev->perm_addr, eth_port->mac_addr); } static struct nfp_eth_table_port * @@ -511,16 +514,18 @@ static int nfp_net_pci_map_mem(struct nfp_pf *pf) return PTR_ERR(mem); } - min_size = NFP_MAC_STATS_SIZE * (pf->eth_tbl->max_index + 1); - pf->mac_stats_mem = nfp_rtsym_map(pf->rtbl, "_mac_stats", - "net.macstats", min_size, - &pf->mac_stats_bar); - if (IS_ERR(pf->mac_stats_mem)) { - if (PTR_ERR(pf->mac_stats_mem) != -ENOENT) { - err = PTR_ERR(pf->mac_stats_mem); - goto err_unmap_ctrl; + if (pf->eth_tbl) { + min_size = NFP_MAC_STATS_SIZE * (pf->eth_tbl->max_index + 1); + pf->mac_stats_mem = nfp_rtsym_map(pf->rtbl, "_mac_stats", + "net.macstats", min_size, + &pf->mac_stats_bar); + if (IS_ERR(pf->mac_stats_mem)) { + if (PTR_ERR(pf->mac_stats_mem) != -ENOENT) { + err = PTR_ERR(pf->mac_stats_mem); + goto err_unmap_ctrl; + } + pf->mac_stats_mem = NULL; } - pf->mac_stats_mem = NULL; } pf->vf_cfg_mem = nfp_net_pf_map_rtsym(pf, "net.vfcfg", -- cgit v1.2.3 From 45f972adb7f4db2d7f02af728ccd104113336074 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 25 Apr 2018 20:21:16 +0200 Subject: net: mvpp2: Fix clk error path in mvpp2_probe When clk_prepare_enable fails for the axi_clk, the mg_clk isn't properly cleaned up. Add another jump label to handle that case, and make sure we jump to it in the later error cases. Fixes: 4792ea04bcd0 ("net: mvpp2: Fix clock resource by adding an optional bus clock") Signed-off-by: Maxime Chevallier Acked-by: Gregory CLEMENT Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 4202f9b5b966..0c2f04813d42 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -8774,12 +8774,12 @@ static int mvpp2_probe(struct platform_device *pdev) if (IS_ERR(priv->axi_clk)) { err = PTR_ERR(priv->axi_clk); if (err == -EPROBE_DEFER) - goto err_gop_clk; + goto err_mg_clk; priv->axi_clk = NULL; } else { err = clk_prepare_enable(priv->axi_clk); if (err < 0) - goto err_gop_clk; + goto err_mg_clk; } /* Get system's tclk rate */ @@ -8793,7 +8793,7 @@ static int mvpp2_probe(struct platform_device *pdev) if (priv->hw_version == MVPP22) { err = dma_set_mask(&pdev->dev, MVPP2_DESC_DMA_MASK); if (err) - goto err_mg_clk; + goto err_axi_clk; /* Sadly, the BM pools all share the same register to * store the high 32 bits of their address. So they * must all have the same high 32 bits, which forces @@ -8801,14 +8801,14 @@ static int mvpp2_probe(struct platform_device *pdev) */ err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32)); if (err) - goto err_mg_clk; + goto err_axi_clk; } /* Initialize network controller */ err = mvpp2_init(pdev, priv); if (err < 0) { dev_err(&pdev->dev, "failed to initialize controller\n"); - goto err_mg_clk; + goto err_axi_clk; } /* Initialize ports */ @@ -8821,7 +8821,7 @@ static int mvpp2_probe(struct platform_device *pdev) if (priv->port_count == 0) { dev_err(&pdev->dev, "no ports enabled\n"); err = -ENODEV; - goto err_mg_clk; + goto err_axi_clk; } /* Statistics must be gathered regularly because some of them (like @@ -8849,8 +8849,9 @@ err_port_probe: mvpp2_port_remove(priv->port_list[i]); i++; } -err_mg_clk: +err_axi_clk: clk_disable_unprepare(priv->axi_clk); +err_mg_clk: if (priv->hw_version == MVPP22) clk_disable_unprepare(priv->mg_clk); err_gop_clk: -- cgit v1.2.3 From 9af771ced473f92b5e57d086a0c2453fc0cb149c Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Wed, 25 Apr 2018 20:21:17 +0200 Subject: net: mvpp2: Fix clock resource by adding missing mg_core_clk Marvell's PPv2.2 IP needs an additional clock named "MG Core clock". This is required on Armada 7K and 8K. This commit adds the required clock in mvpp2, making sure it's only used on PPv2.2. Fixes: c7e92def1ef4 ("clk: mvebu: cp110: Fix clock tree representation") Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/mvpp2.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c index 0c2f04813d42..6f410235987c 100644 --- a/drivers/net/ethernet/marvell/mvpp2.c +++ b/drivers/net/ethernet/marvell/mvpp2.c @@ -942,6 +942,7 @@ struct mvpp2 { struct clk *pp_clk; struct clk *gop_clk; struct clk *mg_clk; + struct clk *mg_core_clk; struct clk *axi_clk; /* List of pointers to port structures */ @@ -8768,18 +8769,27 @@ static int mvpp2_probe(struct platform_device *pdev) err = clk_prepare_enable(priv->mg_clk); if (err < 0) goto err_gop_clk; + + priv->mg_core_clk = devm_clk_get(&pdev->dev, "mg_core_clk"); + if (IS_ERR(priv->mg_core_clk)) { + priv->mg_core_clk = NULL; + } else { + err = clk_prepare_enable(priv->mg_core_clk); + if (err < 0) + goto err_mg_clk; + } } priv->axi_clk = devm_clk_get(&pdev->dev, "axi_clk"); if (IS_ERR(priv->axi_clk)) { err = PTR_ERR(priv->axi_clk); if (err == -EPROBE_DEFER) - goto err_mg_clk; + goto err_mg_core_clk; priv->axi_clk = NULL; } else { err = clk_prepare_enable(priv->axi_clk); if (err < 0) - goto err_mg_clk; + goto err_mg_core_clk; } /* Get system's tclk rate */ @@ -8851,6 +8861,10 @@ err_port_probe: } err_axi_clk: clk_disable_unprepare(priv->axi_clk); + +err_mg_core_clk: + if (priv->hw_version == MVPP22) + clk_disable_unprepare(priv->mg_core_clk); err_mg_clk: if (priv->hw_version == MVPP22) clk_disable_unprepare(priv->mg_clk); @@ -8898,6 +8912,7 @@ static int mvpp2_remove(struct platform_device *pdev) return 0; clk_disable_unprepare(priv->axi_clk); + clk_disable_unprepare(priv->mg_core_clk); clk_disable_unprepare(priv->mg_clk); clk_disable_unprepare(priv->pp_clk); clk_disable_unprepare(priv->gop_clk); -- cgit v1.2.3 From 16ae6aa1705299789f71fdea59bfb119c1fbd9c0 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 25 Apr 2018 11:33:08 -0700 Subject: tcp: ignore Fast Open on repair mode The TCP repair sequence of operation is to first set the socket in repair mode, then inject the TCP stats into the socket with repair socket options, then call connect() to re-activate the socket. The connect syscall simply returns and set state to ESTABLISHED mode. As a result Fast Open is meaningless for TCP repair. However allowing sendto() system call with MSG_FASTOPEN flag half-way during the repair operation could unexpectedly cause data to be sent, before the operation finishes changing the internal TCP stats (e.g. MSS). This in turn triggers TCP warnings on inconsistent packet accounting. The fix is to simply disallow Fast Open operation once the socket is in the repair mode. Reported-by: syzbot Signed-off-by: Yuchung Cheng Reviewed-by: Neal Cardwell Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9ce1c726185e..4b18ad41d4df 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1204,7 +1204,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) uarg->zerocopy = 0; } - if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) { + if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect) && + !tp->repair) { err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); if (err == -EINPROGRESS && copied_syn > 0) goto out; -- cgit v1.2.3 From 9faedd643fd9f3a53f10ca270d3bbd436b908766 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 25 Apr 2018 23:32:06 +0200 Subject: selftests: net: add in_netns.sh TEST_GEN_PROGS_EXTENDED Script in_netns.sh is a utility function and not its own test so it shouldn't be part of the TEST_PROGS. The in_netns.sh get used by run_afpackettests. To install in_netns.sh without being added to the main run_kselftest.sh script use the TEST_GEN_PROGS_EXTENDED variable. Fixes: 5ff9c1a3dd92 ("selftests: net: add in_netns.sh to TEST_PROGS") Signed-off-by: Anders Roxell Signed-off-by: David S. Miller --- tools/testing/selftests/net/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 8f1e13d2e547..daf5effec3f0 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -5,7 +5,8 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh -TEST_PROGS += fib_tests.sh fib-onlink-tests.sh in_netns.sh pmtu.sh +TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh +TEST_GEN_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa -- cgit v1.2.3 From 1f3ccc3c3fc26468be00392ef0b2c215f9c9d054 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Wed, 25 Apr 2018 16:21:51 -0700 Subject: net: systemport: Correclty disambiguate driver instances While adding the DSA notifier, we will be sending DSA notifications with info->master that is going to point to a particular net_device instance. Our logic in bcm_sysport_map_queues() correctly disambiguates net_device instances that are not covered by our own driver, but it will not make sure that info->master points to a particular driver instance that we are interested in. In a system where e.g: two or more SYSTEMPORT instances are registered, this would lead in programming two or more times the queue mapping, completely messing with the logic which does the queue/port allocation and tracking. Fix this by looking at the notifier_block pointer which is unique per instance and allows us to go back to our driver private structure, and in turn to the backing net_device instance. Fixes: d156576362c0 ("net: systemport: Establish lower/upper queue mapping") Signed-off-by: Florian Fainelli Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index f9a3c1a76d5d..0c2b0fab81cf 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2144,14 +2144,21 @@ static const struct net_device_ops bcm_sysport_netdev_ops = { .ndo_select_queue = bcm_sysport_select_queue, }; -static int bcm_sysport_map_queues(struct net_device *dev, +static int bcm_sysport_map_queues(struct notifier_block *nb, struct dsa_notifier_register_info *info) { - struct bcm_sysport_priv *priv = netdev_priv(dev); struct bcm_sysport_tx_ring *ring; + struct bcm_sysport_priv *priv; struct net_device *slave_dev; unsigned int num_tx_queues; unsigned int q, start, port; + struct net_device *dev; + + priv = container_of(nb, struct bcm_sysport_priv, dsa_notifier); + if (priv->netdev != info->master) + return 0; + + dev = info->master; /* We can't be setting up queue inspection for non directly attached * switches @@ -2174,6 +2181,7 @@ static int bcm_sysport_map_queues(struct net_device *dev, if (priv->is_lite) netif_set_real_num_tx_queues(slave_dev, slave_dev->num_tx_queues / 2); + num_tx_queues = slave_dev->real_num_tx_queues; if (priv->per_port_num_tx_queues && @@ -2201,7 +2209,7 @@ static int bcm_sysport_map_queues(struct net_device *dev, return 0; } -static int bcm_sysport_dsa_notifier(struct notifier_block *unused, +static int bcm_sysport_dsa_notifier(struct notifier_block *nb, unsigned long event, void *ptr) { struct dsa_notifier_register_info *info; @@ -2211,7 +2219,7 @@ static int bcm_sysport_dsa_notifier(struct notifier_block *unused, info = ptr; - return notifier_from_errno(bcm_sysport_map_queues(info->master, info)); + return notifier_from_errno(bcm_sysport_map_queues(nb, info)); } #define REV_FMT "v%2x.%02x" -- cgit v1.2.3 From d625329b06e46bd20baf9ee40847d11982569204 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 26 Apr 2018 14:13:57 +0800 Subject: sctp: handle two v4 addrs comparison in sctp_inet6_cmp_addr Since sctp ipv6 socket also supports v4 addrs, it's possible to compare two v4 addrs in pf v6 .cmp_addr, sctp_inet6_cmp_addr. However after Commit 1071ec9d453a ("sctp: do not check port in sctp_inet6_cmp_addr"), it no longer calls af1->cmp_addr, which in this case is sctp_v4_cmp_addr, but calls __sctp_v6_cmp_addr where it handles them as two v6 addrs. It would cause a out of bounds crash. syzbot found this crash when trying to bind two v4 addrs to a v6 socket. This patch fixes it by adding the process for two v4 addrs in sctp_inet6_cmp_addr. Fixes: 1071ec9d453a ("sctp: do not check port in sctp_inet6_cmp_addr") Reported-by: syzbot+cd494c1dd681d4d93ebb@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 2e3f7b75a8ec..42247110d842 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -895,6 +895,9 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2)) return 1; + if (addr1->sa.sa_family == AF_INET && addr2->sa.sa_family == AF_INET) + return addr1->v4.sin_addr.s_addr == addr2->v4.sin_addr.s_addr; + return __sctp_v6_cmp_addr(addr1, addr2); } -- cgit v1.2.3 From 9306b38e42cb266f98bff6f6f4c1c652aa79ba45 Mon Sep 17 00:00:00 2001 From: "SZ Lin (林上智)" Date: Thu, 26 Apr 2018 14:30:13 +0800 Subject: NET: usb: qmi_wwan: add support for ublox R410M PID 0x90b2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for PID 0x90b2 of ublox R410M. qmicli -d /dev/cdc-wdm0 --dms-get-manufacturer [/dev/cdc-wdm0] Device manufacturer retrieved: Manufacturer: 'u-blox' qmicli -d /dev/cdc-wdm0 --dms-get-model [/dev/cdc-wdm0] Device model retrieved: Model: 'SARA-R410M-02B' Signed-off-by: SZ Lin (林上智) Cc: stable Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index c853e7410f5a..51c68fc416fa 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1098,6 +1098,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x05c6, 0x9080, 8)}, {QMI_FIXED_INTF(0x05c6, 0x9083, 3)}, {QMI_FIXED_INTF(0x05c6, 0x9084, 4)}, + {QMI_FIXED_INTF(0x05c6, 0x90b2, 3)}, /* ublox R410M */ {QMI_FIXED_INTF(0x05c6, 0x920d, 0)}, {QMI_FIXED_INTF(0x05c6, 0x920d, 5)}, {QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */ -- cgit v1.2.3 From 6a9a27d5397fc6c52f90c09ddab91e65053584aa Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 26 Apr 2018 15:21:44 +0800 Subject: sctp: clear the new asoc's stream outcnt in sctp_stream_update When processing a duplicate cookie-echo chunk, sctp moves the new temp asoc's stream out/in into the old asoc, and later frees this new temp asoc. But now after this move, the new temp asoc's stream->outcnt is not cleared while stream->out is set to NULL, which would cause a same crash as the one fixed in Commit 79d0895140e9 ("sctp: fix error path in sctp_stream_init") when freeing this asoc later. This fix is to clear this outcnt in sctp_stream_update. Fixes: f952be79cebd ("sctp: introduce struct sctp_stream_out_ext") Reported-by: Jianwen Ji Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/stream.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sctp/stream.c b/net/sctp/stream.c index f799043abec9..f1f1d1b232ba 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -240,6 +240,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) new->out = NULL; new->in = NULL; + new->outcnt = 0; + new->incnt = 0; } static int sctp_send_reconf(struct sctp_association *asoc, -- cgit v1.2.3 From c7f46cca8c73a44311e4164b9196b4d791f59ac7 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 26 Apr 2018 11:46:29 +0300 Subject: mlxsw: spectrum_switchdev: Do not remove mrouter port from MDB's ports list When IGMP snooping is enabled on a bridge, traffic forwarded by an MDB entry should be sent to both ports member in the MDB's ports list and mrouter ports. In case a port needs to be removed from an MDB's ports list, but this port is also configured as an mrouter port, then do not update the device so that it will continue to forward traffic through that port. Fix a copy-paste error that checked that IGMP snooping is enabled twice instead of checking the port's mrouter state. Fixes: ded711c87a04 ("mlxsw: spectrum_switchdev: Consider mrouter status for mdb changes") Signed-off-by: Ido Schimmel Reported-by: Colin King Reviewed-by: Nogah Frankel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index c11c9a635866..4ed01182a82c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -1718,13 +1718,11 @@ __mlxsw_sp_port_mdb_del(struct mlxsw_sp_port *mlxsw_sp_port, struct net_device *dev = mlxsw_sp_port->dev; int err; - if (bridge_port->bridge_device->multicast_enabled) { - if (bridge_port->bridge_device->multicast_enabled) { - err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, - false); - if (err) - netdev_err(dev, "Unable to remove port from SMID\n"); - } + if (bridge_port->bridge_device->multicast_enabled && + !bridge_port->mrouter) { + err = mlxsw_sp_port_smid_set(mlxsw_sp_port, mid->mid, false); + if (err) + netdev_err(dev, "Unable to remove port from SMID\n"); } err = mlxsw_sp_port_remove_from_mid(mlxsw_sp_port, mid); -- cgit v1.2.3 From 0b21bca04551906485e5d4140ccb2d875c45daa0 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Thu, 26 Apr 2018 19:47:35 -0400 Subject: MAINTAINERS: add davem in NETWORKING DRIVERS "./scripts/get_maintainer.pl -f" does not actually show us David as the maintainer of drivers/net directories such as team, bonding, phy or dsa. Adding him in an M: entry of NETWORKING DRIVERS fixes this. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 92be777d060a..2310341b9995 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9733,6 +9733,7 @@ W: https://fedorahosted.org/dropwatch/ F: net/core/drop_monitor.c NETWORKING DRIVERS +M: "David S. Miller" L: netdev@vger.kernel.org W: http://www.linuxfoundation.org/en/Net Q: http://patchwork.ozlabs.org/project/netdev/list/ -- cgit v1.2.3 From 988bf7243e03ef69238381594e0334a79cef74a6 Mon Sep 17 00:00:00 2001 From: Lance Richardson Date: Wed, 25 Apr 2018 10:21:54 -0400 Subject: net: support compat 64-bit time in {s,g}etsockopt For the x32 ABI, struct timeval has two 64-bit fields. However the kernel currently interprets the user-space values used for the SO_RCVTIMEO and SO_SNDTIMEO socket options as having a pair of 32-bit fields. When the seconds portion of the requested timeout is less than 2**32, the seconds portion of the effective timeout is correct but the microseconds portion is zero. When the seconds portion of the requested timeout is zero and the microseconds portion is non-zero, the kernel interprets the timeout as zero (never timeout). Fix by using 64-bit time for SO_RCVTIMEO/SO_SNDTIMEO as required for the ABI. The code included below demonstrates the problem. Results before patch: $ gcc -m64 -Wall -O2 -o socktmo socktmo.c && ./socktmo recv time: 2.008181 seconds send time: 2.015985 seconds $ gcc -m32 -Wall -O2 -o socktmo socktmo.c && ./socktmo recv time: 2.016763 seconds send time: 2.016062 seconds $ gcc -mx32 -Wall -O2 -o socktmo socktmo.c && ./socktmo recv time: 1.007239 seconds send time: 1.023890 seconds Results after patch: $ gcc -m64 -O2 -Wall -o socktmo socktmo.c && ./socktmo recv time: 2.010062 seconds send time: 2.015836 seconds $ gcc -m32 -O2 -Wall -o socktmo socktmo.c && ./socktmo recv time: 2.013974 seconds send time: 2.015981 seconds $ gcc -mx32 -O2 -Wall -o socktmo socktmo.c && ./socktmo recv time: 2.030257 seconds send time: 2.013383 seconds #include #include #include #include #include void checkrc(char *str, int rc) { if (rc >= 0) return; perror(str); exit(1); } static char buf[1024]; int main(int argc, char **argv) { int rc; int socks[2]; struct timeval tv; struct timeval start, end, delta; rc = socketpair(AF_UNIX, SOCK_STREAM, 0, socks); checkrc("socketpair", rc); /* set timeout to 1.999999 seconds */ tv.tv_sec = 1; tv.tv_usec = 999999; rc = setsockopt(socks[0], SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof tv); rc = setsockopt(socks[0], SOL_SOCKET, SO_SNDTIMEO, &tv, sizeof tv); checkrc("setsockopt", rc); /* measure actual receive timeout */ gettimeofday(&start, NULL); rc = recv(socks[0], buf, sizeof buf, 0); gettimeofday(&end, NULL); timersub(&end, &start, &delta); printf("recv time: %ld.%06ld seconds\n", (long)delta.tv_sec, (long)delta.tv_usec); /* fill send buffer */ do { rc = send(socks[0], buf, sizeof buf, 0); } while (rc > 0); /* measure actual send timeout */ gettimeofday(&start, NULL); rc = send(socks[0], buf, sizeof buf, 0); gettimeofday(&end, NULL); timersub(&end, &start, &delta); printf("send time: %ld.%06ld seconds\n", (long)delta.tv_sec, (long)delta.tv_usec); exit(0); } Fixes: 515c7af85ed9 ("x32: Use compat shims for {g,s}etsockopt") Reported-by: Gopal RajagopalSai Signed-off-by: Lance Richardson Signed-off-by: David S. Miller --- net/compat.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/compat.c b/net/compat.c index 5ae7437d3853..7242cce5631b 100644 --- a/net/compat.c +++ b/net/compat.c @@ -377,7 +377,8 @@ static int compat_sock_setsockopt(struct socket *sock, int level, int optname, optname == SO_ATTACH_REUSEPORT_CBPF) return do_set_attach_filter(sock, level, optname, optval, optlen); - if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO) + if (!COMPAT_USE_64BIT_TIME && + (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)) return do_set_sock_timeout(sock, level, optname, optval, optlen); return sock_setsockopt(sock, level, optname, optval, optlen); @@ -448,7 +449,8 @@ static int do_get_sock_timeout(struct socket *sock, int level, int optname, static int compat_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { - if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO) + if (!COMPAT_USE_64BIT_TIME && + (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)) return do_get_sock_timeout(sock, level, optname, optval, optlen); return sock_getsockopt(sock, level, optname, optval, optlen); } -- cgit v1.2.3 From ded8b9c761c50d147b20d7de18766fcdb8f5b621 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 27 Apr 2018 15:08:41 +0100 Subject: sfc: Use filter index rather than ID for rps_flow_id table efx->type->filter_insert() returns an ID rather than the index that efx->type->filter_async_insert() used to, which causes it to exceed efx->type->max_rx_ip_filters on some EF10 configurations, leading to out- of-bounds array writes. So, in efx_filter_rfs_work(), convert this back into an index (which is what the remove call in the expiry path expects, anyway). Fixes: 3af0f34290f6 ("sfc: replace asynchronous filter operations") Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/rx.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 64a94f242027..d2e254f2f72b 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -839,6 +839,8 @@ static void efx_filter_rfs_work(struct work_struct *data) int rc; rc = efx->type->filter_insert(efx, &req->spec, true); + if (rc >= 0) + rc %= efx->type->max_rx_ip_filters; if (efx->rps_hash_table) { spin_lock_bh(&efx->rps_hash_lock); rule = efx_rps_hash_find(efx, &req->spec); -- cgit v1.2.3 From 987c658a61f432804c4662b736dbd5fc5939af1f Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Fri, 27 Apr 2018 15:08:57 +0100 Subject: sfc: fix ARFS expiry check on EF10 Owing to a missing conditional, the result of rps_may_expire_flow() was being ignored and filters were being removed even if we'd decided not to expire them. Fixes: f8d6203780b7 ("sfc: ARFS filter IDs") Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 63036d9bf3e6..d90a7b1f4088 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -4784,8 +4784,9 @@ expire: * will set rule->filter_id to EFX_ARFS_FILTER_ID_PENDING, meaning that * the rule is not removed by efx_rps_hash_del() below. */ - ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority, - filter_idx, true) == 0; + if (ret) + ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority, + filter_idx, true) == 0; /* While we can't safely dereference rule (we dropped the lock), we can * still test it for NULL. */ -- cgit v1.2.3 From e8238fc2bd7b4c3c7554fa2df067e796610212fc Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 27 Apr 2018 20:59:24 +0800 Subject: bridge: check iface upper dev when setting master via ioctl When we set a bond slave's master to bridge via ioctl, we only check the IFF_BRIDGE_PORT flag. Although we will find the slave's real master at netdev_master_upper_dev_link() later, it already does some settings and allocates some resources. It would be better to return as early as possible. v1 -> v2: use netdev_master_upper_dev_get() instead of netdev_has_any_upper_dev() to check if we have a master, because not all upper devs are masters, e.g. vlan device. Reported-by: syzbot+de73361ee4971b6e6f75@syzkaller.appspotmail.com Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_if.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 82c1a6f430b3..5bb6681fa91e 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -518,8 +518,8 @@ int br_add_if(struct net_bridge *br, struct net_device *dev, return -ELOOP; } - /* Device is already being bridged */ - if (br_port_exists(dev)) + /* Device has master upper dev */ + if (netdev_master_upper_dev_get(dev)) return -EBUSY; /* No bridging devices that dislike that (e.g. wireless) */ -- cgit v1.2.3 From 14b7dc18ee1d9ae79eb615ea6a918d15bfddd220 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 27 Apr 2018 20:09:25 +0100 Subject: net: systemport: fix spelling mistake: "asymetric" -> "asymmetric" Trivial fix to spelling mistake in netdev_warn warning message Signed-off-by: Colin Ian King Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bcmsysport.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index 0c2b0fab81cf..f33b25fbca63 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -2186,7 +2186,7 @@ static int bcm_sysport_map_queues(struct notifier_block *nb, if (priv->per_port_num_tx_queues && priv->per_port_num_tx_queues != num_tx_queues) - netdev_warn(slave_dev, "asymetric number of per-port queues\n"); + netdev_warn(slave_dev, "asymmetric number of per-port queues\n"); priv->per_port_num_tx_queues = num_tx_queues; -- cgit v1.2.3 From 2cb5fb1454ef4990f44f3070226ee29201bd5c87 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 27 Apr 2018 16:46:11 -0300 Subject: MAINTAINERS: add myself as SCTP co-maintainer Signed-off-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 2310341b9995..6d21f2f74578 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12508,6 +12508,7 @@ F: drivers/scsi/st_*.h SCTP PROTOCOL M: Vlad Yasevich M: Neil Horman +M: Marcelo Ricardo Leitner L: linux-sctp@vger.kernel.org W: http://lksctp.sourceforge.net S: Maintained -- cgit v1.2.3 From ff81de73e4284649e78df1df0f63e2670ad87e7f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 28 Apr 2018 10:43:20 +0100 Subject: qed: fix spelling mistake: "checksumed" -> "checksummed" Trivial fix to spelling mistake in DP_INFO message text Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 74fc626b1ec1..38502815d681 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -2370,7 +2370,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) u8 flags = 0; if (unlikely(skb->ip_summed != CHECKSUM_NONE)) { - DP_INFO(cdev, "Cannot transmit a checksumed packet\n"); + DP_INFO(cdev, "Cannot transmit a checksummed packet\n"); return -EINVAL; } -- cgit v1.2.3 From f944ad1b2b66bbec8ffc1d6d0a45565b12846308 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sat, 28 Apr 2018 10:57:07 +0100 Subject: net: ethernet: ucc: fix spelling mistake: "tx-late-collsion" -> "tx-late-collision" Trivial fix to spelling mistake in tx_fw_stat_gstrings text Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/ucc_geth_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c index 4df282ed22c7..0beee2cc2ddd 100644 --- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c +++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c @@ -61,7 +61,7 @@ static const char hw_stat_gstrings[][ETH_GSTRING_LEN] = { static const char tx_fw_stat_gstrings[][ETH_GSTRING_LEN] = { "tx-single-collision", "tx-multiple-collision", - "tx-late-collsion", + "tx-late-collision", "tx-aborted-frames", "tx-lost-frames", "tx-carrier-sense-errors", -- cgit v1.2.3 From cea67a2dd6b2419dcc13a39309b9a79a1f773193 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Apr 2018 09:54:59 -0700 Subject: ipv6: fix uninit-value in ip6_multipath_l3_keys() syzbot/KMSAN reported an uninit-value in ip6_multipath_l3_keys(), root caused to a bad assumption of ICMP header being already pulled in skb->head ip_multipath_l3_keys() does the correct thing, so it is an IPv6 only bug. BUG: KMSAN: uninit-value in ip6_multipath_l3_keys net/ipv6/route.c:1830 [inline] BUG: KMSAN: uninit-value in rt6_multipath_hash+0x5c4/0x640 net/ipv6/route.c:1858 CPU: 0 PID: 4507 Comm: syz-executor661 Not tainted 4.16.0+ #87 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 __msan_warning_32+0x6c/0xb0 mm/kmsan/kmsan_instr.c:683 ip6_multipath_l3_keys net/ipv6/route.c:1830 [inline] rt6_multipath_hash+0x5c4/0x640 net/ipv6/route.c:1858 ip6_route_input+0x65a/0x920 net/ipv6/route.c:1884 ip6_rcv_finish+0x413/0x6e0 net/ipv6/ip6_input.c:69 NF_HOOK include/linux/netfilter.h:288 [inline] ipv6_rcv+0x1e16/0x2340 net/ipv6/ip6_input.c:208 __netif_receive_skb_core+0x47df/0x4a90 net/core/dev.c:4562 __netif_receive_skb net/core/dev.c:4627 [inline] netif_receive_skb_internal+0x49d/0x630 net/core/dev.c:4701 netif_receive_skb+0x230/0x240 net/core/dev.c:4725 tun_rx_batched drivers/net/tun.c:1555 [inline] tun_get_user+0x740f/0x7c60 drivers/net/tun.c:1962 tun_chr_write_iter+0x1d4/0x330 drivers/net/tun.c:1990 call_write_iter include/linux/fs.h:1782 [inline] new_sync_write fs/read_write.c:469 [inline] __vfs_write+0x7fb/0x9f0 fs/read_write.c:482 vfs_write+0x463/0x8d0 fs/read_write.c:544 SYSC_write+0x172/0x360 fs/read_write.c:589 SyS_write+0x55/0x80 fs/read_write.c:581 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Fixes: 23aebdacb05d ("ipv6: Compute multipath hash for ICMP errors from offending packet") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Jakub Sitnicki Acked-by: Jakub Sitnicki Signed-off-by: David S. Miller --- net/ipv6/route.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cde7d8251377..f4d61736c41a 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1835,11 +1835,16 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb, const struct ipv6hdr *inner_iph; const struct icmp6hdr *icmph; struct ipv6hdr _inner_iph; + struct icmp6hdr _icmph; if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6)) goto out; - icmph = icmp6_hdr(skb); + icmph = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_icmph), &_icmph); + if (!icmph) + goto out; + if (icmph->icmp6_type != ICMPV6_DEST_UNREACH && icmph->icmp6_type != ICMPV6_PKT_TOOBIG && icmph->icmp6_type != ICMPV6_TIME_EXCEED && -- cgit v1.2.3 From bf2acc943a45d2b2e8a9f1a5ddff6b6e43cc69d9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Apr 2018 18:55:20 -0700 Subject: tcp: fix TCP_REPAIR_QUEUE bound checking syzbot is able to produce a nasty WARN_ON() in tcp_verify_left_out() with following C-repro : socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 3 setsockopt(3, SOL_TCP, TCP_REPAIR, [1], 4) = 0 setsockopt(3, SOL_TCP, TCP_REPAIR_QUEUE, [-1], 4) = 0 bind(3, {sa_family=AF_INET, sin_port=htons(20002), sin_addr=inet_addr("0.0.0.0")}, 16) = 0 sendto(3, "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1242, MSG_FASTOPEN, {sa_family=AF_INET, sin_port=htons(20002), sin_addr=inet_addr("127.0.0.1")}, 16) = 1242 setsockopt(3, SOL_TCP, TCP_REPAIR_WINDOW, "\4\0\0@+\205\0\0\377\377\0\0\377\377\377\177\0\0\0\0", 20) = 0 writev(3, [{"\270", 1}], 1) = 1 setsockopt(3, SOL_TCP, TCP_REPAIR_OPTIONS, "\10\0\0\0\0\0\0\0\0\0\0\0|\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 386) = 0 writev(3, [{"\210v\r[\226\320t\231qwQ\204\264l\254\t\1\20\245\214p\350H\223\254;\\\37\345\307p$"..., 3144}], 1) = 3144 The 3rd system call looks odd : setsockopt(3, SOL_TCP, TCP_REPAIR_QUEUE, [-1], 4) = 0 This patch makes sure bound checking is using an unsigned compare. Fixes: ee9952831cfd ("tcp: Initial repair mode") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4b18ad41d4df..44be7f43455e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2674,7 +2674,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_REPAIR_QUEUE: if (!tp->repair) err = -EPERM; - else if (val < TCP_QUEUES_NR) + else if ((unsigned int)val < TCP_QUEUES_NR) tp->repair_queue = val; else err = -EINVAL; -- cgit v1.2.3 From de08481a253ac658433a8304a303ce9f018d71e5 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 27 Apr 2018 19:02:05 +0300 Subject: vhost: make msg padding explicit There's a 32 bit hole just after type. It's best to give it a name, this way compiler is forced to initialize it with rest of the structure. Reported-by: Kevin Easton Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/uapi/linux/vhost.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index c51f8e5cc608..5a8ad064445b 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -68,6 +68,7 @@ struct vhost_iotlb_msg { struct vhost_msg { int type; + int padding0; union { struct vhost_iotlb_msg iotlb; __u8 padding[64]; -- cgit v1.2.3 From 26ff75857e5953720409ea531d42f902defb130a Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 30 Apr 2018 17:29:45 +0100 Subject: net/mlx4: fix spelling mistake: "failedi" -> "failed" trivial fix to spelling mistake in mlx4_warn message. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index bfef69235d71..211578ffc70d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -1317,7 +1317,7 @@ static int mlx4_mf_unbond(struct mlx4_dev *dev) ret = mlx4_unbond_fs_rules(dev); if (ret) - mlx4_warn(dev, "multifunction unbond for flow rules failedi (%d)\n", ret); + mlx4_warn(dev, "multifunction unbond for flow rules failed (%d)\n", ret); ret1 = mlx4_unbond_mac_table(dev); if (ret1) { mlx4_warn(dev, "multifunction unbond for MAC table failed (%d)\n", ret1); -- cgit v1.2.3 From d656fe49e33df48ee6bc19e871f5862f49895c9e Mon Sep 17 00:00:00 2001 From: Wenwen Wang Date: Mon, 30 Apr 2018 12:31:13 -0500 Subject: ethtool: fix a potential missing-check bug In ethtool_get_rxnfc(), the object "info" is firstly copied from user-space. If the FLOW_RSS flag is set in the member field flow_type of "info" (and cmd is ETHTOOL_GRXFH), info needs to be copied again from user-space because FLOW_RSS is newer and has new definition, as mentioned in the comment. However, given that the user data resides in user-space, a malicious user can race to change the data after the first copy. By doing so, the user can inject inconsistent data. For example, in the second copy, the FLOW_RSS flag could be cleared in the field flow_type of "info". In the following execution, "info" will be used in the function ops->get_rxnfc(). Such inconsistent data can potentially lead to unexpected information leakage since ops->get_rxnfc() will prepare various types of data according to flow_type, and the prepared data will be eventually copied to user-space. This inconsistent data may also cause undefined behaviors based on how ops->get_rxnfc() is implemented. This patch simply re-verifies the flow_type field of "info" after the second copy. If the value is not as expected, an error code will be returned. Signed-off-by: Wenwen Wang Signed-off-by: David S. Miller --- net/core/ethtool.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 03416e6dd5d7..ba02f0dfe85c 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1032,6 +1032,11 @@ static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, info_size = sizeof(info); if (copy_from_user(&info, useraddr, info_size)) return -EFAULT; + /* Since malicious users may modify the original data, + * we need to check whether FLOW_RSS is still requested. + */ + if (!(info.flow_type & FLOW_RSS)) + return -EINVAL; } if (info.cmd == ETHTOOL_GRXCLSRLALL) { -- cgit v1.2.3 From edd7ceb78296fb1574958991b6655c3c2cedf124 Mon Sep 17 00:00:00 2001 From: Thomas Winter Date: Tue, 1 May 2018 09:15:29 +1200 Subject: ipv6: Allow non-gateway ECMP for IPv6 It is valid to have static routes where the nexthop is an interface not an address such as tunnels. For IPv4 it was possible to use ECMP on these routes but not for IPv6. Signed-off-by: Thomas Winter Cc: David Ahern Cc: "David S. Miller" Cc: Alexey Kuznetsov Cc: Hideaki YOSHIFUJI Acked-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_route.h | 3 +-- net/ipv6/ip6_fib.c | 3 --- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 08b132381984..abceb5864d99 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -68,8 +68,7 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt) { - return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == - RTF_GATEWAY; + return (rt->rt6i_flags & (RTF_ADDRCONF | RTF_DYNAMIC)) == 0; } void ip6_route_input(struct sk_buff *skb); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index deab2db6692e..3c97c29d4401 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -934,9 +934,6 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, * list. * Only static routes (which don't have flag * RTF_EXPIRES) are used for ECMPv6. - * - * To avoid long list, we only had siblings if the - * route have a gateway. */ if (rt_can_ecmp && rt6_qualify_for_ecmp(iter)) -- cgit v1.2.3 From a4e21ff8d9a311b16cfa9e41aed0c627a47149a4 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 1 May 2018 10:20:24 -0700 Subject: bpf: minor fix to selftest test_stacktrace_build_id() 1. remove useless parameter list to ./urandom_read 2. add missing "\n" to the end of an error message Fixes: 81f77fd0deeb ("bpf: add selftest for stackmap with BPF_F_STACK_BUILD_ID") Cc: Alexei Starovoitov Cc: Daniel Borkmann Signed-off-by: Song Liu Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_progs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index faadbe233966..4123d0ab90ba 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -1108,7 +1108,7 @@ static void test_stacktrace_build_id(void) assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0); - assert(system("./urandom_read if=/dev/urandom of=/dev/zero count=4 2> /dev/null") == 0); + assert(system("./urandom_read") == 0); /* disable stack trace collection */ key = 0; val = 1; @@ -1158,7 +1158,7 @@ static void test_stacktrace_build_id(void) } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0); CHECK(build_id_matches < 1, "build id match", - "Didn't find expected build ID from the map"); + "Didn't find expected build ID from the map\n"); disable_pmu: ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE); -- cgit v1.2.3 From c212d2c7fc4736d49be102fb7a1a545cdc2f1fea Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Tue, 1 May 2018 13:05:39 -0700 Subject: net/tls: Don't recursively call push_record during tls_write_space callbacks It is reported that in some cases, write_space may be called in do_tcp_sendpages, such that we recursively invoke do_tcp_sendpages again: [ 660.468802] ? do_tcp_sendpages+0x8d/0x580 [ 660.468826] ? tls_push_sg+0x74/0x130 [tls] [ 660.468852] ? tls_push_record+0x24a/0x390 [tls] [ 660.468880] ? tls_write_space+0x6a/0x80 [tls] ... tls_push_sg already does a loop over all sending sg's, so ignore any tls_write_space notifications until we are done sending. We then have to call the previous write_space to wake up poll() waiters after we are done with the send loop. Reported-by: Andre Tomt Signed-off-by: Dave Watson Signed-off-by: David S. Miller --- include/net/tls.h | 1 + net/tls/tls_main.c | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/include/net/tls.h b/include/net/tls.h index 3da8e13a6d96..b400d0bb7448 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -148,6 +148,7 @@ struct tls_context { struct scatterlist *partially_sent_record; u16 partially_sent_offset; unsigned long flags; + bool in_tcp_sendpages; u16 pending_open_record_frags; int (*push_pending_record)(struct sock *sk, int flags); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 0d379970960e..cc03e00785c7 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -114,6 +114,7 @@ int tls_push_sg(struct sock *sk, size = sg->length - offset; offset += sg->offset; + ctx->in_tcp_sendpages = true; while (1) { if (sg_is_last(sg)) sendpage_flags = flags; @@ -148,6 +149,8 @@ retry: } clear_bit(TLS_PENDING_CLOSED_RECORD, &ctx->flags); + ctx->in_tcp_sendpages = false; + ctx->sk_write_space(sk); return 0; } @@ -217,6 +220,10 @@ static void tls_write_space(struct sock *sk) { struct tls_context *ctx = tls_get_ctx(sk); + /* We are already sending pages, ignore notification */ + if (ctx->in_tcp_sendpages) + return; + if (!sk->sk_write_pending && tls_is_pending_closed_record(ctx)) { gfp_t sk_allocation = sk->sk_allocation; int rc; -- cgit v1.2.3 From 50a5852a657f793a8482fe3af4a141b460d3499e Mon Sep 17 00:00:00 2001 From: John Hurley Date: Tue, 1 May 2018 15:49:49 -0700 Subject: nfp: flower: set tunnel ttl value to net default Firmware requires that the ttl value for an encapsulating ipv4 tunnel header be included as an action field. Prior to the support of Geneve tunnel encap (when ttl set was removed completely), ttl value was extracted from the tunnel key. However, tests have shown that this can still produce a ttl of 0. Fix the issue by setting the namespace default value for each new tunnel. Follow up patch for net-next will do a full route lookup. Fixes: 3ca3059dc3a9 ("nfp: flower: compile Geneve encap actions") Fixes: b27d6a95a70d ("nfp: compile flower vxlan tunnel set actions") Signed-off-by: John Hurley Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/flower/action.c | 10 ++++++++-- drivers/net/ethernet/netronome/nfp/flower/cmsg.h | 5 ++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c index b3567a596fc1..80df9a5d4217 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/action.c +++ b/drivers/net/ethernet/netronome/nfp/flower/action.c @@ -183,17 +183,21 @@ static int nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, const struct tc_action *action, struct nfp_fl_pre_tunnel *pre_tun, - enum nfp_flower_tun_type tun_type) + enum nfp_flower_tun_type tun_type, + struct net_device *netdev) { size_t act_size = sizeof(struct nfp_fl_set_ipv4_udp_tun); struct ip_tunnel_info *ip_tun = tcf_tunnel_info(action); u32 tmp_set_ip_tun_type_index = 0; /* Currently support one pre-tunnel so index is always 0. */ int pretun_idx = 0; + struct net *net; if (ip_tun->options_len) return -EOPNOTSUPP; + net = dev_net(netdev); + set_tun->head.jump_id = NFP_FL_ACTION_OPCODE_SET_IPV4_TUNNEL; set_tun->head.len_lw = act_size >> NFP_FL_LW_SIZ; @@ -204,6 +208,7 @@ nfp_fl_set_ipv4_udp_tun(struct nfp_fl_set_ipv4_udp_tun *set_tun, set_tun->tun_type_index = cpu_to_be32(tmp_set_ip_tun_type_index); set_tun->tun_id = ip_tun->key.tun_id; + set_tun->ttl = net->ipv4.sysctl_ip_default_ttl; /* Complete pre_tunnel action. */ pre_tun->ipv4_dst = ip_tun->key.u.ipv4.dst; @@ -511,7 +516,8 @@ nfp_flower_loop_action(const struct tc_action *a, *a_len += sizeof(struct nfp_fl_pre_tunnel); set_tun = (void *)&nfp_fl->action_data[*a_len]; - err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type); + err = nfp_fl_set_ipv4_udp_tun(set_tun, a, pre_tun, *tun_type, + netdev); if (err) return err; *a_len += sizeof(struct nfp_fl_set_ipv4_udp_tun); diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h index b6c0fd053a50..bee4367a2c38 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.h +++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.h @@ -190,7 +190,10 @@ struct nfp_fl_set_ipv4_udp_tun { __be16 reserved; __be64 tun_id __packed; __be32 tun_type_index; - __be32 extra[3]; + __be16 reserved2; + u8 ttl; + u8 reserved3; + __be32 extra[2]; }; /* Metadata with L2 (1W/4B) -- cgit v1.2.3 From c818aa88d2d0cfc4938bfa9e226c0792af2dc45f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 2 May 2018 17:19:05 +0300 Subject: Revert "vhost: make msg padding explicit" This reverts commit 93c0d549c4c5a7382ad70de6b86610b7aae57406. Unfortunately the padding will break 32 bit userspace. Ouch. Need to add some compat code, revert for now. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- include/uapi/linux/vhost.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index 5a8ad064445b..c51f8e5cc608 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -68,7 +68,6 @@ struct vhost_iotlb_msg { struct vhost_msg { int type; - int padding0; union { struct vhost_iotlb_msg iotlb; __u8 padding[64]; -- cgit v1.2.3 From 5e5add172ea81152d518b161ec5706503ad3d799 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Tue, 1 May 2018 12:41:22 -0500 Subject: net: ethernet: ti: cpsw: fix packet leaking in dual_mac mode In dual_mac mode packets arrived on one port should not be forwarded by switch hw to another port. Only Linux Host can forward packets between ports. The below test case (reported in [1]) shows that packet arrived on one port can be leaked to anoter (reproducible with dual port evms): - connect port 1 (eth0) to linux Host 0 and run tcpdump or Wireshark - connect port 2 (eth1) to linux Host 1 with vlan 1 configured - ping from Host 1 through vlan 1 interface. ARP packets will be seen on Host 0. Issue happens because dual_mac mode is implemnted using two vlans: 1 (Port 1+Port 0) and 2 (Port 2+Port 0), so there are vlan records created for for each vlan. By default, the ALE will find valid vlan record in its table when vlan 1 tagged packet arrived on Port 2 and so forwards packet to all ports which are vlan 1 members (like Port. To avoid such behaviorr the ALE VLAN ID Ingress Check need to be enabled for each external CPSW port (ALE_PORTCTLn.VID_INGRESS_CHECK) so ALE will drop ingress packets if Rx port is not VLAN member. Signed-off-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 74f828412055..28d893b93d30 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1340,6 +1340,8 @@ static inline void cpsw_add_dual_emac_def_ale_entries( cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM, ALE_VLAN | ALE_SECURE, slave->port_vlan); + cpsw_ale_control_set(cpsw->ale, slave_port, + ALE_PORT_DROP_UNKNOWN_VLAN, 1); } static void soft_reset_slave(struct cpsw_slave *slave) -- cgit v1.2.3 From e6e6a278b1eaffa19d42186bfacd1ffc15a50b3f Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Tue, 1 May 2018 21:45:41 -0400 Subject: tcp_bbr: fix to zero idle_restart only upon S/ACKed data Previously the bbr->idle_restart tracking was zeroing out the bbr->idle_restart bit upon ACKs that did not SACK or ACK anything, e.g. receiving incoming data or receiver window updates. In such situations BBR would forget that this was a restart-from-idle situation, and if the min_rtt had expired it would unnecessarily enter PROBE_RTT (even though we were actually restarting from idle but had merely forgotten that fact). The fix is simple: we need to remember we are restarting from idle until we receive a S/ACK for some data (a S/ACK for the first flight of data we send as we are restarting). This commit is a stable candidate for kernels back as far as 4.9. Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: Priyaranjan Jha Signed-off-by: Yousuk Seung Signed-off-by: David S. Miller --- net/ipv4/tcp_bbr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 158d105e76da..58e2f479ffb4 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -806,7 +806,9 @@ static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) } } } - bbr->idle_restart = 0; + /* Restart after idle ends only once we process a new S/ACK for data */ + if (rs->delivered > 0) + bbr->idle_restart = 0; } static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) -- cgit v1.2.3 From 4842a08fb80bc09b7b089af42c58353dfaa8f88f Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 2 May 2018 13:37:44 +0800 Subject: sctp: init active key for the new asoc in dupcook_a and dupcook_b When processing a duplicate cookie-echo chunk, for case 'A' and 'B', after sctp_process_init for the new asoc, if auth is enabled for the cookie-ack chunk, the active key should also be initialized. Otherwise, the cookie-ack chunk made later can not be set with auth shkey properly, and a crash can even be caused by this, as after Commit 1b1e0bc99474 ("sctp: add refcnt support for sh_key"), sctp needs to hold the shkey when making control chunks. Fixes: 1b1e0bc99474 ("sctp: add refcnt support for sh_key") Reported-by: Jianwen Ji Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index dd0594a10961..98acfed45e3b 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1794,6 +1794,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_a( GFP_ATOMIC)) goto nomem; + if (sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC)) + goto nomem; + /* Make sure no new addresses are being added during the * restart. Though this is a pretty complicated attack * since you'd have to get inside the cookie. @@ -1906,6 +1909,9 @@ static enum sctp_disposition sctp_sf_do_dupcook_b( GFP_ATOMIC)) goto nomem; + if (sctp_auth_asoc_init_active_key(new_asoc, GFP_ATOMIC)) + goto nomem; + /* Update the content of current association. */ sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, -- cgit v1.2.3 From 46e16d4b956867013e0bbd7f2bad206f4aa55752 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 2 May 2018 13:39:46 +0800 Subject: sctp: use the old asoc when making the cookie-ack chunk in dupcook_d When processing a duplicate cookie-echo chunk, for case 'D', sctp will not process the param from this chunk. It means old asoc has nothing to be updated, and the new temp asoc doesn't have the complete info. So there's no reason to use the new asoc when creating the cookie-ack chunk. Otherwise, like when auth is enabled for cookie-ack, the chunk can not be set with auth, and it will definitely be dropped by peer. This issue is there since very beginning, and we fix it by using the old asoc instead. Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/sm_statefuns.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 98acfed45e3b..28c070e187c2 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2056,7 +2056,7 @@ static enum sctp_disposition sctp_sf_do_dupcook_d( } } - repl = sctp_make_cookie_ack(new_asoc, chunk); + repl = sctp_make_cookie_ack(asoc, chunk); if (!repl) goto nomem; -- cgit v1.2.3 From ce402f044e4e432c296f90eaabb8dbe8f3624391 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 2 May 2018 13:45:12 +0800 Subject: sctp: fix the issue that the cookie-ack with auth can't get processed When auth is enabled for cookie-ack chunk, in sctp_inq_pop, sctp processes auth chunk first, then continues to the next chunk in this packet if chunk_end + chunk_hdr size < skb_tail_pointer(). Otherwise, it will go to the next packet or discard this chunk. However, it missed the fact that cookie-ack chunk's size is equal to chunk_hdr size, which couldn't match that check, and thus this chunk would not get processed. This patch fixes it by changing the check to chunk_end + chunk_hdr size <= skb_tail_pointer(). Fixes: 26b87c788100 ("net: sctp: fix remote memory pressure from excessive queueing") Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/inqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 23ebc5318edc..eb93ffe2408b 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -217,7 +217,7 @@ new_skb: skb_pull(chunk->skb, sizeof(*ch)); chunk->subh.v = NULL; /* Subheader is no longer valid. */ - if (chunk->chunk_end + sizeof(*ch) < skb_tail_pointer(chunk->skb)) { + if (chunk->chunk_end + sizeof(*ch) <= skb_tail_pointer(chunk->skb)) { /* This is not a singleton */ chunk->singleton = 0; } else if (chunk->chunk_end > skb_tail_pointer(chunk->skb)) { -- cgit v1.2.3 From af3e0fcf78879f718c5f73df0814951bd7057d34 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 2 May 2018 13:30:57 +0200 Subject: 8139too: Use disable_irq_nosync() in rtl8139_poll_controller() Use disable_irq_nosync() instead of disable_irq() as this might be called in atomic context with netpoll. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/8139too.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index d24b47b8e0b2..d118da5a10a2 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -2224,7 +2224,7 @@ static void rtl8139_poll_controller(struct net_device *dev) struct rtl8139_private *tp = netdev_priv(dev); const int irq = tp->pci_dev->irq; - disable_irq(irq); + disable_irq_nosync(irq); rtl8139_interrupt(irq, dev); enable_irq(irq); } -- cgit v1.2.3 From 784813aed6ba24a1f24e7e11d9d0f208cee37a7d Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 2 May 2018 16:53:56 +0200 Subject: net/smc: restrict non-blocking connect finish The smc_poll code tries to finish connect() if the socket is in state SMC_INIT and polling of the internal CLC-socket returns with EPOLLOUT. This makes sense for a select/poll call following a connect call, but not without preceding connect(). With this patch smc_poll starts connect logic only, if the CLC-socket is no longer in its initial state TCP_CLOSE. In addition, a poll error on the internal CLC-socket is always propagated to the SMC socket. With this patch the code path mentioned by syzbot https://syzkaller.appspot.com/bug?extid=03faa2dc16b8b64be396 is no longer possible. Signed-off-by: Ursula Braun Reported-by: syzbot+03faa2dc16b8b64be396@syzkaller.appspotmail.com Signed-off-by: David S. Miller --- net/smc/af_smc.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 4470501374bf..8b4c059bd13b 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1166,13 +1166,15 @@ static __poll_t smc_poll(struct file *file, struct socket *sock, /* delegate to CLC child sock */ release_sock(sk); mask = smc->clcsock->ops->poll(file, smc->clcsock, wait); - /* if non-blocking connect finished ... */ lock_sock(sk); - if ((sk->sk_state == SMC_INIT) && (mask & EPOLLOUT)) { - sk->sk_err = smc->clcsock->sk->sk_err; - if (sk->sk_err) { - mask |= EPOLLERR; - } else { + sk->sk_err = smc->clcsock->sk->sk_err; + if (sk->sk_err) { + mask |= EPOLLERR; + } else { + /* if non-blocking connect finished ... */ + if (sk->sk_state == SMC_INIT && + mask & EPOLLOUT && + smc->clcsock->sk->sk_state != TCP_CLOSE) { rc = smc_connect_rdma(smc); if (rc < 0) mask |= EPOLLERR; -- cgit v1.2.3 From 3aab8884c9eb99189a3569ac4e6b205371c9ac0b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 2 May 2018 20:12:22 +0200 Subject: bpf, x64: fix memleak when not converging after image While reviewing x64 JIT code, I noticed that we leak the prior allocated JIT image in the case where proglen != oldproglen during the JIT passes. Prior to the commit e0ee9c12157d ("x86: bpf_jit: fix two bugs in eBPF JIT compiler") we would just break out of the loop, and using the image as the JITed prog since it could only shrink in size anyway. After e0ee9c12157d, we would bail out to out_addrs label where we free addrs and jit_data but not the image coming from bpf_jit_binary_alloc(). Fixes: e0ee9c12157d ("x86: bpf_jit: fix two bugs in eBPF JIT compiler") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: David S. Miller Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index abce27ceb411..9ae7b9372348 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1236,6 +1236,7 @@ skip_init_addrs: for (pass = 0; pass < 20 || image; pass++) { proglen = do_jit(prog, addrs, image, oldproglen, &ctx); if (proglen <= 0) { +out_image: image = NULL; if (header) bpf_jit_binary_free(header); @@ -1246,8 +1247,7 @@ skip_init_addrs: if (proglen != oldproglen) { pr_err("bpf_jit: proglen=%d != oldproglen=%d\n", proglen, oldproglen); - prog = orig_prog; - goto out_addrs; + goto out_image; } break; } -- cgit v1.2.3 From 39f56ca945af86112753646316c4c92dcd4acd82 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 2 May 2018 20:12:23 +0200 Subject: bpf, x64: fix memleak when not converging on calls The JIT logic in jit_subprogs() is as follows: for all subprogs we allocate a bpf_prog_alloc(), populate it (prog->is_func = 1 here), and pass it to bpf_int_jit_compile(). If a failure occurred during JIT and prog->jited is not set, then we bail out from attempting to JIT the whole program, and punt to the interpreter instead. In case JITing went successful, we fixup BPF call offsets and do another pass to bpf_int_jit_compile() (extra_pass is true at that point) to complete JITing calls. Given that requires to pass JIT context around addrs and jit_data from x86 JIT are freed in the extra_pass in bpf_int_jit_compile() when calls are involved (if not, they can be freed immediately). However, if in the original pass, the JIT image didn't converge then we leak addrs and jit_data since image itself is NULL, the prog->is_func is set and extra_pass is false in that case, meaning both will become unreachable and are never cleaned up, therefore we need to free as well on !image. Only x64 JIT is affected. Fixes: 1c2a088a6626 ("bpf: x64: add JIT support for multi-function programs") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: David S. Miller Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 9ae7b9372348..263c8453815e 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1283,7 +1283,7 @@ out_image: prog = orig_prog; } - if (!prog->is_func || extra_pass) { + if (!image || !prog->is_func || extra_pass) { out_addrs: kfree(addrs); kfree(jit_data); -- cgit v1.2.3 From 30ca22e4a5d0063dd9a9cdf35cd139c5807cbeb3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 2 May 2018 22:41:56 +0300 Subject: ipv6: Revert "ipv6: Allow non-gateway ECMP for IPv6" This reverts commit edd7ceb78296 ("ipv6: Allow non-gateway ECMP for IPv6"). Eric reported a division by zero in rt6_multipath_rebalance() which is caused by above commit that considers identical local routes to be siblings. The division by zero happens because a nexthop weight is not set for local routes. Revert the commit as it does not fix a bug and has side effects. To reproduce: # ip -6 address add 2001:db8::1/64 dev dummy0 # ip -6 address add 2001:db8::1/64 dev dummy1 Fixes: edd7ceb78296 ("ipv6: Allow non-gateway ECMP for IPv6") Signed-off-by: Ido Schimmel Reported-by: Eric Dumazet Tested-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip6_route.h | 3 ++- net/ipv6/ip6_fib.c | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index abceb5864d99..08b132381984 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -68,7 +68,8 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) static inline bool rt6_qualify_for_ecmp(const struct rt6_info *rt) { - return (rt->rt6i_flags & (RTF_ADDRCONF | RTF_DYNAMIC)) == 0; + return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == + RTF_GATEWAY; } void ip6_route_input(struct sk_buff *skb); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 3c97c29d4401..deab2db6692e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -934,6 +934,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, * list. * Only static routes (which don't have flag * RTF_EXPIRES) are used for ECMPv6. + * + * To avoid long list, we only had siblings if the + * route have a gateway. */ if (rt_can_ecmp && rt6_qualify_for_ecmp(iter)) -- cgit v1.2.3 From 7df40c2673a1307c3260aab6f9d4b9bf97ca8fd7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 May 2018 10:03:30 -0700 Subject: net_sched: fq: take care of throttled flows before reuse Normally, a socket can not be freed/reused unless all its TX packets left qdisc and were TX-completed. However connect(AF_UNSPEC) allows this to happen. With commit fc59d5bdf1e3 ("pkt_sched: fq: clear time_next_packet for reused flows") we cleared f->time_next_packet but took no special action if the flow was still in the throttled rb-tree. Since f->time_next_packet is the key used in the rb-tree searches, blindly clearing it might break rb-tree integrity. We need to make sure the flow is no longer in the rb-tree to avoid this problem. Fixes: fc59d5bdf1e3 ("pkt_sched: fq: clear time_next_packet for reused flows") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_fq.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index a366e4c9413a..4808713c73b9 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -128,6 +128,28 @@ static bool fq_flow_is_detached(const struct fq_flow *f) return f->next == &detached; } +static bool fq_flow_is_throttled(const struct fq_flow *f) +{ + return f->next == &throttled; +} + +static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow) +{ + if (head->first) + head->last->next = flow; + else + head->first = flow; + head->last = flow; + flow->next = NULL; +} + +static void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f) +{ + rb_erase(&f->rate_node, &q->delayed); + q->throttled_flows--; + fq_flow_add_tail(&q->old_flows, f); +} + static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f) { struct rb_node **p = &q->delayed.rb_node, *parent = NULL; @@ -155,15 +177,6 @@ static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f) static struct kmem_cache *fq_flow_cachep __read_mostly; -static void fq_flow_add_tail(struct fq_flow_head *head, struct fq_flow *flow) -{ - if (head->first) - head->last->next = flow; - else - head->first = flow; - head->last = flow; - flow->next = NULL; -} /* limit number of collected flows per round */ #define FQ_GC_MAX 8 @@ -267,6 +280,8 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q) f->socket_hash != sk->sk_hash)) { f->credit = q->initial_quantum; f->socket_hash = sk->sk_hash; + if (fq_flow_is_throttled(f)) + fq_flow_unset_throttled(q, f); f->time_next_packet = 0ULL; } return f; @@ -438,9 +453,7 @@ static void fq_check_throttled(struct fq_sched_data *q, u64 now) q->time_next_delayed_flow = f->time_next_packet; break; } - rb_erase(p, &q->delayed); - q->throttled_flows--; - fq_flow_add_tail(&q->old_flows, f); + fq_flow_unset_throttled(q, f); } } -- cgit v1.2.3 From 3cc9a472d625f31f981063882b07e96229b9e71a Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 2 May 2018 13:50:19 -0700 Subject: bpf: sockmap, fix scatterlist update on error path in send with apply When the call to do_tcp_sendpage() fails to send the complete block requested we either retry if only a partial send was completed or abort if we receive a error less than or equal to zero. Before returning though we must update the scatterlist length/offset to account for any partial send completed. Before this patch we did this at the end of the retry loop, but this was buggy when used while applying a verdict to fewer bytes than in the scatterlist. When the scatterlist length was being set we forgot to account for the apply logic reducing the size variable. So the result was we chopped off some bytes in the scatterlist without doing proper cleanup on them. This results in a WARNING when the sock is tore down because the bytes have previously been charged to the socket but are never uncharged. The simple fix is to simply do the accounting inside the retry loop subtracting from the absolute scatterlist values rather than trying to accumulate the totals and subtract at the end. Reported-by: Alexei Starovoitov Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov --- kernel/bpf/sockmap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 634415c7fbcd..943929a05c92 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -326,6 +326,9 @@ retry: if (ret > 0) { if (apply) apply_bytes -= ret; + + sg->offset += ret; + sg->length -= ret; size -= ret; offset += ret; if (uncharge) @@ -333,8 +336,6 @@ retry: goto retry; } - sg->length = size; - sg->offset = offset; return ret; } -- cgit v1.2.3 From fec51d40ea65dd8f51a3e27fc69b4e7dc4f17776 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 2 May 2018 13:50:24 -0700 Subject: bpf: sockmap, zero sg_size on error when buffer is released When an error occurs during a redirect we have two cases that need to be handled (i) we have a cork'ed buffer (ii) we have a normal sendmsg buffer. In the cork'ed buffer case we don't currently support recovering from errors in a redirect action. So the buffer is released and the error should _not_ be pushed back to the caller of sendmsg/sendpage. The rationale here is the user will get an error that relates to old data that may have been sent by some arbitrary thread on that sock. Instead we simple consume the data and tell the user that the data has been consumed. We may add proper error recovery in the future. However, this patch fixes a bug where the bytes outstanding counter sg_size was not zeroed. This could result in a case where if the user has both a cork'ed action and apply action in progress we may incorrectly call into the BPF program when the user expected an old verdict to be applied via the apply action. I don't have a use case where using apply and cork at the same time is valid but we never explicitly reject it because it should work fine. This patch ensures the sg_size is zeroed so we don't have this case. In the normal sendmsg buffer case (no cork data) we also do not zero sg_size. Again this can confuse the apply logic when the logic calls into the BPF program when the BPF programmer expected the old verdict to remain. So ensure we set sg_size to zero here as well. And additionally to keep the psock state in-sync with the sk_msg_buff release all the memory as well. Previously we did this before returning to the user but this left a gap where psock and sk_msg_buff states were out of sync which seems fragile. No additional overhead is taken here except for a call to check the length and realize its already been freed. This is in the error path as well so in my opinion lets have robust code over optimized error paths. Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov --- kernel/bpf/sockmap.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 943929a05c92..052c313b12db 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -701,15 +701,22 @@ more_data: err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags); lock_sock(sk); + if (unlikely(err < 0)) { + free_start_sg(sk, m); + psock->sg_size = 0; + if (!cork) + *copied -= send; + } else { + psock->sg_size -= send; + } + if (cork) { free_start_sg(sk, m); + psock->sg_size = 0; kfree(m); m = NULL; + err = 0; } - if (unlikely(err)) - *copied -= err; - else - psock->sg_size -= send; break; case __SK_DROP: default: -- cgit v1.2.3 From abaeb096ca38cad02c8a68c49ddd7efc043c319a Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 2 May 2018 13:50:29 -0700 Subject: bpf: sockmap, fix error handling in redirect failures When a redirect failure happens we release the buffers in-flight without calling a sk_mem_uncharge(), the uncharge is called before dropping the sock lock for the redirecte, however we missed updating the ring start index. When no apply actions are in progress this is OK because we uncharge the entire buffer before the redirect. But, when we have apply logic running its possible that only a portion of the buffer is being redirected. In this case we only do memory accounting for the buffer slice being redirected and expect to be able to loop over the BPF program again and/or if a sock is closed uncharge the memory at sock destruct time. With an invalid start index however the program logic looks at the start pointer index, checks the length, and when seeing the length is zero (from the initial release and failure to update the pointer) aborts without uncharging/releasing the remaining memory. The fix for this is simply to update the start index. To avoid fixing this error in two locations we do a small refactor and remove one case where it is open-coded. Then fix it in the single function. Signed-off-by: John Fastabend Signed-off-by: Alexei Starovoitov --- kernel/bpf/sockmap.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 052c313b12db..098eca568c2b 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -393,7 +393,8 @@ static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md) } while (i != md->sg_end); } -static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md) +static void free_bytes_sg(struct sock *sk, int bytes, + struct sk_msg_buff *md, bool charge) { struct scatterlist *sg = md->sg_data; int i = md->sg_start, free; @@ -403,11 +404,13 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md) if (bytes < free) { sg[i].length -= bytes; sg[i].offset += bytes; - sk_mem_uncharge(sk, bytes); + if (charge) + sk_mem_uncharge(sk, bytes); break; } - sk_mem_uncharge(sk, sg[i].length); + if (charge) + sk_mem_uncharge(sk, sg[i].length); put_page(sg_page(&sg[i])); bytes -= sg[i].length; sg[i].length = 0; @@ -418,6 +421,7 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md) if (i == MAX_SKB_FRAGS) i = 0; } + md->sg_start = i; } static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) @@ -576,10 +580,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send, struct sk_msg_buff *md, int flags) { + bool ingress = !!(md->flags & BPF_F_INGRESS); struct smap_psock *psock; struct scatterlist *sg; - int i, err, free = 0; - bool ingress = !!(md->flags & BPF_F_INGRESS); + int err = 0; sg = md->sg_data; @@ -607,16 +611,8 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send, out_rcu: rcu_read_unlock(); out: - i = md->sg_start; - while (sg[i].length) { - free += sg[i].length; - put_page(sg_page(&sg[i])); - sg[i].length = 0; - i++; - if (i == MAX_SKB_FRAGS) - i = 0; - } - return free; + free_bytes_sg(NULL, send, md, false); + return err; } static inline void bpf_md_init(struct smap_psock *psock) @@ -720,7 +716,7 @@ more_data: break; case __SK_DROP: default: - free_bytes_sg(sk, send, m); + free_bytes_sg(sk, send, m, true); apply_bytes_dec(psock, send); *copied -= send; psock->sg_size -= send; -- cgit v1.2.3 From 94720e3aee6884d8c8beb678001629da60ec6366 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 2 May 2018 09:41:19 +0300 Subject: ipv4: fix fnhe usage by non-cached routes Allow some non-cached routes to use non-expired fnhe: 1. ip_del_fnhe: moved above and now called by find_exception. The 4.5+ commit deed49df7390 expires fnhe only when caching routes. Change that to: 1.1. use fnhe for non-cached local output routes, with the help from (2) 1.2. allow __mkroute_input to detect expired fnhe (outdated fnhe_gw, for example) when do_cache is false, eg. when itag!=0 for unicast destinations. 2. __mkroute_output: keep fi to allow local routes with orig_oif != 0 to use fnhe info even when the new route will not be cached into fnhe. After commit 839da4d98960 ("net: ipv4: set orig_oif based on fib result for local traffic") it means all local routes will be affected because they are not cached. This change is used to solve a PMTU problem with IPVS (and probably Netfilter DNAT) setups that redirect local clients from target local IP (local route to Virtual IP) to new remote IP target, eg. IPVS TUN real server. Loopback has 64K MTU and we need to create fnhe on the local route that will keep the reduced PMTU for the Virtual IP. Without this change fnhe_pmtu is updated from ICMP but never exposed to non-cached local routes. This includes routes with flowi4_oif!=0 for 4.6+ and with flowi4_oif=any for 4.14+). 3. update_or_create_fnhe: make sure fnhe_expires is not 0 for new entries Fixes: 839da4d98960 ("net: ipv4: set orig_oif based on fib result for local traffic") Fixes: d6d5e999e5df ("route: do not cache fib route info on local routes with oif") Fixes: deed49df7390 ("route: check and remove route cache when we get route") Cc: David Ahern Cc: Xin Long Signed-off-by: Julian Anastasov Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 118 +++++++++++++++++++++++++------------------------------ 1 file changed, 53 insertions(+), 65 deletions(-) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ccb25d80f679..1412a7baf0b9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -709,7 +709,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; fnhe->fnhe_mtu_locked = lock; - fnhe->fnhe_expires = expires; + fnhe->fnhe_expires = max(1UL, expires); /* Exception created; mark the cached routes for the nexthop * stale, so anyone caching it rechecks if this exception @@ -1297,6 +1297,36 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } +static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) +{ + struct fnhe_hash_bucket *hash; + struct fib_nh_exception *fnhe, __rcu **fnhe_p; + u32 hval = fnhe_hashfun(daddr); + + spin_lock_bh(&fnhe_lock); + + hash = rcu_dereference_protected(nh->nh_exceptions, + lockdep_is_held(&fnhe_lock)); + hash += hval; + + fnhe_p = &hash->chain; + fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); + while (fnhe) { + if (fnhe->fnhe_daddr == daddr) { + rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( + fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); + fnhe_flush_routes(fnhe); + kfree_rcu(fnhe, rcu); + break; + } + fnhe_p = &fnhe->fnhe_next; + fnhe = rcu_dereference_protected(fnhe->fnhe_next, + lockdep_is_held(&fnhe_lock)); + } + + spin_unlock_bh(&fnhe_lock); +} + static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) { struct fnhe_hash_bucket *hash = rcu_dereference(nh->nh_exceptions); @@ -1310,8 +1340,14 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) for (fnhe = rcu_dereference(hash[hval].chain); fnhe; fnhe = rcu_dereference(fnhe->fnhe_next)) { - if (fnhe->fnhe_daddr == daddr) + if (fnhe->fnhe_daddr == daddr) { + if (fnhe->fnhe_expires && + time_after(jiffies, fnhe->fnhe_expires)) { + ip_del_fnhe(nh, daddr); + break; + } return fnhe; + } } return NULL; } @@ -1636,36 +1672,6 @@ static void ip_handle_martian_source(struct net_device *dev, #endif } -static void ip_del_fnhe(struct fib_nh *nh, __be32 daddr) -{ - struct fnhe_hash_bucket *hash; - struct fib_nh_exception *fnhe, __rcu **fnhe_p; - u32 hval = fnhe_hashfun(daddr); - - spin_lock_bh(&fnhe_lock); - - hash = rcu_dereference_protected(nh->nh_exceptions, - lockdep_is_held(&fnhe_lock)); - hash += hval; - - fnhe_p = &hash->chain; - fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); - while (fnhe) { - if (fnhe->fnhe_daddr == daddr) { - rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( - fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); - fnhe_flush_routes(fnhe); - kfree_rcu(fnhe, rcu); - break; - } - fnhe_p = &fnhe->fnhe_next; - fnhe = rcu_dereference_protected(fnhe->fnhe_next, - lockdep_is_held(&fnhe_lock)); - } - - spin_unlock_bh(&fnhe_lock); -} - /* called in rcu_read_lock() section */ static int __mkroute_input(struct sk_buff *skb, const struct fib_result *res, @@ -1719,20 +1725,10 @@ static int __mkroute_input(struct sk_buff *skb, fnhe = find_exception(&FIB_RES_NH(*res), daddr); if (do_cache) { - if (fnhe) { + if (fnhe) rth = rcu_dereference(fnhe->fnhe_rth_input); - if (rth && rth->dst.expires && - time_after(jiffies, rth->dst.expires)) { - ip_del_fnhe(&FIB_RES_NH(*res), daddr); - fnhe = NULL; - } else { - goto rt_cache; - } - } - - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); - -rt_cache: + else + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -2216,39 +2212,31 @@ static struct rtable *__mkroute_output(const struct fib_result *res, * the loopback interface and the IP_PKTINFO ipi_ifindex will * be set to the loopback interface as well. */ - fi = NULL; + do_cache = false; } fnhe = NULL; do_cache &= fi != NULL; - if (do_cache) { + if (fi) { struct rtable __rcu **prth; struct fib_nh *nh = &FIB_RES_NH(*res); fnhe = find_exception(nh, fl4->daddr); + if (!do_cache) + goto add; if (fnhe) { prth = &fnhe->fnhe_rth_output; - rth = rcu_dereference(*prth); - if (rth && rth->dst.expires && - time_after(jiffies, rth->dst.expires)) { - ip_del_fnhe(nh, fl4->daddr); - fnhe = NULL; - } else { - goto rt_cache; + } else { + if (unlikely(fl4->flowi4_flags & + FLOWI_FLAG_KNOWN_NH && + !(nh->nh_gw && + nh->nh_scope == RT_SCOPE_LINK))) { + do_cache = false; + goto add; } + prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); } - - if (unlikely(fl4->flowi4_flags & - FLOWI_FLAG_KNOWN_NH && - !(nh->nh_gw && - nh->nh_scope == RT_SCOPE_LINK))) { - do_cache = false; - goto add; - } - prth = raw_cpu_ptr(nh->nh_pcpu_rth_output); rth = rcu_dereference(*prth); - -rt_cache: if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst)) return rth; } -- cgit v1.2.3 From 5697db4a696c41601a1d15c1922150b4dbf5726c Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Wed, 2 May 2018 22:22:54 +0200 Subject: qmi_wwan: do not steal interfaces from class drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The USB_DEVICE_INTERFACE_NUMBER matching macro assumes that the { vendorid, productid, interfacenumber } set uniquely identifies one specific function. This has proven to fail for some configurable devices. One example is the Quectel EM06/EP06 where the same interface number can be either QMI or MBIM, without the device ID changing either. Fix by requiring the vendor-specific class for interface number based matching. Functions of other classes can and should use class based matching instead. Fixes: 03304bcb5ec4 ("net: qmi_wwan: use fixed interface number matching") Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- drivers/net/usb/qmi_wwan.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 51c68fc416fa..42565dd33aa6 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -1344,6 +1344,18 @@ static int qmi_wwan_probe(struct usb_interface *intf, id->driver_info = (unsigned long)&qmi_wwan_info; } + /* There are devices where the same interface number can be + * configured as different functions. We should only bind to + * vendor specific functions when matching on interface number + */ + if (id->match_flags & USB_DEVICE_ID_MATCH_INT_NUMBER && + desc->bInterfaceClass != USB_CLASS_VENDOR_SPEC) { + dev_dbg(&intf->dev, + "Rejecting interface number match for class %02x\n", + desc->bInterfaceClass); + return -ENODEV; + } + /* Quectel EC20 quirk where we've QMI on interface 4 instead of 0 */ if (quectel_ec20_detected(intf) && desc->bInterfaceNumber == 0) { dev_dbg(&intf->dev, "Quectel EC20 quirk, skipping interface 0\n"); -- cgit v1.2.3 From eb80ca476ec11f67a62691a93604b405ffc7d80c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 May 2018 14:53:39 -0700 Subject: rds: do not leak kernel memory to user land syzbot/KMSAN reported an uninit-value in put_cmsg(), originating from rds_cmsg_recv(). Simply clear the structure, since we have holes there, or since rx_traces might be smaller than RDS_MSG_RX_DGRAM_TRACE_MAX. BUG: KMSAN: uninit-value in copy_to_user include/linux/uaccess.h:184 [inline] BUG: KMSAN: uninit-value in put_cmsg+0x600/0x870 net/core/scm.c:242 CPU: 0 PID: 4459 Comm: syz-executor582 Not tainted 4.16.0+ #87 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x185/0x1d0 lib/dump_stack.c:53 kmsan_report+0x142/0x240 mm/kmsan/kmsan.c:1067 kmsan_internal_check_memory+0x135/0x1e0 mm/kmsan/kmsan.c:1157 kmsan_copy_to_user+0x69/0x160 mm/kmsan/kmsan.c:1199 copy_to_user include/linux/uaccess.h:184 [inline] put_cmsg+0x600/0x870 net/core/scm.c:242 rds_cmsg_recv net/rds/recv.c:570 [inline] rds_recvmsg+0x2db5/0x3170 net/rds/recv.c:657 sock_recvmsg_nosec net/socket.c:803 [inline] sock_recvmsg+0x1d0/0x230 net/socket.c:810 ___sys_recvmsg+0x3fb/0x810 net/socket.c:2205 __sys_recvmsg net/socket.c:2250 [inline] SYSC_recvmsg+0x298/0x3c0 net/socket.c:2262 SyS_recvmsg+0x54/0x80 net/socket.c:2257 do_syscall_64+0x309/0x430 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x3d/0xa2 Fixes: 3289025aedc0 ("RDS: add receive message trace used by application") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Santosh Shilimkar Cc: linux-rdma Signed-off-by: David S. Miller --- net/rds/recv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/rds/recv.c b/net/rds/recv.c index de50e2126e40..dc67458b52f0 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -558,6 +558,7 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg, struct rds_cmsg_rx_trace t; int i, j; + memset(&t, 0, sizeof(t)); inc->i_rx_lat_trace[RDS_MSG_RX_CMSG] = local_clock(); t.rx_traces = rs->rs_rx_traces; for (i = 0; i < rs->rs_rx_traces; i++) { -- cgit v1.2.3 From 114f39feab360e6c7b0c4238697f223444d662a1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 2 May 2018 20:25:13 -0700 Subject: tcp: restore autocorking When adding rb-tree for TCP retransmit queue, we inadvertently broke TCP autocorking. tcp_should_autocork() should really check if the rtx queue is not empty. Tested: Before the fix : $ nstat -n;./netperf -H 10.246.7.152 -Cc -- -m 500;nstat | grep AutoCork MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.246.7.152 () port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 540000 262144 500 10.00 2682.85 2.47 1.59 3.618 2.329 TcpExtTCPAutoCorking 33 0.0 // Same test, but forcing TCP_NODELAY $ nstat -n;./netperf -H 10.246.7.152 -Cc -- -D -m 500;nstat | grep AutoCork MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.246.7.152 () port 0 AF_INET : nodelay Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 540000 262144 500 10.00 1408.75 2.44 2.96 6.802 8.259 TcpExtTCPAutoCorking 1 0.0 After the fix : $ nstat -n;./netperf -H 10.246.7.152 -Cc -- -m 500;nstat | grep AutoCork MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.246.7.152 () port 0 AF_INET Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 540000 262144 500 10.00 5472.46 2.45 1.43 1.761 1.027 TcpExtTCPAutoCorking 361293 0.0 // With TCP_NODELAY option $ nstat -n;./netperf -H 10.246.7.152 -Cc -- -D -m 500;nstat | grep AutoCork MIGRATED TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.246.7.152 () port 0 AF_INET : nodelay Recv Send Send Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size Size Size Time Throughput local remote local remote bytes bytes bytes secs. 10^6bits/s % S % S us/KB us/KB 540000 262144 500 10.00 5454.96 2.46 1.63 1.775 1.174 TcpExtTCPAutoCorking 315448 0.0 Fixes: 75c119afe14f ("tcp: implement rb-tree based retransmit queue") Signed-off-by: Eric Dumazet Reported-by: Michael Wenig Tested-by: Michael Wenig Signed-off-by: Eric Dumazet Reported-by: Michael Wenig Tested-by: Michael Wenig Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 44be7f43455e..c9d00ef54dec 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -697,7 +697,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, { return skb->len < size_goal && sock_net(sk)->ipv4.sysctl_tcp_autocorking && - skb != tcp_write_queue_head(sk) && + !tcp_rtx_queue_empty(sk) && refcount_read(&sk->sk_wmem_alloc) > skb->truesize; } -- cgit v1.2.3 From 4e11581c27a28503282e777ce75502f560c6f97b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 3 May 2018 10:12:53 +0100 Subject: net/mlx5e: fix spelling mistake: "loobpack" -> "loopback" Trivial fix to spelling mistake in netdev_err error message Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 707976482c09..027f54ac1ca2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -290,7 +290,7 @@ static int mlx5e_test_loopback(struct mlx5e_priv *priv) if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { netdev_err(priv->netdev, - "\tCan't perform loobpack test while device is down\n"); + "\tCan't perform loopback test while device is down\n"); return -ENODEV; } -- cgit v1.2.3 From df80b8fb3c0ef510649d2d6e350cf11be240d15c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 3 May 2018 16:19:32 +0100 Subject: qed: fix spelling mistake: "offloded" -> "offloaded" Trivial fix to spelling mistake in DP_NOTICE message Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_roce.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c index fb7c2d1562ae..6acfd43c1a4f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_roce.c +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -848,7 +848,7 @@ int qed_roce_query_qp(struct qed_hwfn *p_hwfn, if (!(qp->resp_offloaded)) { DP_NOTICE(p_hwfn, - "The responder's qp should be offloded before requester's\n"); + "The responder's qp should be offloaded before requester's\n"); return -EINVAL; } -- cgit v1.2.3 From e63a5f8c19d7807823d68830ebe8cfbd4419ab13 Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 3 May 2018 17:57:37 +0200 Subject: net/smc: call consolidation Consolidate the call to smc_wr_reg_send() in a new function. No functional changes. Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 8b4c059bd13b..fdb2976117bd 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -292,6 +292,15 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC); } +/* register a new rmb */ +static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc) +{ + /* register memory region for new rmb */ + if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) + return -EFAULT; + return 0; +} + static int smc_clnt_conf_first_link(struct smc_sock *smc) { struct smc_link_group *lgr = smc->conn.lgr; @@ -321,9 +330,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc) smc_wr_remember_qp_attr(link); - rc = smc_wr_reg_send(link, - smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]); - if (rc) + if (smc_reg_rmb(link, smc->conn.rmb_desc)) return SMC_CLC_DECL_INTERR; /* send CONFIRM LINK response over RoCE fabric */ @@ -473,13 +480,8 @@ static int smc_connect_rdma(struct smc_sock *smc) goto decline_rdma_unlock; } } else { - struct smc_buf_desc *buf_desc = smc->conn.rmb_desc; - - if (!buf_desc->reused) { - /* register memory region for new rmb */ - rc = smc_wr_reg_send(link, - buf_desc->mr_rx[SMC_SINGLE_LINK]); - if (rc) { + if (!smc->conn.rmb_desc->reused) { + if (smc_reg_rmb(link, smc->conn.rmb_desc)) { reason_code = SMC_CLC_DECL_INTERR; goto decline_rdma_unlock; } @@ -719,9 +721,7 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) link = &lgr->lnk[SMC_SINGLE_LINK]; - rc = smc_wr_reg_send(link, - smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]); - if (rc) + if (smc_reg_rmb(link, smc->conn.rmb_desc)) return SMC_CLC_DECL_INTERR; /* send CONFIRM LINK request to client over the RoCE fabric */ @@ -854,13 +854,8 @@ static void smc_listen_work(struct work_struct *work) smc_rx_init(new_smc); if (local_contact != SMC_FIRST_CONTACT) { - struct smc_buf_desc *buf_desc = new_smc->conn.rmb_desc; - - if (!buf_desc->reused) { - /* register memory region for new rmb */ - rc = smc_wr_reg_send(link, - buf_desc->mr_rx[SMC_SINGLE_LINK]); - if (rc) { + if (!new_smc->conn.rmb_desc->reused) { + if (smc_reg_rmb(link, new_smc->conn.rmb_desc)) { reason_code = SMC_CLC_DECL_INTERR; goto decline_rdma_unlock; } -- cgit v1.2.3 From a6920d1d130c3de039be982eba42542d329dc64c Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Thu, 3 May 2018 17:57:38 +0200 Subject: net/smc: handle unregistered buffers When smc_wr_reg_send() fails then tag (regerr) the affected buffer and free it in smc_buf_unuse(). Signed-off-by: Karsten Graul Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- net/smc/af_smc.c | 4 +++- net/smc/smc_core.c | 22 +++++++++++++++++++--- net/smc/smc_core.h | 3 ++- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index fdb2976117bd..d03b8d29ffc0 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -296,8 +296,10 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc) static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc) { /* register memory region for new rmb */ - if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) + if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) { + rmb_desc->regerr = 1; return -EFAULT; + } return 0; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index f44f6803f7ff..d4bd01bb44e1 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -32,6 +32,9 @@ static u32 smc_lgr_num; /* unique link group number */ +static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, + bool is_rmb); + static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) { /* client link group creation always follows the server link group @@ -234,9 +237,22 @@ static void smc_buf_unuse(struct smc_connection *conn) conn->sndbuf_size = 0; } if (conn->rmb_desc) { - conn->rmb_desc->reused = true; - conn->rmb_desc->used = 0; - conn->rmbe_size = 0; + if (!conn->rmb_desc->regerr) { + conn->rmb_desc->reused = 1; + conn->rmb_desc->used = 0; + conn->rmbe_size = 0; + } else { + /* buf registration failed, reuse not possible */ + struct smc_link_group *lgr = conn->lgr; + struct smc_link *lnk; + + write_lock_bh(&lgr->rmbs_lock); + list_del(&conn->rmb_desc->list); + write_unlock_bh(&lgr->rmbs_lock); + + lnk = &lgr->lnk[SMC_SINGLE_LINK]; + smc_buf_free(conn->rmb_desc, lnk, true); + } } } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index 07e2a393e6d9..5dfcb15d529f 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -123,7 +123,8 @@ struct smc_buf_desc { */ u32 order; /* allocation order */ u32 used; /* currently used / unused */ - bool reused; /* new created / reused */ + u8 reused : 1; /* new created / reused */ + u8 regerr : 1; /* err during registration */ }; struct smc_rtoken { /* address/key of remote RMB */ -- cgit v1.2.3 From bda27ff5c4526f80a7620a94ecfe8dca153e3696 Mon Sep 17 00:00:00 2001 From: Stefan Raspl Date: Thu, 3 May 2018 17:57:39 +0200 Subject: smc: fix sendpage() call The sendpage() call grabs the sock lock before calling the default implementation - which tries to grab it once again. Signed-off-by: Stefan Raspl Signed-off-by: Ursula Braun < Signed-off-by: David S. Miller --- net/smc/af_smc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index d03b8d29ffc0..544bab42f925 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -1315,8 +1315,11 @@ static ssize_t smc_sendpage(struct socket *sock, struct page *page, smc = smc_sk(sk); lock_sock(sk); - if (sk->sk_state != SMC_ACTIVE) + if (sk->sk_state != SMC_ACTIVE) { + release_sock(sk); goto out; + } + release_sock(sk); if (smc->use_fallback) rc = kernel_sendpage(smc->clcsock, page, offset, size, flags); @@ -1324,7 +1327,6 @@ static ssize_t smc_sendpage(struct socket *sock, struct page *page, rc = sock_no_sendpage(sock, page, offset, size, flags); out: - release_sock(sk); return rc; } -- cgit v1.2.3 From a8d7aa17bbc970971ccdf71988ea19230ab368b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 3 May 2018 09:39:20 -0700 Subject: dccp: fix tasklet usage syzbot reported a crash in tasklet_action_common() caused by dccp. dccp needs to make sure socket wont disappear before tasklet handler has completed. This patch takes a reference on the socket when arming the tasklet, and moves the sock_put() from dccp_write_xmit_timer() to dccp_write_xmitlet() kernel BUG at kernel/softirq.c:514! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 17 Comm: ksoftirqd/1 Not tainted 4.17.0-rc3+ #30 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:tasklet_action_common.isra.19+0x6db/0x700 kernel/softirq.c:515 RSP: 0018:ffff8801d9b3faf8 EFLAGS: 00010246 dccp_close: ABORT with 65423 bytes unread RAX: 1ffff1003b367f6b RBX: ffff8801daf1f3f0 RCX: 0000000000000000 RDX: ffff8801cf895498 RSI: 0000000000000004 RDI: 0000000000000000 RBP: ffff8801d9b3fc40 R08: ffffed0039f12a95 R09: ffffed0039f12a94 dccp_close: ABORT with 65423 bytes unread R10: ffffed0039f12a94 R11: ffff8801cf8954a3 R12: 0000000000000000 R13: ffff8801d9b3fc18 R14: dffffc0000000000 R15: ffff8801cf895490 FS: 0000000000000000(0000) GS:ffff8801daf00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000001b2bc28000 CR3: 00000001a08a9000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tasklet_action+0x1d/0x20 kernel/softirq.c:533 __do_softirq+0x2e0/0xaf5 kernel/softirq.c:285 dccp_close: ABORT with 65423 bytes unread run_ksoftirqd+0x86/0x100 kernel/softirq.c:646 smpboot_thread_fn+0x417/0x870 kernel/smpboot.c:164 kthread+0x345/0x410 kernel/kthread.c:238 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412 Code: 48 8b 85 e8 fe ff ff 48 8b 95 f0 fe ff ff e9 94 fb ff ff 48 89 95 f0 fe ff ff e8 81 53 6e 00 48 8b 95 f0 fe ff ff e9 62 fb ff ff <0f> 0b 48 89 cf 48 89 8d e8 fe ff ff e8 64 53 6e 00 48 8b 8d e8 RIP: tasklet_action_common.isra.19+0x6db/0x700 kernel/softirq.c:515 RSP: ffff8801d9b3faf8 Fixes: dc841e30eaea ("dccp: Extend CCID packet dequeueing interface") Signed-off-by: Eric Dumazet Reported-by: syzbot Cc: Gerrit Renker Cc: dccp@vger.kernel.org Signed-off-by: David S. Miller --- net/dccp/ccids/ccid2.c | 14 ++++++++++++-- net/dccp/timer.c | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 92d016e87816..385f153fe031 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -126,6 +126,16 @@ static void ccid2_change_l_seq_window(struct sock *sk, u64 val) DCCPF_SEQ_WMAX)); } +static void dccp_tasklet_schedule(struct sock *sk) +{ + struct tasklet_struct *t = &dccp_sk(sk)->dccps_xmitlet; + + if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { + sock_hold(sk); + __tasklet_schedule(t); + } +} + static void ccid2_hc_tx_rto_expire(struct timer_list *t) { struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer); @@ -166,7 +176,7 @@ static void ccid2_hc_tx_rto_expire(struct timer_list *t) /* if we were blocked before, we may now send cwnd=1 packet */ if (sender_was_blocked) - tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); + dccp_tasklet_schedule(sk); /* restart backed-off timer */ sk_reset_timer(sk, &hc->tx_rtotimer, jiffies + hc->tx_rto); out: @@ -706,7 +716,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) done: /* check if incoming Acks allow pending packets to be sent */ if (sender_was_blocked && !ccid2_cwnd_network_limited(hc)) - tasklet_schedule(&dccp_sk(sk)->dccps_xmitlet); + dccp_tasklet_schedule(sk); dccp_ackvec_parsed_cleanup(&hc->tx_av_chunks); } diff --git a/net/dccp/timer.c b/net/dccp/timer.c index b50a8732ff43..1501a20a94ca 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -232,6 +232,7 @@ static void dccp_write_xmitlet(unsigned long data) else dccp_write_xmit(sk); bh_unlock_sock(sk); + sock_put(sk); } static void dccp_write_xmit_timer(struct timer_list *t) @@ -240,7 +241,6 @@ static void dccp_write_xmit_timer(struct timer_list *t) struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk; dccp_write_xmitlet((unsigned long)sk); - sock_put(sk); } void dccp_init_xmit_timers(struct sock *sk) -- cgit v1.2.3