Merge tag 'v5.0-rc6' into for-5.1/block

Pull in 5.0-rc6 to avoid a dumb merge conflict with fs/iomap.c. This is needed since io_uring is now based on the block branch, to avoid a conflict between the multi-page bvecs and the bits of io_uring that touch the core block parts. * tag 'v5.0-rc6': (525 commits) Linux 5.0-rc6 x86/mm: Make set_pmd_at() paravirt aware MAINTAINERS: Update the ocores i2c bus driver maintainer, etc blk-mq: remove duplicated definition of blk_mq_freeze_queue Blk-iolatency: warn on negative inflight IO counter blk-iolatency: fix IO hang due to negative inflight counter MAINTAINERS: unify reference to xen-devel list x86/mm/cpa: Fix set_mce_nospec() futex: Handle early deadlock return correctly futex: Fix barrier comment net: dsa: b53: Fix for failure when irq is not defined in dt blktrace: Show requests without sector mips: cm: reprime error cause mips: loongson64: remove unreachable(), fix loongson_poweroff(). sit: check if IPv6 enabled before calling ip6_err_gen_icmpv6_unreach() geneve: should not call rt6_lookup() when ipv6 was disabled KVM: nVMX: unconditionally cancel preemption timer in free_nested (CVE-2019-7221) KVM: x86: work around leak of uninitialized stack contents (CVE-2019-7222) kvm: fix kvm_ioctl_create_device() reference counting (CVE-2019-6974) signal: Better detection of synchronous signals ...
author: Jens Axboe <axboe@kernel.dk> 2019-02-15 08:43:59 -0700
committer: Jens Axboe <axboe@kernel.dk> 2019-02-15 08:43:59 -0700
commit: 6fb845f0e78de19eaaf6a2d351702474e44b6a9e (patch)
tree: 94c6474b38d8f59d8f0de0151053283812e456c6 /tools
parent: 56d18f62f556b80105e38e7975975cf7465aae3e (diff)
parent: d13937116f1e82bf508a6325111b322c30c85eb9 (diff)
39 files changed, 1551 insertions, 123 deletions
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 897483457bf0..f7261fad45c1 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -297,10 +297,8 @@ char *get_fdinfo(int fd, const char *key)
 	snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd);
 
 	fdi = fopen(path, "r");
-	if (!fdi) {
-		p_err("can't open fdinfo: %s", strerror(errno));
+	if (!fdi)
 		return NULL;
-	}
 
 	while ((n = getline(&line, &line_n, fdi)) > 0) {
 		char *value;
@@ -313,7 +311,6 @@ char *get_fdinfo(int fd, const char *key)
 
 		value = strchr(line, '\t');
 		if (!value || !value[1]) {
-			p_err("malformed fdinfo!?");
 			free(line);
 			return NULL;
 		}
@@ -326,7 +323,6 @@ char *get_fdinfo(int fd, const char *key)
 		return line;
 	}
 
-	p_err("key '%s' not found in fdinfo", key);
 	free(line);
 	fclose(fdi);
 	return NULL;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 2037e3dc864b..1ef1ee2280a2 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -347,6 +347,20 @@ static char **parse_bytes(char **argv, const char *name, unsigned char *val,
 	return argv + i;
 }
 
+/* on per cpu maps we must copy the provided value on all value instances */
+static void fill_per_cpu_value(struct bpf_map_info *info, void *value)
+{
+	unsigned int i, n, step;
+
+	if (!map_is_per_cpu(info->type))
+		return;
+
+	n = get_possible_cpus();
+	step = round_up(info->value_size, 8);
+	for (i = 1; i < n; i++)
+		memcpy(value + i * step, value, info->value_size);
+}
+
 static int parse_elem(char **argv, struct bpf_map_info *info,
 		      void *key, void *value, __u32 key_size, __u32 value_size,
 		      __u32 *flags, __u32 **value_fd)
@@ -426,6 +440,8 @@ static int parse_elem(char **argv, struct bpf_map_info *info,
 			argv = parse_bytes(argv, "value", value, value_size);
 			if (!argv)
 				return -1;
+
+			fill_per_cpu_value(info, value);
 		}
 
 		return parse_elem(argv, info, key, NULL, key_size, value_size,
@@ -497,10 +513,9 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
 				jsonw_uint_field(json_wtr, "owner_prog_type",
 						 prog_type);
 		}
-		if (atoi(owner_jited))
-			jsonw_bool_field(json_wtr, "owner_jited", true);
-		else
-			jsonw_bool_field(json_wtr, "owner_jited", false);
+		if (owner_jited)
+			jsonw_bool_field(json_wtr, "owner_jited",
+					 !!atoi(owner_jited));
 
 		free(owner_prog_type);
 		free(owner_jited);
@@ -553,7 +568,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
 		char *owner_prog_type = get_fdinfo(fd, "owner_prog_type");
 		char *owner_jited = get_fdinfo(fd, "owner_jited");
 
-		printf("\n\t");
+		if (owner_prog_type || owner_jited)
+			printf("\n\t");
 		if (owner_prog_type) {
 			unsigned int prog_type = atoi(owner_prog_type);
 
@@ -563,10 +579,9 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
 			else
 				printf("owner_prog_type %d  ", prog_type);
 		}
-		if (atoi(owner_jited))
-			printf("owner jited");
-		else
-			printf("owner not jited");
+		if (owner_jited)
+			printf("owner%s jited",
+			       atoi(owner_jited) ? "" : " not");
 
 		free(owner_prog_type);
 		free(owner_jited);
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 2d1bb7d6ff51..b54ed82b9589 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -78,13 +78,14 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
 
 static int prog_fd_by_tag(unsigned char *tag)
 {
-	struct bpf_prog_info info = {};
-	__u32 len = sizeof(info);
 	unsigned int id = 0;
 	int err;
 	int fd;
 
 	while (true) {
+		struct bpf_prog_info info = {};
+		__u32 len = sizeof(info);
+
 		err = bpf_prog_get_next_id(id, &id);
 		if (err) {
 			p_err("%s", strerror(errno));
diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c
index 3040830d7797..84545666a09c 100644
--- a/tools/iio/iio_generic_buffer.c
+++ b/tools/iio/iio_generic_buffer.c
@@ -330,7 +330,7 @@ static const struct option longopts[] = {
 
 int main(int argc, char **argv)
 {
-	unsigned long long num_loops = 2;
+	long long num_loops = 2;
 	unsigned long timedelay = 1000000;
 	unsigned long buf_len = 128;
 
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index f6052e70bf40..a55cb8b10165 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -268,7 +268,7 @@ struct sockaddr_in {
 #define	IN_MULTICAST(a)		IN_CLASSD(a)
 #define	IN_MULTICAST_NET	0xe0000000
 
-#define	IN_BADCLASS(a)		((((long int) (a) ) == 0xffffffff)
+#define	IN_BADCLASS(a)		(((long int) (a) ) == (long int)0xffffffff)
 #define	IN_EXPERIMENTAL(a)	IN_BADCLASS((a))
 
 #define	IN_CLASSE(a)		((((long int) (a)) & 0xf0000000) == 0xf0000000)
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index 095aebdc5bb7..e6150f21267d 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -19,8 +19,11 @@ C2C stands for Cache To Cache.
 The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows
 you to track down the cacheline contentions.
 
-The tool is based on x86's load latency and precise store facility events
-provided by Intel CPUs. These events provide:
+On x86, the tool is based on load latency and precise store facility events
+provided by Intel CPUs. On PowerPC, the tool uses random instruction sampling
+with thresholding feature.
+
+These events provide:
   - memory address of the access
   - type of the access (load and store details)
   - latency (in cycles) of the load access
@@ -46,7 +49,7 @@ RECORD OPTIONS
 
 -l::
 --ldlat::
-	Configure mem-loads latency.
+	Configure mem-loads latency. (x86 only)
 
 -k::
 --all-kernel::
@@ -119,11 +122,16 @@ Following perf record options are configured by default:
   -W,-d,--phys-data,--sample-cpu
 
 Unless specified otherwise with '-e' option, following events are monitored by
-default:
+default on x86:
 
   cpu/mem-loads,ldlat=30/P
   cpu/mem-stores/P
 
+and following on PowerPC:
+
+  cpu/mem-loads/
+  cpu/mem-stores/
+
 User can pass any 'perf record' option behind '--' mark, like (to enable
 callchains and system wide monitoring):
 
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index f8d2167cf3e7..199ea0f0a6c0 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -82,7 +82,7 @@ RECORD OPTIONS
 	Be more verbose (show counter open errors, etc)
 
 --ldlat <n>::
-	Specify desired latency for loads event.
+	Specify desired latency for loads event. (x86 only)
 
 In addition, for report all perf report options are valid, and for record
 all perf record options.
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 2e6595310420..ba98bd006488 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -2,6 +2,7 @@ libperf-y += header.o
 libperf-y += sym-handling.o
 libperf-y += kvm-stat.o
 libperf-y += perf_regs.o
+libperf-y += mem-events.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/mem-events.c b/tools/perf/arch/powerpc/util/mem-events.c
new file mode 100644
index 000000000000..d08311f04e95
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/mem-events.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "mem-events.h"
+
+/* PowerPC does not support 'ldlat' parameter. */
+char *perf_mem_events__name(int i)
+{
+	if (i == PERF_MEM_EVENTS__LOAD)
+		return (char *) "cpu/mem-loads/";
+
+	return (char *) "cpu/mem-stores/";
+}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index d079f36d342d..ac221f137ed2 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -1681,13 +1681,8 @@ static void perf_sample__fprint_metric(struct perf_script *script,
 		.force_header = false,
 	};
 	struct perf_evsel *ev2;
-	static bool init;
 	u64 val;
 
-	if (!init) {
-		perf_stat__init_shadow_stats();
-		init = true;
-	}
 	if (!evsel->stats)
 		perf_evlist__alloc_stats(script->session->evlist, false);
 	if (evsel_script(evsel->leader)->gnum++ == 0)
@@ -1794,7 +1789,7 @@ static void process_event(struct perf_script *script,
 		return;
 	}
 
-	if (PRINT_FIELD(TRACE)) {
+	if (PRINT_FIELD(TRACE) && sample->raw_data) {
 		event_format__fprintf(evsel->tp_format, sample->cpu,
 				      sample->raw_data, sample->raw_size, fp);
 	}
@@ -2359,6 +2354,8 @@ static int __cmd_script(struct perf_script *script)
 
 	signal(SIGINT, sig_handler);
 
+	perf_stat__init_shadow_stats();
+
 	/* override event processing functions */
 	if (script->show_task_events) {
 		script->tool.comm = process_comm_event;
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index ed4583128b9c..b36061cd1ab8 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2514,19 +2514,30 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp);
 
 static bool perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
 {
-	struct perf_evsel *evsel = perf_evsel__newtp("probe", "vfs_getname");
+	bool found = false;
+	struct perf_evsel *evsel, *tmp;
+	struct parse_events_error err = { .idx = 0, };
+	int ret = parse_events(evlist, "probe:vfs_getname*", &err);
 
-	if (IS_ERR(evsel))
+	if (ret)
 		return false;
 
-	if (perf_evsel__field(evsel, "pathname") == NULL) {
+	evlist__for_each_entry_safe(evlist, evsel, tmp) {
+		if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname"))
+			continue;
+
+		if (perf_evsel__field(evsel, "pathname")) {
+			evsel->handler = trace__vfs_getname;
+			found = true;
+			continue;
+		}
+
+		list_del_init(&evsel->node);
+		evsel->evlist = NULL;
 		perf_evsel__delete(evsel);
-		return false;
 	}
 
-	evsel->handler = trace__vfs_getname;
-	perf_evlist__add(evlist, evsel);
-	return true;
+	return found;
 }
 
 static struct perf_evsel *perf_evsel__new_pgfault(u64 config)
diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
index 44090a9a19f3..e952127e4fb0 100644
--- a/tools/perf/tests/attr.py
+++ b/tools/perf/tests/attr.py
@@ -1,6 +1,8 @@
 #! /usr/bin/python
 # SPDX-License-Identifier: GPL-2.0
 
+from __future__ import print_function
+
 import os
 import sys
 import glob
@@ -8,7 +10,11 @@ import optparse
 import tempfile
 import logging
 import shutil
-import ConfigParser
+
+try:
+    import configparser
+except ImportError:
+    import ConfigParser as configparser
 
 def data_equal(a, b):
     # Allow multiple values in assignment separated by '|'
@@ -100,20 +106,20 @@ class Event(dict):
     def equal(self, other):
         for t in Event.terms:
             log.debug("      [%s] %s %s" % (t, self[t], other[t]));
-            if not self.has_key(t) or not other.has_key(t):
+            if t not in self or t not in other:
                 return False
             if not data_equal(self[t], other[t]):
                 return False
         return True
 
     def optional(self):
-        if self.has_key('optional') and self['optional'] == '1':
+        if 'optional' in self and self['optional'] == '1':
             return True
         return False
 
     def diff(self, other):
         for t in Event.terms:
-            if not self.has_key(t) or not other.has_key(t):
+            if t not in self or t not in other:
                 continue
             if not data_equal(self[t], other[t]):
                 log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
@@ -134,7 +140,7 @@ class Event(dict):
 #   - expected values assignments
 class Test(object):
     def __init__(self, path, options):
-        parser = ConfigParser.SafeConfigParser()
+        parser = configparser.SafeConfigParser()
         parser.read(path)
 
         log.warning("running '%s'" % path)
@@ -193,7 +199,7 @@ class Test(object):
         return True
 
     def load_events(self, path, events):
-        parser_event = ConfigParser.SafeConfigParser()
+        parser_event = configparser.SafeConfigParser()
         parser_event.read(path)
 
         # The event record section header contains 'event' word,
@@ -207,7 +213,7 @@ class Test(object):
             # Read parent event if there's any
             if (':' in section):
                 base = section[section.index(':') + 1:]
-                parser_base = ConfigParser.SafeConfigParser()
+                parser_base = configparser.SafeConfigParser()
                 parser_base.read(self.test_dir + '/' + base)
                 base_items = parser_base.items('event')
 
@@ -322,9 +328,9 @@ def run_tests(options):
     for f in glob.glob(options.test_dir + '/' + options.test):
         try:
             Test(f, options).run()
-        except Unsup, obj:
+        except Unsup as obj:
             log.warning("unsupp  %s" % obj.getMsg())
-        except Notest, obj:
+        except Notest as obj:
             log.warning("skipped %s" % obj.getMsg())
 
 def setup_log(verbose):
@@ -363,7 +369,7 @@ def main():
     parser.add_option("-p", "--perf",
                       action="store", type="string", dest="perf")
     parser.add_option("-v", "--verbose",
-                      action="count", dest="verbose")
+                      default=0, action="count", dest="verbose")
 
     options, args = parser.parse_args()
     if args:
@@ -373,7 +379,7 @@ def main():
     setup_log(options.verbose)
 
     if not options.test_dir:
-        print 'FAILED no -d option specified'
+        print('FAILED no -d option specified')
         sys.exit(-1)
 
     if not options.test:
@@ -382,8 +388,8 @@ def main():
     try:
         run_tests(options)
 
-    except Fail, obj:
-        print "FAILED %s" % obj.getMsg();
+    except Fail as obj:
+        print("FAILED %s" % obj.getMsg())
         sys.exit(-1)
 
     sys.exit(0)
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index 5f8501c68da4..5cbba70bcdd0 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -17,7 +17,7 @@ static int perf_evsel__test_field(struct perf_evsel *evsel, const char *name,
 		return -1;
 	}
 
-	is_signed = !!(field->flags | TEP_FIELD_IS_SIGNED);
+	is_signed = !!(field->flags & TEP_FIELD_IS_SIGNED);
 	if (should_be_signed && !is_signed) {
 		pr_debug("%s: \"%s\" signedness(%d) is wrong, should be %d\n",
 			 evsel->name, name, is_signed, should_be_signed);
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 1d00e5ec7906..82e16bf84466 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -224,20 +224,24 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser)
 	return ret;
 }
 
-static int disasm__cmp(struct annotation_line *a, struct annotation_line *b)
+static double disasm__cmp(struct annotation_line *a, struct annotation_line *b,
+						  int percent_type)
 {
 	int i;
 
 	for (i = 0; i < a->data_nr; i++) {
-		if (a->data[i].percent == b->data[i].percent)
+		if (a->data[i].percent[percent_type] == b->data[i].percent[percent_type])
 			continue;
-		return a->data[i].percent < b->data[i].percent;
+		return a->data[i].percent[percent_type] -
+			   b->data[i].percent[percent_type];
 	}
 	return 0;
 }
 
-static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al)
+static void disasm_rb_tree__insert(struct annotate_browser *browser,
+				struct annotation_line *al)
 {
+	struct rb_root *root = &browser->entries;
 	struct rb_node **p = &root->rb_node;
 	struct rb_node *parent = NULL;
 	struct annotation_line *l;
@@ -246,7 +250,7 @@ static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line
 		parent = *p;
 		l = rb_entry(parent, struct annotation_line, rb_node);
 
-		if (disasm__cmp(al, l))
+		if (disasm__cmp(al, l, browser->opts->percent_type) < 0)
 			p = &(*p)->rb_left;
 		else
 			p = &(*p)->rb_right;
@@ -329,7 +333,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
 			RB_CLEAR_NODE(&pos->al.rb_node);
 			continue;
 		}
-		disasm_rb_tree__insert(&browser->entries, &pos->al);
+		disasm_rb_tree__insert(browser, &pos->al);
 	}
 	pthread_mutex_unlock(&notes->lock);
 
diff --git a/tools/perf/util/c++/clang.cpp b/tools/perf/util/c++/clang.cpp
index 89512504551b..39c0004f2886 100644
--- a/tools/perf/util/c++/clang.cpp
+++ b/tools/perf/util/c++/clang.cpp
@@ -160,7 +160,7 @@ getBPFObjectFromModule(llvm::Module *Module)
 	}
 	PM.run(*Module);
 
-	return std::move(Buffer);
+	return Buffer;
 }
 
 }
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 1ccbd3342069..383674f448fc 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -134,7 +134,12 @@ struct cpu_map *cpu_map__new(const char *cpu_list)
 	if (!cpu_list)
 		return cpu_map__read_all_cpu_map();
 
-	if (!isdigit(*cpu_list))
+	/*
+	 * must handle the case of empty cpumap to cover
+	 * TOPOLOGY header for NUMA nodes with no CPU
+	 * ( e.g., because of CPU hotplug)
+	 */
+	if (!isdigit(*cpu_list) && *cpu_list != '\0')
 		goto out;
 
 	while (isdigit(*cpu_list)) {
@@ -181,8 +186,10 @@ struct cpu_map *cpu_map__new(const char *cpu_list)
 
 	if (nr_cpus > 0)
 		cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
-	else
+	else if (*cpu_list != '\0')
 		cpus = cpu_map__default_new();
+	else
+		cpus = cpu_map__dummy_new();
 invalid:
 	free(tmp_cpus);
 out:
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index 93f74d8d3cdd..42c3e5a229d2 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -28,7 +28,7 @@ struct perf_mem_event perf_mem_events[PERF_MEM_EVENTS__MAX] = {
 static char mem_loads_name[100];
 static bool mem_loads_name__init;
 
-char *perf_mem_events__name(int i)
+char * __weak perf_mem_events__name(int i)
 {
 	if (i == PERF_MEM_EVENTS__LOAD) {
 		if (!mem_loads_name__init) {
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index 897589507d97..ea523d3b248f 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -391,8 +391,10 @@ void ordered_events__free(struct ordered_events *oe)
 	 * Current buffer might not have all the events allocated
 	 * yet, we need to free only allocated ones ...
 	 */
-	list_del(&oe->buffer->list);
-	ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe);
+	if (oe->buffer) {
+		list_del(&oe->buffer->list);
+		ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe);
+	}
 
 	/* ... and continue with the rest */
 	list_for_each_entry_safe(buffer, tmp, &oe->to_free, list) {
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 63f758c655d5..64d1f36dee99 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -17,6 +17,8 @@ if cc == "clang":
             vars[var] = sub("-mcet", "", vars[var])
         if not clang_has_option("-fcf-protection"):
             vars[var] = sub("-fcf-protection", "", vars[var])
+        if not clang_has_option("-fstack-clash-protection"):
+            vars[var] = sub("-fstack-clash-protection", "", vars[var])
 
 from distutils.core import setup, Extension
 
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 66a84d5846c8..dca7dfae69ad 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -19,6 +19,20 @@
 #define EM_AARCH64	183  /* ARM 64 bit */
 #endif
 
+#ifndef ELF32_ST_VISIBILITY
+#define ELF32_ST_VISIBILITY(o)	((o) & 0x03)
+#endif
+
+/* For ELF64 the definitions are the same.  */
+#ifndef ELF64_ST_VISIBILITY
+#define ELF64_ST_VISIBILITY(o)	ELF32_ST_VISIBILITY (o)
+#endif
+
+/* How to extract information held in the st_other field.  */
+#ifndef GELF_ST_VISIBILITY
+#define GELF_ST_VISIBILITY(val)	ELF64_ST_VISIBILITY (val)
+#endif
+
 typedef Elf64_Nhdr GElf_Nhdr;
 
 #ifdef HAVE_CPLUS_DEMANGLE_SUPPORT
@@ -87,6 +101,11 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym)
 	return GELF_ST_TYPE(sym->st_info);
 }
 
+static inline uint8_t elf_sym__visibility(const GElf_Sym *sym)
+{
+	return GELF_ST_VISIBILITY(sym->st_other);
+}
+
 #ifndef STT_GNU_IFUNC
 #define STT_GNU_IFUNC 10
 #endif
@@ -111,7 +130,9 @@ static inline int elf_sym__is_label(const GElf_Sym *sym)
 	return elf_sym__type(sym) == STT_NOTYPE &&
 		sym->st_name != 0 &&
 		sym->st_shndx != SHN_UNDEF &&
-		sym->st_shndx != SHN_ABS;
+		sym->st_shndx != SHN_ABS &&
+		elf_sym__visibility(sym) != STV_HIDDEN &&
+		elf_sym__visibility(sym) != STV_INTERNAL;
 }
 
 static bool elf_sym__filter(GElf_Sym *sym)
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 1a2bd15c5b6e..400ee81a3043 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -10,6 +10,7 @@ TARGETS += drivers/dma-buf
 TARGETS += efivarfs
 TARGETS += exec
 TARGETS += filesystems
+TARGETS += filesystems/binderfs
 TARGETS += firmware
 TARGETS += ftrace
 TARGETS += futex
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index 315a44fa32af..84fd6f1bf33e 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -13,7 +13,7 @@ static inline unsigned int bpf_num_possible_cpus(void)
 	unsigned int start, end, possible_cpus = 0;
 	char buff[128];
 	FILE *fp;
-	int n;
+	int len, n, i, j = 0;
 
 	fp = fopen(fcpu, "r");
 	if (!fp) {
@@ -21,17 +21,27 @@ static inline unsigned int bpf_num_possible_cpus(void)
 		exit(1);
 	}
 
-	while (fgets(buff, sizeof(buff), fp)) {
-		n = sscanf(buff, "%u-%u", &start, &end);
-		if (n == 0) {
-			printf("Failed to retrieve # possible CPUs!\n");
-			exit(1);
-		} else if (n == 1) {
-			end = start;
+	if (!fgets(buff, sizeof(buff), fp)) {
+		printf("Failed to read %s!\n", fcpu);
+		exit(1);
+	}
+
+	len = strlen(buff);
+	for (i = 0; i <= len; i++) {
+		if (buff[i] == ',' || buff[i] == '\0') {
+			buff[i] = '\0';
+			n = sscanf(&buff[j], "%u-%u", &start, &end);
+			if (n <= 0) {
+				printf("Failed to retrieve # possible CPUs!\n");
+				exit(1);
+			} else if (n == 1) {
+				end = start;
+			}
+			possible_cpus += end - start + 1;
+			j = i + 1;
 		}
-		possible_cpus = start == 0 ? end + 1 : 0;
-		break;
 	}
+
 	fclose(fp);
 
 	return possible_cpus;
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index a0bd04befe87..91420fa83b08 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -1881,13 +1881,12 @@ static struct btf_raw_test raw_tests[] = {
 },
 
 {
-	.descr = "func proto (CONST=>TYPEDEF=>FUNC_PROTO)",
+	.descr = "func proto (TYPEDEF=>FUNC_PROTO)",
 	.raw_types = {
 		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
 		BTF_TYPE_INT_ENC(0, 0, 0, 32, 4),		/* [2] */
-		BTF_CONST_ENC(4),				/* [3] */
-		BTF_TYPEDEF_ENC(NAME_TBD, 5),			/* [4] */
-		BTF_FUNC_PROTO_ENC(0, 2),			/* [5] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 4),			/* [3] */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [4] */
 			BTF_FUNC_PROTO_ARG_ENC(0, 1),
 			BTF_FUNC_PROTO_ARG_ENC(0, 2),
 		BTF_END_RAW,
@@ -1901,8 +1900,6 @@ static struct btf_raw_test raw_tests[] = {
 	.key_type_id = 1,
 	.value_type_id = 1,
 	.max_entries = 4,
-	.btf_load_err = true,
-	.err_str = "Invalid type_id",
 },
 
 {
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
index bab13dd025a6..0d26b5e3f966 100755
--- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
+++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
@@ -37,6 +37,10 @@ prerequisite()
 		exit $ksft_skip
 	fi
 
+	present_cpus=`cat $SYSFS/devices/system/cpu/present`
+	present_max=${present_cpus##*-}
+	echo "present_cpus = $present_cpus present_max = $present_max"
+
 	echo -e "\t Cpus in online state: $online_cpus"
 
 	offline_cpus=`cat $SYSFS/devices/system/cpu/offline`
@@ -151,6 +155,8 @@ online_cpus=0
 online_max=0
 offline_cpus=0
 offline_max=0
+present_cpus=0
+present_max=0
 
 while getopts e:ahp: opt; do
 	case $opt in
@@ -190,9 +196,10 @@ if [ $allcpus -eq 0 ]; then
 	online_cpu_expect_success $online_max
 
 	if [[ $offline_cpus -gt 0 ]]; then
-		echo -e "\t offline to online to offline: cpu $offline_max"
-		online_cpu_expect_success $offline_max
-		offline_cpu_expect_success $offline_max
+		echo -e "\t offline to online to offline: cpu $present_max"
+		online_cpu_expect_success $present_max
+		offline_cpu_expect_success $present_max
+		online_cpu $present_max
 	fi
 	exit 0
 else
diff --git a/tools/testing/selftests/filesystems/binderfs/.gitignore b/tools/testing/selftests/filesystems/binderfs/.gitignore
new file mode 100644
index 000000000000..8a5d9bf63dd4
--- /dev/null
+++ b/tools/testing/selftests/filesystems/binderfs/.gitignore
@@ -0,0 +1 @@
+binderfs_test
diff --git a/tools/testing/selftests/filesystems/binderfs/Makefile b/tools/testing/selftests/filesystems/binderfs/Makefile
new file mode 100644
index 000000000000..58cb659b56b4
--- /dev/null
+++ b/tools/testing/selftests/filesystems/binderfs/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../../usr/include/
+TEST_GEN_PROGS := binderfs_test
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
new file mode 100644
index 000000000000..8c2ed962e1c7
--- /dev/null
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <linux/android/binder.h>
+#include <linux/android/binderfs.h>
+#include "../../kselftest.h"
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+	ssize_t ret;
+again:
+	ret = write(fd, buf, count);
+	if (ret < 0 && errno == EINTR)
+		goto again;
+
+	return ret;
+}
+
+static void write_to_file(const char *filename, const void *buf, size_t count,
+			  int allowed_errno)
+{
+	int fd, saved_errno;
+	ssize_t ret;
+
+	fd = open(filename, O_WRONLY | O_CLOEXEC);
+	if (fd < 0)
+		ksft_exit_fail_msg("%s - Failed to open file %s\n",
+				   strerror(errno), filename);
+
+	ret = write_nointr(fd, buf, count);
+	if (ret < 0) {
+		if (allowed_errno && (errno == allowed_errno)) {
+			close(fd);
+			return;
+		}
+
+		goto on_error;
+	}
+
+	if ((size_t)ret != count)
+		goto on_error;
+
+	close(fd);
+	return;
+
+on_error:
+	saved_errno = errno;
+	close(fd);
+	errno = saved_errno;
+
+	if (ret < 0)
+		ksft_exit_fail_msg("%s - Failed to write to file %s\n",
+				   strerror(errno), filename);
+
+	ksft_exit_fail_msg("Failed to write to file %s\n", filename);
+}
+
+static void change_to_userns(void)
+{
+	int ret;
+	uid_t uid;
+	gid_t gid;
+	/* {g,u}id_map files only allow a max of 4096 bytes written to them */
+	char idmap[4096];
+
+	uid = getuid();
+	gid = getgid();
+
+	ret = unshare(CLONE_NEWUSER);
+	if (ret < 0)
+		ksft_exit_fail_msg("%s - Failed to unshare user namespace\n",
+				   strerror(errno));
+
+	write_to_file("/proc/self/setgroups", "deny", strlen("deny"), ENOENT);
+
+	ret = snprintf(idmap, sizeof(idmap), "0 %d 1", uid);
+	if (ret < 0 || (size_t)ret >= sizeof(idmap))
+		ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n",
+				   strerror(errno));
+
+	write_to_file("/proc/self/uid_map", idmap, strlen(idmap), 0);
+
+	ret = snprintf(idmap, sizeof(idmap), "0 %d 1", gid);
+	if (ret < 0 || (size_t)ret >= sizeof(idmap))
+		ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n",
+				   strerror(errno));
+
+	write_to_file("/proc/self/gid_map", idmap, strlen(idmap), 0);
+
+	ret = setgid(0);
+	if (ret)
+		ksft_exit_fail_msg("%s - Failed to setgid(0)\n",
+				   strerror(errno));
+
+	ret = setuid(0);
+	if (ret)
+		ksft_exit_fail_msg("%s - Failed to setgid(0)\n",
+				   strerror(errno));
+}
+
+static void change_to_mountns(void)
+{
+	int ret;
+
+	ret = unshare(CLONE_NEWNS);
+	if (ret < 0)
+		ksft_exit_fail_msg("%s - Failed to unshare mount namespace\n",
+				   strerror(errno));
+
+	ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
+	if (ret < 0)
+		ksft_exit_fail_msg("%s - Failed to mount / as private\n",
+				   strerror(errno));
+}
+
+static void rmdir_protect_errno(const char *dir)
+{
+	int saved_errno = errno;
+	(void)rmdir(dir);
+	errno = saved_errno;
+}
+
+static void __do_binderfs_test(void)
+{
+	int fd, ret, saved_errno;
+	size_t len;
+	ssize_t wret;
+	bool keep = false;
+	struct binderfs_device device = { 0 };
+	struct binder_version version = { 0 };
+
+	change_to_mountns();
+
+	ret = mkdir("/dev/binderfs", 0755);
+	if (ret < 0) {
+		if (errno != EEXIST)
+			ksft_exit_fail_msg(
+				"%s - Failed to create binderfs mountpoint\n",
+				strerror(errno));
+
+		keep = true;
+	}
+
+	ret = mount(NULL, "/dev/binderfs", "binder", 0, 0);
+	if (ret < 0) {
+		if (errno != ENODEV)
+			ksft_exit_fail_msg("%s - Failed to mount binderfs\n",
+					   strerror(errno));
+
+		keep ? : rmdir_protect_errno("/dev/binderfs");
+		ksft_exit_skip(
+			"The Android binderfs filesystem is not available\n");
+	}
+
+	/* binderfs mount test passed */
+	ksft_inc_pass_cnt();
+
+	memcpy(device.name, "my-binder", strlen("my-binder"));
+
+	fd = open("/dev/binderfs/binder-control", O_RDONLY | O_CLOEXEC);
+	if (fd < 0)
+		ksft_exit_fail_msg(
+			"%s - Failed to open binder-control device\n",
+			strerror(errno));
+
+	ret = ioctl(fd, BINDER_CTL_ADD, &device);
+	saved_errno = errno;
+	close(fd);
+	errno = saved_errno;
+	if (ret < 0) {
+		keep ? : rmdir_protect_errno("/dev/binderfs");
+		ksft_exit_fail_msg(
+			"%s - Failed to allocate new binder device\n",
+			strerror(errno));
+	}
+
+	ksft_print_msg(
+		"Allocated new binder device with major %d, minor %d, and name %s\n",
+		device.major, device.minor, device.name);
+
+	/* binder device allocation test passed */
+	ksft_inc_pass_cnt();
+
+	fd = open("/dev/binderfs/my-binder", O_CLOEXEC | O_RDONLY);
+	if (fd < 0) {
+		keep ? : rmdir_protect_errno("/dev/binderfs");
+		ksft_exit_fail_msg("%s - Failed to open my-binder device\n",
+				   strerror(errno));
+	}
+
+	ret = ioctl(fd, BINDER_VERSION, &version);
+	saved_errno = errno;
+	close(fd);
+	errno = saved_errno;
+	if (ret < 0) {
+		keep ? : rmdir_protect_errno("/dev/binderfs");
+		ksft_exit_fail_msg(
+			"%s - Failed to open perform BINDER_VERSION request\n",
+			strerror(errno));
+	}
+
+	ksft_print_msg("Detected binder version: %d\n",
+		       version.protocol_version);
+
+	/* binder transaction with binderfs binder device passed */
+	ksft_inc_pass_cnt();
+
+	ret = unlink("/dev/binderfs/my-binder");
+	if (ret < 0) {
+		keep ? : rmdir_protect_errno("/dev/binderfs");
+		ksft_exit_fail_msg("%s - Failed to delete binder device\n",
+				   strerror(errno));
+	}
+
+	/* binder device removal passed */
+	ksft_inc_pass_cnt();
+
+	ret = unlink("/dev/binderfs/binder-control");
+	if (!ret) {
+		keep ? : rmdir_protect_errno("/dev/binderfs");
+		ksft_exit_fail_msg("Managed to delete binder-control device\n");
+	} else if (errno != EPERM) {
+		keep ? : rmdir_protect_errno("/dev/binderfs");
+		ksft_exit_fail_msg(
+			"%s - Failed to delete binder-control device but exited with unexpected error code\n",
+			strerror(errno));
+	}
+
+	/* binder-control device removal failed as expected */
+	ksft_inc_xfail_cnt();
+
+on_error:
+	ret = umount2("/dev/binderfs", MNT_DETACH);
+	keep ?: rmdir_protect_errno("/dev/binderfs");
+	if (ret < 0)
+		ksft_exit_fail_msg("%s - Failed to unmount binderfs\n",
+				   strerror(errno));
+
+	/* binderfs unmount test passed */
+	ksft_inc_pass_cnt();
+}
+
+static void binderfs_test_privileged()
+{
+	if (geteuid() != 0)
+		ksft_print_msg(
+			"Tests are not run as root. Skipping privileged tests\n");
+	else
+		__do_binderfs_test();
+}
+
+static void binderfs_test_unprivileged()
+{
+	change_to_userns();
+	__do_binderfs_test();
+}
+
+int main(int argc, char *argv[])
+{
+	binderfs_test_privileged();
+	binderfs_test_unprivileged();
+	ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/filesystems/binderfs/config b/tools/testing/selftests/filesystems/binderfs/config
new file mode 100644
index 000000000000..02dd6cc9cf99
--- /dev/null
+++ b/tools/testing/selftests/filesystems/binderfs/config
@@ -0,0 +1,3 @@
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDERFS=y
+CONFIG_ANDROID_BINDER_IPC=y
diff --git a/tools/testing/selftests/ir/Makefile b/tools/testing/selftests/ir/Makefile
index f4ba8eb84b95..ad06489c22a5 100644
--- a/tools/testing/selftests/ir/Makefile
+++ b/tools/testing/selftests/ir/Makefile
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 TEST_PROGS := ir_loopback.sh
 TEST_GEN_PROGS_EXTENDED := ir_loopback
+APIDIR := ../../../include/uapi
+CFLAGS += -Wall -O2 -I$(APIDIR)
 
 include ../lib.mk
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index f8f3e90700c0..1e6d14d2825c 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -21,6 +21,6 @@ TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
-$(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma
+$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
 $(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread
 $(OUTPUT)/tcp_inq: LDFLAGS += -lpthread
diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh
index 8db35b99457c..71d7fdc513c1 100755
--- a/tools/testing/selftests/net/xfrm_policy.sh
+++ b/tools/testing/selftests/net/xfrm_policy.sh
@@ -28,6 +28,19 @@ KEY_AES=0x0123456789abcdef0123456789012345
 SPI1=0x1
 SPI2=0x2
 
+do_esp_policy() {
+    local ns=$1
+    local me=$2
+    local remote=$3
+    local lnet=$4
+    local rnet=$5
+
+    # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
+    ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow
+    # to fwd decrypted packets after esp processing:
+    ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow
+}
+
 do_esp() {
     local ns=$1
     local me=$2
@@ -40,10 +53,59 @@ do_esp() {
     ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in  enc aes $KEY_AES  auth sha1 $KEY_SHA  mode tunnel sel src $rnet dst $lnet
     ip -net $ns xfrm state add src $me  dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
 
-    # to encrypt packets as they go out (includes forwarded packets that need encapsulation)
-    ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow
-    # to fwd decrypted packets after esp processing:
-    ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow
+    do_esp_policy $ns $me $remote $lnet $rnet
+}
+
+# add policies with different netmasks, to make sure kernel carries
+# the policies contained within new netmask over when search tree is
+# re-built.
+# peer netns that are supposed to be encapsulated via esp have addresses
+# in the 10.0.1.0/24 and 10.0.2.0/24 subnets, respectively.
+#
+# Adding a policy for '10.0.1.0/23' will make it necessary to
+# alter the prefix of 10.0.1.0 subnet.
+# In case new prefix overlaps with existing node, the node and all
+# policies it carries need to be merged with the existing one(s).
+#
+# Do that here.
+do_overlap()
+{
+    local ns=$1
+
+    # adds new nodes to tree (neither network exists yet in policy database).
+    ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/24 dir fwd priority 200 action block
+
+    # adds a new node in the 10.0.0.0/24 tree (dst node exists).
+    ip -net $ns xfrm policy add src 10.2.0.0/24 dst 10.0.0.0/24 dir fwd priority 200 action block
+
+    # adds a 10.2.0.0/23 node, but for different dst.
+    ip -net $ns xfrm policy add src 10.2.0.0/23 dst 10.0.1.0/24 dir fwd priority 200 action block
+
+    # dst now overlaps with the 10.0.1.0/24 ESP policy in fwd.
+    # kernel must 'promote' existing one (10.0.0.0/24) to 10.0.0.0/23.
+    # But 10.0.0.0/23 also includes existing 10.0.1.0/24, so that node
+    # also has to be merged too, including source-sorted subtrees.
+    # old:
+    # 10.0.0.0/24 (node 1 in dst tree of the bin)
+    #    10.1.0.0/24 (node in src tree of dst node 1)
+    #    10.2.0.0/24 (node in src tree of dst node 1)
+    # 10.0.1.0/24 (node 2 in dst tree of the bin)
+    #    10.0.2.0/24 (node in src tree of dst node 2)
+    #    10.2.0.0/24 (node in src tree of dst node 2)
+    #
+    # The next 'policy add' adds dst '10.0.0.0/23', which means
+    # that dst node 1 and dst node 2 have to be merged including
+    # the sub-tree.  As no duplicates are allowed, policies in
+    # the two '10.0.2.0/24' are also merged.
+    #
+    # after the 'add', internal search tree should look like this:
+    # 10.0.0.0/23 (node in dst tree of bin)
+    #     10.0.2.0/24 (node in src tree of dst node)
+    #     10.1.0.0/24 (node in src tree of dst node)
+    #     10.2.0.0/24 (node in src tree of dst node)
+    #
+    # 10.0.0.0/24 and 10.0.1.0/24 nodes have been merged as 10.0.0.0/23.
+    ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/23 dir fwd priority 200 action block
 }
 
 do_esp_policy_get_check() {
@@ -160,6 +222,41 @@ check_xfrm() {
 	return $lret
 }
 
+check_exceptions()
+{
+	logpostfix="$1"
+	local lret=0
+
+	# ping to .254 should be excluded from the tunnel (exception is in place).
+	check_xfrm 0 254
+	if [ $? -ne 0 ]; then
+		echo "FAIL: expected ping to .254 to fail ($logpostfix)"
+		lret=1
+	else
+		echo "PASS: ping to .254 bypassed ipsec tunnel ($logpostfix)"
+	fi
+
+	# ping to .253 should use use ipsec due to direct policy exception.
+	check_xfrm 1 253
+	if [ $? -ne 0 ]; then
+		echo "FAIL: expected ping to .253 to use ipsec tunnel ($logpostfix)"
+		lret=1
+	else
+		echo "PASS: direct policy matches ($logpostfix)"
+	fi
+
+	# ping to .2 should use ipsec.
+	check_xfrm 1 2
+	if [ $? -ne 0 ]; then
+		echo "FAIL: expected ping to .2 to use ipsec tunnel ($logpostfix)"
+		lret=1
+	else
+		echo "PASS: policy matches ($logpostfix)"
+	fi
+
+	return $lret
+}
+
 #check for needed privileges
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
@@ -270,33 +367,45 @@ do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28
 do_exception ns3 dead:3::1 dead:3::10 dead:2::fd  dead:2:f0::/96
 do_exception ns4 dead:3::10 dead:3::1 dead:1::fd  dead:1:f0::/96
 
-# ping to .254 should now be excluded from the tunnel
-check_xfrm 0 254
+check_exceptions "exceptions"
 if [ $? -ne 0 ]; then
-	echo "FAIL: expected ping to .254 to fail"
 	ret=1
-else
-	echo "PASS: ping to .254 bypassed ipsec tunnel"
 fi
 
-# ping to .253 should use use ipsec due to direct policy exception.
-check_xfrm 1 253
-if [ $? -ne 0 ]; then
-	echo "FAIL: expected ping to .253 to use ipsec tunnel"
-	ret=1
-else
-	echo "PASS: direct policy matches"
-fi
+# insert block policies with adjacent/overlapping netmasks
+do_overlap ns3
 
-# ping to .2 should use ipsec.
-check_xfrm 1 2
+check_exceptions "exceptions and block policies"
 if [ $? -ne 0 ]; then
-	echo "FAIL: expected ping to .2 to use ipsec tunnel"
 	ret=1
-else
-	echo "PASS: policy matches"
 fi
 
+for n in ns3 ns4;do
+	ip -net $n xfrm policy set hthresh4 28 24 hthresh6 126 125
+	sleep $((RANDOM%5))
+done
+
+check_exceptions "exceptions and block policies after hresh changes"
+
+# full flush of policy db, check everything gets freed incl. internal meta data
+ip -net ns3 xfrm policy flush
+
+do_esp_policy ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24
+do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
+
+# move inexact policies to hash table
+ip -net ns3 xfrm policy set hthresh4 16 16
+
+sleep $((RANDOM%5))
+check_exceptions "exceptions and block policies after hthresh change in ns3"
+
+# restore original hthresh settings -- move policies back to tables
+for n in ns3 ns4;do
+	ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128
+	sleep $((RANDOM%5))
+done
+check_exceptions "exceptions and block policies after hresh change to normal"
+
 for i in 1 2 3 4;do ip netns del ns$i;done
 
 exit $ret
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index 47ed6cef93fb..c9ff2b47bd1c 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for netfilter selftests
 
-TEST_PROGS := nft_trans_stress.sh
+TEST_PROGS := nft_trans_stress.sh nft_nat.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
index 1017313e41a8..59caa8f71cd8 100644
--- a/tools/testing/selftests/netfilter/config
+++ b/tools/testing/selftests/netfilter/config
@@ -1,2 +1,2 @@
 CONFIG_NET_NS=y
-NF_TABLES_INET=y
+CONFIG_NF_TABLES_INET=y
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
new file mode 100755
index 000000000000..8ec76681605c
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -0,0 +1,762 @@
+#!/bin/bash
+#
+# This test is for basic NAT functionality: snat, dnat, redirect, masquerade.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+ip netns add ns0
+ip netns add ns1
+ip netns add ns2
+
+ip link add veth0 netns ns0 type veth peer name eth0 netns ns1
+ip link add veth1 netns ns0 type veth peer name eth0 netns ns2
+
+ip -net ns0 link set lo up
+ip -net ns0 link set veth0 up
+ip -net ns0 addr add 10.0.1.1/24 dev veth0
+ip -net ns0 addr add dead:1::1/64 dev veth0
+
+ip -net ns0 link set veth1 up
+ip -net ns0 addr add 10.0.2.1/24 dev veth1
+ip -net ns0 addr add dead:2::1/64 dev veth1
+
+for i in 1 2; do
+  ip -net ns$i link set lo up
+  ip -net ns$i link set eth0 up
+  ip -net ns$i addr add 10.0.$i.99/24 dev eth0
+  ip -net ns$i route add default via 10.0.$i.1
+  ip -net ns$i addr add dead:$i::99/64 dev eth0
+  ip -net ns$i route add default via dead:$i::1
+done
+
+bad_counter()
+{
+	local ns=$1
+	local counter=$2
+	local expect=$3
+
+	echo "ERROR: $counter counter in $ns has unexpected value (expected $expect)" 1>&2
+	ip netns exec $ns nft list counter inet filter $counter 1>&2
+}
+
+check_counters()
+{
+	ns=$1
+	local lret=0
+
+	cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84")
+	if [ $? -ne 0 ]; then
+		bad_counter $ns ns0in "packets 1 bytes 84"
+		lret=1
+	fi
+	cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84")
+	if [ $? -ne 0 ]; then
+		bad_counter $ns ns0out "packets 1 bytes 84"
+		lret=1
+	fi
+
+	expect="packets 1 bytes 104"
+	cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect")
+	if [ $? -ne 0 ]; then
+		bad_counter $ns ns0in6 "$expect"
+		lret=1
+	fi
+	cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect")
+	if [ $? -ne 0 ]; then
+		bad_counter $ns ns0out6 "$expect"
+		lret=1
+	fi
+
+	return $lret
+}
+
+check_ns0_counters()
+{
+	local ns=$1
+	local lret=0
+
+	cnt=$(ip netns exec ns0 nft list counter inet filter ns0in | grep -q "packets 0 bytes 0")
+	if [ $? -ne 0 ]; then
+		bad_counter ns0 ns0in "packets 0 bytes 0"
+		lret=1
+	fi
+
+	cnt=$(ip netns exec ns0 nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0")
+	if [ $? -ne 0 ]; then
+		bad_counter ns0 ns0in6 "packets 0 bytes 0"
+		lret=1
+	fi
+
+	cnt=$(ip netns exec ns0 nft list counter inet filter ns0out | grep -q "packets 0 bytes 0")
+	if [ $? -ne 0 ]; then
+		bad_counter ns0 ns0out "packets 0 bytes 0"
+		lret=1
+	fi
+	cnt=$(ip netns exec ns0 nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0")
+	if [ $? -ne 0 ]; then
+		bad_counter ns0 ns0out6 "packets 0 bytes 0"
+		lret=1
+	fi
+
+	for dir in "in" "out" ; do
+		expect="packets 1 bytes 84"
+		cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 $ns$dir "$expect"
+			lret=1
+		fi
+
+		expect="packets 1 bytes 104"
+		cnt=$(ip netns exec ns0 nft list counter inet filter ${ns}${dir}6 | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 $ns$dir6 "$expect"
+			lret=1
+		fi
+	done
+
+	return $lret
+}
+
+reset_counters()
+{
+	for i in 0 1 2;do
+		ip netns exec ns$i nft reset counters inet > /dev/null
+	done
+}
+
+test_local_dnat6()
+{
+	local lret=0
+ip netns exec ns0 nft -f - <<EOF
+table ip6 nat {
+	chain output {
+		type nat hook output priority 0; policy accept;
+		ip6 daddr dead:1::99 dnat to dead:2::99
+	}
+}
+EOF
+	if [ $? -ne 0 ]; then
+		echo "SKIP: Could not add add ip6 dnat hook"
+		return $ksft_skip
+	fi
+
+	# ping netns1, expect rewrite to netns2
+	ip netns exec ns0 ping -q -c 1 dead:1::99 > /dev/null
+	if [ $? -ne 0 ]; then
+		lret=1
+		echo "ERROR: ping6 failed"
+		return $lret
+	fi
+
+	expect="packets 0 bytes 0"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 ns2$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# expect 0 count in ns1
+	expect="packets 0 bytes 0"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# expect 1 packet in ns2
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	test $lret -eq 0 && echo "PASS: ipv6 ping to ns1 was NATted to ns2"
+	ip netns exec ns0 nft flush chain ip6 nat output
+
+	return $lret
+}
+
+test_local_dnat()
+{
+	local lret=0
+ip netns exec ns0 nft -f - <<EOF
+table ip nat {
+	chain output {
+		type nat hook output priority 0; policy accept;
+		ip daddr 10.0.1.99 dnat to 10.0.2.99
+	}
+}
+EOF
+	# ping netns1, expect rewrite to netns2
+	ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
+	if [ $? -ne 0 ]; then
+		lret=1
+		echo "ERROR: ping failed"
+		return $lret
+	fi
+
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 ns2$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# expect 0 count in ns1
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# expect 1 packet in ns2
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	test $lret -eq 0 && echo "PASS: ping to ns1 was NATted to ns2"
+
+	ip netns exec ns0 nft flush chain ip nat output
+
+	reset_counters
+	ip netns exec ns0 ping -q -c 1 10.0.1.99 > /dev/null
+	if [ $? -ne 0 ]; then
+		lret=1
+		echo "ERROR: ping failed"
+		return $lret
+	fi
+
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 ns2$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# expect 1 count in ns1
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns0 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# expect 0 packet in ns2
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns2$dir "$expect"
+			lret=1
+		fi
+	done
+
+	test $lret -eq 0 && echo "PASS: ping to ns1 OK after nat output chain flush"
+
+	return $lret
+}
+
+
+test_masquerade6()
+{
+	local lret=0
+
+	ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping ns1 from ns2 via ipv6"
+		return 1
+		lret=1
+	fi
+
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns2$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	reset_counters
+
+# add masquerading rule
+ip netns exec ns0 nft -f - <<EOF
+table ip6 nat {
+	chain postrouting {
+		type nat hook postrouting priority 0; policy accept;
+		meta oif veth0 masquerade
+	}
+}
+EOF
+	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping ns1 from ns2 with active ipv6 masquerading"
+		lret=1
+	fi
+
+	# ns1 should have seen packets from ns0, due to masquerade
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# ns1 should not have seen packets from ns2, due to masquerade
+	expect="packets 0 bytes 0"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	ip netns exec ns0 nft flush chain ip6 nat postrouting
+	if [ $? -ne 0 ]; then
+		echo "ERROR: Could not flush ip6 nat postrouting" 1>&2
+		lret=1
+	fi
+
+	test $lret -eq 0 && echo "PASS: IPv6 masquerade for ns2"
+
+	return $lret
+}
+
+test_masquerade()
+{
+	local lret=0
+
+	ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+	ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: canot ping ns1 from ns2"
+		lret=1
+	fi
+
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns2$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	reset_counters
+
+# add masquerading rule
+ip netns exec ns0 nft -f - <<EOF
+table ip nat {
+	chain postrouting {
+		type nat hook postrouting priority 0; policy accept;
+		meta oif veth0 masquerade
+	}
+}
+EOF
+	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping ns1 from ns2 with active ip masquerading"
+		lret=1
+	fi
+
+	# ns1 should have seen packets from ns0, due to masquerade
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns0${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# ns1 should not have seen packets from ns2, due to masquerade
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	ip netns exec ns0 nft flush chain ip nat postrouting
+	if [ $? -ne 0 ]; then
+		echo "ERROR: Could not flush nat postrouting" 1>&2
+		lret=1
+	fi
+
+	test $lret -eq 0 && echo "PASS: IP masquerade for ns2"
+
+	return $lret
+}
+
+test_redirect6()
+{
+	local lret=0
+
+	ip netns exec ns0 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannnot ping ns1 from ns2 via ipv6"
+		lret=1
+	fi
+
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns2$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	reset_counters
+
+# add redirect rule
+ip netns exec ns0 nft -f - <<EOF
+table ip6 nat {
+	chain prerouting {
+		type nat hook prerouting priority 0; policy accept;
+		meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect
+	}
+}
+EOF
+	ip netns exec ns2 ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping ns1 from ns2 with active ip6 redirect"
+		lret=1
+	fi
+
+	# ns1 should have seen no packets from ns2, due to redirection
+	expect="packets 0 bytes 0"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# ns0 should have seen packets from ns2, due to masquerade
+	expect="packets 1 bytes 104"
+	for dir in "in6" "out6" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	ip netns exec ns0 nft delete table ip6 nat
+	if [ $? -ne 0 ]; then
+		echo "ERROR: Could not delete ip6 nat table" 1>&2
+		lret=1
+	fi
+
+	test $lret -eq 0 && echo "PASS: IPv6 redirection for ns2"
+
+	return $lret
+}
+
+test_redirect()
+{
+	local lret=0
+
+	ip netns exec ns0 sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+	ip netns exec ns0 sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping ns1 from ns2"
+		lret=1
+	fi
+
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns2$dir "$expect"
+			lret=1
+		fi
+
+		cnt=$(ip netns exec ns2 nft list counter inet filter ns1${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns2 ns1$dir "$expect"
+			lret=1
+		fi
+	done
+
+	reset_counters
+
+# add redirect rule
+ip netns exec ns0 nft -f - <<EOF
+table ip nat {
+	chain prerouting {
+		type nat hook prerouting priority 0; policy accept;
+		meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect
+	}
+}
+EOF
+	ip netns exec ns2 ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+	if [ $? -ne 0 ] ; then
+		echo "ERROR: cannot ping ns1 from ns2 with active ip redirect"
+		lret=1
+	fi
+
+	# ns1 should have seen no packets from ns2, due to redirection
+	expect="packets 0 bytes 0"
+	for dir in "in" "out" ; do
+
+		cnt=$(ip netns exec ns1 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	# ns0 should have seen packets from ns2, due to masquerade
+	expect="packets 1 bytes 84"
+	for dir in "in" "out" ; do
+		cnt=$(ip netns exec ns0 nft list counter inet filter ns2${dir} | grep -q "$expect")
+		if [ $? -ne 0 ]; then
+			bad_counter ns1 ns0$dir "$expect"
+			lret=1
+		fi
+	done
+
+	ip netns exec ns0 nft delete table ip nat
+	if [ $? -ne 0 ]; then
+		echo "ERROR: Could not delete nat table" 1>&2
+		lret=1
+	fi
+
+	test $lret -eq 0 && echo "PASS: IP redirection for ns2"
+
+	return $lret
+}
+
+
+# ip netns exec ns0 ping -c 1 -q 10.0.$i.99
+for i in 0 1 2; do
+ip netns exec ns$i nft -f - <<EOF
+table inet filter {
+	counter ns0in {}
+	counter ns1in {}
+	counter ns2in {}
+
+	counter ns0out {}
+	counter ns1out {}
+	counter ns2out {}
+
+	counter ns0in6 {}
+	counter ns1in6 {}
+	counter ns2in6 {}
+
+	counter ns0out6 {}
+	counter ns1out6 {}
+	counter ns2out6 {}
+
+	map nsincounter {
+		type ipv4_addr : counter
+		elements = { 10.0.1.1 : "ns0in",
+			     10.0.2.1 : "ns0in",
+			     10.0.1.99 : "ns1in",
+			     10.0.2.99 : "ns2in" }
+	}
+
+	map nsincounter6 {
+		type ipv6_addr : counter
+		elements = { dead:1::1 : "ns0in6",
+			     dead:2::1 : "ns0in6",
+			     dead:1::99 : "ns1in6",
+			     dead:2::99 : "ns2in6" }
+	}
+
+	map nsoutcounter {
+		type ipv4_addr : counter
+		elements = { 10.0.1.1 : "ns0out",
+			     10.0.2.1 : "ns0out",
+			     10.0.1.99: "ns1out",
+			     10.0.2.99: "ns2out" }
+	}
+
+	map nsoutcounter6 {
+		type ipv6_addr : counter
+		elements = { dead:1::1 : "ns0out6",
+			     dead:2::1 : "ns0out6",
+			     dead:1::99 : "ns1out6",
+			     dead:2::99 : "ns2out6" }
+	}
+
+	chain input {
+		type filter hook input priority 0; policy accept;
+		counter name ip saddr map @nsincounter
+		icmpv6 type { "echo-request", "echo-reply" } counter name ip6 saddr map @nsincounter6
+	}
+	chain output {
+		type filter hook output priority 0; policy accept;
+		counter name ip daddr map @nsoutcounter
+		icmpv6 type { "echo-request", "echo-reply" } counter name ip6 daddr map @nsoutcounter6
+	}
+}
+EOF
+done
+
+sleep 3
+# test basic connectivity
+for i in 1 2; do
+  ip netns exec ns0 ping -c 1 -q 10.0.$i.99 > /dev/null
+  if [ $? -ne 0 ];then
+  	echo "ERROR: Could not reach other namespace(s)" 1>&2
+	ret=1
+  fi
+
+  ip netns exec ns0 ping -c 1 -q dead:$i::99 > /dev/null
+  if [ $? -ne 0 ];then
+	echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2
+	ret=1
+  fi
+  check_counters ns$i
+  if [ $? -ne 0 ]; then
+	ret=1
+  fi
+
+  check_ns0_counters ns$i
+  if [ $? -ne 0 ]; then
+	ret=1
+  fi
+  reset_counters
+done
+
+if [ $ret -eq 0 ];then
+	echo "PASS: netns routing/connectivity: ns0 can reach ns1 and ns2"
+fi
+
+reset_counters
+test_local_dnat
+test_local_dnat6
+
+reset_counters
+test_masquerade
+test_masquerade6
+
+reset_counters
+test_redirect
+test_redirect6
+
+for i in 0 1 2; do ip netns del ns$i;done
+
+exit $ret
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index 82121a81681f..29bac5ef9a93 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -10,4 +10,5 @@
 /proc-uptime-002
 /read
 /self
+/setns-dcache
 /thread-self
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index 1c12c34cf85d..434d033ee067 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -14,6 +14,7 @@ TEST_GEN_PROGS += proc-uptime-001
 TEST_GEN_PROGS += proc-uptime-002
 TEST_GEN_PROGS += read
 TEST_GEN_PROGS += self
+TEST_GEN_PROGS += setns-dcache
 TEST_GEN_PROGS += thread-self
 
 include ../lib.mk
diff --git a/tools/testing/selftests/proc/setns-dcache.c b/tools/testing/selftests/proc/setns-dcache.c
new file mode 100644
index 000000000000..60ab197a73fc
--- /dev/null
+++ b/tools/testing/selftests/proc/setns-dcache.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright © 2019 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Test that setns(CLONE_NEWNET) points to new /proc/net content even
+ * if old one is in dcache.
+ *
+ * FIXME /proc/net/unix is under CONFIG_UNIX which can be disabled.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+
+static pid_t pid = -1;
+
+static void f(void)
+{
+	if (pid > 0) {
+		kill(pid, SIGTERM);
+	}
+}
+
+int main(void)
+{
+	int fd[2];
+	char _ = 0;
+	int nsfd;
+
+	atexit(f);
+
+	/* Check for priviledges and syscall availability straight away. */
+	if (unshare(CLONE_NEWNET) == -1) {
+		if (errno == ENOSYS || errno == EPERM) {
+			return 4;
+		}
+		return 1;
+	}
+	/* Distinguisher between two otherwise empty net namespaces. */
+	if (socket(AF_UNIX, SOCK_STREAM, 0) == -1) {
+		return 1;
+	}
+
+	if (pipe(fd) == -1) {
+		return 1;
+	}
+
+	pid = fork();
+	if (pid == -1) {
+		return 1;
+	}
+
+	if (pid == 0) {
+		if (unshare(CLONE_NEWNET) == -1) {
+			return 1;
+		}
+
+		if (write(fd[1], &_, 1) != 1) {
+			return 1;
+		}
+
+		pause();
+
+		return 0;
+	}
+
+	if (read(fd[0], &_, 1) != 1) {
+		return 1;
+	}
+
+	{
+		char buf[64];
+		snprintf(buf, sizeof(buf), "/proc/%u/ns/net", pid);
+		nsfd = open(buf, O_RDONLY);
+		if (nsfd == -1) {
+			return 1;
+		}
+	}
+
+	/* Reliably pin dentry into dcache. */
+	(void)open("/proc/net/unix", O_RDONLY);
+
+	if (setns(nsfd, CLONE_NEWNET) == -1) {
+		return 1;
+	}
+
+	kill(pid, SIGTERM);
+	pid = 0;
+
+	{
+		char buf[4096];
+		ssize_t rv;
+		int fd;
+
+		fd = open("/proc/net/unix", O_RDONLY);
+		if (fd == -1) {
+			return 1;
+		}
+
+#define S "Num       RefCount Protocol Flags    Type St Inode Path\n"
+		rv = read(fd, buf, sizeof(buf));
+
+		assert(rv == strlen(S));
+		assert(memcmp(buf, S, strlen(S)) == 0);
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 496a9a8c773a..7e632b465ab4 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1608,7 +1608,16 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 #ifdef SYSCALL_NUM_RET_SHARE_REG
 # define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(-1, action)
 #else
-# define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(val, action)
+# define EXPECT_SYSCALL_RETURN(val, action)		\
+	do {						\
+		errno = 0;				\
+		if (val < 0) {				\
+			EXPECT_EQ(-1, action);		\
+			EXPECT_EQ(-(val), errno);	\
+		} else {				\
+			EXPECT_EQ(val, action);		\
+		}					\
+	} while (0)
 #endif
 
 /* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
@@ -1647,7 +1656,7 @@ int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
 
 /* Architecture-specific syscall changing routine. */
 void change_syscall(struct __test_metadata *_metadata,
-		    pid_t tracee, int syscall)
+		    pid_t tracee, int syscall, int result)
 {
 	int ret;
 	ARCH_REGS regs;
@@ -1706,7 +1715,7 @@ void change_syscall(struct __test_metadata *_metadata,
 #ifdef SYSCALL_NUM_RET_SHARE_REG
 		TH_LOG("Can't modify syscall return on this architecture");
 #else
-		regs.SYSCALL_RET = EPERM;
+		regs.SYSCALL_RET = result;
 #endif
 
 #ifdef HAVE_GETREGS
@@ -1734,14 +1743,19 @@ void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
 	case 0x1002:
 		/* change getpid to getppid. */
 		EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
-		change_syscall(_metadata, tracee, __NR_getppid);
+		change_syscall(_metadata, tracee, __NR_getppid, 0);
 		break;
 	case 0x1003:
-		/* skip gettid. */
+		/* skip gettid with valid return code. */
 		EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
-		change_syscall(_metadata, tracee, -1);
+		change_syscall(_metadata, tracee, -1, 45000);
 		break;
 	case 0x1004:
+		/* skip openat with error. */
+		EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
+		change_syscall(_metadata, tracee, -1, -ESRCH);
+		break;
+	case 0x1005:
 		/* do nothing (allow getppid) */
 		EXPECT_EQ(__NR_getppid, get_syscall(_metadata, tracee));
 		break;
@@ -1774,9 +1788,11 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
 	nr = get_syscall(_metadata, tracee);
 
 	if (nr == __NR_getpid)
-		change_syscall(_metadata, tracee, __NR_getppid);
+		change_syscall(_metadata, tracee, __NR_getppid, 0);
+	if (nr == __NR_gettid)
+		change_syscall(_metadata, tracee, -1, 45000);
 	if (nr == __NR_openat)
-		change_syscall(_metadata, tracee, -1);
+		change_syscall(_metadata, tracee, -1, -ESRCH);
 }
 
 FIXTURE_DATA(TRACE_syscall) {
@@ -1793,8 +1809,10 @@ FIXTURE_SETUP(TRACE_syscall)
 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
-		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_openat, 0, 1),
 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 	};
 
@@ -1842,15 +1860,26 @@ TEST_F(TRACE_syscall, ptrace_syscall_redirected)
 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
 }
 
-TEST_F(TRACE_syscall, ptrace_syscall_dropped)
+TEST_F(TRACE_syscall, ptrace_syscall_errno)
+{
+	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
+	teardown_trace_fixture(_metadata, self->tracer);
+	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
+					   true);
+
+	/* Tracer should skip the open syscall, resulting in ESRCH. */
+	EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
+}
+
+TEST_F(TRACE_syscall, ptrace_syscall_faked)
 {
 	/* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
 	teardown_trace_fixture(_metadata, self->tracer);
 	self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
 					   true);
 
-	/* Tracer should skip the open syscall, resulting in EPERM. */
-	EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_openat));
+	/* Tracer should skip the gettid syscall, resulting fake pid. */
+	EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
 }
 
 TEST_F(TRACE_syscall, syscall_allowed)
@@ -1883,7 +1912,21 @@ TEST_F(TRACE_syscall, syscall_redirected)
 	EXPECT_NE(self->mypid, syscall(__NR_getpid));
 }
 
-TEST_F(TRACE_syscall, syscall_dropped)
+TEST_F(TRACE_syscall, syscall_errno)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* openat has been skipped and an errno return. */
+	EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
+}
+
+TEST_F(TRACE_syscall, syscall_faked)
 {
 	long ret;
 
@@ -1894,8 +1937,7 @@ TEST_F(TRACE_syscall, syscall_dropped)
 	ASSERT_EQ(0, ret);
 
 	/* gettid has been skipped and an altered return value stored. */
-	EXPECT_SYSCALL_RETURN(EPERM, syscall(__NR_gettid));
-	EXPECT_NE(self->mytid, syscall(__NR_gettid));
+	EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
 }
 
 TEST_F(TRACE_syscall, skip_after_RET_TRACE)
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index c02683cfb6c9..7656c7ce79d9 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 CFLAGS += -O3 -Wl,-no-as-needed -Wall
-LDFLAGS += -lrt -lpthread -lm
+LDLIBS += -lrt -lpthread -lm
 
 # these are all "safe" tests that don't modify
 # system time or require escalated privileges
author	Jens Axboe <axboe@kernel.dk>	2019-02-15 08:43:59 -0700
committer	Jens Axboe <axboe@kernel.dk>	2019-02-15 08:43:59 -0700
commit	6fb845f0e78de19eaaf6a2d351702474e44b6a9e (patch)
tree	94c6474b38d8f59d8f0de0151053283812e456c6 /tools
parent	56d18f62f556b80105e38e7975975cf7465aae3e (diff)
parent	d13937116f1e82bf508a6325111b322c30c85eb9 (diff)