From 746ce767128598711a00d8df5713d4c3b3d9e9a7 Mon Sep 17 00:00:00 2001
From: Dave Thaler <dthaler@microsoft.com>
Date: Mon, 20 Feb 2023 22:37:42 +0000
Subject: bpf, docs: Add explanation of endianness

Document the discussion from the email thread on the IETF bpf list,
where it was explained that the raw format varies by endianness
of the processor.

Signed-off-by: Dave Thaler <dthaler@microsoft.com>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230220223742.1347-1-dthaler1968@googlemail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/instruction-set.rst | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index af515de5fc38..01802ed9b29b 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -38,8 +38,9 @@ eBPF has two instruction encodings:
 * the wide instruction encoding, which appends a second 64-bit immediate (i.e.,
   constant) value after the basic instruction for a total of 128 bits.
 
-The basic instruction encoding is as follows, where MSB and LSB mean the most significant
-bits and least significant bits, respectively:
+The basic instruction encoding looks as follows for a little-endian processor,
+where MSB and LSB mean the most significant bits and least significant bits,
+respectively:
 
 =============  =======  =======  =======  ============
 32 bits (MSB)  16 bits  4 bits   4 bits   8 bits (LSB)
@@ -63,6 +64,17 @@ imm            offset   src_reg  dst_reg  opcode
 **opcode**
   operation to perform
 
+and as follows for a big-endian processor:
+
+=============  =======  =======  =======  ============
+32 bits (MSB)  16 bits  4 bits   4 bits   8 bits (LSB)
+=============  =======  =======  =======  ============
+imm            offset   dst_reg  src_reg  opcode
+=============  =======  =======  =======  ============
+
+Multi-byte fields ('imm' and 'offset') are similarly stored in
+the byte order of the processor.
+
 Note that most instructions do not use all of the fields.
 Unused fields shall be cleared to zero.
 
-- 
cgit v1.2.3


From 332ea1f697be148bd5e66475d82b5ecc5084da65 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 22 Feb 2023 15:29:12 -1000
Subject: bpf: Add bpf_cgroup_from_id() kfunc

cgroup ID is an userspace-visible 64bit value uniquely identifying a given
cgroup. As the IDs are used widely, it's useful to be able to look up the
matching cgroups. Add bpf_cgroup_from_id().

v2: Separate out selftest into its own patch as suggested by Alexei.

Signed-off-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/Y/bBaG96t0/gQl9/@slm.duckdns.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/kfuncs.rst | 10 +++++++---
 kernel/bpf/helpers.c         | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index ca96ef3f6896..226313747be5 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -583,13 +583,17 @@ Here's an example of how it can be used:
 
 ----
 
-Another kfunc available for interacting with ``struct cgroup *`` objects is
-bpf_cgroup_ancestor(). This allows callers to access the ancestor of a cgroup,
-and return it as a cgroup kptr.
+Other kfuncs available for interacting with ``struct cgroup *`` objects are
+bpf_cgroup_ancestor() and bpf_cgroup_from_id(), allowing callers to access
+the ancestor of a cgroup and find a cgroup by its ID, respectively. Both
+return a cgroup kptr.
 
 .. kernel-doc:: kernel/bpf/helpers.c
    :identifiers: bpf_cgroup_ancestor
 
+.. kernel-doc:: kernel/bpf/helpers.c
+   :identifiers: bpf_cgroup_from_id
+
 Eventually, BPF should be updated to allow this to happen with a normal memory
 load in the program itself. This is currently not possible without more work in
 the verifier. bpf_cgroup_ancestor() can be used as follows:
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5b278a38ae58..a784be6f8bac 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2101,6 +2101,23 @@ __bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
 	cgroup_get(ancestor);
 	return ancestor;
 }
+
+/**
+ * bpf_cgroup_from_id - Find a cgroup from its ID. A cgroup returned by this
+ * kfunc which is not subsequently stored in a map, must be released by calling
+ * bpf_cgroup_release().
+ * @cgrp: The cgroup for which we're performing a lookup.
+ * @level: The level of ancestor to look up.
+ */
+__bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid)
+{
+	struct cgroup *cgrp;
+
+	cgrp = cgroup_get_from_id(cgid);
+	if (IS_ERR(cgrp))
+		return NULL;
+	return cgrp;
+}
 #endif /* CONFIG_CGROUPS */
 
 /**
@@ -2167,6 +2184,7 @@ BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
 BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
 #endif
 BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
 BTF_SET8_END(generic_btf_ids)
-- 
cgit v1.2.3


From ae256f95478e07d49dae5036bb83c09dfbd686d4 Mon Sep 17 00:00:00 2001
From: "Jose E. Marchesi" <jose.marchesi@oracle.com>
Date: Tue, 28 Feb 2023 10:51:29 +0100
Subject: bpf, docs: Document BPF insn encoding in term of stored bytes

[Changes from V4:
- s/regs:16/regs:8 in figure.]

[Changes from V3:
- Back to src_reg and dst_reg, since they denote register numbers
  as opposed to the values stored in these registers.]

[Changes from V2:
- Use src and dst consistently in the document.
- Use a more graphical depiction of the 128-bit instruction.
- Remove `Where:' fragment.
- Clarify that unused bits are reserved and shall be zeroed.]

[Changes from V1:
- Use rst literal blocks for figures.
- Avoid using | in the basic instruction/pseudo instruction figure.
- Rebased to today's bpf-next master branch.]

This patch modifies instruction-set.rst so it documents the encoding
of BPF instructions in terms of how the bytes are stored (be it in an
ELF file or as bytes in a memory buffer to be loaded into the kernel
or some other BPF consumer) as opposed to how the instruction looks
like once loaded.

This is hopefully easier to understand by implementors looking to
generate and/or consume bytes conforming BPF instructions.

The patch also clarifies that the unused bytes in a pseudo-instruction
shall be cleared with zeros.

Signed-off-by: Jose E. Marchesi <jose.marchesi@oracle.com>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/87h6v6i0da.fsf_-_@oracle.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/instruction-set.rst | 46 ++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 22 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index 01802ed9b29b..db8789e6969e 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -38,15 +38,11 @@ eBPF has two instruction encodings:
 * the wide instruction encoding, which appends a second 64-bit immediate (i.e.,
   constant) value after the basic instruction for a total of 128 bits.
 
-The basic instruction encoding looks as follows for a little-endian processor,
-where MSB and LSB mean the most significant bits and least significant bits,
-respectively:
+The fields conforming an encoded basic instruction are stored in the
+following order::
 
-=============  =======  =======  =======  ============
-32 bits (MSB)  16 bits  4 bits   4 bits   8 bits (LSB)
-=============  =======  =======  =======  ============
-imm            offset   src_reg  dst_reg  opcode
-=============  =======  =======  =======  ============
+  opcode:8 src_reg:4 dst_reg:4 offset:16 imm:32 // In little-endian BPF.
+  opcode:8 dst_reg:4 src_reg:4 offset:16 imm:32 // In big-endian BPF.
 
 **imm**
   signed integer immediate value
@@ -64,16 +60,17 @@ imm            offset   src_reg  dst_reg  opcode
 **opcode**
   operation to perform
 
-and as follows for a big-endian processor:
+Note that the contents of multi-byte fields ('imm' and 'offset') are
+stored using big-endian byte ordering in big-endian BPF and
+little-endian byte ordering in little-endian BPF.
 
-=============  =======  =======  =======  ============
-32 bits (MSB)  16 bits  4 bits   4 bits   8 bits (LSB)
-=============  =======  =======  =======  ============
-imm            offset   dst_reg  src_reg  opcode
-=============  =======  =======  =======  ============
+For example::
 
-Multi-byte fields ('imm' and 'offset') are similarly stored in
-the byte order of the processor.
+  opcode                  offset imm          assembly
+         src_reg dst_reg
+  07     0       1        00 00  44 33 22 11  r1 += 0x11223344 // little
+         dst_reg src_reg
+  07     1       0        00 00  11 22 33 44  r1 += 0x11223344 // big
 
 Note that most instructions do not use all of the fields.
 Unused fields shall be cleared to zero.
@@ -84,18 +81,23 @@ The 64 bits following the basic instruction contain a pseudo instruction
 using the same format but with opcode, dst_reg, src_reg, and offset all set to zero,
 and imm containing the high 32 bits of the immediate value.
 
-=================  ==================
-64 bits (MSB)      64 bits (LSB)
-=================  ==================
-basic instruction  pseudo instruction
-=================  ==================
+This is depicted in the following figure::
+
+        basic_instruction
+  .-----------------------------.
+  |                             |
+  code:8 regs:8 offset:16 imm:32 unused:32 imm:32
+                                 |              |
+                                 '--------------'
+                                pseudo instruction
 
 Thus the 64-bit immediate value is constructed as follows:
 
   imm64 = (next_imm << 32) | imm
 
 where 'next_imm' refers to the imm value of the pseudo instruction
-following the basic instruction.
+following the basic instruction.  The unused bytes in the pseudo
+instruction are reserved and shall be cleared to zero.
 
 Instruction classes
 -------------------
-- 
cgit v1.2.3


From d96d937d7c5c12237dce1f14bf0fc9900cabba09 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Wed, 1 Mar 2023 07:49:49 -0800
Subject: bpf: Add __uninit kfunc annotation

This patch adds __uninit as a kfunc annotation.

This will be useful for scenarios such as for example in dynptrs,
indicating whether the dynptr should be checked by the verifier as an
initialized or an uninitialized dynptr.

Without this annotation, the alternative would be needing to hard-code
in the verifier the specific kfunc to indicate that arg should be
treated as an uninitialized arg.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Link: https://lore.kernel.org/r/20230301154953.641654-7-joannelkoong@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/kfuncs.rst | 17 +++++++++++++++++
 kernel/bpf/verifier.c        | 18 ++++++++++++++++--
 2 files changed, 33 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index 226313747be5..9a78533d25ac 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -100,6 +100,23 @@ Hence, whenever a constant scalar argument is accepted by a kfunc which is not a
 size parameter, and the value of the constant matters for program safety, __k
 suffix should be used.
 
+2.2.2 __uninit Annotation
+--------------------
+
+This annotation is used to indicate that the argument will be treated as
+uninitialized.
+
+An example is given below::
+
+        __bpf_kfunc int bpf_dynptr_from_skb(..., struct bpf_dynptr_kern *ptr__uninit)
+        {
+        ...
+        }
+
+Here, the dynptr will be treated as an uninitialized dynptr. Without this
+annotation, the verifier will reject the program if the dynptr passed in is
+not initialized.
+
 .. _BPF_kfunc_nodef:
 
 2.3 Using an existing kernel function
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8fd2f26a8977..d052aa5800de 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -8727,6 +8727,11 @@ static bool is_kfunc_arg_alloc_obj(const struct btf *btf, const struct btf_param
 	return __kfunc_param_match_suffix(btf, arg, "__alloc");
 }
 
+static bool is_kfunc_arg_uninit(const struct btf *btf, const struct btf_param *arg)
+{
+	return __kfunc_param_match_suffix(btf, arg, "__uninit");
+}
+
 static bool is_kfunc_arg_scalar_with_name(const struct btf *btf,
 					  const struct btf_param *arg,
 					  const char *name)
@@ -9662,17 +9667,26 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 				return ret;
 			break;
 		case KF_ARG_PTR_TO_DYNPTR:
+		{
+			enum bpf_arg_type dynptr_arg_type = ARG_PTR_TO_DYNPTR;
+
 			if (reg->type != PTR_TO_STACK &&
 			    reg->type != CONST_PTR_TO_DYNPTR) {
 				verbose(env, "arg#%d expected pointer to stack or dynptr_ptr\n", i);
 				return -EINVAL;
 			}
 
-			ret = process_dynptr_func(env, regno, insn_idx,
-						  ARG_PTR_TO_DYNPTR | MEM_RDONLY);
+			if (reg->type == CONST_PTR_TO_DYNPTR)
+				dynptr_arg_type |= MEM_RDONLY;
+
+			if (is_kfunc_arg_uninit(btf, &args[i]))
+				dynptr_arg_type |= MEM_UNINIT;
+
+			ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type);
 			if (ret < 0)
 				return ret;
 			break;
+		}
 		case KF_ARG_PTR_TO_LIST_HEAD:
 			if (reg->type != PTR_TO_MAP_VALUE &&
 			    reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
-- 
cgit v1.2.3


From db52b587c67f40e4bd6e8167f2334d4500617bdc Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Wed, 1 Mar 2023 13:49:10 -0600
Subject: bpf, docs: Fix __uninit kfunc doc section

In commit d96d937d7c5c ("bpf: Add __uninit kfunc annotation"), the
__uninit kfunc annotation was documented in kfuncs.rst. You have to
fully underline a section in rst, or the build will issue a warning that
the title underline is too short:

./Documentation/bpf/kfuncs.rst:104: WARNING: Title underline too short.

2.2.2 __uninit Annotation
--------------------

This patch fixes that title underline.

Fixes: d96d937d7c5c ("bpf: Add __uninit kfunc annotation")
Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230301194910.602738-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/kfuncs.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index 9a78533d25ac..9d85bbc3b771 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -101,7 +101,7 @@ size parameter, and the value of the constant matters for program safety, __k
 suffix should be used.
 
 2.2.2 __uninit Annotation
---------------------
+-------------------------
 
 This annotation is used to indicate that the argument will be treated as
 uninitialized.
-- 
cgit v1.2.3


From d56b0c461d19dae917fa0bba76cbe8ad7a44712e Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Thu, 2 Mar 2023 12:39:17 -0600
Subject: bpf, docs: Fix link to netdev-FAQ target

The BPF devel Q&A documentation page makes frequent reference to the
netdev-QA page via the netdev-FAQ rst link. This link is currently
broken, as is evidenced by the build output when making BPF docs:

./Documentation/bpf/bpf_devel_QA.rst:150: WARNING: undefined label: 'netdev-faq'
./Documentation/bpf/bpf_devel_QA.rst:206: WARNING: undefined label: 'netdev-faq'
./Documentation/bpf/bpf_devel_QA.rst:231: WARNING: undefined label: 'netdev-faq'
./Documentation/bpf/bpf_devel_QA.rst:396: WARNING: undefined label: 'netdev-faq'
./Documentation/bpf/bpf_devel_QA.rst:412: WARNING: undefined label: 'netdev-faq'

Fix the links to point to the actual netdev-faq page.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230302183918.54190-1-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/bpf_devel_QA.rst | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index 03d4993eda6f..5f5f9ccc3862 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -128,7 +128,7 @@ into the bpf-next tree will make their way into net-next tree. net and
 net-next are both run by David S. Miller. From there, they will go
 into the kernel mainline tree run by Linus Torvalds. To read up on the
 process of net and net-next being merged into the mainline tree, see
-the :ref:`netdev-FAQ`
+the `netdev-FAQ`_.
 
 
@@ -147,7 +147,7 @@ request)::
 Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be applied to?
 ---------------------------------------------------------------------------------
 
-A: The process is the very same as described in the :ref:`netdev-FAQ`,
+A: The process is the very same as described in the `netdev-FAQ`_,
 so please read up on it. The subject line must indicate whether the
 patch is a fix or rather "next-like" content in order to let the
 maintainers know whether it is targeted at bpf or bpf-next.
@@ -206,7 +206,7 @@ ii) run extensive BPF test suite and
 Once the BPF pull request was accepted by David S. Miller, then
 the patches end up in net or net-next tree, respectively, and
 make their way from there further into mainline. Again, see the
-:ref:`netdev-FAQ` for additional information e.g. on how often they are
+`netdev-FAQ`_ for additional information e.g. on how often they are
 merged to mainline.
 
 Q: How long do I need to wait for feedback on my BPF patches?
@@ -230,7 +230,7 @@ Q: Are patches applied to bpf-next when the merge window is open?
 -----------------------------------------------------------------
 A: For the time when the merge window is open, bpf-next will not be
 processed. This is roughly analogous to net-next patch processing,
-so feel free to read up on the :ref:`netdev-FAQ` about further details.
+so feel free to read up on the `netdev-FAQ`_ about further details.
 
 During those two weeks of merge window, we might ask you to resend
 your patch series once bpf-next is open again. Once Linus released
@@ -394,7 +394,7 @@ netdev kernel mailing list in Cc and ask for the fix to be queued up:
   netdev@vger.kernel.org
 
 The process in general is the same as on netdev itself, see also the
-:ref:`netdev-FAQ`.
+`netdev-FAQ`_.
 
 Q: Do you also backport to kernels not currently maintained as stable?
 ----------------------------------------------------------------------
@@ -410,7 +410,7 @@ Q: The BPF patch I am about to submit needs to go to stable as well
 What should I do?
 
 A: The same rules apply as with netdev patch submissions in general, see
-the :ref:`netdev-FAQ`.
+the `netdev-FAQ`_.
 
 Never add "``Cc: stable@vger.kernel.org``" to the patch description, but
 ask the BPF maintainers to queue the patches instead. This can be done
@@ -685,7 +685,7 @@ when:
 
 .. Links
 .. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/
-.. _netdev-FAQ: Documentation/process/maintainer-netdev.rst
+.. _netdev-FAQ: https://www.kernel.org/doc/html/latest/process/maintainer-netdev.html
 .. _selftests:
    https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
 .. _Documentation/dev-tools/kselftest.rst:
-- 
cgit v1.2.3


From cacad346f67ce9604dcc9db10f1f1769dabb3891 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Thu, 2 Mar 2023 12:39:18 -0600
Subject: bpf, docs: Fix final bpf docs build failure

maps.rst in the BPF documentation links to the
/userspace-api/ebpf/syscall document
(Documentation/userspace-api/ebpf/syscall.rst). For some reason, if you
try to reference the document with :doc:, the docs build emits the
following warning:

./Documentation/bpf/maps.rst:13: WARNING: \
    unknown document: '/userspace-api/ebpf/syscall'

It appears that other places in the docs tree also don't support using
:doc:. Elsewhere in the BPF documentation, we just reference the kernel
docs page directly. Let's do that here to clean up the last remaining
noise in the docs build.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230302183918.54190-2-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/maps.rst | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/maps.rst b/Documentation/bpf/maps.rst
index 4906ff0f8382..6f069f3d6f4b 100644
--- a/Documentation/bpf/maps.rst
+++ b/Documentation/bpf/maps.rst
@@ -11,9 +11,9 @@ maps are accessed from BPF programs via BPF helpers which are documented in the
 `man-pages`_ for `bpf-helpers(7)`_.
 
 BPF maps are accessed from user space via the ``bpf`` syscall, which provides
-commands to create maps, lookup elements, update elements and delete
-elements. More details of the BPF syscall are available in
-:doc:`/userspace-api/ebpf/syscall` and in the `man-pages`_ for `bpf(2)`_.
+commands to create maps, lookup elements, update elements and delete elements.
+More details of the BPF syscall are available in `ebpf-syscall`_ and in the
+`man-pages`_ for `bpf(2)`_.
 
 Map Types
 =========
@@ -79,3 +79,4 @@ Find and delete element by key in a given map using ``attr->map_fd``,
 .. _man-pages: https://www.kernel.org/doc/man-pages/
 .. _bpf(2): https://man7.org/linux/man-pages/man2/bpf.2.html
 .. _bpf-helpers(7): https://man7.org/linux/man-pages/man7/bpf-helpers.7.html
+.. _ebpf-syscall: https://docs.kernel.org/userspace-api/ebpf/syscall.html
-- 
cgit v1.2.3


From 03b77e17aeb22a5935ea20d585ca6a1f2947e62b Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 2 Mar 2023 20:14:41 -0800
Subject: bpf: Rename __kptr_ref -> __kptr and __kptr -> __kptr_untrusted.

__kptr meant to store PTR_UNTRUSTED kernel pointers inside bpf maps.
The concept felt useful, but didn't get much traction,
since bpf_rdonly_cast() was added soon after and bpf programs received
a simpler way to access PTR_UNTRUSTED kernel pointers
without going through restrictive __kptr usage.

Rename __kptr_ref -> __kptr and __kptr -> __kptr_untrusted to indicate
its intended usage.
The main goal of __kptr_untrusted was to read/write such pointers
directly while bpf_kptr_xchg was a mechanism to access refcnted
kernel pointers. The next patch will allow RCU protected __kptr access
with direct read. At that point __kptr_untrusted will be deprecated.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/bpf/20230303041446.3630-2-alexei.starovoitov@gmail.com
---
 Documentation/bpf/bpf_design_QA.rst                |  4 ++--
 Documentation/bpf/cpumasks.rst                     |  4 ++--
 Documentation/bpf/kfuncs.rst                       |  2 +-
 kernel/bpf/btf.c                                   |  4 ++--
 tools/lib/bpf/bpf_helpers.h                        |  2 +-
 tools/testing/selftests/bpf/progs/cb_refs.c        |  2 +-
 .../selftests/bpf/progs/cgrp_kfunc_common.h        |  2 +-
 tools/testing/selftests/bpf/progs/cpumask_common.h |  2 +-
 tools/testing/selftests/bpf/progs/jit_probe_mem.c  |  2 +-
 tools/testing/selftests/bpf/progs/lru_bug.c        |  2 +-
 tools/testing/selftests/bpf/progs/map_kptr.c       |  4 ++--
 tools/testing/selftests/bpf/progs/map_kptr_fail.c  |  6 +++---
 .../selftests/bpf/progs/task_kfunc_common.h        |  2 +-
 tools/testing/selftests/bpf/test_verifier.c        | 22 +++++++++++-----------
 14 files changed, 30 insertions(+), 30 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/bpf_design_QA.rst b/Documentation/bpf/bpf_design_QA.rst
index bfff0e7e37c2..38372a956d65 100644
--- a/Documentation/bpf/bpf_design_QA.rst
+++ b/Documentation/bpf/bpf_design_QA.rst
@@ -314,7 +314,7 @@ Q: What is the compatibility story for special BPF types in map values?
 Q: Users are allowed to embed bpf_spin_lock, bpf_timer fields in their BPF map
 values (when using BTF support for BPF maps). This allows to use helpers for
 such objects on these fields inside map values. Users are also allowed to embed
-pointers to some kernel types (with __kptr and __kptr_ref BTF tags). Will the
+pointers to some kernel types (with __kptr_untrusted and __kptr BTF tags). Will the
 kernel preserve backwards compatibility for these features?
 
 A: It depends. For bpf_spin_lock, bpf_timer: YES, for kptr and everything else:
@@ -324,7 +324,7 @@ For struct types that have been added already, like bpf_spin_lock and bpf_timer,
 the kernel will preserve backwards compatibility, as they are part of UAPI.
 
 For kptrs, they are also part of UAPI, but only with respect to the kptr
-mechanism. The types that you can use with a __kptr and __kptr_ref tagged
+mechanism. The types that you can use with a __kptr_untrusted and __kptr tagged
 pointer in your struct are NOT part of the UAPI contract. The supported types can
 and will change across kernel releases. However, operations like accessing kptr
 fields and bpf_kptr_xchg() helper will continue to be supported across kernel
diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst
index 24bef9cbbeee..75344cd230e5 100644
--- a/Documentation/bpf/cpumasks.rst
+++ b/Documentation/bpf/cpumasks.rst
@@ -51,7 +51,7 @@ For example:
 .. code-block:: c
 
         struct cpumask_map_value {
-                struct bpf_cpumask __kptr_ref * cpumask;
+                struct bpf_cpumask __kptr * cpumask;
         };
 
         struct array_map {
@@ -128,7 +128,7 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
 
 	/* struct containing the struct bpf_cpumask kptr which is stored in the map. */
 	struct cpumasks_kfunc_map_value {
-		struct bpf_cpumask __kptr_ref * bpf_cpumask;
+		struct bpf_cpumask __kptr * bpf_cpumask;
 	};
 
 	/* The map containing struct cpumasks_kfunc_map_value entries. */
diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index 9d85bbc3b771..b5d9b0d446bc 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -544,7 +544,7 @@ Here's an example of how it can be used:
 
 	/* struct containing the struct task_struct kptr which is actually stored in the map. */
 	struct __cgroups_kfunc_map_value {
-		struct cgroup __kptr_ref * cgroup;
+		struct cgroup __kptr * cgroup;
 	};
 
 	/* The map containing struct __cgroups_kfunc_map_value entries. */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index ef2d8969ed1f..c5e1d6955491 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3288,9 +3288,9 @@ static int btf_find_kptr(const struct btf *btf, const struct btf_type *t,
 	/* Reject extra tags */
 	if (btf_type_is_type_tag(btf_type_by_id(btf, t->type)))
 		return -EINVAL;
-	if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
+	if (!strcmp("kptr_untrusted", __btf_name_by_offset(btf, t->name_off)))
 		type = BPF_KPTR_UNREF;
-	else if (!strcmp("kptr_ref", __btf_name_by_offset(btf, t->name_off)))
+	else if (!strcmp("kptr", __btf_name_by_offset(btf, t->name_off)))
 		type = BPF_KPTR_REF;
 	else
 		return -EINVAL;
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 5ec1871acb2f..7d12d3e620cc 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -174,8 +174,8 @@ enum libbpf_tristate {
 
 #define __kconfig __attribute__((section(".kconfig")))
 #define __ksym __attribute__((section(".ksyms")))
+#define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted")))
 #define __kptr __attribute__((btf_type_tag("kptr")))
-#define __kptr_ref __attribute__((btf_type_tag("kptr_ref")))
 
 #ifndef ___bpf_concat
 #define ___bpf_concat(a, b) a ## b
diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c
index 7653df1bc787..ce96b33e38d6 100644
--- a/tools/testing/selftests/bpf/progs/cb_refs.c
+++ b/tools/testing/selftests/bpf/progs/cb_refs.c
@@ -4,7 +4,7 @@
 #include <bpf/bpf_helpers.h>
 
 struct map_value {
-	struct prog_test_ref_kfunc __kptr_ref *ptr;
+	struct prog_test_ref_kfunc __kptr *ptr;
 };
 
 struct {
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
index 2f8de933b957..d0b7cd0d09d7 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
@@ -10,7 +10,7 @@
 #include <bpf/bpf_tracing.h>
 
 struct __cgrps_kfunc_map_value {
-	struct cgroup __kptr_ref * cgrp;
+	struct cgroup __kptr * cgrp;
 };
 
 struct hash_map {
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index ad34f3b602be..65e5496ca1b2 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -10,7 +10,7 @@
 int err;
 
 struct __cpumask_map_value {
-	struct bpf_cpumask __kptr_ref * cpumask;
+	struct bpf_cpumask __kptr * cpumask;
 };
 
 struct array_map {
diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
index 2d2e61470794..13f00ca2ed0a 100644
--- a/tools/testing/selftests/bpf/progs/jit_probe_mem.c
+++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
@@ -4,7 +4,7 @@
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
 
-static struct prog_test_ref_kfunc __kptr_ref *v;
+static struct prog_test_ref_kfunc __kptr *v;
 long total_sum = -1;
 
 extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/lru_bug.c b/tools/testing/selftests/bpf/progs/lru_bug.c
index 687081a724b3..ad73029cb1e3 100644
--- a/tools/testing/selftests/bpf/progs/lru_bug.c
+++ b/tools/testing/selftests/bpf/progs/lru_bug.c
@@ -4,7 +4,7 @@
 #include <bpf/bpf_helpers.h>
 
 struct map_value {
-	struct task_struct __kptr *ptr;
+	struct task_struct __kptr_untrusted *ptr;
 };
 
 struct {
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
index a24d17bc17eb..3fe7cde4cbfd 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -4,8 +4,8 @@
 #include <bpf/bpf_helpers.h>
 
 struct map_value {
-	struct prog_test_ref_kfunc __kptr *unref_ptr;
-	struct prog_test_ref_kfunc __kptr_ref *ref_ptr;
+	struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr;
+	struct prog_test_ref_kfunc __kptr *ref_ptr;
 };
 
 struct array_map {
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
index 760e41e1a632..e19e2a5f38cf 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -7,9 +7,9 @@
 
 struct map_value {
 	char buf[8];
-	struct prog_test_ref_kfunc __kptr *unref_ptr;
-	struct prog_test_ref_kfunc __kptr_ref *ref_ptr;
-	struct prog_test_member __kptr_ref *ref_memb_ptr;
+	struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr;
+	struct prog_test_ref_kfunc __kptr *ref_ptr;
+	struct prog_test_member __kptr *ref_memb_ptr;
 };
 
 struct array_map {
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
index c0ffd171743e..4c2a4b0e3a25 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
@@ -10,7 +10,7 @@
 #include <bpf/bpf_tracing.h>
 
 struct __tasks_kfunc_map_value {
-	struct task_struct __kptr_ref * task;
+	struct task_struct __kptr * task;
 };
 
 struct hash_map {
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 8b9949bb833d..49a70d9beb0b 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -699,13 +699,13 @@ static int create_cgroup_storage(bool percpu)
  *   struct bpf_timer t;
  * };
  * struct btf_ptr {
+ *   struct prog_test_ref_kfunc __kptr_untrusted *ptr;
  *   struct prog_test_ref_kfunc __kptr *ptr;
- *   struct prog_test_ref_kfunc __kptr_ref *ptr;
- *   struct prog_test_member __kptr_ref *ptr;
+ *   struct prog_test_member __kptr *ptr;
  * }
  */
 static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t"
-				  "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_ref"
+				  "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_untrusted"
 				  "\0prog_test_member";
 static __u32 btf_raw_types[] = {
 	/* int */
@@ -724,20 +724,20 @@ static __u32 btf_raw_types[] = {
 	BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */
 	/* struct prog_test_ref_kfunc */		/* [6] */
 	BTF_STRUCT_ENC(51, 0, 0),
-	BTF_STRUCT_ENC(89, 0, 0),			/* [7] */
+	BTF_STRUCT_ENC(95, 0, 0),			/* [7] */
+	/* type tag "kptr_untrusted" */
+	BTF_TYPE_TAG_ENC(80, 6),			/* [8] */
 	/* type tag "kptr" */
-	BTF_TYPE_TAG_ENC(75, 6),			/* [8] */
-	/* type tag "kptr_ref" */
-	BTF_TYPE_TAG_ENC(80, 6),			/* [9] */
-	BTF_TYPE_TAG_ENC(80, 7),			/* [10] */
+	BTF_TYPE_TAG_ENC(75, 6),			/* [9] */
+	BTF_TYPE_TAG_ENC(75, 7),			/* [10] */
 	BTF_PTR_ENC(8),					/* [11] */
 	BTF_PTR_ENC(9),					/* [12] */
 	BTF_PTR_ENC(10),				/* [13] */
 	/* struct btf_ptr */				/* [14] */
 	BTF_STRUCT_ENC(43, 3, 24),
-	BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr *ptr; */
-	BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr_ref *ptr; */
-	BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */
+	BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr_untrusted *ptr; */
+	BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr *ptr; */
+	BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr *ptr; */
 };
 
 static char bpf_vlog[UINT_MAX >> 8];
-- 
cgit v1.2.3


From 20c09d92faeefb8536f705d3a4629e0dc314c8a1 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Thu, 2 Mar 2023 20:14:43 -0800
Subject: bpf: Introduce kptr_rcu.

The life time of certain kernel structures like 'struct cgroup' is protected by RCU.
Hence it's safe to dereference them directly from __kptr tagged pointers in bpf maps.
The resulting pointer is MEM_RCU and can be passed to kfuncs that expect KF_RCU.
Derefrence of other kptr-s returns PTR_UNTRUSTED.

For example:
struct map_value {
   struct cgroup __kptr *cgrp;
};

SEC("tp_btf/cgroup_mkdir")
int BPF_PROG(test_cgrp_get_ancestors, struct cgroup *cgrp_arg, const char *path)
{
  struct cgroup *cg, *cg2;

  cg = bpf_cgroup_acquire(cgrp_arg); // cg is PTR_TRUSTED and ref_obj_id > 0
  bpf_kptr_xchg(&v->cgrp, cg);

  cg2 = v->cgrp; // This is new feature introduced by this patch.
  // cg2 is PTR_MAYBE_NULL | MEM_RCU.
  // When cg2 != NULL, it's a valid cgroup, but its percpu_ref could be zero

  if (cg2)
    bpf_cgroup_ancestor(cg2, level); // safe to do.
}

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/bpf/20230303041446.3630-4-alexei.starovoitov@gmail.com
---
 Documentation/bpf/kfuncs.rst                       | 12 +++--
 include/linux/btf.h                                |  2 +-
 kernel/bpf/helpers.c                               |  6 ++-
 kernel/bpf/verifier.c                              | 55 ++++++++++++++++++----
 net/bpf/test_run.c                                 |  3 +-
 .../selftests/bpf/progs/cgrp_kfunc_failure.c       |  2 +-
 tools/testing/selftests/bpf/progs/map_kptr_fail.c  |  4 +-
 tools/testing/selftests/bpf/verifier/calls.c       |  2 +-
 tools/testing/selftests/bpf/verifier/map_kptr.c    |  2 +-
 9 files changed, 65 insertions(+), 23 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index b5d9b0d446bc..69eccf6f98ef 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -249,11 +249,13 @@ added later.
 2.4.8 KF_RCU flag
 -----------------
 
-The KF_RCU flag is used for kfuncs which have a rcu ptr as its argument.
-When used together with KF_ACQUIRE, it indicates the kfunc should have a
-single argument which must be a trusted argument or a MEM_RCU pointer.
-The argument may have reference count of 0 and the kfunc must take this
-into consideration.
+The KF_RCU flag is a weaker version of KF_TRUSTED_ARGS. The kfuncs marked with
+KF_RCU expect either PTR_TRUSTED or MEM_RCU arguments. The verifier guarantees
+that the objects are valid and there is no use-after-free. The pointers are not
+NULL, but the object's refcount could have reached zero. The kfuncs need to
+consider doing refcnt != 0 check, especially when returning a KF_ACQUIRE
+pointer. Note as well that a KF_ACQUIRE kfunc that is KF_RCU should very likely
+also be KF_RET_NULL.
 
 .. _KF_deprecated_flag:
 
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 49e0fe6d8274..556b3e2e7471 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -70,7 +70,7 @@
 #define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
 #define KF_SLEEPABLE    (1 << 5) /* kfunc may sleep */
 #define KF_DESTRUCTIVE  (1 << 6) /* kfunc performs destructive actions */
-#define KF_RCU          (1 << 7) /* kfunc only takes rcu pointer arguments */
+#define KF_RCU          (1 << 7) /* kfunc takes either rcu or trusted pointer arguments */
 
 /*
  * Tag marking a kernel function as a kfunc. This is meant to minimize the
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 12f12e879bcf..637ac4e92e75 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2163,8 +2163,10 @@ __bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
 	if (level > cgrp->level || level < 0)
 		return NULL;
 
+	/* cgrp's refcnt could be 0 here, but ancestors can still be accessed */
 	ancestor = cgrp->ancestors[level];
-	cgroup_get(ancestor);
+	if (!cgroup_tryget(ancestor))
+		return NULL;
 	return ancestor;
 }
 
@@ -2382,7 +2384,7 @@ BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
 BTF_ID_FLAGS(func, bpf_cgroup_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_cgroup_release, KF_RELEASE)
-BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_TRUSTED_ARGS | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_cgroup_ancestor, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_cgroup_from_id, KF_ACQUIRE | KF_RET_NULL)
 #endif
 BTF_ID_FLAGS(func, bpf_task_from_pid, KF_ACQUIRE | KF_RET_NULL)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index b834f3d2d81a..a095055d7ef4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4218,7 +4218,7 @@ static int map_kptr_match_type(struct bpf_verifier_env *env,
 			       struct bpf_reg_state *reg, u32 regno)
 {
 	const char *targ_name = kernel_type_name(kptr_field->kptr.btf, kptr_field->kptr.btf_id);
-	int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED;
+	int perm_flags = PTR_MAYBE_NULL | PTR_TRUSTED | MEM_RCU;
 	const char *reg_name = "";
 
 	/* Only unreferenced case accepts untrusted pointers */
@@ -4285,6 +4285,34 @@ bad_type:
 	return -EINVAL;
 }
 
+/* The non-sleepable programs and sleepable programs with explicit bpf_rcu_read_lock()
+ * can dereference RCU protected pointers and result is PTR_TRUSTED.
+ */
+static bool in_rcu_cs(struct bpf_verifier_env *env)
+{
+	return env->cur_state->active_rcu_lock || !env->prog->aux->sleepable;
+}
+
+/* Once GCC supports btf_type_tag the following mechanism will be replaced with tag check */
+BTF_SET_START(rcu_protected_types)
+BTF_ID(struct, prog_test_ref_kfunc)
+BTF_ID(struct, cgroup)
+BTF_SET_END(rcu_protected_types)
+
+static bool rcu_protected_object(const struct btf *btf, u32 btf_id)
+{
+	if (!btf_is_kernel(btf))
+		return false;
+	return btf_id_set_contains(&rcu_protected_types, btf_id);
+}
+
+static bool rcu_safe_kptr(const struct btf_field *field)
+{
+	const struct btf_field_kptr *kptr = &field->kptr;
+
+	return field->type == BPF_KPTR_REF && rcu_protected_object(kptr->btf, kptr->btf_id);
+}
+
 static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
 				 int value_regno, int insn_idx,
 				 struct btf_field *kptr_field)
@@ -4319,7 +4347,10 @@ static int check_map_kptr_access(struct bpf_verifier_env *env, u32 regno,
 		 * value from map as PTR_TO_BTF_ID, with the correct type.
 		 */
 		mark_btf_ld_reg(env, cur_regs(env), value_regno, PTR_TO_BTF_ID, kptr_field->kptr.btf,
-				kptr_field->kptr.btf_id, PTR_MAYBE_NULL | PTR_UNTRUSTED);
+				kptr_field->kptr.btf_id,
+				rcu_safe_kptr(kptr_field) && in_rcu_cs(env) ?
+				PTR_MAYBE_NULL | MEM_RCU :
+				PTR_MAYBE_NULL | PTR_UNTRUSTED);
 		/* For mark_ptr_or_null_reg */
 		val_reg->id = ++env->id_gen;
 	} else if (class == BPF_STX) {
@@ -5163,10 +5194,17 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 	 * An RCU-protected pointer can also be deemed trusted if we are in an
 	 * RCU read region. This case is handled below.
 	 */
-	if (nested_ptr_is_trusted(env, reg, off))
+	if (nested_ptr_is_trusted(env, reg, off)) {
 		flag |= PTR_TRUSTED;
-	else
+		/*
+		 * task->cgroups is trusted. It provides a stronger guarantee
+		 * than __rcu tag on 'cgroups' field in 'struct task_struct'.
+		 * Clear MEM_RCU in such case.
+		 */
+		flag &= ~MEM_RCU;
+	} else {
 		flag &= ~PTR_TRUSTED;
+	}
 
 	if (flag & MEM_RCU) {
 		/* Mark value register as MEM_RCU only if it is protected by
@@ -5175,11 +5213,10 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
 		 * read lock region. Also mark rcu pointer as PTR_MAYBE_NULL since
 		 * it could be null in some cases.
 		 */
-		if (!env->cur_state->active_rcu_lock ||
-		    !(is_trusted_reg(reg) || is_rcu_reg(reg)))
-			flag &= ~MEM_RCU;
-		else
+		if (in_rcu_cs(env) && (is_trusted_reg(reg) || is_rcu_reg(reg)))
 			flag |= PTR_MAYBE_NULL;
+		else
+			flag &= ~MEM_RCU;
 	} else if (reg->type & MEM_RCU) {
 		/* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
 		 * with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
@@ -9676,7 +9713,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
 			return -EINVAL;
 		}
 
-		if (is_kfunc_trusted_args(meta) &&
+		if ((is_kfunc_trusted_args(meta) || is_kfunc_rcu(meta)) &&
 		    (register_is_null(reg) || type_may_be_null(reg->type))) {
 			verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
 			return -EACCES;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 6f3d654b3339..6a8b33a103a4 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -737,6 +737,7 @@ __bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
 
 __bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
 {
+	/* p != NULL, but p->cnt could be 0 */
 }
 
 __bpf_kfunc void bpf_kfunc_call_test_destructive(void)
@@ -784,7 +785,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
-BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
 BTF_SET8_END(test_sk_check_kfunc_ids)
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
index 4ad7fe24966d..b42291ed9586 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
@@ -205,7 +205,7 @@ int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path)
 }
 
 SEC("tp_btf/cgroup_mkdir")
-__failure __msg("arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket")
+__failure __msg("expects refcounted")
 int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path)
 {
 	struct __cgrps_kfunc_map_value *v;
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
index e19e2a5f38cf..08f9ec18c345 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -281,7 +281,7 @@ int reject_kptr_get_bad_type_match(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_")
+__failure __msg("R1 type=rcu_ptr_or_null_ expected=percpu_ptr_")
 int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx)
 {
 	struct map_value *v;
@@ -316,7 +316,7 @@ int reject_untrusted_store_to_ref(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("R2 type=untrusted_ptr_ expected=ptr_")
+__failure __msg("R2 must be referenced")
 int reject_untrusted_xchg(struct __sk_buff *ctx)
 {
 	struct prog_test_ref_kfunc *p;
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 289ed202ec66..9a326a800e5c 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -243,7 +243,7 @@
 	},
 	.result_unpriv = REJECT,
 	.result = REJECT,
-	.errstr = "R1 must be referenced",
+	.errstr = "R1 must be",
 },
 {
 	"calls: valid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID",
diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c
index 6914904344c0..d775ccb01989 100644
--- a/tools/testing/selftests/bpf/verifier/map_kptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_kptr.c
@@ -336,7 +336,7 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.fixup_map_kptr = { 1 },
 	.result = REJECT,
-	.errstr = "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_",
+	.errstr = "R1 type=rcu_ptr_or_null_ expected=percpu_ptr_",
 },
 {
 	"map_kptr: ref: reject off != 0",
-- 
cgit v1.2.3


From 7d8c48917a9576b5fc8871aa4946149b0e4a4927 Mon Sep 17 00:00:00 2001
From: Arınç ÜNAL <arinc.unal@arinc9.com>
Date: Tue, 7 Mar 2023 12:56:19 +0300
Subject: dt-bindings: net: dsa: mediatek,mt7530: change some descriptions to
 literal
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The line endings must be preserved on gpio-controller, io-supply, and
reset-gpios properties to look proper when the YAML file is parsed.

Currently it's interpreted as a single line when parsed. Change the style
of the description of these properties to literal style to preserve the
line endings.

Signed-off-by: Arınç ÜNAL <arinc.unal@arinc9.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml
index 449ee0735012..5ae9cd8f99a2 100644
--- a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml
@@ -93,7 +93,7 @@ properties:
 
   gpio-controller:
     type: boolean
-    description:
+    description: |
       If defined, LED controller of the MT7530 switch will run on GPIO mode.
 
       There are 15 controllable pins.
@@ -112,7 +112,7 @@ properties:
     maxItems: 1
 
   io-supply:
-    description:
+    description: |
       Phandle to the regulator node necessary for the I/O power.
       See Documentation/devicetree/bindings/regulator/mt6323-regulator.txt for
       details for the regulator setup on these boards.
@@ -124,7 +124,7 @@ properties:
       switch is a part of the multi-chip module.
 
   reset-gpios:
-    description:
+    description: |
       GPIO to reset the switch. Use this if mediatek,mcm is not used.
       This property is optional because some boards share the reset line with
       other components which makes it impossible to probe the switch if the
-- 
cgit v1.2.3


From c1f9e14e3b676eb88fe1c9488c0b5f4fc9108a1c Mon Sep 17 00:00:00 2001
From: Dave Thaler <dthaler@microsoft.com>
Date: Wed, 8 Mar 2023 20:53:03 +0000
Subject: bpf, docs: Explain helper functions

Add brief text about existence of helper functions, with details to go in
separate psABI text.

Note that text about runtime functions (kfuncs) is part of a separate patch,
not this one.

Signed-off-by: Dave Thaler <dthaler@microsoft.com>
Link: https://lore.kernel.org/r/20230308205303.1308-1-dthaler1968@googlemail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/clang-notes.rst     | 6 ++++++
 Documentation/bpf/instruction-set.rst | 9 ++++++++-
 Documentation/bpf/linux-notes.rst     | 8 ++++++++
 3 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/clang-notes.rst b/Documentation/bpf/clang-notes.rst
index 528feddf2db9..2c872a1ee08e 100644
--- a/Documentation/bpf/clang-notes.rst
+++ b/Documentation/bpf/clang-notes.rst
@@ -20,6 +20,12 @@ Arithmetic instructions
 For CPU versions prior to 3, Clang v7.0 and later can enable ``BPF_ALU`` support with
 ``-Xclang -target-feature -Xclang +alu32``.  In CPU version 3, support is automatically included.
 
+Jump instructions
+=================
+
+If ``-O0`` is used, Clang will generate the ``BPF_CALL | BPF_X | BPF_JMP`` (0x8d)
+instruction, which is not supported by the Linux kernel verifier.
+
 Atomic operations
 =================
 
diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index db8789e6969e..5e43e14abe80 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -253,7 +253,7 @@ BPF_JSET  0x40   PC += off if dst & src
 BPF_JNE   0x50   PC += off if dst != src
 BPF_JSGT  0x60   PC += off if dst > src     signed
 BPF_JSGE  0x70   PC += off if dst >= src    signed
-BPF_CALL  0x80   function call
+BPF_CALL  0x80   function call              see `Helper functions`_
 BPF_EXIT  0x90   function / program return  BPF_JMP only
 BPF_JLT   0xa0   PC += off if dst < src     unsigned
 BPF_JLE   0xb0   PC += off if dst <= src    unsigned
@@ -264,6 +264,13 @@ BPF_JSLE  0xd0   PC += off if dst <= src    signed
 The eBPF program needs to store the return value into register R0 before doing a
 BPF_EXIT.
 
+Helper functions
+~~~~~~~~~~~~~~~~
+
+Helper functions are a concept whereby BPF programs can call into a
+set of function calls exposed by the runtime.  Each helper
+function is identified by an integer used in a ``BPF_CALL`` instruction.
+The available helper functions may differ for each program type.
 
 Load and store instructions
 ===========================
diff --git a/Documentation/bpf/linux-notes.rst b/Documentation/bpf/linux-notes.rst
index 956b0c86699d..f43b9c797bcb 100644
--- a/Documentation/bpf/linux-notes.rst
+++ b/Documentation/bpf/linux-notes.rst
@@ -12,6 +12,14 @@ Byte swap instructions
 
 ``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and ``BPF_TO_BE`` respectively.
 
+Jump instructions
+=================
+
+``BPF_CALL | BPF_X | BPF_JMP`` (0x8d), where the helper function
+integer would be read from a specified register, is not currently supported
+by the verifier.  Any programs with this instruction will fail to load
+until such support is added.
+
 Legacy BPF Packet access instructions
 =====================================
 
-- 
cgit v1.2.3


From aacaf7b3d19daaa91528ab0c598b89a7f82aa47d Mon Sep 17 00:00:00 2001
From: Siddharth Vadapalli <s-vadapalli@ti.com>
Date: Thu, 9 Mar 2023 13:06:11 +0530
Subject: dt-bindings: net: ti: k3-am654-cpsw-nuss: Document Serdes PHY

Update bindings to include Serdes PHY as an optional PHY, in addition to
the existing CPSW MAC's PHY. The CPSW MAC's PHY is required while the
Serdes PHY is optional. The Serdes PHY handle has to be provided only
when the Serdes is being configured in a Single-Link protocol. Using the
name "serdes-phy" to represent the Serdes PHY handle, the am65-cpsw-nuss
driver can obtain the Serdes PHY and request the Serdes to be
configured.

Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml     | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index 900063411a20..628d63e1eb1f 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -126,8 +126,18 @@ properties:
             description: CPSW port number
 
           phys:
-            maxItems: 1
-            description: phandle on phy-gmii-sel PHY
+            minItems: 1
+            items:
+              - description: CPSW MAC's PHY.
+              - description: Serdes PHY. Serdes PHY is required only if
+                             the Serdes has to be configured in the
+                             Single-Link configuration.
+
+          phy-names:
+            minItems: 1
+            items:
+              - const: mac
+              - const: serdes
 
           label:
             description: label associated with this port
-- 
cgit v1.2.3


From b9fe8e8d03d0df28b2431e3aaf8e115cf7bf2f65 Mon Sep 17 00:00:00 2001
From: Dave Thaler <dthaler@microsoft.com>
Date: Fri, 10 Mar 2023 23:38:14 +0000
Subject: bpf, docs: Add signed comparison example

Improve clarity by adding an example of a signed comparison instruction

Signed-off-by: Dave Thaler <dthaler@microsoft.com>
Acked-by: David Vernet <void@manifault.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/r/20230310233814.4641-1-dthaler1968@googlemail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/instruction-set.rst | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index 5e43e14abe80..b44640589055 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -11,7 +11,8 @@ Documentation conventions
 =========================
 
 For brevity, this document uses the type notion "u64", "u32", etc.
-to mean an unsigned integer whose width is the specified number of bits.
+to mean an unsigned integer whose width is the specified number of bits,
+and "s32", etc. to mean a signed integer of the specified number of bits.
 
 Registers and calling convention
 ================================
@@ -264,6 +265,14 @@ BPF_JSLE  0xd0   PC += off if dst <= src    signed
 The eBPF program needs to store the return value into register R0 before doing a
 BPF_EXIT.
 
+Example:
+
+``BPF_JSGE | BPF_X | BPF_JMP32`` (0x7e) means::
+
+  if (s32)dst s>= (s32)src goto +offset
+
+where 's>=' indicates a signed '>=' comparison.
+
 Helper functions
 ~~~~~~~~~~~~~~~~
 
-- 
cgit v1.2.3


From 1bffcea42926b26e092045ac398850e80d950bb2 Mon Sep 17 00:00:00 2001
From: Gal Pressman <gal@nvidia.com>
Date: Mon, 13 Mar 2023 22:42:30 -0700
Subject: net/mlx5e: Add devlink hairpin queues parameters

We refer to a TC NIC rule that involves forwarding as "hairpin".
Hairpin queues are mlx5 hardware specific implementation for hardware
forwarding of such packets.

Per the discussion in [1], move the hairpin queues control (number and
size) from debugfs to devlink.

Expose two devlink params:
- hairpin_num_queues: control the number of hairpin queues
- hairpin_queue_size: control the size (in packets) of the hairpin queues

[1] https://lore.kernel.org/all/20230111194608.7f15b9a1@kernel.org/

Signed-off-by: Gal Pressman <gal@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20230314054234.267365-12-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../ethernet/mellanox/mlx5/devlink.rst             | 35 ++++++++++++
 Documentation/networking/devlink/mlx5.rst          | 12 ++++
 drivers/net/ethernet/mellanox/mlx5/core/devlink.c  | 66 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/devlink.h  |  2 +
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 50 +++++++---------
 5 files changed, 134 insertions(+), 31 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
index 9b5c40ba7f0d..0995e4e5acd7 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
@@ -122,6 +122,41 @@ users try to enable them.
 
     $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev
 
+hairpin_num_queues: Number of hairpin queues
+--------------------------------------------
+We refer to a TC NIC rule that involves forwarding as "hairpin".
+
+Hairpin queues are mlx5 hardware specific implementation for hardware
+forwarding of such packets.
+
+- Show the number of hairpin queues::
+
+    $ devlink dev param show pci/0000:06:00.0 name hairpin_num_queues
+      pci/0000:06:00.0:
+        name hairpin_num_queues type driver-specific
+          values:
+            cmode driverinit value 2
+
+- Change the number of hairpin queues::
+
+    $ devlink dev param set pci/0000:06:00.0 name hairpin_num_queues value 4 cmode driverinit
+
+hairpin_queue_size: Size of the hairpin queues
+----------------------------------------------
+Control the size of the hairpin queues.
+
+- Show the size of the hairpin queues::
+
+    $ devlink dev param show pci/0000:06:00.0 name hairpin_queue_size
+      pci/0000:06:00.0:
+        name hairpin_queue_size type driver-specific
+          values:
+            cmode driverinit value 1024
+
+- Change the size (in packets) of the hairpin queues::
+
+    $ devlink dev param set pci/0000:06:00.0 name hairpin_queue_size value 512 cmode driverinit
+
 Health reporters
 ================
 
diff --git a/Documentation/networking/devlink/mlx5.rst b/Documentation/networking/devlink/mlx5.rst
index 3321117cf605..202798d6501e 100644
--- a/Documentation/networking/devlink/mlx5.rst
+++ b/Documentation/networking/devlink/mlx5.rst
@@ -72,6 +72,18 @@ parameters.
 
        Default: disabled
 
+   * - ``hairpin_num_queues``
+     - u32
+     - driverinit
+     - We refer to a TC NIC rule that involves forwarding as "hairpin".
+       Hairpin queues are mlx5 hardware specific implementation for hardware
+       forwarding of such packets.
+
+       Control the number of hairpin queues.
+   * - ``hairpin_queue_size``
+     - u32
+     - driverinit
+     - Control the size (in packets) of the hairpin queues.
 
 The ``mlx5`` driver supports reloading via ``DEVLINK_CMD_RELOAD``
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
index b7784e02c2dd..1ee2a472e1d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c
@@ -494,6 +494,61 @@ static int mlx5_devlink_eq_depth_validate(struct devlink *devlink, u32 id,
 	return (val.vu32 >= 64 && val.vu32 <= 4096) ? 0 : -EINVAL;
 }
 
+static int
+mlx5_devlink_hairpin_num_queues_validate(struct devlink *devlink, u32 id,
+					 union devlink_param_value val,
+					 struct netlink_ext_ack *extack)
+{
+	return val.vu32 ? 0 : -EINVAL;
+}
+
+static int
+mlx5_devlink_hairpin_queue_size_validate(struct devlink *devlink, u32 id,
+					 union devlink_param_value val,
+					 struct netlink_ext_ack *extack)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	u32 val32 = val.vu32;
+
+	if (!is_power_of_2(val32)) {
+		NL_SET_ERR_MSG_MOD(extack, "Value is not power of two");
+		return -EINVAL;
+	}
+
+	if (val32 > BIT(MLX5_CAP_GEN(dev, log_max_hairpin_num_packets))) {
+		NL_SET_ERR_MSG_FMT_MOD(
+			extack, "Maximum hairpin queue size is %lu",
+			BIT(MLX5_CAP_GEN(dev, log_max_hairpin_num_packets)));
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void mlx5_devlink_hairpin_params_init_values(struct devlink *devlink)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	union devlink_param_value value;
+	u64 link_speed64;
+	u32 link_speed;
+
+	/* set hairpin pair per each 50Gbs share of the link */
+	mlx5_port_max_linkspeed(dev, &link_speed);
+	link_speed = max_t(u32, link_speed, 50000);
+	link_speed64 = link_speed;
+	do_div(link_speed64, 50000);
+
+	value.vu32 = link_speed64;
+	devl_param_driverinit_value_set(
+		devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, value);
+
+	value.vu32 =
+		BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(dev),
+			  MLX5_CAP_GEN(dev, log_max_hairpin_num_packets)));
+	devl_param_driverinit_value_set(
+		devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, value);
+}
+
 static const struct devlink_param mlx5_devlink_params[] = {
 	DEVLINK_PARAM_GENERIC(ENABLE_ROCE, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
 			      NULL, NULL, mlx5_devlink_enable_roce_validate),
@@ -547,6 +602,14 @@ static void mlx5_devlink_set_params_init_values(struct devlink *devlink)
 static const struct devlink_param mlx5_devlink_eth_params[] = {
 	DEVLINK_PARAM_GENERIC(ENABLE_ETH, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT),
 			      NULL, NULL, NULL),
+	DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES,
+			     "hairpin_num_queues", DEVLINK_PARAM_TYPE_U32,
+			     BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
+			     mlx5_devlink_hairpin_num_queues_validate),
+	DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE,
+			     "hairpin_queue_size", DEVLINK_PARAM_TYPE_U32,
+			     BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL,
+			     mlx5_devlink_hairpin_queue_size_validate),
 };
 
 static int mlx5_devlink_eth_params_register(struct devlink *devlink)
@@ -567,6 +630,9 @@ static int mlx5_devlink_eth_params_register(struct devlink *devlink)
 	devl_param_driverinit_value_set(devlink,
 					DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
 					value);
+
+	mlx5_devlink_hairpin_params_init_values(devlink);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
index 212b12424146..5dcfb4d86d8a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
@@ -12,6 +12,8 @@ enum mlx5_devlink_param_id {
 	MLX5_DEVLINK_PARAM_ID_ESW_LARGE_GROUP_NUM,
 	MLX5_DEVLINK_PARAM_ID_ESW_PORT_METADATA,
 	MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT,
+	MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES,
+	MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE,
 };
 
 struct mlx5_trap_ctx {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 79dd8ad5ede7..2e6351ef4d9c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -44,6 +44,7 @@
 #include <net/bareudp.h>
 #include <net/bonding.h>
 #include <net/dst_metadata.h>
+#include "devlink.h"
 #include "en.h"
 #include "en/tc/post_act.h"
 #include "en/tc/act_stats.h"
@@ -73,12 +74,6 @@
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
 
-struct mlx5e_hairpin_params {
-	struct mlx5_core_dev *mdev;
-	u32 num_queues;
-	u32 queue_size;
-};
-
 struct mlx5e_tc_table {
 	/* Protects the dynamic assignment of the t parameter
 	 * which is the nic tc root table.
@@ -101,7 +96,6 @@ struct mlx5e_tc_table {
 
 	struct mlx5_tc_ct_priv         *ct;
 	struct mapping_ctx             *mapping;
-	struct mlx5e_hairpin_params    hairpin_params;
 	struct dentry                  *dfs_root;
 
 	/* tc action stats */
@@ -1099,33 +1093,15 @@ static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table *tc,
 			    &debugfs_hairpin_table_dump_fops);
 }
 
-static void
-mlx5e_hairpin_params_init(struct mlx5e_hairpin_params *hairpin_params,
-			  struct mlx5_core_dev *mdev)
-{
-	u64 link_speed64;
-	u32 link_speed;
-
-	hairpin_params->mdev = mdev;
-	/* set hairpin pair per each 50Gbs share of the link */
-	mlx5_port_max_linkspeed(mdev, &link_speed);
-	link_speed = max_t(u32, link_speed, 50000);
-	link_speed64 = link_speed;
-	do_div(link_speed64, 50000);
-	hairpin_params->num_queues = link_speed64;
-
-	hairpin_params->queue_size =
-		BIT(min_t(u32, 16 - MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev),
-			  MLX5_CAP_GEN(mdev, log_max_hairpin_num_packets)));
-}
-
 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
 				  struct mlx5e_tc_flow *flow,
 				  struct mlx5e_tc_flow_parse_attr *parse_attr,
 				  struct netlink_ext_ack *extack)
 {
 	struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
+	struct devlink *devlink = priv_to_devlink(priv->mdev);
 	int peer_ifindex = parse_attr->mirred_ifindex[0];
+	union devlink_param_value val = {};
 	struct mlx5_hairpin_params params;
 	struct mlx5_core_dev *peer_mdev;
 	struct mlx5e_hairpin_entry *hpe;
@@ -1182,7 +1158,14 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
 		 hash_hairpin_info(peer_id, match_prio));
 	mutex_unlock(&tc->hairpin_tbl_lock);
 
-	params.log_num_packets = ilog2(tc->hairpin_params.queue_size);
+	err = devl_param_driverinit_value_get(
+		devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, &val);
+	if (err) {
+		err = -ENOMEM;
+		goto out_err;
+	}
+
+	params.log_num_packets = ilog2(val.vu32);
 	params.log_data_size =
 		clamp_t(u32,
 			params.log_num_packets +
@@ -1191,7 +1174,14 @@ static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
 			MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
 
 	params.q_counter = priv->q_counter;
-	params.num_channels = tc->hairpin_params.num_queues;
+	err = devl_param_driverinit_value_get(
+		devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, &val);
+	if (err) {
+		err = -ENOMEM;
+		goto out_err;
+	}
+
+	params.num_channels = val.vu32;
 
 	hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
 	hpe->hp = hp;
@@ -5289,8 +5279,6 @@ int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
 	tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr,
 				 MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
 
-	mlx5e_hairpin_params_init(&tc->hairpin_params, dev);
-
 	tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
 	err = register_netdevice_notifier_dev_net(priv->netdev,
 						  &tc->netdevice_nb,
-- 
cgit v1.2.3


From fec2c6d14fd5001e7d24a2ae44f0e9aea82a6149 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Thu, 16 Mar 2023 00:40:28 -0500
Subject: bpf,docs: Remove bpf_cpumask_kptr_get() from documentation

Now that the kfunc no longer exists, we can remove it and instead
describe how RCU can be used to get a struct bpf_cpumask from a map
value. This patch updates the BPF documentation accordingly.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230316054028.88924-6-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/cpumasks.rst | 30 ++++++++++--------------------
 1 file changed, 10 insertions(+), 20 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst
index 75344cd230e5..41efd8874eeb 100644
--- a/Documentation/bpf/cpumasks.rst
+++ b/Documentation/bpf/cpumasks.rst
@@ -117,12 +117,7 @@ For example:
 As mentioned and illustrated above, these ``struct bpf_cpumask *`` objects can
 also be stored in a map and used as kptrs. If a ``struct bpf_cpumask *`` is in
 a map, the reference can be removed from the map with bpf_kptr_xchg(), or
-opportunistically acquired with bpf_cpumask_kptr_get():
-
-.. kernel-doc:: kernel/bpf/cpumask.c
-  :identifiers: bpf_cpumask_kptr_get
-
-Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
+opportunistically acquired using RCU:
 
 .. code-block:: c
 
@@ -144,7 +139,7 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
 	/**
 	 * A simple example tracepoint program showing how a
 	 * struct bpf_cpumask * kptr that is stored in a map can
-	 * be acquired using the bpf_cpumask_kptr_get() kfunc.
+	 * be passed to kfuncs using RCU protection.
 	 */
 	SEC("tp_btf/cgroup_mkdir")
 	int BPF_PROG(cgrp_ancestor_example, struct cgroup *cgrp, const char *path)
@@ -158,26 +153,21 @@ Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
 		if (!v)
 			return -ENOENT;
 
+		bpf_rcu_read_lock();
 		/* Acquire a reference to the bpf_cpumask * kptr that's already stored in the map. */
-		kptr = bpf_cpumask_kptr_get(&v->cpumask);
-		if (!kptr)
+		kptr = v->cpumask;
+		if (!kptr) {
 			/* If no bpf_cpumask was present in the map, it's because
 			 * we're racing with another CPU that removed it with
 			 * bpf_kptr_xchg() between the bpf_map_lookup_elem()
-			 * above, and our call to bpf_cpumask_kptr_get().
-			 * bpf_cpumask_kptr_get() internally safely handles this
-			 * race, and will return NULL if the cpumask is no longer
-			 * present in the map by the time we invoke the kfunc.
+			 * above, and our load of the pointer from the map.
 			 */
+			bpf_rcu_read_unlock();
 			return -EBUSY;
+		}
 
-		/* Free the reference we just took above. Note that the
-		 * original struct bpf_cpumask * kptr is still in the map. It will
-		 * be freed either at a later time if another context deletes
-		 * it from the map, or automatically by the BPF subsystem if
-		 * it's still present when the map is destroyed.
-		 */
-		bpf_cpumask_release(kptr);
+		bpf_cpumask_setall(kptr);
+		bpf_rcu_read_unlock();
 
 		return 0;
 	}
-- 
cgit v1.2.3


From 40235edeadf58e4232bfcf8bf15be453cfe233b7 Mon Sep 17 00:00:00 2001
From: Siddharth Vadapalli <s-vadapalli@ti.com>
Date: Wed, 15 Mar 2023 13:29:47 +0530
Subject: dt-bindings: net: ti: k3-am654-cpsw-nuss: Fix compatible order

Reorder compatibles to follow alphanumeric order.

Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index 628d63e1eb1f..6f56add1919b 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -54,11 +54,11 @@ properties:
 
   compatible:
     enum:
+      - ti,am642-cpsw-nuss
       - ti,am654-cpsw-nuss
       - ti,j7200-cpswxg-nuss
       - ti,j721e-cpsw-nuss
       - ti,j721e-cpswxg-nuss
-      - ti,am642-cpsw-nuss
 
   reg:
     maxItems: 1
@@ -215,8 +215,8 @@ allOf:
           compatible:
             contains:
               enum:
-                - ti,j721e-cpswxg-nuss
                 - ti,j7200-cpswxg-nuss
+                - ti,j721e-cpswxg-nuss
     then:
       properties:
         ethernet-ports:
-- 
cgit v1.2.3


From e0c9c2a7dd738120c2fbc155c6fba1066f109be0 Mon Sep 17 00:00:00 2001
From: Siddharth Vadapalli <s-vadapalli@ti.com>
Date: Wed, 15 Mar 2023 13:29:48 +0530
Subject: dt-bindings: net: ti: k3-am654-cpsw-nuss: Add J784S4 CPSW9G support

Update bindings for TI K3 J784S4 SoC which contains 9 ports (8 external
ports) CPSW9G module and add compatible for it.

Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index 6f56add1919b..306709bcc9e9 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -59,6 +59,7 @@ properties:
       - ti,j7200-cpswxg-nuss
       - ti,j721e-cpsw-nuss
       - ti,j721e-cpswxg-nuss
+      - ti,j784s4-cpswxg-nuss
 
   reg:
     maxItems: 1
@@ -197,7 +198,9 @@ allOf:
         properties:
           compatible:
             contains:
-              const: ti,j721e-cpswxg-nuss
+              enum:
+                - ti,j721e-cpswxg-nuss
+                - ti,j784s4-cpswxg-nuss
     then:
       properties:
         ethernet-ports:
@@ -217,6 +220,7 @@ allOf:
               enum:
                 - ti,j7200-cpswxg-nuss
                 - ti,j721e-cpswxg-nuss
+                - ti,j784s4-cpswxg-nuss
     then:
       properties:
         ethernet-ports:
-- 
cgit v1.2.3


From 74bf6477c18b2904936763132e9224a41b8da13a Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 15 Mar 2023 21:49:13 -0700
Subject: netlink-specs: add partial specification for devlink

Devlink is quite complex but put in the very basics so we can
incrementally fill in the commands as needed.

$ ./tools/net/ynl/cli.py --spec Documentation/netlink/specs/devlink.yaml \
    --dump get

[{'bus-name': 'netdevsim',
  'dev-name': 'netdevsim1',
  'dev-stats': {'reload-stats': {'reload-action-info': {'reload-action': 1,
                                                        'reload-action-stats': {'reload-stats-entry': [{'reload-stats-limit': 0,
                                                                                                        'reload-stats-value': 0}]}}},
                'remote-reload-stats': {'reload-action-info': {'reload-action': 2,
                                                               'reload-action-stats': {'reload-stats-entry': [{'reload-stats-limit': 0,
                                                                                                               'reload-stats-value': 0},
                                                                                                              {'reload-stats-limit': 1,
                                                                                                               'reload-stats-value': 0}]}}}},
  'reload-failed': 0}]

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/netlink/specs/devlink.yaml | 198 +++++++++++++++++++++++++++++++
 1 file changed, 198 insertions(+)
 create mode 100644 Documentation/netlink/specs/devlink.yaml

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml
new file mode 100644
index 000000000000..90641668232e
--- /dev/null
+++ b/Documentation/netlink/specs/devlink.yaml
@@ -0,0 +1,198 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: devlink
+
+protocol: genetlink-legacy
+
+doc: Partial family for Devlink.
+
+attribute-sets:
+  -
+    name: devlink
+    attributes:
+      -
+        name: bus-name
+        type: string
+        value: 1
+      -
+        name: dev-name
+        type: string
+      -
+        name: port-index
+        type: u32
+
+      # TODO: fill in the attributes in between
+
+      -
+        name: info-driver-name
+        type: string
+        value: 98
+      -
+        name: info-serial-number
+        type: string
+      -
+        name: info-version-fixed
+        type: nest
+        multi-attr: true
+        nested-attributes: dl-info-version
+      -
+        name: info-version-running
+        type: nest
+        multi-attr: true
+        nested-attributes: dl-info-version
+      -
+        name: info-version-stored
+        type: nest
+        multi-attr: true
+        nested-attributes: dl-info-version
+      -
+        name: info-version-name
+        type: string
+      -
+        name: info-version-value
+        type: string
+
+      # TODO: fill in the attributes in between
+
+      -
+        name: reload-failed
+        type: u8
+        value: 136
+
+      # TODO: fill in the attributes in between
+
+      -
+        name: reload-action
+        type: u8
+        value: 153
+
+      # TODO: fill in the attributes in between
+
+      -
+        name: dev-stats
+        type: nest
+        value: 156
+        nested-attributes: dl-dev-stats
+      -
+        name: reload-stats
+        type: nest
+        nested-attributes: dl-reload-stats
+      -
+        name: reload-stats-entry
+        type: nest
+        multi-attr: true
+        nested-attributes: dl-reload-stats-entry
+      -
+        name: reload-stats-limit
+        type: u8
+      -
+        name: reload-stats-value
+        type: u32
+      -
+        name: remote-reload-stats
+        type: nest
+        nested-attributes: dl-reload-stats
+      -
+        name: reload-action-info
+        type: nest
+        nested-attributes: dl-reload-act-info
+      -
+        name: reload-action-stats
+        type: nest
+        nested-attributes: dl-reload-act-stats
+  -
+    name: dl-dev-stats
+    subset-of: devlink
+    attributes:
+      -
+        name: reload-stats
+        type: nest
+      -
+        name: remote-reload-stats
+        type: nest
+  -
+    name: dl-reload-stats
+    subset-of: devlink
+    attributes:
+      -
+        name: reload-action-info
+        type: nest
+  -
+    name: dl-reload-act-info
+    subset-of: devlink
+    attributes:
+      -
+        name: reload-action
+        type: u8
+      -
+        name: reload-action-stats
+        type: nest
+  -
+    name: dl-reload-act-stats
+    subset-of: devlink
+    attributes:
+      -
+        name: reload-stats-entry
+        type: nest
+  -
+    name: dl-reload-stats-entry
+    subset-of: devlink
+    attributes:
+      -
+        name: reload-stats-limit
+        type: u8
+      -
+        name: reload-stats-value
+        type: u32
+  -
+    name: dl-info-version
+    subset-of: devlink
+    attributes:
+      -
+        name: info-version-name
+        type: string
+      -
+        name: info-version-value
+        type: string
+
+operations:
+  enum-model: directional
+  list:
+    -
+      name: get
+      doc: Get devlink instances.
+      attribute-set: devlink
+
+      do:
+        request:
+          value: 1
+          attributes: &dev-id-attrs
+            - bus-name
+            - dev-name
+        reply:  &get-reply
+          value: 3
+          attributes:
+            - bus-name
+            - dev-name
+            - reload-failed
+            - reload-action
+            - dev-stats
+      dump:
+        reply: *get-reply
+
+      # TODO: fill in the operations in between
+
+    -
+      name: info-get
+      doc: Get device information, like driver name, hardware and firmware versions etc.
+      attribute-set: devlink
+
+      do:
+        request:
+          value: 51
+          attributes: *dev-id-attrs
+        reply:
+          value: 51
+          attributes:
+            - bus-name
+            - dev-name
-- 
cgit v1.2.3


From 82b3297009b6831dfe47f0f38ed4043e39f58c9f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 15 Mar 2023 21:50:27 -0700
Subject: netlink: specs: allow uapi-header in genetlink

Chuck wanted to put the UAPI header in linux/net/ which seems
reasonable, allow genetlink families to choose the location.
It doesn't really matter for non-C-like languages.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/netlink/genetlink-c.yaml      | 2 +-
 Documentation/netlink/genetlink-legacy.yaml | 2 +-
 Documentation/netlink/genetlink.yaml        | 3 +++
 3 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml
index f082a5ad7cf1..c83643d403b7 100644
--- a/Documentation/netlink/genetlink-c.yaml
+++ b/Documentation/netlink/genetlink-c.yaml
@@ -33,10 +33,10 @@ properties:
   protocol:
     description: Schema compatibility level. Default is "genetlink".
     enum: [ genetlink, genetlink-c ]
-  # Start genetlink-c
   uapi-header:
     description: Path to the uAPI header, default is linux/${family-name}.h
     type: string
+  # Start genetlink-c
   c-family-name:
     description: Name of the define for the family name.
     type: string
diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index c6b8c77f7d12..792875dd7ed1 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -33,10 +33,10 @@ properties:
   protocol:
     description: Schema compatibility level. Default is "genetlink".
     enum: [ genetlink, genetlink-c, genetlink-legacy ] # Trim
-  # Start genetlink-c
   uapi-header:
     description: Path to the uAPI header, default is linux/${family-name}.h
     type: string
+  # Start genetlink-c
   c-family-name:
     description: Name of the define for the family name.
     type: string
diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml
index b2d56ab9e615..8952e84ff207 100644
--- a/Documentation/netlink/genetlink.yaml
+++ b/Documentation/netlink/genetlink.yaml
@@ -33,6 +33,9 @@ properties:
   protocol:
     description: Schema compatibility level. Default is "genetlink".
     enum: [ genetlink ]
+  uapi-header:
+    description: Path to the uAPI header, default is linux/${family-name}.h
+    type: string
 
   definitions:
     description: List of type and constant definitions (enums, flags, defines).
-- 
cgit v1.2.3


From 0f10f647f45545004ea50b73a7a7c5c3309ff286 Mon Sep 17 00:00:00 2001
From: Bagas Sanjaya <bagasdotme@gmail.com>
Date: Tue, 14 Mar 2023 14:44:49 +0700
Subject: bpf, docs: Use internal linking for link to netdev subsystem doc

Commit d56b0c461d19da ("bpf, docs: Fix link to netdev-FAQ target")
attempts to fix linking problem to undefined "netdev-FAQ" label
introduced in 287f4fa99a5281 ("docs: Update references to netdev-FAQ")
by changing internal cross reference to netdev subsystem documentation
(Documentation/process/maintainer-netdev.rst) to external one at
docs.kernel.org. However, the linking problem is still not
resolved, as the generated link points to non-existent netdev-FAQ
section of the external doc, which when clicked, will instead going
to the top of the doc.

Revert back to internal linking by simply mention the doc path while
massaging the leading text to the link, since the netdev subsystem
doc contains no FAQs but rather general information about the subsystem.

Fixes: d56b0c461d19 ("bpf, docs: Fix link to netdev-FAQ target")
Fixes: 287f4fa99a52 ("docs: Update references to netdev-FAQ")
Signed-off-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20230314074449.23620-1-bagasdotme@gmail.com
---
 Documentation/bpf/bpf_devel_QA.rst | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index 5f5f9ccc3862..e151e61dff38 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -128,7 +128,8 @@ into the bpf-next tree will make their way into net-next tree. net and
 net-next are both run by David S. Miller. From there, they will go
 into the kernel mainline tree run by Linus Torvalds. To read up on the
 process of net and net-next being merged into the mainline tree, see
-the `netdev-FAQ`_.
+the documentation on netdev subsystem at
+Documentation/process/maintainer-netdev.rst.
 
 
@@ -147,7 +148,8 @@ request)::
 Q: How do I indicate which tree (bpf vs. bpf-next) my patch should be applied to?
 ---------------------------------------------------------------------------------
 
-A: The process is the very same as described in the `netdev-FAQ`_,
+A: The process is the very same as described in the netdev subsystem
+documentation at Documentation/process/maintainer-netdev.rst,
 so please read up on it. The subject line must indicate whether the
 patch is a fix or rather "next-like" content in order to let the
 maintainers know whether it is targeted at bpf or bpf-next.
@@ -206,8 +208,9 @@ ii) run extensive BPF test suite and
 Once the BPF pull request was accepted by David S. Miller, then
 the patches end up in net or net-next tree, respectively, and
 make their way from there further into mainline. Again, see the
-`netdev-FAQ`_ for additional information e.g. on how often they are
-merged to mainline.
+documentation for netdev subsystem at
+Documentation/process/maintainer-netdev.rst for additional information
+e.g. on how often they are merged to mainline.
 
 Q: How long do I need to wait for feedback on my BPF patches?
 -------------------------------------------------------------
@@ -230,7 +233,8 @@ Q: Are patches applied to bpf-next when the merge window is open?
 -----------------------------------------------------------------
 A: For the time when the merge window is open, bpf-next will not be
 processed. This is roughly analogous to net-next patch processing,
-so feel free to read up on the `netdev-FAQ`_ about further details.
+so feel free to read up on the netdev docs at
+Documentation/process/maintainer-netdev.rst about further details.
 
 During those two weeks of merge window, we might ask you to resend
 your patch series once bpf-next is open again. Once Linus released
@@ -394,7 +398,8 @@ netdev kernel mailing list in Cc and ask for the fix to be queued up:
   netdev@vger.kernel.org
 
 The process in general is the same as on netdev itself, see also the
-`netdev-FAQ`_.
+the documentation on networking subsystem at
+Documentation/process/maintainer-netdev.rst.
 
 Q: Do you also backport to kernels not currently maintained as stable?
 ----------------------------------------------------------------------
@@ -410,7 +415,7 @@ Q: The BPF patch I am about to submit needs to go to stable as well
 What should I do?
 
 A: The same rules apply as with netdev patch submissions in general, see
-the `netdev-FAQ`_.
+the netdev docs at Documentation/process/maintainer-netdev.rst.
 
 Never add "``Cc: stable@vger.kernel.org``" to the patch description, but
 ask the BPF maintainers to queue the patches instead. This can be done
@@ -685,7 +690,6 @@ when:
 
 .. Links
 .. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/
-.. _netdev-FAQ: https://www.kernel.org/doc/html/latest/process/maintainer-netdev.html
 .. _selftests:
    https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
 .. _Documentation/dev-tools/kselftest.rst:
-- 
cgit v1.2.3


From 0de10fd6eb94259a749d558ee0d34083ae010a1d Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Wed, 15 Mar 2023 14:43:05 -0500
Subject: dt-bindings: net: qcom,ipa: add SDX65 compatible

Add support for SDX65, which uses IPA v5.0.

Reviewed-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: Alex Elder <elder@linaro.org>
Link: https://lore.kernel.org/r/20230315194305.1647311-1-elder@linaro.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/devicetree/bindings/net/qcom,ipa.yaml | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
index 4aeda379726f..2d5e4ffb2f9e 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
@@ -49,6 +49,7 @@ properties:
       - qcom,sc7280-ipa
       - qcom,sdm845-ipa
       - qcom,sdx55-ipa
+      - qcom,sdx65-ipa
       - qcom,sm6350-ipa
       - qcom,sm8350-ipa
 
-- 
cgit v1.2.3


From 08ff1c9f3e927ba3701c113dda70953a6f4afffa Mon Sep 17 00:00:00 2001
From: Sreevani Sreejith <ssreevani@meta.com>
Date: Wed, 15 Mar 2023 12:54:05 -0700
Subject: bpf, docs: Libbpf overview documentation

This patch documents overview of libbpf, including its features for
developing BPF programs.

Signed-off-by: Sreevani Sreejith <ssreevani@meta.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/bpf/20230315195405.2051559-1-ssreevani@meta.com
---
 Documentation/bpf/libbpf/index.rst           |  25 ++-
 Documentation/bpf/libbpf/libbpf_overview.rst | 228 +++++++++++++++++++++++++++
 2 files changed, 245 insertions(+), 8 deletions(-)
 create mode 100644 Documentation/bpf/libbpf/libbpf_overview.rst

(limited to 'Documentation')

diff --git a/Documentation/bpf/libbpf/index.rst b/Documentation/bpf/libbpf/index.rst
index f9b3b252e28f..7545a2049692 100644
--- a/Documentation/bpf/libbpf/index.rst
+++ b/Documentation/bpf/libbpf/index.rst
@@ -2,23 +2,32 @@
 
 .. _libbpf:
 
+======
 libbpf
 ======
 
+If you are looking to develop BPF applications using the libbpf library, this
+directory contains important documentation that you should read.
+
+To get started, it is recommended to begin with the :doc:`libbpf Overview
+<libbpf_overview>` document, which provides a high-level understanding of the
+libbpf APIs and their usage. This will give you a solid foundation to start
+exploring and utilizing the various features of libbpf to develop your BPF
+applications.
+
 .. toctree::
    :maxdepth: 1
 
+   libbpf_overview
    API Documentation <https://libbpf.readthedocs.io/en/latest/api.html>
    program_types
    libbpf_naming_convention
    libbpf_build
 
-This is documentation for libbpf, a userspace library for loading and
-interacting with bpf programs.
 
-All general BPF questions, including kernel functionality, libbpf APIs and
-their application, should be sent to bpf@vger.kernel.org mailing list.
-You can `subscribe <http://vger.kernel.org/vger-lists.html#bpf>`_ to the
-mailing list search its `archive <https://lore.kernel.org/bpf/>`_.
-Please search the archive before asking new questions. It very well might
-be that this was already addressed or answered before.
+All general BPF questions, including kernel functionality, libbpf APIs and their
+application, should be sent to bpf@vger.kernel.org mailing list.  You can
+`subscribe <http://vger.kernel.org/vger-lists.html#bpf>`_ to the mailing list
+search its `archive <https://lore.kernel.org/bpf/>`_.  Please search the archive
+before asking new questions. It may be that this was already addressed or
+answered before.
diff --git a/Documentation/bpf/libbpf/libbpf_overview.rst b/Documentation/bpf/libbpf/libbpf_overview.rst
new file mode 100644
index 000000000000..f36a2d4ffea2
--- /dev/null
+++ b/Documentation/bpf/libbpf/libbpf_overview.rst
@@ -0,0 +1,228 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+libbpf Overview
+===============
+
+libbpf is a C-based library containing a BPF loader that takes compiled BPF
+object files and prepares and loads them into the Linux kernel. libbpf takes the
+heavy lifting of loading, verifying, and attaching BPF programs to various
+kernel hooks, allowing BPF application developers to focus only on BPF program
+correctness and performance.
+
+The following are the high-level features supported by libbpf:
+
+* Provides high-level and low-level APIs for user space programs to interact
+  with BPF programs. The low-level APIs wrap all the bpf system call
+  functionality, which is useful when users need more fine-grained control
+  over the interactions between user space and BPF programs.
+* Provides overall support for the BPF object skeleton generated by bpftool.
+  The skeleton file simplifies the process for the user space programs to access
+  global variables and work with BPF programs.
+* Provides BPF-side APIS, including BPF helper definitions, BPF maps support,
+  and tracing helpers, allowing developers to simplify BPF code writing.
+* Supports BPF CO-RE mechanism, enabling BPF developers to write portable
+  BPF programs that can be compiled once and run across different kernel
+  versions.
+
+This document will delve into the above concepts in detail, providing a deeper
+understanding of the capabilities and advantages of libbpf and how it can help
+you develop BPF applications efficiently.
+
+BPF App Lifecycle and libbpf APIs
+==================================
+
+A BPF application consists of one or more BPF programs (either cooperating or
+completely independent), BPF maps, and global variables. The global
+variables are shared between all BPF programs, which allows them to cooperate on
+a common set of data. libbpf provides APIs that user space programs can use to
+manipulate the BPF programs by triggering different phases of a BPF application
+lifecycle.
+
+The following section provides a brief overview of each phase in the BPF life
+cycle:
+
+* **Open phase**: In this phase, libbpf parses the BPF
+  object file and discovers BPF maps, BPF programs, and global variables. After
+  a BPF app is opened, user space apps can make additional adjustments
+  (setting BPF program types, if necessary; pre-setting initial values for
+  global variables, etc.) before all the entities are created and loaded.
+
+* **Load phase**: In the load phase, libbpf creates BPF
+  maps, resolves various relocations, and verifies and loads BPF programs into
+  the kernel. At this point, libbpf validates all the parts of a BPF application
+  and loads the BPF program into the kernel, but no BPF program has yet been
+  executed. After the load phase, it’s possible to set up the initial BPF map
+  state without racing with the BPF program code execution.
+
+* **Attachment phase**: In this phase, libbpf
+  attaches BPF programs to various BPF hook points (e.g., tracepoints, kprobes,
+  cgroup hooks, network packet processing pipeline, etc.). During this
+  phase, BPF programs perform useful work such as processing
+  packets, or updating BPF maps and global variables that can be read from user
+  space.
+
+* **Tear down phase**: In the tear down phase,
+  libbpf detaches BPF programs and unloads them from the kernel. BPF maps are
+  destroyed, and all the resources used by the BPF app are freed.
+
+BPF Object Skeleton File
+========================
+
+BPF skeleton is an alternative interface to libbpf APIs for working with BPF
+objects. Skeleton code abstract away generic libbpf APIs to significantly
+simplify code for manipulating BPF programs from user space. Skeleton code
+includes a bytecode representation of the BPF object file, simplifying the
+process of distributing your BPF code. With BPF bytecode embedded, there are no
+extra files to deploy along with your application binary.
+
+You can generate the skeleton header file ``(.skel.h)`` for a specific object
+file by passing the BPF object to the bpftool. The generated BPF skeleton
+provides the following custom functions that correspond to the BPF lifecycle,
+each of them prefixed with the specific object name:
+
+* ``<name>__open()`` – creates and opens BPF application (``<name>`` stands for
+  the specific bpf object name)
+* ``<name>__load()`` – instantiates, loads,and verifies BPF application parts
+* ``<name>__attach()`` – attaches all auto-attachable BPF programs (it’s
+  optional, you can have more control by using libbpf APIs directly)
+* ``<name>__destroy()`` – detaches all BPF programs and
+  frees up all used resources
+
+Using the skeleton code is the recommended way to work with bpf programs. Keep
+in mind, BPF skeleton provides access to the underlying BPF object, so whatever
+was possible to do with generic libbpf APIs is still possible even when the BPF
+skeleton is used. It's an additive convenience feature, with no syscalls, and no
+cumbersome code.
+
+Other Advantages of Using Skeleton File
+---------------------------------------
+
+* BPF skeleton provides an interface for user space programs to work with BPF
+  global variables. The skeleton code memory maps global variables as a struct
+  into user space. The struct interface allows user space programs to initialize
+  BPF programs before the BPF load phase and fetch and update data from user
+  space afterward.
+
+* The ``skel.h`` file reflects the object file structure by listing out the
+  available maps, programs, etc. BPF skeleton provides direct access to all the
+  BPF maps and BPF programs as struct fields. This eliminates the need for
+  string-based lookups with ``bpf_object_find_map_by_name()`` and
+  ``bpf_object_find_program_by_name()`` APIs, reducing errors due to BPF source
+  code and user-space code getting out of sync.
+
+* The embedded bytecode representation of the object file ensures that the
+  skeleton and the BPF object file are always in sync.
+
+BPF Helpers
+===========
+
+libbpf provides BPF-side APIs that BPF programs can use to interact with the
+system. The BPF helpers definition allows developers to use them in BPF code as
+any other plain C function. For example, there are helper functions to print
+debugging messages, get the time since the system was booted, interact with BPF
+maps, manipulate network packets, etc.
+
+For a complete description of what the helpers do, the arguments they take, and
+the return value, see the `bpf-helpers
+<https://man7.org/linux/man-pages/man7/bpf-helpers.7.html>`_ man page.
+
+BPF CO-RE (Compile Once – Run Everywhere)
+=========================================
+
+BPF programs work in the kernel space and have access to kernel memory and data
+structures. One limitation that BPF applications come across is the lack of
+portability across different kernel versions and configurations. `BCC
+<https://github.com/iovisor/bcc/>`_ is one of the solutions for BPF
+portability. However, it comes with runtime overhead and a large binary size
+from embedding the compiler with the application.
+
+libbpf steps up the BPF program portability by supporting the BPF CO-RE concept.
+BPF CO-RE brings together BTF type information, libbpf, and the compiler to
+produce a single executable binary that you can run on multiple kernel versions
+and configurations.
+
+To make BPF programs portable libbpf relies on the BTF type information of the
+running kernel. Kernel also exposes this self-describing authoritative BTF
+information through ``sysfs`` at ``/sys/kernel/btf/vmlinux``.
+
+You can generate the BTF information for the running kernel with the following
+command:
+
+::
+
+  $ bpftool btf dump file /sys/kernel/btf/vmlinux format c > vmlinux.h
+
+The command generates a ``vmlinux.h`` header file with all kernel types
+(:doc:`BTF types <../btf>`) that the running kernel uses. Including
+``vmlinux.h`` in your BPF program eliminates dependency on system-wide kernel
+headers.
+
+libbpf enables portability of BPF programs by looking at the BPF program’s
+recorded BTF type and relocation information and matching them to BTF
+information (vmlinux) provided by the running kernel. libbpf then resolves and
+matches all the types and fields, and updates necessary offsets and other
+relocatable data to ensure that BPF program’s logic functions correctly for a
+specific kernel on the host. BPF CO-RE concept thus eliminates overhead
+associated with BPF development and allows developers to write portable BPF
+applications without modifications and runtime source code compilation on the
+target machine.
+
+The following code snippet shows how to read the parent field of a kernel
+``task_struct`` using BPF CO-RE and libbf. The basic helper to read a field in a
+CO-RE relocatable manner is ``bpf_core_read(dst, sz, src)``, which will read
+``sz`` bytes from the field referenced by ``src`` into the memory pointed to by
+``dst``.
+
+.. code-block:: C
+   :emphasize-lines: 6
+
+    //...
+    struct task_struct *task = (void *)bpf_get_current_task();
+    struct task_struct *parent_task;
+    int err;
+
+    err = bpf_core_read(&parent_task, sizeof(void *), &task->parent);
+    if (err) {
+      /* handle error */
+    }
+
+    /* parent_task contains the value of task->parent pointer */
+
+In the code snippet, we first get a pointer to the current ``task_struct`` using
+``bpf_get_current_task()``.  We then use ``bpf_core_read()`` to read the parent
+field of task struct into the ``parent_task`` variable. ``bpf_core_read()`` is
+just like ``bpf_probe_read_kernel()`` BPF helper, except it records information
+about the field that should be relocated on the target kernel. i.e, if the
+``parent`` field gets shifted to a different offset within
+``struct task_struct`` due to some new field added in front of it, libbpf will
+automatically adjust the actual offset to the proper value.
+
+Getting Started with libbpf
+===========================
+
+Check out the `libbpf-bootstrap <https://github.com/libbpf/libbpf-bootstrap>`_
+repository with simple examples of using libbpf to build various BPF
+applications.
+
+See also `libbpf API documentation
+<https://libbpf.readthedocs.io/en/latest/api.html>`_.
+
+libbpf and Rust
+===============
+
+If you are building BPF applications in Rust, it is recommended to use the
+`Libbpf-rs <https://github.com/libbpf/libbpf-rs>`_ library instead of bindgen
+bindings directly to libbpf. Libbpf-rs wraps libbpf functionality in
+Rust-idiomatic interfaces and provides libbpf-cargo plugin to handle BPF code
+compilation and skeleton generation. Using Libbpf-rs will make building user
+space part of the BPF application easier. Note that the BPF program themselves
+must still be written in plain C.
+
+Additional Documentation
+========================
+
+* `Program types and ELF Sections <https://libbpf.readthedocs.io/en/latest/program_types.html>`_
+* `API naming convention <https://libbpf.readthedocs.io/en/latest/libbpf_naming_convention.html>`_
+* `Building libbpf <https://libbpf.readthedocs.io/en/latest/libbpf_build.html>`_
+* `API documentation Convention <https://libbpf.readthedocs.io/en/latest/libbpf_naming_convention.html#api-documentation-convention>`_
-- 
cgit v1.2.3


From e485f3a6eae0849f83b94936778a2325f72a0c89 Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Fri, 17 Mar 2023 13:09:03 -0700
Subject: ixgb: Remove ixgb driver

There are likely no users of this driver as the hardware has been
discontinued since 2010. Remove the driver and all references to it
in documentation.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Acked-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/PCI/pci-error-recovery.rst           |    1 -
 .../networking/device_drivers/ethernet/index.rst   |    1 -
 .../device_drivers/ethernet/intel/ixgb.rst         |  468 ----
 arch/loongarch/configs/loongson3_defconfig         |    1 -
 arch/mips/configs/loongson2k_defconfig             |    1 -
 arch/mips/configs/loongson3_defconfig              |    1 -
 arch/mips/configs/mtx1_defconfig                   |    1 -
 arch/powerpc/configs/powernv_defconfig             |    1 -
 arch/powerpc/configs/ppc64_defconfig               |    1 -
 arch/powerpc/configs/ppc64e_defconfig              |    1 -
 arch/powerpc/configs/ppc6xx_defconfig              |    1 -
 arch/powerpc/configs/pseries_defconfig             |    1 -
 arch/powerpc/configs/skiroot_defconfig             |    1 -
 drivers/net/ethernet/intel/Kconfig                 |   17 -
 drivers/net/ethernet/intel/Makefile                |    1 -
 drivers/net/ethernet/intel/ixgb/Makefile           |    9 -
 drivers/net/ethernet/intel/ixgb/ixgb.h             |  179 --
 drivers/net/ethernet/intel/ixgb/ixgb_ee.c          |  580 -----
 drivers/net/ethernet/intel/ixgb/ixgb_ee.h          |   79 -
 drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c     |  642 ------
 drivers/net/ethernet/intel/ixgb/ixgb_hw.c          | 1229 -----------
 drivers/net/ethernet/intel/ixgb/ixgb_hw.h          |  767 -------
 drivers/net/ethernet/intel/ixgb/ixgb_ids.h         |   23 -
 drivers/net/ethernet/intel/ixgb/ixgb_main.c        | 2285 --------------------
 drivers/net/ethernet/intel/ixgb/ixgb_osdep.h       |   39 -
 drivers/net/ethernet/intel/ixgb/ixgb_param.c       |  442 ----
 26 files changed, 6772 deletions(-)
 delete mode 100644 Documentation/networking/device_drivers/ethernet/intel/ixgb.rst
 delete mode 100644 drivers/net/ethernet/intel/ixgb/Makefile
 delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb.h
 delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_ee.c
 delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_ee.h
 delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
 delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_hw.c
 delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_hw.h
 delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_ids.h
 delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_main.c
 delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_osdep.h
 delete mode 100644 drivers/net/ethernet/intel/ixgb/ixgb_param.c

(limited to 'Documentation')

diff --git a/Documentation/PCI/pci-error-recovery.rst b/Documentation/PCI/pci-error-recovery.rst
index bdafeb4b66dc..9981d330da8f 100644
--- a/Documentation/PCI/pci-error-recovery.rst
+++ b/Documentation/PCI/pci-error-recovery.rst
@@ -418,7 +418,6 @@ That is, the recovery API only requires that:
    - drivers/next/e100.c
    - drivers/net/e1000
    - drivers/net/e1000e
-   - drivers/net/ixgb
    - drivers/net/ixgbe
    - drivers/net/cxgb3
    - drivers/net/s2io.c
diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst
index 392969ac88ad..6e9e7012d000 100644
--- a/Documentation/networking/device_drivers/ethernet/index.rst
+++ b/Documentation/networking/device_drivers/ethernet/index.rst
@@ -31,7 +31,6 @@ Contents:
    intel/fm10k
    intel/igb
    intel/igbvf
-   intel/ixgb
    intel/ixgbe
    intel/ixgbevf
    intel/i40e
diff --git a/Documentation/networking/device_drivers/ethernet/intel/ixgb.rst b/Documentation/networking/device_drivers/ethernet/intel/ixgb.rst
deleted file mode 100644
index c6a233e68ad6..000000000000
--- a/Documentation/networking/device_drivers/ethernet/intel/ixgb.rst
+++ /dev/null
@@ -1,468 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0+
-
-=====================================================================
-Linux Base Driver for 10 Gigabit Intel(R) Ethernet Network Connection
-=====================================================================
-
-October 1, 2018
-
-
-Contents
-========
-
-- In This Release
-- Identifying Your Adapter
-- Command Line Parameters
-- Improving Performance
-- Additional Configurations
-- Known Issues/Troubleshooting
-- Support
-
-
-
-In This Release
-===============
-
-This file describes the ixgb Linux Base Driver for the 10 Gigabit Intel(R)
-Network Connection.  This driver includes support for Itanium(R)2-based
-systems.
-
-For questions related to hardware requirements, refer to the documentation
-supplied with your 10 Gigabit adapter.  All hardware requirements listed apply
-to use with Linux.
-
-The following features are available in this kernel:
- - Native VLANs
- - Channel Bonding (teaming)
- - SNMP
-
-Channel Bonding documentation can be found in the Linux kernel source:
-/Documentation/networking/bonding.rst
-
-The driver information previously displayed in the /proc filesystem is not
-supported in this release.  Alternatively, you can use ethtool (version 1.6
-or later), lspci, and iproute2 to obtain the same information.
-
-Instructions on updating ethtool can be found in the section "Additional
-Configurations" later in this document.
-
-
-Identifying Your Adapter
-========================
-
-The following Intel network adapters are compatible with the drivers in this
-release:
-
-+------------+------------------------------+----------------------------------+
-| Controller | Adapter Name                 | Physical Layer                   |
-+============+==============================+==================================+
-| 82597EX    | Intel(R) PRO/10GbE LR/SR/CX4 | - 10G Base-LR (fiber)            |
-|            | Server Adapters              | - 10G Base-SR (fiber)            |
-|            |                              | - 10G Base-CX4 (copper)          |
-+------------+------------------------------+----------------------------------+
-
-For more information on how to identify your adapter, go to the Adapter &
-Driver ID Guide at:
-
-    https://support.intel.com
-
-
-Command Line Parameters
-=======================
-
-If the driver is built as a module, the  following optional parameters are
-used by entering them on the command line with the modprobe command using
-this syntax::
-
-    modprobe ixgb [<option>=<VAL1>,<VAL2>,...]
-
-For example, with two 10GbE PCI adapters, entering::
-
-    modprobe ixgb TxDescriptors=80,128
-
-loads the ixgb driver with 80 TX resources for the first adapter and 128 TX
-resources for the second adapter.
-
-The default value for each parameter is generally the recommended setting,
-unless otherwise noted.
-
-Copybreak
----------
-:Valid Range: 0-XXXX
-:Default Value: 256
-
-    This is the maximum size of packet that is copied to a new buffer on
-    receive.
-
-Debug
------
-:Valid Range: 0-16 (0=none,...,16=all)
-:Default Value: 0
-
-    This parameter adjusts the level of debug messages displayed in the
-    system logs.
-
-FlowControl
------------
-:Valid Range: 0-3 (0=none, 1=Rx only, 2=Tx only, 3=Rx&Tx)
-:Default Value: 1 if no EEPROM, otherwise read from EEPROM
-
-    This parameter controls the automatic generation(Tx) and response(Rx) to
-    Ethernet PAUSE frames.  There are hardware bugs associated with enabling
-    Tx flow control so beware.
-
-RxDescriptors
--------------
-:Valid Range: 64-4096
-:Default Value: 1024
-
-    This value is the number of receive descriptors allocated by the driver.
-    Increasing this value allows the driver to buffer more incoming packets.
-    Each descriptor is 16 bytes.  A receive buffer is also allocated for
-    each descriptor and can be either 2048, 4056, 8192, or 16384 bytes,
-    depending on the MTU setting.  When the MTU size is 1500 or less, the
-    receive buffer size is 2048 bytes. When the MTU is greater than 1500 the
-    receive buffer size will be either 4056, 8192, or 16384 bytes.  The
-    maximum MTU size is 16114.
-
-TxDescriptors
--------------
-:Valid Range: 64-4096
-:Default Value: 256
-
-    This value is the number of transmit descriptors allocated by the driver.
-    Increasing this value allows the driver to queue more transmits.  Each
-    descriptor is 16 bytes.
-
-RxIntDelay
-----------
-:Valid Range: 0-65535 (0=off)
-:Default Value: 72
-
-    This value delays the generation of receive interrupts in units of
-    0.8192 microseconds.  Receive interrupt reduction can improve CPU
-    efficiency if properly tuned for specific network traffic.  Increasing
-    this value adds extra latency to frame reception and can end up
-    decreasing the throughput of TCP traffic.  If the system is reporting
-    dropped receives, this value may be set too high, causing the driver to
-    run out of available receive descriptors.
-
-TxIntDelay
-----------
-:Valid Range: 0-65535 (0=off)
-:Default Value: 32
-
-    This value delays the generation of transmit interrupts in units of
-    0.8192 microseconds.  Transmit interrupt reduction can improve CPU
-    efficiency if properly tuned for specific network traffic.  Increasing
-    this value adds extra latency to frame transmission and can end up
-    decreasing the throughput of TCP traffic.  If this value is set too high,
-    it will cause the driver to run out of available transmit descriptors.
-
-XsumRX
-------
-:Valid Range: 0-1
-:Default Value: 1
-
-    A value of '1' indicates that the driver should enable IP checksum
-    offload for received packets (both UDP and TCP) to the adapter hardware.
-
-RxFCHighThresh
---------------
-:Valid Range: 1,536-262,136 (0x600 - 0x3FFF8, 8 byte granularity)
-:Default Value: 196,608 (0x30000)
-
-    Receive Flow control high threshold (when we send a pause frame)
-
-RxFCLowThresh
--------------
-:Valid Range: 64-262,136 (0x40 - 0x3FFF8, 8 byte granularity)
-:Default Value: 163,840 (0x28000)
-
-    Receive Flow control low threshold (when we send a resume frame)
-
-FCReqTimeout
-------------
-:Valid Range: 1-65535
-:Default Value: 65535
-
-    Flow control request timeout (how long to pause the link partner's tx)
-
-IntDelayEnable
---------------
-:Value Range: 0,1
-:Default Value: 1
-
-    Interrupt Delay, 0 disables transmit interrupt delay and 1 enables it.
-
-
-Improving Performance
-=====================
-
-With the 10 Gigabit server adapters, the default Linux configuration will
-very likely limit the total available throughput artificially.  There is a set
-of configuration changes that, when applied together, will increase the ability
-of Linux to transmit and receive data.  The following enhancements were
-originally acquired from settings published at https://www.spec.org/web99/ for
-various submitted results using Linux.
-
-NOTE:
-  These changes are only suggestions, and serve as a starting point for
-  tuning your network performance.
-
-The changes are made in three major ways, listed in order of greatest effect:
-
-- Use ip link to modify the mtu (maximum transmission unit) and the txqueuelen
-  parameter.
-- Use sysctl to modify /proc parameters (essentially kernel tuning)
-- Use setpci to modify the MMRBC field in PCI-X configuration space to increase
-  transmit burst lengths on the bus.
-
-NOTE:
-  setpci modifies the adapter's configuration registers to allow it to read
-  up to 4k bytes at a time (for transmits).  However, for some systems the
-  behavior after modifying this register may be undefined (possibly errors of
-  some kind).  A power-cycle, hard reset or explicitly setting the e6 register
-  back to 22 (setpci -d 8086:1a48 e6.b=22) may be required to get back to a
-  stable configuration.
-
-- COPY these lines and paste them into ixgb_perf.sh:
-
-::
-
-  #!/bin/bash
-  echo "configuring network performance , edit this file to change the interface
-  or device ID of 10GbE card"
-  # set mmrbc to 4k reads, modify only Intel 10GbE device IDs
-  # replace 1a48 with appropriate 10GbE device's ID installed on the system,
-  # if needed.
-  setpci -d 8086:1a48 e6.b=2e
-  # set the MTU (max transmission unit) - it requires your switch and clients
-  # to change as well.
-  # set the txqueuelen
-  # your ixgb adapter should be loaded as eth1 for this to work, change if needed
-  ip li set dev eth1 mtu 9000 txqueuelen 1000 up
-  # call the sysctl utility to modify /proc/sys entries
-  sysctl -p ./sysctl_ixgb.conf
-
-- COPY these lines and paste them into sysctl_ixgb.conf:
-
-::
-
-  # some of the defaults may be different for your kernel
-  # call this file with sysctl -p <this file>
-  # these are just suggested values that worked well to increase throughput in
-  # several network benchmark tests, your mileage may vary
-
-  ### IPV4 specific settings
-  # turn TCP timestamp support off, default 1, reduces CPU use
-  net.ipv4.tcp_timestamps = 0
-  # turn SACK support off, default on
-  # on systems with a VERY fast bus -> memory interface this is the big gainer
-  net.ipv4.tcp_sack = 0
-  # set min/default/max TCP read buffer, default 4096 87380 174760
-  net.ipv4.tcp_rmem = 10000000 10000000 10000000
-  # set min/pressure/max TCP write buffer, default 4096 16384 131072
-  net.ipv4.tcp_wmem = 10000000 10000000 10000000
-  # set min/pressure/max TCP buffer space, default 31744 32256 32768
-  net.ipv4.tcp_mem = 10000000 10000000 10000000
-
-  ### CORE settings (mostly for socket and UDP effect)
-  # set maximum receive socket buffer size, default 131071
-  net.core.rmem_max = 524287
-  # set maximum send socket buffer size, default 131071
-  net.core.wmem_max = 524287
-  # set default receive socket buffer size, default 65535
-  net.core.rmem_default = 524287
-  # set default send socket buffer size, default 65535
-  net.core.wmem_default = 524287
-  # set maximum amount of option memory buffers, default 10240
-  net.core.optmem_max = 524287
-  # set number of unprocessed input packets before kernel starts dropping them; default 300
-  net.core.netdev_max_backlog = 300000
-
-Edit the ixgb_perf.sh script if necessary to change eth1 to whatever interface
-your ixgb driver is using and/or replace '1a48' with appropriate 10GbE device's
-ID installed on the system.
-
-NOTE:
-  Unless these scripts are added to the boot process, these changes will
-  only last only until the next system reboot.
-
-
-Resolving Slow UDP Traffic
---------------------------
-If your server does not seem to be able to receive UDP traffic as fast as it
-can receive TCP traffic, it could be because Linux, by default, does not set
-the network stack buffers as large as they need to be to support high UDP
-transfer rates.  One way to alleviate this problem is to allow more memory to
-be used by the IP stack to store incoming data.
-
-For instance, use the commands::
-
-    sysctl -w net.core.rmem_max=262143
-
-and::
-
-    sysctl -w net.core.rmem_default=262143
-
-to increase the read buffer memory max and default to 262143 (256k - 1) from
-defaults of max=131071 (128k - 1) and default=65535 (64k - 1).  These variables
-will increase the amount of memory used by the network stack for receives, and
-can be increased significantly more if necessary for your application.
-
-
-Additional Configurations
-=========================
-
-Configuring the Driver on Different Distributions
--------------------------------------------------
-Configuring a network driver to load properly when the system is started is
-distribution dependent. Typically, the configuration process involves adding
-an alias line to /etc/modprobe.conf as well as editing other system startup
-scripts and/or configuration files.  Many popular Linux distributions ship
-with tools to make these changes for you.  To learn the proper way to
-configure a network device for your system, refer to your distribution
-documentation.  If during this process you are asked for the driver or module
-name, the name for the Linux Base Driver for the Intel 10GbE Family of
-Adapters is ixgb.
-
-Viewing Link Messages
----------------------
-Link messages will not be displayed to the console if the distribution is
-restricting system messages. In order to see network driver link messages on
-your console, set dmesg to eight by entering the following::
-
-    dmesg -n 8
-
-NOTE: This setting is not saved across reboots.
-
-Jumbo Frames
-------------
-The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
-enabled by changing the MTU to a value larger than the default of 1500.
-The maximum value for the MTU is 16114.  Use the ip command to
-increase the MTU size.  For example::
-
-    ip li set dev ethx mtu 9000
-
-The maximum MTU setting for Jumbo Frames is 16114.  This value coincides
-with the maximum Jumbo Frames size of 16128.
-
-Ethtool
--------
-The driver utilizes the ethtool interface for driver configuration and
-diagnostics, as well as displaying statistical information.  The ethtool
-version 1.6 or later is required for this functionality.
-
-The latest release of ethtool can be found from
-https://www.kernel.org/pub/software/network/ethtool/
-
-NOTE:
-  The ethtool version 1.6 only supports a limited set of ethtool options.
-  Support for a more complete ethtool feature set can be enabled by
-  upgrading to the latest version.
-
-NAPI
-----
-NAPI (Rx polling mode) is supported in the ixgb driver.
-
-See https://wiki.linuxfoundation.org/networking/napi for more information on
-NAPI.
-
-
-Known Issues/Troubleshooting
-============================
-
-NOTE:
-  After installing the driver, if your Intel Network Connection is not
-  working, verify in the "In This Release" section of the readme that you have
-  installed the correct driver.
-
-Cable Interoperability Issue with Fujitsu XENPAK Module in SmartBits Chassis
-----------------------------------------------------------------------------
-Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4
-Server adapter is connected to a Fujitsu XENPAK CX4 module in a SmartBits
-chassis using 15 m/24AWG cable assemblies manufactured by Fujitsu or Leoni.
-The CRC errors may be received either by the Intel(R) PRO/10GbE CX4
-Server adapter or the SmartBits. If this situation occurs using a different
-cable assembly may resolve the issue.
-
-Cable Interoperability Issues with HP Procurve 3400cl Switch Port
------------------------------------------------------------------
-Excessive CRC errors may be observed if the Intel(R) PRO/10GbE CX4 Server
-adapter is connected to an HP Procurve 3400cl switch port using short cables
-(1 m or shorter). If this situation occurs, using a longer cable may resolve
-the issue.
-
-Excessive CRC errors may be observed using Fujitsu 24AWG cable assemblies that
-Are 10 m or longer or where using a Leoni 15 m/24AWG cable assembly. The CRC
-errors may be received either by the CX4 Server adapter or at the switch. If
-this situation occurs, using a different cable assembly may resolve the issue.
-
-Jumbo Frames System Requirement
--------------------------------
-Memory allocation failures have been observed on Linux systems with 64 MB
-of RAM or less that are running Jumbo Frames.  If you are using Jumbo
-Frames, your system may require more than the advertised minimum
-requirement of 64 MB of system memory.
-
-Performance Degradation with Jumbo Frames
------------------------------------------
-Degradation in throughput performance may be observed in some Jumbo frames
-environments.  If this is observed, increasing the application's socket buffer
-size and/or increasing the /proc/sys/net/ipv4/tcp_*mem entry values may help.
-See the specific application manual and /usr/src/linux*/Documentation/
-networking/ip-sysctl.txt for more details.
-
-Allocating Rx Buffers when Using Jumbo Frames
----------------------------------------------
-Allocating Rx buffers when using Jumbo Frames on 2.6.x kernels may fail if
-the available memory is heavily fragmented. This issue may be seen with PCI-X
-adapters or with packet split disabled. This can be reduced or eliminated
-by changing the amount of available memory for receive buffer allocation, by
-increasing /proc/sys/vm/min_free_kbytes.
-
-Multiple Interfaces on Same Ethernet Broadcast Network
-------------------------------------------------------
-Due to the default ARP behavior on Linux, it is not possible to have
-one system on two IP networks in the same Ethernet broadcast domain
-(non-partitioned switch) behave as expected.  All Ethernet interfaces
-will respond to IP traffic for any IP address assigned to the system.
-This results in unbalanced receive traffic.
-
-If you have multiple interfaces in a server, do either of the following:
-
-  - Turn on ARP filtering by entering::
-
-      echo 1 > /proc/sys/net/ipv4/conf/all/arp_filter
-
-  - Install the interfaces in separate broadcast domains - either in
-    different switches or in a switch partitioned to VLANs.
-
-UDP Stress Test Dropped Packet Issue
---------------------------------------
-Under small packets UDP stress test with 10GbE driver, the Linux system
-may drop UDP packets due to the fullness of socket buffers. You may want
-to change the driver's Flow Control variables to the minimum value for
-controlling packet reception.
-
-Tx Hangs Possible Under Stress
-------------------------------
-Under stress conditions, if TX hangs occur, turning off TSO
-"ethtool -K eth0 tso off" may resolve the problem.
-
-
-Support
-=======
-For general information, go to the Intel support website at:
-
-https://www.intel.com/support/
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-https://sourceforge.net/projects/e1000
-
-If an issue is identified with the released source code on a supported kernel
-with a supported adapter, email the specific information related to the issue
-to e1000-devel@lists.sf.net
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index e18213f01cc4..6cd26dd3c134 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -487,7 +487,6 @@ CONFIG_CHELSIO_T4=m
 CONFIG_E1000=y
 CONFIG_E1000E=y
 CONFIG_IGB=y
-CONFIG_IXGB=y
 CONFIG_IXGBE=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MELLANOX is not set
diff --git a/arch/mips/configs/loongson2k_defconfig b/arch/mips/configs/loongson2k_defconfig
index 728bef666f7a..0ab029ecad21 100644
--- a/arch/mips/configs/loongson2k_defconfig
+++ b/arch/mips/configs/loongson2k_defconfig
@@ -154,7 +154,6 @@ CONFIG_TUN=m
 CONFIG_E1000=y
 CONFIG_E1000E=y
 CONFIG_IGB=y
-CONFIG_IXGB=y
 CONFIG_IXGBE=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MELLANOX is not set
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index aca66a5f330d..6f4a52608ea4 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -207,7 +207,6 @@ CONFIG_VIRTIO_NET=m
 CONFIG_E1000=y
 CONFIG_E1000E=y
 CONFIG_IGB=y
-CONFIG_IXGB=y
 CONFIG_IXGBE=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MELLANOX is not set
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index edf9634aa8ee..e1b66aac7025 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -280,7 +280,6 @@ CONFIG_SUNDANCE=m
 CONFIG_PCMCIA_FMVJ18X=m
 CONFIG_E100=m
 CONFIG_E1000=m
-CONFIG_IXGB=m
 CONFIG_SKGE=m
 CONFIG_SKY2=m
 CONFIG_MYRI10GE=m
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index c92652575064..f2a9be02a8d2 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -170,7 +170,6 @@ CONFIG_S2IO=m
 CONFIG_E100=y
 CONFIG_E1000=y
 CONFIG_E1000E=y
-CONFIG_IXGB=m
 CONFIG_IXGBE=m
 CONFIG_I40E=m
 CONFIG_MLX4_EN=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index d6949a6c5b2b..6c46e5560d96 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -182,7 +182,6 @@ CONFIG_IBMVNIC=m
 CONFIG_E100=y
 CONFIG_E1000=y
 CONFIG_E1000E=y
-CONFIG_IXGB=m
 CONFIG_IXGBE=m
 CONFIG_I40E=m
 CONFIG_MLX4_EN=m
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
index f97a2d31bbf7..776c32964e12 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -102,7 +102,6 @@ CONFIG_PCNET32=y
 CONFIG_TIGON3=y
 CONFIG_E100=y
 CONFIG_E1000=y
-CONFIG_IXGB=m
 CONFIG_SUNGEM=y
 CONFIG_BROADCOM_PHY=m
 CONFIG_MARVELL_PHY=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index f73c98be56c8..5927b2312936 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -455,7 +455,6 @@ CONFIG_E100=m
 CONFIG_E1000=m
 CONFIG_E1000E=m
 CONFIG_IGB=m
-CONFIG_IXGB=m
 CONFIG_IXGBE=m
 CONFIG_MV643XX_ETH=m
 CONFIG_SKGE=m
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 7497e17ea657..49b3ff4e3b18 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -164,7 +164,6 @@ CONFIG_IBMVNIC=y
 CONFIG_E100=y
 CONFIG_E1000=y
 CONFIG_E1000E=y
-CONFIG_IXGB=m
 CONFIG_IXGBE=m
 CONFIG_I40E=m
 CONFIG_MLX4_EN=m
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index e0964210f259..71cfb990a74f 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -149,7 +149,6 @@ CONFIG_BE2NET=m
 CONFIG_E1000=m
 CONFIG_E1000E=m
 CONFIG_IGB=m
-CONFIG_IXGB=m
 CONFIG_IXGBE=m
 CONFIG_I40E=m
 # CONFIG_NET_VENDOR_MARVELL is not set
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index c18c3b373846..9bc0a9519899 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -139,23 +139,6 @@ config IGBVF
 	  To compile this driver as a module, choose M here. The module
 	  will be called igbvf.
 
-config IXGB
-	tristate "Intel(R) PRO/10GbE support"
-	depends on PCI
-	help
-	  This driver supports Intel(R) PRO/10GbE family of adapters for
-	  PCI-X type cards. For PCI-E type cards, use the "ixgbe" driver
-	  instead. For more information on how to identify your adapter, go
-	  to the Adapter & Driver ID Guide that can be located at:
-
-	  <http://support.intel.com>
-
-	  More specific information on configuring the driver is in
-	  <file:Documentation/networking/device_drivers/ethernet/intel/ixgb.rst>.
-
-	  To compile this driver as a module, choose M here. The module
-	  will be called ixgb.
-
 config IXGBE
 	tristate "Intel(R) 10GbE PCI Express adapters support"
 	depends on PCI
diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile
index 3075290063f6..d80d04132073 100644
--- a/drivers/net/ethernet/intel/Makefile
+++ b/drivers/net/ethernet/intel/Makefile
@@ -12,7 +12,6 @@ obj-$(CONFIG_IGBVF) += igbvf/
 obj-$(CONFIG_IXGBE) += ixgbe/
 obj-$(CONFIG_IXGBEVF) += ixgbevf/
 obj-$(CONFIG_I40E) += i40e/
-obj-$(CONFIG_IXGB) += ixgb/
 obj-$(CONFIG_IAVF) += iavf/
 obj-$(CONFIG_FM10K) += fm10k/
 obj-$(CONFIG_ICE) += ice/
diff --git a/drivers/net/ethernet/intel/ixgb/Makefile b/drivers/net/ethernet/intel/ixgb/Makefile
deleted file mode 100644
index 2433e9300a33..000000000000
--- a/drivers/net/ethernet/intel/ixgb/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# Copyright(c) 1999 - 2008 Intel Corporation.
-#
-# Makefile for the Intel(R) PRO/10GbE ethernet driver
-#
-
-obj-$(CONFIG_IXGB) += ixgb.o
-
-ixgb-objs := ixgb_main.o ixgb_hw.o ixgb_ee.o ixgb_ethtool.o ixgb_param.o
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb.h b/drivers/net/ethernet/intel/ixgb/ixgb.h
deleted file mode 100644
index 81ac39576803..000000000000
--- a/drivers/net/ethernet/intel/ixgb/ixgb.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 1999 - 2008 Intel Corporation. */
-
-#ifndef _IXGB_H_
-#define _IXGB_H_
-
-#include <linux/stddef.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <asm/byteorder.h>
-#include <linux/mm.h>
-#include <linux/errno.h>
-#include <linux/ioport.h>
-#include <linux/pci.h>
-#include <linux/kernel.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/skbuff.h>
-#include <linux/delay.h>
-#include <linux/timer.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/interrupt.h>
-#include <linux/string.h>
-#include <linux/pagemap.h>
-#include <linux/dma-mapping.h>
-#include <linux/bitops.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <linux/capability.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <net/pkt_sched.h>
-#include <linux/list.h>
-#include <linux/reboot.h>
-#include <net/checksum.h>
-
-#include <linux/ethtool.h>
-#include <linux/if_vlan.h>
-
-#define BAR_0		0
-#define BAR_1		1
-
-struct ixgb_adapter;
-#include "ixgb_hw.h"
-#include "ixgb_ee.h"
-#include "ixgb_ids.h"
-
-/* TX/RX descriptor defines */
-#define DEFAULT_TXD      256
-#define MAX_TXD         4096
-#define MIN_TXD           64
-
-/* hardware cannot reliably support more than 512 descriptors owned by
- * hardware descriptor cache otherwise an unreliable ring under heavy
- * receive load may result */
-#define DEFAULT_RXD      512
-#define MAX_RXD          512
-#define MIN_RXD           64
-
-/* Supported Rx Buffer Sizes */
-#define IXGB_RXBUFFER_2048  2048
-#define IXGB_RXBUFFER_4096  4096
-#define IXGB_RXBUFFER_8192  8192
-#define IXGB_RXBUFFER_16384 16384
-
-/* How many Rx Buffers do we bundle into one write to the hardware ? */
-#define IXGB_RX_BUFFER_WRITE	8	/* Must be power of 2 */
-
-/* wrapper around a pointer to a socket buffer,
- * so a DMA handle can be stored along with the buffer */
-struct ixgb_buffer {
-	struct sk_buff *skb;
-	dma_addr_t dma;
-	unsigned long time_stamp;
-	u16 length;
-	u16 next_to_watch;
-	u16 mapped_as_page;
-};
-
-struct ixgb_desc_ring {
-	/* pointer to the descriptor ring memory */
-	void *desc;
-	/* physical address of the descriptor ring */
-	dma_addr_t dma;
-	/* length of descriptor ring in bytes */
-	unsigned int size;
-	/* number of descriptors in the ring */
-	unsigned int count;
-	/* next descriptor to associate a buffer with */
-	unsigned int next_to_use;
-	/* next descriptor to check for DD status bit */
-	unsigned int next_to_clean;
-	/* array of buffer information structs */
-	struct ixgb_buffer *buffer_info;
-};
-
-#define IXGB_DESC_UNUSED(R) \
-	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
-	(R)->next_to_clean - (R)->next_to_use - 1)
-
-#define IXGB_GET_DESC(R, i, type)	(&(((struct type *)((R).desc))[i]))
-#define IXGB_RX_DESC(R, i)		IXGB_GET_DESC(R, i, ixgb_rx_desc)
-#define IXGB_TX_DESC(R, i)		IXGB_GET_DESC(R, i, ixgb_tx_desc)
-#define IXGB_CONTEXT_DESC(R, i)	IXGB_GET_DESC(R, i, ixgb_context_desc)
-
-/* board specific private data structure */
-
-struct ixgb_adapter {
-	struct timer_list watchdog_timer;
-	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
-	u32 bd_number;
-	u32 rx_buffer_len;
-	u32 part_num;
-	u16 link_speed;
-	u16 link_duplex;
-	struct work_struct tx_timeout_task;
-
-	/* TX */
-	struct ixgb_desc_ring tx_ring ____cacheline_aligned_in_smp;
-	unsigned int restart_queue;
-	unsigned long timeo_start;
-	u32 tx_cmd_type;
-	u64 hw_csum_tx_good;
-	u64 hw_csum_tx_error;
-	u32 tx_int_delay;
-	u32 tx_timeout_count;
-	bool tx_int_delay_enable;
-	bool detect_tx_hung;
-
-	/* RX */
-	struct ixgb_desc_ring rx_ring;
-	u64 hw_csum_rx_error;
-	u64 hw_csum_rx_good;
-	u32 rx_int_delay;
-	bool rx_csum;
-
-	/* OS defined structs */
-	struct napi_struct napi;
-	struct net_device *netdev;
-	struct pci_dev *pdev;
-
-	/* structs defined in ixgb_hw.h */
-	struct ixgb_hw hw;
-	u16 msg_enable;
-	struct ixgb_hw_stats stats;
-	u32 alloc_rx_buff_failed;
-	bool have_msi;
-	unsigned long flags;
-};
-
-enum ixgb_state_t {
-	/* TBD
-	__IXGB_TESTING,
-	__IXGB_RESETTING,
-	*/
-	__IXGB_DOWN
-};
-
-/* Exported from other modules */
-void ixgb_check_options(struct ixgb_adapter *adapter);
-void ixgb_set_ethtool_ops(struct net_device *netdev);
-extern char ixgb_driver_name[];
-
-void ixgb_set_speed_duplex(struct net_device *netdev);
-
-int ixgb_up(struct ixgb_adapter *adapter);
-void ixgb_down(struct ixgb_adapter *adapter, bool kill_watchdog);
-void ixgb_reset(struct ixgb_adapter *adapter);
-int ixgb_setup_rx_resources(struct ixgb_adapter *adapter);
-int ixgb_setup_tx_resources(struct ixgb_adapter *adapter);
-void ixgb_free_rx_resources(struct ixgb_adapter *adapter);
-void ixgb_free_tx_resources(struct ixgb_adapter *adapter);
-void ixgb_update_stats(struct ixgb_adapter *adapter);
-
-
-#endif /* _IXGB_H_ */
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ee.c b/drivers/net/ethernet/intel/ixgb/ixgb_ee.c
deleted file mode 100644
index 129286fc1634..000000000000
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ee.c
+++ /dev/null
@@ -1,580 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 1999 - 2008 Intel Corporation. */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include "ixgb_hw.h"
-#include "ixgb_ee.h"
-/* Local prototypes */
-static u16 ixgb_shift_in_bits(struct ixgb_hw *hw);
-
-static void ixgb_shift_out_bits(struct ixgb_hw *hw,
-				u16 data,
-				u16 count);
-static void ixgb_standby_eeprom(struct ixgb_hw *hw);
-
-static bool ixgb_wait_eeprom_command(struct ixgb_hw *hw);
-
-static void ixgb_cleanup_eeprom(struct ixgb_hw *hw);
-
-/******************************************************************************
- * Raises the EEPROM's clock input.
- *
- * hw - Struct containing variables accessed by shared code
- * eecd_reg - EECD's current value
- *****************************************************************************/
-static void
-ixgb_raise_clock(struct ixgb_hw *hw,
-		  u32 *eecd_reg)
-{
-	/* Raise the clock input to the EEPROM (by setting the SK bit), and then
-	 *  wait 50 microseconds.
-	 */
-	*eecd_reg = *eecd_reg | IXGB_EECD_SK;
-	IXGB_WRITE_REG(hw, EECD, *eecd_reg);
-	IXGB_WRITE_FLUSH(hw);
-	udelay(50);
-}
-
-/******************************************************************************
- * Lowers the EEPROM's clock input.
- *
- * hw - Struct containing variables accessed by shared code
- * eecd_reg - EECD's current value
- *****************************************************************************/
-static void
-ixgb_lower_clock(struct ixgb_hw *hw,
-		  u32 *eecd_reg)
-{
-	/* Lower the clock input to the EEPROM (by clearing the SK bit), and then
-	 * wait 50 microseconds.
-	 */
-	*eecd_reg = *eecd_reg & ~IXGB_EECD_SK;
-	IXGB_WRITE_REG(hw, EECD, *eecd_reg);
-	IXGB_WRITE_FLUSH(hw);
-	udelay(50);
-}
-
-/******************************************************************************
- * Shift data bits out to the EEPROM.
- *
- * hw - Struct containing variables accessed by shared code
- * data - data to send to the EEPROM
- * count - number of bits to shift out
- *****************************************************************************/
-static void
-ixgb_shift_out_bits(struct ixgb_hw *hw,
-					 u16 data,
-					 u16 count)
-{
-	u32 eecd_reg;
-	u32 mask;
-
-	/* We need to shift "count" bits out to the EEPROM. So, value in the
-	 * "data" parameter will be shifted out to the EEPROM one bit at a time.
-	 * In order to do this, "data" must be broken down into bits.
-	 */
-	mask = 0x01 << (count - 1);
-	eecd_reg = IXGB_READ_REG(hw, EECD);
-	eecd_reg &= ~(IXGB_EECD_DO | IXGB_EECD_DI);
-	do {
-		/* A "1" is shifted out to the EEPROM by setting bit "DI" to a "1",
-		 * and then raising and then lowering the clock (the SK bit controls
-		 * the clock input to the EEPROM).  A "0" is shifted out to the EEPROM
-		 * by setting "DI" to "0" and then raising and then lowering the clock.
-		 */
-		eecd_reg &= ~IXGB_EECD_DI;
-
-		if (data & mask)
-			eecd_reg |= IXGB_EECD_DI;
-
-		IXGB_WRITE_REG(hw, EECD, eecd_reg);
-		IXGB_WRITE_FLUSH(hw);
-
-		udelay(50);
-
-		ixgb_raise_clock(hw, &eecd_reg);
-		ixgb_lower_clock(hw, &eecd_reg);
-
-		mask = mask >> 1;
-
-	} while (mask);
-
-	/* We leave the "DI" bit set to "0" when we leave this routine. */
-	eecd_reg &= ~IXGB_EECD_DI;
-	IXGB_WRITE_REG(hw, EECD, eecd_reg);
-}
-
-/******************************************************************************
- * Shift data bits in from the EEPROM
- *
- * hw - Struct containing variables accessed by shared code
- *****************************************************************************/
-static u16
-ixgb_shift_in_bits(struct ixgb_hw *hw)
-{
-	u32 eecd_reg;
-	u32 i;
-	u16 data;
-
-	/* In order to read a register from the EEPROM, we need to shift 16 bits
-	 * in from the EEPROM. Bits are "shifted in" by raising the clock input to
-	 * the EEPROM (setting the SK bit), and then reading the value of the "DO"
-	 * bit.  During this "shifting in" process the "DI" bit should always be
-	 * clear..
-	 */
-
-	eecd_reg = IXGB_READ_REG(hw, EECD);
-
-	eecd_reg &= ~(IXGB_EECD_DO | IXGB_EECD_DI);
-	data = 0;
-
-	for (i = 0; i < 16; i++) {
-		data = data << 1;
-		ixgb_raise_clock(hw, &eecd_reg);
-
-		eecd_reg = IXGB_READ_REG(hw, EECD);
-
-		eecd_reg &= ~(IXGB_EECD_DI);
-		if (eecd_reg & IXGB_EECD_DO)
-			data |= 1;
-
-		ixgb_lower_clock(hw, &eecd_reg);
-	}
-
-	return data;
-}
-
-/******************************************************************************
- * Prepares EEPROM for access
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Lowers EEPROM clock. Clears input pin. Sets the chip select pin. This
- * function should be called before issuing a command to the EEPROM.
- *****************************************************************************/
-static void
-ixgb_setup_eeprom(struct ixgb_hw *hw)
-{
-	u32 eecd_reg;
-
-	eecd_reg = IXGB_READ_REG(hw, EECD);
-
-	/*  Clear SK and DI  */
-	eecd_reg &= ~(IXGB_EECD_SK | IXGB_EECD_DI);
-	IXGB_WRITE_REG(hw, EECD, eecd_reg);
-
-	/*  Set CS  */
-	eecd_reg |= IXGB_EECD_CS;
-	IXGB_WRITE_REG(hw, EECD, eecd_reg);
-}
-
-/******************************************************************************
- * Returns EEPROM to a "standby" state
- *
- * hw - Struct containing variables accessed by shared code
- *****************************************************************************/
-static void
-ixgb_standby_eeprom(struct ixgb_hw *hw)
-{
-	u32 eecd_reg;
-
-	eecd_reg = IXGB_READ_REG(hw, EECD);
-
-	/*  Deselect EEPROM  */
-	eecd_reg &= ~(IXGB_EECD_CS | IXGB_EECD_SK);
-	IXGB_WRITE_REG(hw, EECD, eecd_reg);
-	IXGB_WRITE_FLUSH(hw);
-	udelay(50);
-
-	/*  Clock high  */
-	eecd_reg |= IXGB_EECD_SK;
-	IXGB_WRITE_REG(hw, EECD, eecd_reg);
-	IXGB_WRITE_FLUSH(hw);
-	udelay(50);
-
-	/*  Select EEPROM  */
-	eecd_reg |= IXGB_EECD_CS;
-	IXGB_WRITE_REG(hw, EECD, eecd_reg);
-	IXGB_WRITE_FLUSH(hw);
-	udelay(50);
-
-	/*  Clock low  */
-	eecd_reg &= ~IXGB_EECD_SK;
-	IXGB_WRITE_REG(hw, EECD, eecd_reg);
-	IXGB_WRITE_FLUSH(hw);
-	udelay(50);
-}
-
-/******************************************************************************
- * Raises then lowers the EEPROM's clock pin
- *
- * hw - Struct containing variables accessed by shared code
- *****************************************************************************/
-static void
-ixgb_clock_eeprom(struct ixgb_hw *hw)
-{
-	u32 eecd_reg;
-
-	eecd_reg = IXGB_READ_REG(hw, EECD);
-
-	/*  Rising edge of clock  */
-	eecd_reg |= IXGB_EECD_SK;
-	IXGB_WRITE_REG(hw, EECD, eecd_reg);
-	IXGB_WRITE_FLUSH(hw);
-	udelay(50);
-
-	/*  Falling edge of clock  */
-	eecd_reg &= ~IXGB_EECD_SK;
-	IXGB_WRITE_REG(hw, EECD, eecd_reg);
-	IXGB_WRITE_FLUSH(hw);
-	udelay(50);
-}
-
-/******************************************************************************
- * Terminates a command by lowering the EEPROM's chip select pin
- *
- * hw - Struct containing variables accessed by shared code
- *****************************************************************************/
-static void
-ixgb_cleanup_eeprom(struct ixgb_hw *hw)
-{
-	u32 eecd_reg;
-
-	eecd_reg = IXGB_READ_REG(hw, EECD);
-
-	eecd_reg &= ~(IXGB_EECD_CS | IXGB_EECD_DI);
-
-	IXGB_WRITE_REG(hw, EECD, eecd_reg);
-
-	ixgb_clock_eeprom(hw);
-}
-
-/******************************************************************************
- * Waits for the EEPROM to finish the current command.
- *
- * hw - Struct containing variables accessed by shared code
- *
- * The command is done when the EEPROM's data out pin goes high.
- *
- * Returns:
- *      true: EEPROM data pin is high before timeout.
- *      false:  Time expired.
- *****************************************************************************/
-static bool
-ixgb_wait_eeprom_command(struct ixgb_hw *hw)
-{
-	u32 eecd_reg;
-	u32 i;
-
-	/* Toggle the CS line.  This in effect tells to EEPROM to actually execute
-	 * the command in question.
-	 */
-	ixgb_standby_eeprom(hw);
-
-	/* Now read DO repeatedly until is high (equal to '1').  The EEPROM will
-	 * signal that the command has been completed by raising the DO signal.
-	 * If DO does not go high in 10 milliseconds, then error out.
-	 */
-	for (i = 0; i < 200; i++) {
-		eecd_reg = IXGB_READ_REG(hw, EECD);
-
-		if (eecd_reg & IXGB_EECD_DO)
-			return true;
-
-		udelay(50);
-	}
-	ASSERT(0);
-	return false;
-}
-
-/******************************************************************************
- * Verifies that the EEPROM has a valid checksum
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Reads the first 64 16 bit words of the EEPROM and sums the values read.
- * If the sum of the 64 16 bit words is 0xBABA, the EEPROM's checksum is
- * valid.
- *
- * Returns:
- *  true: Checksum is valid
- *  false: Checksum is not valid.
- *****************************************************************************/
-bool
-ixgb_validate_eeprom_checksum(struct ixgb_hw *hw)
-{
-	u16 checksum = 0;
-	u16 i;
-
-	for (i = 0; i < (EEPROM_CHECKSUM_REG + 1); i++)
-		checksum += ixgb_read_eeprom(hw, i);
-
-	if (checksum == (u16) EEPROM_SUM)
-		return true;
-	else
-		return false;
-}
-
-/******************************************************************************
- * Calculates the EEPROM checksum and writes it to the EEPROM
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Sums the first 63 16 bit words of the EEPROM. Subtracts the sum from 0xBABA.
- * Writes the difference to word offset 63 of the EEPROM.
- *****************************************************************************/
-void
-ixgb_update_eeprom_checksum(struct ixgb_hw *hw)
-{
-	u16 checksum = 0;
-	u16 i;
-
-	for (i = 0; i < EEPROM_CHECKSUM_REG; i++)
-		checksum += ixgb_read_eeprom(hw, i);
-
-	checksum = (u16) EEPROM_SUM - checksum;
-
-	ixgb_write_eeprom(hw, EEPROM_CHECKSUM_REG, checksum);
-}
-
-/******************************************************************************
- * Writes a 16 bit word to a given offset in the EEPROM.
- *
- * hw - Struct containing variables accessed by shared code
- * reg - offset within the EEPROM to be written to
- * data - 16 bit word to be written to the EEPROM
- *
- * If ixgb_update_eeprom_checksum is not called after this function, the
- * EEPROM will most likely contain an invalid checksum.
- *
- *****************************************************************************/
-void
-ixgb_write_eeprom(struct ixgb_hw *hw, u16 offset, u16 data)
-{
-	struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom;
-
-	/* Prepare the EEPROM for writing */
-	ixgb_setup_eeprom(hw);
-
-	/*  Send the 9-bit EWEN (write enable) command to the EEPROM (5-bit opcode
-	 *  plus 4-bit dummy).  This puts the EEPROM into write/erase mode.
-	 */
-	ixgb_shift_out_bits(hw, EEPROM_EWEN_OPCODE, 5);
-	ixgb_shift_out_bits(hw, 0, 4);
-
-	/*  Prepare the EEPROM  */
-	ixgb_standby_eeprom(hw);
-
-	/*  Send the Write command (3-bit opcode + 6-bit addr)  */
-	ixgb_shift_out_bits(hw, EEPROM_WRITE_OPCODE, 3);
-	ixgb_shift_out_bits(hw, offset, 6);
-
-	/*  Send the data  */
-	ixgb_shift_out_bits(hw, data, 16);
-
-	ixgb_wait_eeprom_command(hw);
-
-	/*  Recover from write  */
-	ixgb_standby_eeprom(hw);
-
-	/* Send the 9-bit EWDS (write disable) command to the EEPROM (5-bit
-	 * opcode plus 4-bit dummy).  This takes the EEPROM out of write/erase
-	 * mode.
-	 */
-	ixgb_shift_out_bits(hw, EEPROM_EWDS_OPCODE, 5);
-	ixgb_shift_out_bits(hw, 0, 4);
-
-	/*  Done with writing  */
-	ixgb_cleanup_eeprom(hw);
-
-	/* clear the init_ctrl_reg_1 to signify that the cache is invalidated */
-	ee_map->init_ctrl_reg_1 = cpu_to_le16(EEPROM_ICW1_SIGNATURE_CLEAR);
-}
-
-/******************************************************************************
- * Reads a 16 bit word from the EEPROM.
- *
- * hw - Struct containing variables accessed by shared code
- * offset - offset of 16 bit word in the EEPROM to read
- *
- * Returns:
- *  The 16-bit value read from the eeprom
- *****************************************************************************/
-u16
-ixgb_read_eeprom(struct ixgb_hw *hw,
-		  u16 offset)
-{
-	u16 data;
-
-	/*  Prepare the EEPROM for reading  */
-	ixgb_setup_eeprom(hw);
-
-	/*  Send the READ command (opcode + addr)  */
-	ixgb_shift_out_bits(hw, EEPROM_READ_OPCODE, 3);
-	/*
-	 * We have a 64 word EEPROM, there are 6 address bits
-	 */
-	ixgb_shift_out_bits(hw, offset, 6);
-
-	/*  Read the data  */
-	data = ixgb_shift_in_bits(hw);
-
-	/*  End this read operation  */
-	ixgb_standby_eeprom(hw);
-
-	return data;
-}
-
-/******************************************************************************
- * Reads eeprom and stores data in shared structure.
- * Validates eeprom checksum and eeprom signature.
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Returns:
- *      true: if eeprom read is successful
- *      false: otherwise.
- *****************************************************************************/
-bool
-ixgb_get_eeprom_data(struct ixgb_hw *hw)
-{
-	u16 i;
-	u16 checksum = 0;
-	struct ixgb_ee_map_type *ee_map;
-
-	ENTER();
-
-	ee_map = (struct ixgb_ee_map_type *)hw->eeprom;
-
-	pr_debug("Reading eeprom data\n");
-	for (i = 0; i < IXGB_EEPROM_SIZE ; i++) {
-		u16 ee_data;
-		ee_data = ixgb_read_eeprom(hw, i);
-		checksum += ee_data;
-		hw->eeprom[i] = cpu_to_le16(ee_data);
-	}
-
-	if (checksum != (u16) EEPROM_SUM) {
-		pr_debug("Checksum invalid\n");
-		/* clear the init_ctrl_reg_1 to signify that the cache is
-		 * invalidated */
-		ee_map->init_ctrl_reg_1 = cpu_to_le16(EEPROM_ICW1_SIGNATURE_CLEAR);
-		return false;
-	}
-
-	if ((ee_map->init_ctrl_reg_1 & cpu_to_le16(EEPROM_ICW1_SIGNATURE_MASK))
-		 != cpu_to_le16(EEPROM_ICW1_SIGNATURE_VALID)) {
-		pr_debug("Signature invalid\n");
-		return false;
-	}
-
-	return true;
-}
-
-/******************************************************************************
- * Local function to check if the eeprom signature is good
- * If the eeprom signature is good, calls ixgb)get_eeprom_data.
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Returns:
- *      true: eeprom signature was good and the eeprom read was successful
- *      false: otherwise.
- ******************************************************************************/
-static bool
-ixgb_check_and_get_eeprom_data (struct ixgb_hw* hw)
-{
-	struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom;
-
-	if ((ee_map->init_ctrl_reg_1 & cpu_to_le16(EEPROM_ICW1_SIGNATURE_MASK))
-	    == cpu_to_le16(EEPROM_ICW1_SIGNATURE_VALID)) {
-		return true;
-	} else {
-		return ixgb_get_eeprom_data(hw);
-	}
-}
-
-/******************************************************************************
- * return a word from the eeprom
- *
- * hw - Struct containing variables accessed by shared code
- * index - Offset of eeprom word
- *
- * Returns:
- *          Word at indexed offset in eeprom, if valid, 0 otherwise.
- ******************************************************************************/
-__le16
-ixgb_get_eeprom_word(struct ixgb_hw *hw, u16 index)
-{
-
-	if (index < IXGB_EEPROM_SIZE && ixgb_check_and_get_eeprom_data(hw))
-		return hw->eeprom[index];
-
-	return 0;
-}
-
-/******************************************************************************
- * return the mac address from EEPROM
- *
- * hw       - Struct containing variables accessed by shared code
- * mac_addr - Ethernet Address if EEPROM contents are valid, 0 otherwise
- *
- * Returns: None.
- ******************************************************************************/
-void
-ixgb_get_ee_mac_addr(struct ixgb_hw *hw,
-			u8 *mac_addr)
-{
-	int i;
-	struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom;
-
-	ENTER();
-
-	if (ixgb_check_and_get_eeprom_data(hw)) {
-		for (i = 0; i < ETH_ALEN; i++) {
-			mac_addr[i] = ee_map->mac_addr[i];
-		}
-		pr_debug("eeprom mac address = %pM\n", mac_addr);
-	}
-}
-
-
-/******************************************************************************
- * return the Printed Board Assembly number from EEPROM
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Returns:
- *          PBA number if EEPROM contents are valid, 0 otherwise
- ******************************************************************************/
-u32
-ixgb_get_ee_pba_number(struct ixgb_hw *hw)
-{
-	if (ixgb_check_and_get_eeprom_data(hw))
-		return le16_to_cpu(hw->eeprom[EEPROM_PBA_1_2_REG])
-			| (le16_to_cpu(hw->eeprom[EEPROM_PBA_3_4_REG])<<16);
-
-	return 0;
-}
-
-
-/******************************************************************************
- * return the Device Id from EEPROM
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Returns:
- *          Device Id if EEPROM contents are valid, 0 otherwise
- ******************************************************************************/
-u16
-ixgb_get_ee_device_id(struct ixgb_hw *hw)
-{
-	struct ixgb_ee_map_type *ee_map = (struct ixgb_ee_map_type *)hw->eeprom;
-
-	if (ixgb_check_and_get_eeprom_data(hw))
-		return le16_to_cpu(ee_map->device_id);
-
-	return 0;
-}
-
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ee.h b/drivers/net/ethernet/intel/ixgb/ixgb_ee.h
deleted file mode 100644
index 3ee0a09e5d0a..000000000000
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ee.h
+++ /dev/null
@@ -1,79 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 1999 - 2008 Intel Corporation. */
-
-#ifndef _IXGB_EE_H_
-#define _IXGB_EE_H_
-
-#define IXGB_EEPROM_SIZE    64	/* Size in words */
-
-/* EEPROM Commands */
-#define EEPROM_READ_OPCODE  0x6	/* EEPROM read opcode */
-#define EEPROM_WRITE_OPCODE 0x5	/* EEPROM write opcode */
-#define EEPROM_ERASE_OPCODE 0x7	/* EEPROM erase opcode */
-#define EEPROM_EWEN_OPCODE  0x13	/* EEPROM erase/write enable */
-#define EEPROM_EWDS_OPCODE  0x10	/* EEPROM erase/write disable */
-
-/* EEPROM MAP (Word Offsets) */
-#define EEPROM_IA_1_2_REG        0x0000
-#define EEPROM_IA_3_4_REG        0x0001
-#define EEPROM_IA_5_6_REG        0x0002
-#define EEPROM_COMPATIBILITY_REG 0x0003
-#define EEPROM_PBA_1_2_REG       0x0008
-#define EEPROM_PBA_3_4_REG       0x0009
-#define EEPROM_INIT_CONTROL1_REG 0x000A
-#define EEPROM_SUBSYS_ID_REG     0x000B
-#define EEPROM_SUBVEND_ID_REG    0x000C
-#define EEPROM_DEVICE_ID_REG     0x000D
-#define EEPROM_VENDOR_ID_REG     0x000E
-#define EEPROM_INIT_CONTROL2_REG 0x000F
-#define EEPROM_SWDPINS_REG       0x0020
-#define EEPROM_CIRCUIT_CTRL_REG  0x0021
-#define EEPROM_D0_D3_POWER_REG   0x0022
-#define EEPROM_FLASH_VERSION     0x0032
-#define EEPROM_CHECKSUM_REG      0x003F
-
-/* Mask bits for fields in Word 0x0a of the EEPROM */
-
-#define EEPROM_ICW1_SIGNATURE_MASK  0xC000
-#define EEPROM_ICW1_SIGNATURE_VALID 0x4000
-#define EEPROM_ICW1_SIGNATURE_CLEAR 0x0000
-
-/* For checksumming, the sum of all words in the EEPROM should equal 0xBABA. */
-#define EEPROM_SUM 0xBABA
-
-/* EEPROM Map Sizes (Byte Counts) */
-#define PBA_SIZE 4
-
-/* EEPROM Map defines (WORD OFFSETS)*/
-
-/* EEPROM structure */
-struct ixgb_ee_map_type {
-	u8 mac_addr[ETH_ALEN];
-	__le16 compatibility;
-	__le16 reserved1[4];
-	__le32 pba_number;
-	__le16 init_ctrl_reg_1;
-	__le16 subsystem_id;
-	__le16 subvendor_id;
-	__le16 device_id;
-	__le16 vendor_id;
-	__le16 init_ctrl_reg_2;
-	__le16 oem_reserved[16];
-	__le16 swdpins_reg;
-	__le16 circuit_ctrl_reg;
-	u8 d3_power;
-	u8 d0_power;
-	__le16 reserved2[28];
-	__le16 checksum;
-};
-
-/* EEPROM Functions */
-u16 ixgb_read_eeprom(struct ixgb_hw *hw, u16 reg);
-
-bool ixgb_validate_eeprom_checksum(struct ixgb_hw *hw);
-
-void ixgb_update_eeprom_checksum(struct ixgb_hw *hw);
-
-void ixgb_write_eeprom(struct ixgb_hw *hw, u16 reg, u16 data);
-
-#endif				/* IXGB_EE_H */
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c b/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
deleted file mode 100644
index efa980514944..000000000000
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ethtool.c
+++ /dev/null
@@ -1,642 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 1999 - 2008 Intel Corporation. */
-
-/* ethtool support for ixgb */
-
-#include "ixgb.h"
-
-#include <linux/uaccess.h>
-
-#define IXGB_ALL_RAR_ENTRIES 16
-
-enum {NETDEV_STATS, IXGB_STATS};
-
-struct ixgb_stats {
-	char stat_string[ETH_GSTRING_LEN];
-	int type;
-	int sizeof_stat;
-	int stat_offset;
-};
-
-#define IXGB_STAT(m)		IXGB_STATS, \
-				sizeof_field(struct ixgb_adapter, m), \
-				offsetof(struct ixgb_adapter, m)
-#define IXGB_NETDEV_STAT(m)	NETDEV_STATS, \
-				sizeof_field(struct net_device, m), \
-				offsetof(struct net_device, m)
-
-static struct ixgb_stats ixgb_gstrings_stats[] = {
-	{"rx_packets", IXGB_NETDEV_STAT(stats.rx_packets)},
-	{"tx_packets", IXGB_NETDEV_STAT(stats.tx_packets)},
-	{"rx_bytes", IXGB_NETDEV_STAT(stats.rx_bytes)},
-	{"tx_bytes", IXGB_NETDEV_STAT(stats.tx_bytes)},
-	{"rx_errors", IXGB_NETDEV_STAT(stats.rx_errors)},
-	{"tx_errors", IXGB_NETDEV_STAT(stats.tx_errors)},
-	{"rx_dropped", IXGB_NETDEV_STAT(stats.rx_dropped)},
-	{"tx_dropped", IXGB_NETDEV_STAT(stats.tx_dropped)},
-	{"multicast", IXGB_NETDEV_STAT(stats.multicast)},
-	{"collisions", IXGB_NETDEV_STAT(stats.collisions)},
-
-/*	{ "rx_length_errors", IXGB_NETDEV_STAT(stats.rx_length_errors) },	*/
-	{"rx_over_errors", IXGB_NETDEV_STAT(stats.rx_over_errors)},
-	{"rx_crc_errors", IXGB_NETDEV_STAT(stats.rx_crc_errors)},
-	{"rx_frame_errors", IXGB_NETDEV_STAT(stats.rx_frame_errors)},
-	{"rx_no_buffer_count", IXGB_STAT(stats.rnbc)},
-	{"rx_fifo_errors", IXGB_NETDEV_STAT(stats.rx_fifo_errors)},
-	{"rx_missed_errors", IXGB_NETDEV_STAT(stats.rx_missed_errors)},
-	{"tx_aborted_errors", IXGB_NETDEV_STAT(stats.tx_aborted_errors)},
-	{"tx_carrier_errors", IXGB_NETDEV_STAT(stats.tx_carrier_errors)},
-	{"tx_fifo_errors", IXGB_NETDEV_STAT(stats.tx_fifo_errors)},
-	{"tx_heartbeat_errors", IXGB_NETDEV_STAT(stats.tx_heartbeat_errors)},
-	{"tx_window_errors", IXGB_NETDEV_STAT(stats.tx_window_errors)},
-	{"tx_deferred_ok", IXGB_STAT(stats.dc)},
-	{"tx_timeout_count", IXGB_STAT(tx_timeout_count) },
-	{"tx_restart_queue", IXGB_STAT(restart_queue) },
-	{"rx_long_length_errors", IXGB_STAT(stats.roc)},
-	{"rx_short_length_errors", IXGB_STAT(stats.ruc)},
-	{"tx_tcp_seg_good", IXGB_STAT(stats.tsctc)},
-	{"tx_tcp_seg_failed", IXGB_STAT(stats.tsctfc)},
-	{"rx_flow_control_xon", IXGB_STAT(stats.xonrxc)},
-	{"rx_flow_control_xoff", IXGB_STAT(stats.xoffrxc)},
-	{"tx_flow_control_xon", IXGB_STAT(stats.xontxc)},
-	{"tx_flow_control_xoff", IXGB_STAT(stats.xofftxc)},
-	{"rx_csum_offload_good", IXGB_STAT(hw_csum_rx_good)},
-	{"rx_csum_offload_errors", IXGB_STAT(hw_csum_rx_error)},
-	{"tx_csum_offload_good", IXGB_STAT(hw_csum_tx_good)},
-	{"tx_csum_offload_errors", IXGB_STAT(hw_csum_tx_error)}
-};
-
-#define IXGB_STATS_LEN	ARRAY_SIZE(ixgb_gstrings_stats)
-
-static int
-ixgb_get_link_ksettings(struct net_device *netdev,
-			struct ethtool_link_ksettings *cmd)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-
-	ethtool_link_ksettings_zero_link_mode(cmd, supported);
-	ethtool_link_ksettings_add_link_mode(cmd, supported, 10000baseT_Full);
-	ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE);
-
-	ethtool_link_ksettings_zero_link_mode(cmd, advertising);
-	ethtool_link_ksettings_add_link_mode(cmd, advertising, 10000baseT_Full);
-	ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE);
-
-	cmd->base.port = PORT_FIBRE;
-
-	if (netif_carrier_ok(adapter->netdev)) {
-		cmd->base.speed = SPEED_10000;
-		cmd->base.duplex = DUPLEX_FULL;
-	} else {
-		cmd->base.speed = SPEED_UNKNOWN;
-		cmd->base.duplex = DUPLEX_UNKNOWN;
-	}
-
-	cmd->base.autoneg = AUTONEG_DISABLE;
-	return 0;
-}
-
-void ixgb_set_speed_duplex(struct net_device *netdev)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	/* be optimistic about our link, since we were up before */
-	adapter->link_speed = 10000;
-	adapter->link_duplex = FULL_DUPLEX;
-	netif_carrier_on(netdev);
-	netif_wake_queue(netdev);
-}
-
-static int
-ixgb_set_link_ksettings(struct net_device *netdev,
-			const struct ethtool_link_ksettings *cmd)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	u32 speed = cmd->base.speed;
-
-	if (cmd->base.autoneg == AUTONEG_ENABLE ||
-	    (speed + cmd->base.duplex != SPEED_10000 + DUPLEX_FULL))
-		return -EINVAL;
-
-	if (netif_running(adapter->netdev)) {
-		ixgb_down(adapter, true);
-		ixgb_reset(adapter);
-		ixgb_up(adapter);
-		ixgb_set_speed_duplex(netdev);
-	} else
-		ixgb_reset(adapter);
-
-	return 0;
-}
-
-static void
-ixgb_get_pauseparam(struct net_device *netdev,
-			 struct ethtool_pauseparam *pause)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	struct ixgb_hw *hw = &adapter->hw;
-
-	pause->autoneg = AUTONEG_DISABLE;
-
-	if (hw->fc.type == ixgb_fc_rx_pause)
-		pause->rx_pause = 1;
-	else if (hw->fc.type == ixgb_fc_tx_pause)
-		pause->tx_pause = 1;
-	else if (hw->fc.type == ixgb_fc_full) {
-		pause->rx_pause = 1;
-		pause->tx_pause = 1;
-	}
-}
-
-static int
-ixgb_set_pauseparam(struct net_device *netdev,
-			 struct ethtool_pauseparam *pause)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	struct ixgb_hw *hw = &adapter->hw;
-
-	if (pause->autoneg == AUTONEG_ENABLE)
-		return -EINVAL;
-
-	if (pause->rx_pause && pause->tx_pause)
-		hw->fc.type = ixgb_fc_full;
-	else if (pause->rx_pause && !pause->tx_pause)
-		hw->fc.type = ixgb_fc_rx_pause;
-	else if (!pause->rx_pause && pause->tx_pause)
-		hw->fc.type = ixgb_fc_tx_pause;
-	else if (!pause->rx_pause && !pause->tx_pause)
-		hw->fc.type = ixgb_fc_none;
-
-	if (netif_running(adapter->netdev)) {
-		ixgb_down(adapter, true);
-		ixgb_up(adapter);
-		ixgb_set_speed_duplex(netdev);
-	} else
-		ixgb_reset(adapter);
-
-	return 0;
-}
-
-static u32
-ixgb_get_msglevel(struct net_device *netdev)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	return adapter->msg_enable;
-}
-
-static void
-ixgb_set_msglevel(struct net_device *netdev, u32 data)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	adapter->msg_enable = data;
-}
-#define IXGB_GET_STAT(_A_, _R_) _A_->stats._R_
-
-static int
-ixgb_get_regs_len(struct net_device *netdev)
-{
-#define IXGB_REG_DUMP_LEN  136*sizeof(u32)
-	return IXGB_REG_DUMP_LEN;
-}
-
-static void
-ixgb_get_regs(struct net_device *netdev,
-		   struct ethtool_regs *regs, void *p)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	struct ixgb_hw *hw = &adapter->hw;
-	u32 *reg = p;
-	u32 *reg_start = reg;
-	u8 i;
-
-	/* the 1 (one) below indicates an attempt at versioning, if the
-	 * interface in ethtool or the driver changes, this 1 should be
-	 * incremented */
-	regs->version = (1<<24) | hw->revision_id << 16 | hw->device_id;
-
-	/* General Registers */
-	*reg++ = IXGB_READ_REG(hw, CTRL0);	/*   0 */
-	*reg++ = IXGB_READ_REG(hw, CTRL1);	/*   1 */
-	*reg++ = IXGB_READ_REG(hw, STATUS);	/*   2 */
-	*reg++ = IXGB_READ_REG(hw, EECD);	/*   3 */
-	*reg++ = IXGB_READ_REG(hw, MFS);	/*   4 */
-
-	/* Interrupt */
-	*reg++ = IXGB_READ_REG(hw, ICR);	/*   5 */
-	*reg++ = IXGB_READ_REG(hw, ICS);	/*   6 */
-	*reg++ = IXGB_READ_REG(hw, IMS);	/*   7 */
-	*reg++ = IXGB_READ_REG(hw, IMC);	/*   8 */
-
-	/* Receive */
-	*reg++ = IXGB_READ_REG(hw, RCTL);	/*   9 */
-	*reg++ = IXGB_READ_REG(hw, FCRTL);	/*  10 */
-	*reg++ = IXGB_READ_REG(hw, FCRTH);	/*  11 */
-	*reg++ = IXGB_READ_REG(hw, RDBAL);	/*  12 */
-	*reg++ = IXGB_READ_REG(hw, RDBAH);	/*  13 */
-	*reg++ = IXGB_READ_REG(hw, RDLEN);	/*  14 */
-	*reg++ = IXGB_READ_REG(hw, RDH);	/*  15 */
-	*reg++ = IXGB_READ_REG(hw, RDT);	/*  16 */
-	*reg++ = IXGB_READ_REG(hw, RDTR);	/*  17 */
-	*reg++ = IXGB_READ_REG(hw, RXDCTL);	/*  18 */
-	*reg++ = IXGB_READ_REG(hw, RAIDC);	/*  19 */
-	*reg++ = IXGB_READ_REG(hw, RXCSUM);	/*  20 */
-
-	/* there are 16 RAR entries in hardware, we only use 3 */
-	for (i = 0; i < IXGB_ALL_RAR_ENTRIES; i++) {
-		*reg++ = IXGB_READ_REG_ARRAY(hw, RAL, (i << 1)); /*21,...,51 */
-		*reg++ = IXGB_READ_REG_ARRAY(hw, RAH, (i << 1)); /*22,...,52 */
-	}
-
-	/* Transmit */
-	*reg++ = IXGB_READ_REG(hw, TCTL);	/*  53 */
-	*reg++ = IXGB_READ_REG(hw, TDBAL);	/*  54 */
-	*reg++ = IXGB_READ_REG(hw, TDBAH);	/*  55 */
-	*reg++ = IXGB_READ_REG(hw, TDLEN);	/*  56 */
-	*reg++ = IXGB_READ_REG(hw, TDH);	/*  57 */
-	*reg++ = IXGB_READ_REG(hw, TDT);	/*  58 */
-	*reg++ = IXGB_READ_REG(hw, TIDV);	/*  59 */
-	*reg++ = IXGB_READ_REG(hw, TXDCTL);	/*  60 */
-	*reg++ = IXGB_READ_REG(hw, TSPMT);	/*  61 */
-	*reg++ = IXGB_READ_REG(hw, PAP);	/*  62 */
-
-	/* Physical */
-	*reg++ = IXGB_READ_REG(hw, PCSC1);	/*  63 */
-	*reg++ = IXGB_READ_REG(hw, PCSC2);	/*  64 */
-	*reg++ = IXGB_READ_REG(hw, PCSS1);	/*  65 */
-	*reg++ = IXGB_READ_REG(hw, PCSS2);	/*  66 */
-	*reg++ = IXGB_READ_REG(hw, XPCSS);	/*  67 */
-	*reg++ = IXGB_READ_REG(hw, UCCR);	/*  68 */
-	*reg++ = IXGB_READ_REG(hw, XPCSTC);	/*  69 */
-	*reg++ = IXGB_READ_REG(hw, MACA);	/*  70 */
-	*reg++ = IXGB_READ_REG(hw, APAE);	/*  71 */
-	*reg++ = IXGB_READ_REG(hw, ARD);	/*  72 */
-	*reg++ = IXGB_READ_REG(hw, AIS);	/*  73 */
-	*reg++ = IXGB_READ_REG(hw, MSCA);	/*  74 */
-	*reg++ = IXGB_READ_REG(hw, MSRWD);	/*  75 */
-
-	/* Statistics */
-	*reg++ = IXGB_GET_STAT(adapter, tprl);	/*  76 */
-	*reg++ = IXGB_GET_STAT(adapter, tprh);	/*  77 */
-	*reg++ = IXGB_GET_STAT(adapter, gprcl);	/*  78 */
-	*reg++ = IXGB_GET_STAT(adapter, gprch);	/*  79 */
-	*reg++ = IXGB_GET_STAT(adapter, bprcl);	/*  80 */
-	*reg++ = IXGB_GET_STAT(adapter, bprch);	/*  81 */
-	*reg++ = IXGB_GET_STAT(adapter, mprcl);	/*  82 */
-	*reg++ = IXGB_GET_STAT(adapter, mprch);	/*  83 */
-	*reg++ = IXGB_GET_STAT(adapter, uprcl);	/*  84 */
-	*reg++ = IXGB_GET_STAT(adapter, uprch);	/*  85 */
-	*reg++ = IXGB_GET_STAT(adapter, vprcl);	/*  86 */
-	*reg++ = IXGB_GET_STAT(adapter, vprch);	/*  87 */
-	*reg++ = IXGB_GET_STAT(adapter, jprcl);	/*  88 */
-	*reg++ = IXGB_GET_STAT(adapter, jprch);	/*  89 */
-	*reg++ = IXGB_GET_STAT(adapter, gorcl);	/*  90 */
-	*reg++ = IXGB_GET_STAT(adapter, gorch);	/*  91 */
-	*reg++ = IXGB_GET_STAT(adapter, torl);	/*  92 */
-	*reg++ = IXGB_GET_STAT(adapter, torh);	/*  93 */
-	*reg++ = IXGB_GET_STAT(adapter, rnbc);	/*  94 */
-	*reg++ = IXGB_GET_STAT(adapter, ruc);	/*  95 */
-	*reg++ = IXGB_GET_STAT(adapter, roc);	/*  96 */
-	*reg++ = IXGB_GET_STAT(adapter, rlec);	/*  97 */
-	*reg++ = IXGB_GET_STAT(adapter, crcerrs);	/*  98 */
-	*reg++ = IXGB_GET_STAT(adapter, icbc);	/*  99 */
-	*reg++ = IXGB_GET_STAT(adapter, ecbc);	/* 100 */
-	*reg++ = IXGB_GET_STAT(adapter, mpc);	/* 101 */
-	*reg++ = IXGB_GET_STAT(adapter, tptl);	/* 102 */
-	*reg++ = IXGB_GET_STAT(adapter, tpth);	/* 103 */
-	*reg++ = IXGB_GET_STAT(adapter, gptcl);	/* 104 */
-	*reg++ = IXGB_GET_STAT(adapter, gptch);	/* 105 */
-	*reg++ = IXGB_GET_STAT(adapter, bptcl);	/* 106 */
-	*reg++ = IXGB_GET_STAT(adapter, bptch);	/* 107 */
-	*reg++ = IXGB_GET_STAT(adapter, mptcl);	/* 108 */
-	*reg++ = IXGB_GET_STAT(adapter, mptch);	/* 109 */
-	*reg++ = IXGB_GET_STAT(adapter, uptcl);	/* 110 */
-	*reg++ = IXGB_GET_STAT(adapter, uptch);	/* 111 */
-	*reg++ = IXGB_GET_STAT(adapter, vptcl);	/* 112 */
-	*reg++ = IXGB_GET_STAT(adapter, vptch);	/* 113 */
-	*reg++ = IXGB_GET_STAT(adapter, jptcl);	/* 114 */
-	*reg++ = IXGB_GET_STAT(adapter, jptch);	/* 115 */
-	*reg++ = IXGB_GET_STAT(adapter, gotcl);	/* 116 */
-	*reg++ = IXGB_GET_STAT(adapter, gotch);	/* 117 */
-	*reg++ = IXGB_GET_STAT(adapter, totl);	/* 118 */
-	*reg++ = IXGB_GET_STAT(adapter, toth);	/* 119 */
-	*reg++ = IXGB_GET_STAT(adapter, dc);	/* 120 */
-	*reg++ = IXGB_GET_STAT(adapter, plt64c);	/* 121 */
-	*reg++ = IXGB_GET_STAT(adapter, tsctc);	/* 122 */
-	*reg++ = IXGB_GET_STAT(adapter, tsctfc);	/* 123 */
-	*reg++ = IXGB_GET_STAT(adapter, ibic);	/* 124 */
-	*reg++ = IXGB_GET_STAT(adapter, rfc);	/* 125 */
-	*reg++ = IXGB_GET_STAT(adapter, lfc);	/* 126 */
-	*reg++ = IXGB_GET_STAT(adapter, pfrc);	/* 127 */
-	*reg++ = IXGB_GET_STAT(adapter, pftc);	/* 128 */
-	*reg++ = IXGB_GET_STAT(adapter, mcfrc);	/* 129 */
-	*reg++ = IXGB_GET_STAT(adapter, mcftc);	/* 130 */
-	*reg++ = IXGB_GET_STAT(adapter, xonrxc);	/* 131 */
-	*reg++ = IXGB_GET_STAT(adapter, xontxc);	/* 132 */
-	*reg++ = IXGB_GET_STAT(adapter, xoffrxc);	/* 133 */
-	*reg++ = IXGB_GET_STAT(adapter, xofftxc);	/* 134 */
-	*reg++ = IXGB_GET_STAT(adapter, rjc);	/* 135 */
-
-	regs->len = (reg - reg_start) * sizeof(u32);
-}
-
-static int
-ixgb_get_eeprom_len(struct net_device *netdev)
-{
-	/* return size in bytes */
-	return IXGB_EEPROM_SIZE << 1;
-}
-
-static int
-ixgb_get_eeprom(struct net_device *netdev,
-		  struct ethtool_eeprom *eeprom, u8 *bytes)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	struct ixgb_hw *hw = &adapter->hw;
-	__le16 *eeprom_buff;
-	int i, max_len, first_word, last_word;
-	int ret_val = 0;
-
-	if (eeprom->len == 0) {
-		ret_val = -EINVAL;
-		goto geeprom_error;
-	}
-
-	eeprom->magic = hw->vendor_id | (hw->device_id << 16);
-
-	max_len = ixgb_get_eeprom_len(netdev);
-
-	if (eeprom->offset > eeprom->offset + eeprom->len) {
-		ret_val = -EINVAL;
-		goto geeprom_error;
-	}
-
-	if ((eeprom->offset + eeprom->len) > max_len)
-		eeprom->len = (max_len - eeprom->offset);
-
-	first_word = eeprom->offset >> 1;
-	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
-
-	eeprom_buff = kmalloc_array(last_word - first_word + 1,
-				    sizeof(__le16),
-				    GFP_KERNEL);
-	if (!eeprom_buff)
-		return -ENOMEM;
-
-	/* note the eeprom was good because the driver loaded */
-	for (i = 0; i <= (last_word - first_word); i++)
-		eeprom_buff[i] = ixgb_get_eeprom_word(hw, (first_word + i));
-
-	memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), eeprom->len);
-	kfree(eeprom_buff);
-
-geeprom_error:
-	return ret_val;
-}
-
-static int
-ixgb_set_eeprom(struct net_device *netdev,
-		  struct ethtool_eeprom *eeprom, u8 *bytes)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	struct ixgb_hw *hw = &adapter->hw;
-	u16 *eeprom_buff;
-	void *ptr;
-	int max_len, first_word, last_word;
-	u16 i;
-
-	if (eeprom->len == 0)
-		return -EINVAL;
-
-	if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16)))
-		return -EFAULT;
-
-	max_len = ixgb_get_eeprom_len(netdev);
-
-	if (eeprom->offset > eeprom->offset + eeprom->len)
-		return -EINVAL;
-
-	if ((eeprom->offset + eeprom->len) > max_len)
-		eeprom->len = (max_len - eeprom->offset);
-
-	first_word = eeprom->offset >> 1;
-	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
-	eeprom_buff = kmalloc(max_len, GFP_KERNEL);
-	if (!eeprom_buff)
-		return -ENOMEM;
-
-	ptr = (void *)eeprom_buff;
-
-	if (eeprom->offset & 1) {
-		/* need read/modify/write of first changed EEPROM word */
-		/* only the second byte of the word is being modified */
-		eeprom_buff[0] = ixgb_read_eeprom(hw, first_word);
-		ptr++;
-	}
-	if ((eeprom->offset + eeprom->len) & 1) {
-		/* need read/modify/write of last changed EEPROM word */
-		/* only the first byte of the word is being modified */
-		eeprom_buff[last_word - first_word]
-			= ixgb_read_eeprom(hw, last_word);
-	}
-
-	memcpy(ptr, bytes, eeprom->len);
-	for (i = 0; i <= (last_word - first_word); i++)
-		ixgb_write_eeprom(hw, first_word + i, eeprom_buff[i]);
-
-	/* Update the checksum over the first part of the EEPROM if needed */
-	if (first_word <= EEPROM_CHECKSUM_REG)
-		ixgb_update_eeprom_checksum(hw);
-
-	kfree(eeprom_buff);
-	return 0;
-}
-
-static void
-ixgb_get_drvinfo(struct net_device *netdev,
-		   struct ethtool_drvinfo *drvinfo)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-
-	strscpy(drvinfo->driver,  ixgb_driver_name,
-		sizeof(drvinfo->driver));
-	strscpy(drvinfo->bus_info, pci_name(adapter->pdev),
-		sizeof(drvinfo->bus_info));
-}
-
-static void
-ixgb_get_ringparam(struct net_device *netdev,
-		   struct ethtool_ringparam *ring,
-		   struct kernel_ethtool_ringparam *kernel_ring,
-		   struct netlink_ext_ack *extack)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	struct ixgb_desc_ring *txdr = &adapter->tx_ring;
-	struct ixgb_desc_ring *rxdr = &adapter->rx_ring;
-
-	ring->rx_max_pending = MAX_RXD;
-	ring->tx_max_pending = MAX_TXD;
-	ring->rx_pending = rxdr->count;
-	ring->tx_pending = txdr->count;
-}
-
-static int
-ixgb_set_ringparam(struct net_device *netdev,
-		   struct ethtool_ringparam *ring,
-		   struct kernel_ethtool_ringparam *kernel_ring,
-		   struct netlink_ext_ack *extack)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	struct ixgb_desc_ring *txdr = &adapter->tx_ring;
-	struct ixgb_desc_ring *rxdr = &adapter->rx_ring;
-	struct ixgb_desc_ring tx_old, tx_new, rx_old, rx_new;
-	int err;
-
-	tx_old = adapter->tx_ring;
-	rx_old = adapter->rx_ring;
-
-	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
-		return -EINVAL;
-
-	if (netif_running(adapter->netdev))
-		ixgb_down(adapter, true);
-
-	rxdr->count = max(ring->rx_pending,(u32)MIN_RXD);
-	rxdr->count = min(rxdr->count,(u32)MAX_RXD);
-	rxdr->count = ALIGN(rxdr->count, IXGB_REQ_RX_DESCRIPTOR_MULTIPLE);
-
-	txdr->count = max(ring->tx_pending,(u32)MIN_TXD);
-	txdr->count = min(txdr->count,(u32)MAX_TXD);
-	txdr->count = ALIGN(txdr->count, IXGB_REQ_TX_DESCRIPTOR_MULTIPLE);
-
-	if (netif_running(adapter->netdev)) {
-		/* Try to get new resources before deleting old */
-		if ((err = ixgb_setup_rx_resources(adapter)))
-			goto err_setup_rx;
-		if ((err = ixgb_setup_tx_resources(adapter)))
-			goto err_setup_tx;
-
-		/* save the new, restore the old in order to free it,
-		 * then restore the new back again */
-
-		rx_new = adapter->rx_ring;
-		tx_new = adapter->tx_ring;
-		adapter->rx_ring = rx_old;
-		adapter->tx_ring = tx_old;
-		ixgb_free_rx_resources(adapter);
-		ixgb_free_tx_resources(adapter);
-		adapter->rx_ring = rx_new;
-		adapter->tx_ring = tx_new;
-		if ((err = ixgb_up(adapter)))
-			return err;
-		ixgb_set_speed_duplex(netdev);
-	}
-
-	return 0;
-err_setup_tx:
-	ixgb_free_rx_resources(adapter);
-err_setup_rx:
-	adapter->rx_ring = rx_old;
-	adapter->tx_ring = tx_old;
-	ixgb_up(adapter);
-	return err;
-}
-
-static int
-ixgb_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-
-	switch (state) {
-	case ETHTOOL_ID_ACTIVE:
-		return 2;
-
-	case ETHTOOL_ID_ON:
-		ixgb_led_on(&adapter->hw);
-		break;
-
-	case ETHTOOL_ID_OFF:
-	case ETHTOOL_ID_INACTIVE:
-		ixgb_led_off(&adapter->hw);
-	}
-
-	return 0;
-}
-
-static int
-ixgb_get_sset_count(struct net_device *netdev, int sset)
-{
-	switch (sset) {
-	case ETH_SS_STATS:
-		return IXGB_STATS_LEN;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static void
-ixgb_get_ethtool_stats(struct net_device *netdev,
-		struct ethtool_stats *stats, u64 *data)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	int i;
-	char *p = NULL;
-
-	ixgb_update_stats(adapter);
-	for (i = 0; i < IXGB_STATS_LEN; i++) {
-		switch (ixgb_gstrings_stats[i].type) {
-		case NETDEV_STATS:
-			p = (char *) netdev +
-					ixgb_gstrings_stats[i].stat_offset;
-			break;
-		case IXGB_STATS:
-			p = (char *) adapter +
-					ixgb_gstrings_stats[i].stat_offset;
-			break;
-		}
-
-		data[i] = (ixgb_gstrings_stats[i].sizeof_stat ==
-			sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
-	}
-}
-
-static void
-ixgb_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
-{
-	int i;
-
-	switch(stringset) {
-	case ETH_SS_STATS:
-		for (i = 0; i < IXGB_STATS_LEN; i++) {
-			memcpy(data + i * ETH_GSTRING_LEN,
-			ixgb_gstrings_stats[i].stat_string,
-			ETH_GSTRING_LEN);
-		}
-		break;
-	}
-}
-
-static const struct ethtool_ops ixgb_ethtool_ops = {
-	.get_drvinfo = ixgb_get_drvinfo,
-	.get_regs_len = ixgb_get_regs_len,
-	.get_regs = ixgb_get_regs,
-	.get_link = ethtool_op_get_link,
-	.get_eeprom_len = ixgb_get_eeprom_len,
-	.get_eeprom = ixgb_get_eeprom,
-	.set_eeprom = ixgb_set_eeprom,
-	.get_ringparam = ixgb_get_ringparam,
-	.set_ringparam = ixgb_set_ringparam,
-	.get_pauseparam	= ixgb_get_pauseparam,
-	.set_pauseparam	= ixgb_set_pauseparam,
-	.get_msglevel = ixgb_get_msglevel,
-	.set_msglevel = ixgb_set_msglevel,
-	.get_strings = ixgb_get_strings,
-	.set_phys_id = ixgb_set_phys_id,
-	.get_sset_count = ixgb_get_sset_count,
-	.get_ethtool_stats = ixgb_get_ethtool_stats,
-	.get_link_ksettings = ixgb_get_link_ksettings,
-	.set_link_ksettings = ixgb_set_link_ksettings,
-};
-
-void ixgb_set_ethtool_ops(struct net_device *netdev)
-{
-	netdev->ethtool_ops = &ixgb_ethtool_ops;
-}
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c b/drivers/net/ethernet/intel/ixgb/ixgb_hw.c
deleted file mode 100644
index 98bd3267b99b..000000000000
--- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c
+++ /dev/null
@@ -1,1229 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 1999 - 2008 Intel Corporation. */
-
-/* ixgb_hw.c
- * Shared functions for accessing and configuring the adapter
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/pci_ids.h>
-#include "ixgb_hw.h"
-#include "ixgb_ids.h"
-
-#include <linux/etherdevice.h>
-
-/*  Local function prototypes */
-
-static u32 ixgb_hash_mc_addr(struct ixgb_hw *hw, u8 * mc_addr);
-
-static void ixgb_mta_set(struct ixgb_hw *hw, u32 hash_value);
-
-static void ixgb_get_bus_info(struct ixgb_hw *hw);
-
-static bool ixgb_link_reset(struct ixgb_hw *hw);
-
-static void ixgb_optics_reset(struct ixgb_hw *hw);
-
-static void ixgb_optics_reset_bcm(struct ixgb_hw *hw);
-
-static ixgb_phy_type ixgb_identify_phy(struct ixgb_hw *hw);
-
-static void ixgb_clear_hw_cntrs(struct ixgb_hw *hw);
-
-static void ixgb_clear_vfta(struct ixgb_hw *hw);
-
-static void ixgb_init_rx_addrs(struct ixgb_hw *hw);
-
-static u16 ixgb_read_phy_reg(struct ixgb_hw *hw,
-				  u32 reg_address,
-				  u32 phy_address,
-				  u32 device_type);
-
-static bool ixgb_setup_fc(struct ixgb_hw *hw);
-
-static bool mac_addr_valid(u8 *mac_addr);
-
-static u32 ixgb_mac_reset(struct ixgb_hw *hw)
-{
-	u32 ctrl_reg;
-
-	ctrl_reg =  IXGB_CTRL0_RST |
-				IXGB_CTRL0_SDP3_DIR |   /* All pins are Output=1 */
-				IXGB_CTRL0_SDP2_DIR |
-				IXGB_CTRL0_SDP1_DIR |
-				IXGB_CTRL0_SDP0_DIR |
-				IXGB_CTRL0_SDP3	 |   /* Initial value 1101   */
-				IXGB_CTRL0_SDP2	 |
-				IXGB_CTRL0_SDP0;
-
-#ifdef HP_ZX1
-	/* Workaround for 82597EX reset errata */
-	IXGB_WRITE_REG_IO(hw, CTRL0, ctrl_reg);
-#else
-	IXGB_WRITE_REG(hw, CTRL0, ctrl_reg);
-#endif
-
-	/* Delay a few ms just to allow the reset to complete */
-	msleep(IXGB_DELAY_AFTER_RESET);
-	ctrl_reg = IXGB_READ_REG(hw, CTRL0);
-#ifdef DBG
-	/* Make sure the self-clearing global reset bit did self clear */
-	ASSERT(!(ctrl_reg & IXGB_CTRL0_RST));
-#endif
-
-	if (hw->subsystem_vendor_id == PCI_VENDOR_ID_SUN) {
-		ctrl_reg =  /* Enable interrupt from XFP and SerDes */
-			   IXGB_CTRL1_GPI0_EN |
-			   IXGB_CTRL1_SDP6_DIR |
-			   IXGB_CTRL1_SDP7_DIR |
-			   IXGB_CTRL1_SDP6 |
-			   IXGB_CTRL1_SDP7;
-		IXGB_WRITE_REG(hw, CTRL1, ctrl_reg);
-		ixgb_optics_reset_bcm(hw);
-	}
-
-	if (hw->phy_type == ixgb_phy_type_txn17401)
-		ixgb_optics_reset(hw);
-
-	return ctrl_reg;
-}
-
-/******************************************************************************
- * Reset the transmit and receive units; mask and clear all interrupts.
- *
- * hw - Struct containing variables accessed by shared code
- *****************************************************************************/
-bool
-ixgb_adapter_stop(struct ixgb_hw *hw)
-{
-	u32 ctrl_reg;
-
-	ENTER();
-
-	/* If we are stopped or resetting exit gracefully and wait to be
-	 * started again before accessing the hardware.
-	 */
-	if (hw->adapter_stopped) {
-		pr_debug("Exiting because the adapter is already stopped!!!\n");
-		return false;
-	}
-
-	/* Set the Adapter Stopped flag so other driver functions stop
-	 * touching the Hardware.
-	 */
-	hw->adapter_stopped = true;
-
-	/* Clear interrupt mask to stop board from generating interrupts */
-	pr_debug("Masking off all interrupts\n");
-	IXGB_WRITE_REG(hw, IMC, 0xFFFFFFFF);
-
-	/* Disable the Transmit and Receive units.  Then delay to allow
-	 * any pending transactions to complete before we hit the MAC with
-	 * the global reset.
-	 */
-	IXGB_WRITE_REG(hw, RCTL, IXGB_READ_REG(hw, RCTL) & ~IXGB_RCTL_RXEN);
-	IXGB_WRITE_REG(hw, TCTL, IXGB_READ_REG(hw, TCTL) & ~IXGB_TCTL_TXEN);
-	IXGB_WRITE_FLUSH(hw);
-	msleep(IXGB_DELAY_BEFORE_RESET);
-
-	/* Issue a global reset to the MAC.  This will reset the chip's
-	 * transmit, receive, DMA, and link units.  It will not effect
-	 * the current PCI configuration.  The global reset bit is self-
-	 * clearing, and should clear within a microsecond.
-	 */
-	pr_debug("Issuing a global reset to MAC\n");
-
-	ctrl_reg = ixgb_mac_reset(hw);
-
-	/* Clear interrupt mask to stop board from generating interrupts */
-	pr_debug("Masking off all interrupts\n");
-	IXGB_WRITE_REG(hw, IMC, 0xffffffff);
-
-	/* Clear any pending interrupt events. */
-	IXGB_READ_REG(hw, ICR);
-
-	return ctrl_reg & IXGB_CTRL0_RST;
-}
-
-
-/******************************************************************************
- * Identifies the vendor of the optics module on the adapter.  The SR adapters
- * support two different types of XPAK optics, so it is necessary to determine
- * which optics are present before applying any optics-specific workarounds.
- *
- * hw - Struct containing variables accessed by shared code.
- *
- * Returns: the vendor of the XPAK optics module.
- *****************************************************************************/
-static ixgb_xpak_vendor
-ixgb_identify_xpak_vendor(struct ixgb_hw *hw)
-{
-	u32 i;
-	u16 vendor_name[5];
-	ixgb_xpak_vendor xpak_vendor;
-
-	ENTER();
-
-	/* Read the first few bytes of the vendor string from the XPAK NVR
-	 * registers.  These are standard XENPAK/XPAK registers, so all XPAK
-	 * devices should implement them. */
-	for (i = 0; i < 5; i++) {
-		vendor_name[i] = ixgb_read_phy_reg(hw,
-						   MDIO_PMA_PMD_XPAK_VENDOR_NAME
-						   + i, IXGB_PHY_ADDRESS,
-						   MDIO_MMD_PMAPMD);
-	}
-
-	/* Determine the actual vendor */
-	if (vendor_name[0] == 'I' &&
-	    vendor_name[1] == 'N' &&
-	    vendor_name[2] == 'T' &&
-	    vendor_name[3] == 'E' && vendor_name[4] == 'L') {
-		xpak_vendor = ixgb_xpak_vendor_intel;
-	} else {
-		xpak_vendor = ixgb_xpak_vendor_infineon;
-	}
-
-	return xpak_vendor;
-}
-
-/******************************************************************************
- * Determine the physical layer module on the adapter.
- *
- * hw - Struct containing variables accessed by shared code.  The device_id
- *      field must be (correctly) populated before calling this routine.
- *
- * Returns: the phy type of the adapter.
- *****************************************************************************/
-static ixgb_phy_type
-ixgb_identify_phy(struct ixgb_hw *hw)
-{
-	ixgb_phy_type phy_type;
-	ixgb_xpak_vendor xpak_vendor;
-
-	ENTER();
-
-	/* Infer the transceiver/phy type from the device id */
-	switch (hw->device_id) {
-	case IXGB_DEVICE_ID_82597EX:
-		pr_debug("Identified TXN17401 optics\n");
-		phy_type = ixgb_phy_type_txn17401;
-		break;
-
-	case IXGB_DEVICE_ID_82597EX_SR:
-		/* The SR adapters carry two different types of XPAK optics
-		 * modules; read the vendor identifier to determine the exact
-		 * type of optics. */
-		xpak_vendor = ixgb_identify_xpak_vendor(hw);
-		if (xpak_vendor == ixgb_xpak_vendor_intel) {
-			pr_debug("Identified TXN17201 optics\n");
-			phy_type = ixgb_phy_type_txn17201;
-		} else {
-			pr_debug("Identified G6005 optics\n");
-			phy_type = ixgb_phy_type_g6005;
-		}
-		break;
-	case IXGB_DEVICE_ID_82597EX_LR:
-		pr_debug("Identified G6104 optics\n");
-		phy_type = ixgb_phy_type_g6104;
-		break;
-	case IXGB_DEVICE_ID_82597EX_CX4:
-		pr_debug("Identified CX4\n");
-		xpak_vendor = ixgb_identify_xpak_vendor(hw);
-		if (xpak_vendor == ixgb_xpak_vendor_intel) {
-			pr_debug("Identified TXN17201 optics\n");
-			phy_type = ixgb_phy_type_txn17201;
-		} else {
-			pr_debug("Identified G6005 optics\n");
-			phy_type = ixgb_phy_type_g6005;
-		}
-		break;
-	default:
-		pr_debug("Unknown physical layer module\n");
-		phy_type = ixgb_phy_type_unknown;
-		break;
-	}
-
-	/* update phy type for sun specific board */
-	if (hw->subsystem_vendor_id == PCI_VENDOR_ID_SUN)
-		phy_type = ixgb_phy_type_bcm;
-
-	return phy_type;
-}
-
-/******************************************************************************
- * Performs basic configuration of the adapter.
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Resets the controller.
- * Reads and validates the EEPROM.
- * Initializes the receive address registers.
- * Initializes the multicast table.
- * Clears all on-chip counters.
- * Calls routine to setup flow control settings.
- * Leaves the transmit and receive units disabled and uninitialized.
- *
- * Returns:
- *      true if successful,
- *      false if unrecoverable problems were encountered.
- *****************************************************************************/
-bool
-ixgb_init_hw(struct ixgb_hw *hw)
-{
-	u32 i;
-	bool status;
-
-	ENTER();
-
-	/* Issue a global reset to the MAC.  This will reset the chip's
-	 * transmit, receive, DMA, and link units.  It will not effect
-	 * the current PCI configuration.  The global reset bit is self-
-	 * clearing, and should clear within a microsecond.
-	 */
-	pr_debug("Issuing a global reset to MAC\n");
-
-	ixgb_mac_reset(hw);
-
-	pr_debug("Issuing an EE reset to MAC\n");
-#ifdef HP_ZX1
-	/* Workaround for 82597EX reset errata */
-	IXGB_WRITE_REG_IO(hw, CTRL1, IXGB_CTRL1_EE_RST);
-#else
-	IXGB_WRITE_REG(hw, CTRL1, IXGB_CTRL1_EE_RST);
-#endif
-
-	/* Delay a few ms just to allow the reset to complete */
-	msleep(IXGB_DELAY_AFTER_EE_RESET);
-
-	if (!ixgb_get_eeprom_data(hw))
-		return false;
-
-	/* Use the device id to determine the type of phy/transceiver. */
-	hw->device_id = ixgb_get_ee_device_id(hw);
-	hw->phy_type = ixgb_identify_phy(hw);
-
-	/* Setup the receive addresses.
-	 * Receive Address Registers (RARs 0 - 15).
-	 */
-	ixgb_init_rx_addrs(hw);
-
-	/*
-	 * Check that a valid MAC address has been set.
-	 * If it is not valid, we fail hardware init.
-	 */
-	if (!mac_addr_valid(hw->curr_mac_addr)) {
-		pr_debug("MAC address invalid after ixgb_init_rx_addrs\n");
-		return(false);
-	}
-
-	/* tell the routines in this file they can access hardware again */
-	hw->adapter_stopped = false;
-
-	/* Fill in the bus_info structure */
-	ixgb_get_bus_info(hw);
-
-	/* Zero out the Multicast HASH table */
-	pr_debug("Zeroing the MTA\n");
-	for (i = 0; i < IXGB_MC_TBL_SIZE; i++)
-		IXGB_WRITE_REG_ARRAY(hw, MTA, i, 0);
-
-	/* Zero out the VLAN Filter Table Array */
-	ixgb_clear_vfta(hw);
-
-	/* Zero all of the hardware counters */
-	ixgb_clear_hw_cntrs(hw);
-
-	/* Call a subroutine to setup flow control. */
-	status = ixgb_setup_fc(hw);
-
-	/* 82597EX errata: Call check-for-link in case lane deskew is locked */
-	ixgb_check_for_link(hw);
-
-	return status;
-}
-
-/******************************************************************************
- * Initializes receive address filters.
- *
- * hw - Struct containing variables accessed by shared code
- *
- * Places the MAC address in receive address register 0 and clears the rest
- * of the receive address registers. Clears the multicast table. Assumes
- * the receiver is in reset when the routine is called.
- *****************************************************************************/
-static void
-ixgb_init_rx_addrs(struct ixgb_hw *hw)
-{
-	u32 i;
-
-	ENTER();
-
-	/*
-	 * If the current mac address is valid, assume it is a software override
-	 * to the permanent address.
-	 * Otherwise, use the permanent address from the eeprom.
-	 */
-	if (!mac_addr_valid(hw->curr_mac_addr)) {
-
-		/* Get the MAC address from the eeprom for later reference */
-		ixgb_get_ee_mac_addr(hw, hw->curr_mac_addr);
-
-		pr_debug("Keeping Permanent MAC Addr = %pM\n",
-			 hw->curr_mac_addr);
-	} else {
-
-		/* Setup the receive address. */
-		pr_debug("Overriding MAC Address in RAR[0]\n");
-		pr_debug("New MAC Addr = %pM\n", hw->curr_mac_addr);
-
-		ixgb_rar_set(hw, hw->curr_mac_addr, 0);
-	}
-
-	/* Zero out the other 15 receive addresses. */
-	pr_debug("Clearing RAR[1-15]\n");
-	for (i = 1; i < IXGB_RAR_ENTRIES; i++) {
-		/* Write high reg first to disable the AV bit first */
-		IXGB_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0);
-		IXGB_WRITE_REG_ARRAY(hw, RA, (i << 1), 0);
-	}
-}
-
-/******************************************************************************
- * Updates the MAC's list of multicast addresses.
- *
- * hw - Struct containing variables accessed by shared code
- * mc_addr_list - the list of new multicast addresses
- * mc_addr_count - number of addresses
- * pad - number of bytes between addresses in the list
- *
- * The given list replaces any existing list. Clears the last 15 receive
- * address registers and the multicast table. Uses receive address registers
- * for the first 15 multicast addresses, and hashes the rest into the
- * multicast table.
- *****************************************************************************/
-void
-ixgb_mc_addr_list_update(struct ixgb_hw *hw,
-			  u8 *mc_addr_list,
-			  u32 mc_addr_count,
-			  u32 pad)
-{
-	u32 hash_value;
-	u32 i;
-	u32 rar_used_count = 1;		/* RAR[0] is used for our MAC address */
-	u8 *mca;
-
-	ENTER();
-
-	/* Set the new number of MC addresses that we are being requested to use. */
-	hw->num_mc_addrs = mc_addr_count;
-
-	/* Clear RAR[1-15] */
-	pr_debug("Clearing RAR[1-15]\n");
-	for (i = rar_used_count; i < IXGB_RAR_ENTRIES; i++) {
-		IXGB_WRITE_REG_ARRAY(hw, RA, (i << 1), 0);
-		IXGB_WRITE_REG_ARRAY(hw, RA, ((i << 1) + 1), 0);
-	}
-
-	/* Clear the MTA */
-	pr_debug("Clearing MTA\n");
-	for (i = 0; i < IXGB_MC_TBL_SIZE; i++)
-		IXGB_WRITE_REG_ARRAY(hw, MTA, i, 0);
-
-	/* Add the new addresses */
-	mca = mc_addr_list;
-	for (i = 0; i < mc_addr_count; i++) {
-		pr_debug("Adding the multicast addresses:\n");
-		pr_debug("MC Addr #%d = %pM\n", i, mca);
-
-		/* Place this multicast address in the RAR if there is room, *
-		 * else put it in the MTA
-		 */
-		if (rar_used_count < IXGB_RAR_ENTRIES) {
-			ixgb_rar_set(hw, mca, rar_used_count);
-			pr_debug("Added a multicast address to RAR[%d]\n", i);
-			rar_used_count++;
-		} else {
-			hash_value = ixgb_hash_mc_addr(hw, mca);
-
-			pr_debug("Hash value = 0x%03X\n", hash_value);
-
-			ixgb_mta_set(hw, hash_value);
-		}
-
-		mca += ETH_ALEN + pad;
-	}
-
-	pr_debug("MC Update Complete\n");
-}
-
-/******************************************************************************
- * Hashes an address to determine its location in the multicast table
- *
- * hw - Struct containing variables accessed by shared code
- * mc_addr - the multicast address to hash
- *
- * Returns:
- *      The hash value
- *****************************************************************************/
-static u32
-ixgb_hash_mc_addr(struct ixgb_hw *hw,
-		   u8 *mc_addr)
-{
-	u32 hash_value = 0;
-
-	ENTER();
-
-	/* The portion of the address that is used for the hash table is
-	 * determined by the mc_filter_type setting.
-	 */
-	switch (hw->mc_filter_type) {
-		/* [0] [1] [2] [3] [4] [5]
-		 * 01  AA  00  12  34  56
-		 * LSB                 MSB - According to H/W docs */
-	case 0:
-		/* [47:36] i.e. 0x563 for above example address */
-		hash_value =
-		    ((mc_addr[4] >> 4) | (((u16) mc_addr[5]) << 4));
-		break;
-	case 1:		/* [46:35] i.e. 0xAC6 for above example address */
-		hash_value =
-		    ((mc_addr[4] >> 3) | (((u16) mc_addr[5]) << 5));
-		break;
-	case 2:		/* [45:34] i.e. 0x5D8 for above example address */
-		hash_value =
-		    ((mc_addr[4] >> 2) | (((u16) mc_addr[5]) << 6));
-		break;
-	case 3:		/* [43:32] i.e. 0x634 for above example address */
-		hash_value = ((mc_addr[4]) | (((u16) mc_addr[5]) << 8));
-		break;
-	default:
-		/* Invalid mc_filter_type, what should we do? */
-		pr_debug("MC filter type param set incorrectly\n");
-		ASSERT(0);
-		break;
-	}
-
-	hash_value &= 0xFFF;
-	return hash_value;
-}
-
-/******************************************************************************
- * Sets the bit in the multicast table corresponding to the hash value.
- *
- * hw - Struct containing variables accessed by shared code
- * hash_value - Multicast address hash value
- *****************************************************************************/
-static void
-ixgb_mta_set(struct ixgb_hw *hw,
-		  u32 hash_value)
-{
-	u32 hash_bit, hash_reg;
-	u32 mta_reg;
-
-	/* The MTA is a register array of 128 32-bit registers.
-	 * It is treated like an array of 4096 bits.  We want to set
-	 * bit BitArray[hash_value]. So we figure out what register
-	 * the bit is in, read it, OR in the new bit, then write
-	 * back the new value.  The register is determined by the
-	 * upper 7 bits of the hash value and the bit within that
-	 * register are determined by the lower 5 bits of the value.
-	 */
-	hash_reg = (hash_value >> 5) & 0x7F;
-	hash_bit = hash_value & 0x1F;
-
-	mta_reg = IXGB_READ_REG_ARRAY(hw, MTA, hash_reg);
-
-	mta_reg |= (1 << hash_bit);
-
-	IXGB_WRITE_REG_ARRAY(hw, MTA, hash_reg, mta_reg);
-}
-
-/******************************************************************************
- * Puts an ethernet address into a receive address register.
- *
- * hw - Struct containing variables accessed by shared code
- * addr - Address to put into receive address register
- * index - Receive address register to write
- *****************************************************************************/
-void
-ixgb_rar_set(struct ixgb_hw *hw,
-		  const u8 *addr,
-		  u32 index)
-{
-	u32 rar_low, rar_high;
-
-	ENTER();
-
-	/* HW expects these in little endian so we reverse the byte order
-	 * from network order (big endian) to little endian
-	 */
-	rar_low = ((u32) addr[0] |
-		   ((u32)addr[1] << 8) |
-		   ((u32)addr[2] << 16) |
-		   ((u32)addr[3] << 24));
-
-	rar_high = ((u32) addr[4] |
-			((u32)addr[5] << 8) |
-			IXGB_RAH_AV);
-
-	IXGB_WRITE_REG_ARRAY(hw, RA, (index << 1), rar_low);
-	IXGB_WRITE_REG_ARRAY(hw, RA, ((index << 1) + 1), rar_high);
-}
-
-/******************************************************************************
- * Writes a value to the specified offset in the VLAN filter table.
- *
- * hw - Struct containing variables accessed by shared code
- * offset - Offset in VLAN filter table to write
- * value - Value to write into VLAN filter table
- *****************************************************************************/
-void
-ixgb_write_vfta(struct ixgb_hw *hw,
-		 u32 offset,
-		 u32 value)
-{
-	IXGB_WRITE_REG_ARRAY(hw, VFTA, offset, value);
-}
-
-/******************************************************************************
- * Clears the VLAN filter table
- *
- * hw - Struct containing variables accessed by shared code
- *****************************************************************************/
-static void
-ixgb_clear_vfta(struct ixgb_hw *hw)
-{
-	u32 offset;
-
-	for (offset = 0; offset < IXGB_VLAN_FILTER_TBL_SIZE; offset++)
-		IXGB_WRITE_REG_ARRAY(hw, VFTA, offset, 0);
-}
-
-/******************************************************************************
- * Configures the flow control settings based on SW configuration.
- *
- * hw - Struct containing variables accessed by shared code
- *****************************************************************************/
-
-static bool
-ixgb_setup_fc(struct ixgb_hw *hw)
-{
-	u32 ctrl_reg;
-	u32 pap_reg = 0;   /* by default, assume no pause time */
-	bool status = true;
-
-	ENTER();
-
-	/* Get the current control reg 0 settings */
-	ctrl_reg = IXGB_READ_REG(hw, CTRL0);
-
-	/* Clear the Receive Pause Enable and Transmit Pause Enable bits */
-	ctrl_reg &= ~(IXGB_CTRL0_RPE | IXGB_CTRL0_TPE);
-
-	/* The possible values of the "flow_control" parameter are:
-	 *      0:  Flow control is completely disabled
-	 *      1:  Rx flow control is enabled (we can receive pause frames
-	 *          but not send pause frames).
-	 *      2:  Tx flow control is enabled (we can send pause frames
-	 *          but we do not support receiving pause frames).
-	 *      3:  Both Rx and TX flow control (symmetric) are enabled.
-	 *  other:  Invalid.
-	 */
-	switch (hw->fc.type) {
-	case ixgb_fc_none:	/* 0 */
-		/* Set CMDC bit to disable Rx Flow control */
-		ctrl_reg |= (IXGB_CTRL0_CMDC);
-		break;
-	case ixgb_fc_rx_pause:	/* 1 */
-		/* RX Flow control is enabled, and TX Flow control is
-		 * disabled.
-		 */
-		ctrl_reg |= (IXGB_CTRL0_RPE);
-		break;
-	case ixgb_fc_tx_pause:	/* 2 */
-		/* TX Flow control is enabled, and RX Flow control is
-		 * disabled, by a software over-ride.
-		 */
-		ctrl_reg |= (IXGB_CTRL0_TPE);
-		pap_reg = hw->fc.pause_time;
-		break;
-	case ixgb_fc_full:	/* 3 */
-		/* Flow control (both RX and TX) is enabled by a software
-		 * over-ride.
-		 */
-		ctrl_reg |= (IXGB_CTRL0_RPE | IXGB_CTRL0_TPE);
-		pap_reg = hw->fc.pause_time;
-		break;
-	default:
-		/* We should never get here.  The value should be 0-3. */
-		pr_debug("Flow control param set incorrectly\n");
-		ASSERT(0);
-		break;
-	}
-
-	/* Write the new settings */
-	IXGB_WRITE_REG(hw, CTRL0, ctrl_reg);
-
-	if (pap_reg != 0)
-		IXGB_WRITE_REG(hw, PAP, pap_reg);
-
-	/* Set the flow control receive threshold registers.  Normally,
-	 * these registers will be set to a default threshold that may be
-	 * adjusted later by the driver's runtime code.  However, if the
-	 * ability to transmit pause frames in not enabled, then these
-	 * registers will be set to 0.
-	 */
-	if (!(hw->fc.type & ixgb_fc_tx_pause)) {
-		IXGB_WRITE_REG(hw, FCRTL, 0);
-		IXGB_WRITE_REG(hw, FCRTH, 0);
-	} else {
-	   /* We need to set up the Receive Threshold high and low water
-	    * marks as well as (optionally) enabling the transmission of XON
-	    * frames. */
-		if (hw->fc.send_xon) {
-			IXGB_WRITE_REG(hw, FCRTL,
-				(hw->fc.low_water | IXGB_FCRTL_XONE));
-		} else {
-			IXGB_WRITE_REG(hw, FCRTL, hw->fc.low_water);
-		}
-		IXGB_WRITE_REG(hw, FCRTH, hw->fc.high_water);
-	}
-	return status;
-}
-
-/******************************************************************************
- * Reads a word from a device over the Management Data Interface (MDI) bus.
- * This interface is used to manage Physical layer devices.
- *
- * hw          - Struct containing variables accessed by hw code
- * reg_address - Offset of device register being read.
- * phy_address - Address of device on MDI.
- *
- * Returns:  Data word (16 bits) from MDI device.
- *
- * The 82597EX has support for several MDI access methods.  This routine
- * uses the new protocol MDI Single Command and Address Operation.
- * This requires that first an address cycle command is sent, followed by a
- * read command.
- *****************************************************************************/
-static u16
-ixgb_read_phy_reg(struct ixgb_hw *hw,
-		u32 reg_address,
-		u32 phy_address,
-		u32 device_type)
-{
-	u32 i;
-	u32 data;
-	u32 command = 0;
-
-	ASSERT(reg_address <= IXGB_MAX_PHY_REG_ADDRESS);
-	ASSERT(phy_address <= IXGB_MAX_PHY_ADDRESS);
-	ASSERT(device_type <= IXGB_MAX_PHY_DEV_TYPE);
-
-	/* Setup and write the address cycle command */
-	command = ((reg_address << IXGB_MSCA_NP_ADDR_SHIFT) |
-		   (device_type << IXGB_MSCA_DEV_TYPE_SHIFT) |
-		   (phy_address << IXGB_MSCA_PHY_ADDR_SHIFT) |
-		   (IXGB_MSCA_ADDR_CYCLE | IXGB_MSCA_MDI_COMMAND));
-
-	IXGB_WRITE_REG(hw, MSCA, command);
-
-    /**************************************************************
-    ** Check every 10 usec to see if the address cycle completed
-    ** The COMMAND bit will clear when the operation is complete.
-    ** This may take as long as 64 usecs (we'll wait 100 usecs max)
-    ** from the CPU Write to the Ready bit assertion.
-    **************************************************************/
-
-	for (i = 0; i < 10; i++)
-	{
-		udelay(10);
-
-		command = IXGB_READ_REG(hw, MSCA);
-
-		if ((command & IXGB_MSCA_MDI_COMMAND) == 0)
-			break;
-	}
-
-	ASSERT((command & IXGB_MSCA_MDI_COMMAND) == 0);
-
-	/* Address cycle complete, setup and write the read command */
-	command = ((reg_address << IXGB_MSCA_NP_ADDR_SHIFT) |
-		   (device_type << IXGB_MSCA_DEV_TYPE_SHIFT) |
-		   (phy_address << IXGB_MSCA_PHY_ADDR_SHIFT) |
-		   (IXGB_MSCA_READ | IXGB_MSCA_MDI_COMMAND));
-
-	IXGB_WRITE_REG(hw, MSCA, command);
-
-    /**************************************************************
-    ** Check every 10 usec to see if the read command completed
-    ** The COMMAND bit will clear when the operation is complete.
-    ** The read may take as long as 64 usecs (we'll wait 100 usecs max)
-    ** from the CPU Write to the Ready bit assertion.
-    **************************************************************/
-
-	for (i = 0; i < 10; i++)
-	{
-		udelay(10);
-
-		command = IXGB_READ_REG(hw, MSCA);
-
-		if ((command & IXGB_MSCA_MDI_COMMAND) == 0)
-			break;
-	}
-
-	ASSERT((command & IXGB_MSCA_MDI_COMMAND) == 0);
-
-	/* Operation is complete, get the data from the MDIO Read/Write Data
-	 * register and return.
-	 */
-	data = IXGB_READ_REG(hw, MSRWD);
-	data >>= IXGB_MSRWD_READ_DATA_SHIFT;
-	return((u16) data);
-}
-
-/******************************************************************************
- * Writes a word to a device over the Management Data Interface (MDI) bus.
- * This interface is used to manage Physical layer devices.
- *
- * hw          - Struct containing variables accessed by hw code
- * reg_address - Offset of device register being read.
- * phy_address - Address of device on MDI.
- * device_type - Also known as the Device ID or DID.
- * data        - 16-bit value to be written
- *
- * Returns:  void.
- *
- * The 82597EX has support for several MDI access methods.  This routine
- * uses the new protocol MDI Single Command and Address Operation.
- * This requires that first an address cycle command is sent, followed by a
- * write command.
- *****************************************************************************/
-static void
-ixgb_write_phy_reg(struct ixgb_hw *hw,
-			u32 reg_address,
-			u32 phy_address,
-			u32 device_type,
-			u16 data)
-{
-	u32 i;
-	u32 command = 0;
-
-	ASSERT(reg_address <= IXGB_MAX_PHY_REG_ADDRESS);
-	ASSERT(phy_address <= IXGB_MAX_PHY_ADDRESS);
-	ASSERT(device_type <= IXGB_MAX_PHY_DEV_TYPE);
-
-	/* Put the data in the MDIO Read/Write Data register */
-	IXGB_WRITE_REG(hw, MSRWD, (u32)data);
-
-	/* Setup and write the address cycle command */
-	command = ((reg_address << IXGB_MSCA_NP_ADDR_SHIFT)  |
-			   (device_type << IXGB_MSCA_DEV_TYPE_SHIFT) |
-			   (phy_address << IXGB_MSCA_PHY_ADDR_SHIFT) |
-			   (IXGB_MSCA_ADDR_CYCLE | IXGB_MSCA_MDI_COMMAND));
-
-	IXGB_WRITE_REG(hw, MSCA, command);
-
-	/**************************************************************
-	** Check every 10 usec to see if the address cycle completed
-	** The COMMAND bit will clear when the operation is complete.
-	** This may take as long as 64 usecs (we'll wait 100 usecs max)
-	** from the CPU Write to the Ready bit assertion.
-	**************************************************************/
-
-	for (i = 0; i < 10; i++)
-	{
-		udelay(10);
-
-		command = IXGB_READ_REG(hw, MSCA);
-
-		if ((command & IXGB_MSCA_MDI_COMMAND) == 0)
-			break;
-	}
-
-	ASSERT((command & IXGB_MSCA_MDI_COMMAND) == 0);
-
-	/* Address cycle complete, setup and write the write command */
-	command = ((reg_address << IXGB_MSCA_NP_ADDR_SHIFT)  |
-			   (device_type << IXGB_MSCA_DEV_TYPE_SHIFT) |
-			   (phy_address << IXGB_MSCA_PHY_ADDR_SHIFT) |
-			   (IXGB_MSCA_WRITE | IXGB_MSCA_MDI_COMMAND));
-
-	IXGB_WRITE_REG(hw, MSCA, command);
-
-	/**************************************************************
-	** Check every 10 usec to see if the read command completed
-	** The COMMAND bit will clear when the operation is complete.
-	** The write may take as long as 64 usecs (we'll wait 100 usecs max)
-	** from the CPU Write to the Ready bit assertion.
-	**************************************************************/
-
-	for (i = 0; i < 10; i++)
-	{
-		udelay(10);
-
-		command = IXGB_READ_REG(hw, MSCA);
-
-		if ((command & IXGB_MSCA_MDI_COMMAND) == 0)
-			break;
-	}
-
-	ASSERT((command & IXGB_MSCA_MDI_COMMAND) == 0);
-
-	/* Operation is complete, return. */
-}
-
-/******************************************************************************
- * Checks to see if the link status of the hardware has changed.
- *
- * hw - Struct containing variables accessed by hw code
- *
- * Called by any function that needs to check the link status of the adapter.
- *****************************************************************************/
-void
-ixgb_check_for_link(struct ixgb_hw *hw)
-{
-	u32 status_reg;
-	u32 xpcss_reg;
-
-	ENTER();
-
-	xpcss_reg = IXGB_READ_REG(hw, XPCSS);
-	status_reg = IXGB_READ_REG(hw, STATUS);
-
-	if ((xpcss_reg & IXGB_XPCSS_ALIGN_STATUS) &&
-	    (status_reg & IXGB_STATUS_LU)) {
-		hw->link_up = true;
-	} else if (!(xpcss_reg & IXGB_XPCSS_ALIGN_STATUS) &&
-		   (status_reg & IXGB_STATUS_LU)) {
-		pr_debug("XPCSS Not Aligned while Status:LU is set\n");
-		hw->link_up = ixgb_link_reset(hw);
-	} else {
-		/*
-		 * 82597EX errata.  Since the lane deskew problem may prevent
-		 * link, reset the link before reporting link down.
-		 */
-		hw->link_up = ixgb_link_reset(hw);
-	}
-	/*  Anything else for 10 Gig?? */
-}
-
-/******************************************************************************
- * Check for a bad link condition that may have occurred.
- * The indication is that the RFC / LFC registers may be incrementing
- * continually.  A full adapter reset is required to recover.
- *
- * hw - Struct containing variables accessed by hw code
- *
- * Called by any function that needs to check the link status of the adapter.
- *****************************************************************************/
-bool ixgb_check_for_bad_link(struct ixgb_hw *hw)
-{
-	u32 newLFC, newRFC;
-	bool bad_link_returncode = false;
-
-	if (hw->phy_type == ixgb_phy_type_txn17401) {
-		newLFC = IXGB_READ_REG(hw, LFC);
-		newRFC = IXGB_READ_REG(hw, RFC);
-		if ((hw->lastLFC + 250 < newLFC)
-		    || (hw->lastRFC + 250 < newRFC)) {
-			pr_debug("BAD LINK! too many LFC/RFC since last check\n");
-			bad_link_returncode = true;
-		}
-		hw->lastLFC = newLFC;
-		hw->lastRFC = newRFC;
-	}
-
-	return bad_link_returncode;
-}
-
-/******************************************************************************
- * Clears all hardware statistics counters.
- *
- * hw - Struct containing variables accessed by shared code
- *****************************************************************************/
-static void
-ixgb_clear_hw_cntrs(struct ixgb_hw *hw)
-{
-	ENTER();
-
-	/* if we are stopped or resetting exit gracefully */
-	if (hw->adapter_stopped) {
-		pr_debug("Exiting because the adapter is stopped!!!\n");
-		return;
-	}
-
-	IXGB_READ_REG(hw, TPRL);
-	IXGB_READ_REG(hw, TPRH);
-	IXGB_READ_REG(hw, GPRCL);
-	IXGB_READ_REG(hw, GPRCH);
-	IXGB_READ_REG(hw, BPRCL);
-	IXGB_READ_REG(hw, BPRCH);
-	IXGB_READ_REG(hw, MPRCL);
-	IXGB_READ_REG(hw, MPRCH);
-	IXGB_READ_REG(hw, UPRCL);
-	IXGB_READ_REG(hw, UPRCH);
-	IXGB_READ_REG(hw, VPRCL);
-	IXGB_READ_REG(hw, VPRCH);
-	IXGB_READ_REG(hw, JPRCL);
-	IXGB_READ_REG(hw, JPRCH);
-	IXGB_READ_REG(hw, GORCL);
-	IXGB_READ_REG(hw, GORCH);
-	IXGB_READ_REG(hw, TORL);
-	IXGB_READ_REG(hw, TORH);
-	IXGB_READ_REG(hw, RNBC);
-	IXGB_READ_REG(hw, RUC);
-	IXGB_READ_REG(hw, ROC);
-	IXGB_READ_REG(hw, RLEC);
-	IXGB_READ_REG(hw, CRCERRS);
-	IXGB_READ_REG(hw, ICBC);
-	IXGB_READ_REG(hw, ECBC);
-	IXGB_READ_REG(hw, MPC);
-	IXGB_READ_REG(hw, TPTL);
-	IXGB_READ_REG(hw, TPTH);
-	IXGB_READ_REG(hw, GPTCL);
-	IXGB_READ_REG(hw, GPTCH);
-	IXGB_READ_REG(hw, BPTCL);
-	IXGB_READ_REG(hw, BPTCH);
-	IXGB_READ_REG(hw, MPTCL);
-	IXGB_READ_REG(hw, MPTCH);
-	IXGB_READ_REG(hw, UPTCL);
-	IXGB_READ_REG(hw, UPTCH);
-	IXGB_READ_REG(hw, VPTCL);
-	IXGB_READ_REG(hw, VPTCH);
-	IXGB_READ_REG(hw, JPTCL);
-	IXGB_READ_REG(hw, JPTCH);
-	IXGB_READ_REG(hw, GOTCL);
-	IXGB_READ_REG(hw, GOTCH);
-	IXGB_READ_REG(hw, TOTL);
-	IXGB_READ_REG(hw, TOTH);
-	IXGB_READ_REG(hw, DC);
-	IXGB_READ_REG(hw, PLT64C);
-	IXGB_READ_REG(hw, TSCTC);
-	IXGB_READ_REG(hw, TSCTFC);
-	IXGB_READ_REG(hw, IBIC);
-	IXGB_READ_REG(hw, RFC);
-	IXGB_READ_REG(hw, LFC);
-	IXGB_READ_REG(hw, PFRC);
-	IXGB_READ_REG(hw, PFTC);
-	IXGB_READ_REG(hw, MCFRC);
-	IXGB_READ_REG(hw, MCFTC);
-	IXGB_READ_REG(hw, XONRXC);
-	IXGB_READ_REG(hw, XONTXC);
-	IXGB_READ_REG(hw, XOFFRXC);
-	IXGB_READ_REG(hw, XOFFTXC);
-	IXGB_READ_REG(hw, RJC);
-}
-
-/******************************************************************************
- * Turns on the software controllable LED
- *
- * hw - Struct containing variables accessed by shared code
- *****************************************************************************/
-void
-ixgb_led_on(struct ixgb_hw *hw)
-{
-	u32 ctrl0_reg = IXGB_READ_REG(hw, CTRL0);
-
-	/* To turn on the LED, clear software-definable pin 0 (SDP0). */
-	ctrl0_reg &= ~IXGB_CTRL0_SDP0;
-	IXGB_WRITE_REG(hw, CTRL0, ctrl0_reg);
-}
-
-/******************************************************************************
- * Turns off the software controllable LED
- *
- * hw - Struct containing variables accessed by shared code
- *****************************************************************************/
-void
-ixgb_led_off(struct ixgb_hw *hw)
-{
-	u32 ctrl0_reg = IXGB_READ_REG(hw, CTRL0);
-
-	/* To turn off the LED, set software-definable pin 0 (SDP0). */
-	ctrl0_reg |= IXGB_CTRL0_SDP0;
-	IXGB_WRITE_REG(hw, CTRL0, ctrl0_reg);
-}
-
-/******************************************************************************
- * Gets the current PCI bus type, speed, and width of the hardware
- *
- * hw - Struct containing variables accessed by shared code
- *****************************************************************************/
-static void
-ixgb_get_bus_info(struct ixgb_hw *hw)
-{
-	u32 status_reg;
-
-	status_reg = IXGB_READ_REG(hw, STATUS);
-
-	hw->bus.type = (status_reg & IXGB_STATUS_PCIX_MODE) ?
-		ixgb_bus_type_pcix : ixgb_bus_type_pci;
-
-	if (hw->bus.type == ixgb_bus_type_pci) {
-		hw->bus.speed = (status_reg & IXGB_STATUS_PCI_SPD) ?
-			ixgb_bus_speed_66 : ixgb_bus_speed_33;
-	} else {
-		switch (status_reg & IXGB_STATUS_PCIX_SPD_MASK) {
-		case IXGB_STATUS_PCIX_SPD_66:
-			hw->bus.speed = ixgb_bus_speed_66;
-			break;
-		case IXGB_STATUS_PCIX_SPD_100:
-			hw->bus.speed = ixgb_bus_speed_100;
-			break;
-		case IXGB_STATUS_PCIX_SPD_133:
-			hw->bus.speed = ixgb_bus_speed_133;
-			break;
-		default:
-			hw->bus.speed = ixgb_bus_speed_reserved;
-			break;
-		}
-	}
-
-	hw->bus.width = (status_reg & IXGB_STATUS_BUS64) ?
-		ixgb_bus_width_64 : ixgb_bus_width_32;
-}
-
-/******************************************************************************
- * Tests a MAC address to ensure it is a valid Individual Address
- *
- * mac_addr - pointer to MAC address.
- *
- *****************************************************************************/
-static bool
-mac_addr_valid(u8 *mac_addr)
-{
-	bool is_valid = true;
-	ENTER();
-
-	/* Make sure it is not a multicast address */
-	if (is_multicast_ether_addr(mac_addr)) {
-		pr_debug("MAC address is multicast\n");
-		is_valid = false;
-	}
-	/* Not a broadcast address */
-	else if (is_broadcast_ether_addr(mac_addr)) {
-		pr_debug("MAC address is broadcast\n");
-		is_valid = false;
-	}
-	/* Reject the zero address */
-	else if (is_zero_ether_addr(mac_addr)) {
-		pr_debug("MAC address is all zeros\n");
-		is_valid = false;
-	}
-	return is_valid;
-}
-
-/******************************************************************************
- * Resets the 10GbE link.  Waits the settle time and returns the state of
- * the link.
- *
- * hw - Struct containing variables accessed by shared code
- *****************************************************************************/
-static bool
-ixgb_link_reset(struct ixgb_hw *hw)
-{
-	bool link_status = false;
-	u8 wait_retries = MAX_RESET_ITERATIONS;
-	u8 lrst_retries = MAX_RESET_ITERATIONS;
-
-	do {
-		/* Reset the link */
-		IXGB_WRITE_REG(hw, CTRL0,
-			       IXGB_READ_REG(hw, CTRL0) | IXGB_CTRL0_LRST);
-
-		/* Wait for link-up and lane re-alignment */
-		do {
-			udelay(IXGB_DELAY_USECS_AFTER_LINK_RESET);
-			link_status =
-			    ((IXGB_READ_REG(hw, STATUS) & IXGB_STATUS_LU)
-			     && (IXGB_READ_REG(hw, XPCSS) &
-				 IXGB_XPCSS_ALIGN_STATUS)) ? true : false;
-		} while (!link_status && --wait_retries);
-
-	} while (!link_status && --lrst_retries);
-
-	return link_status;
-}
-
-/******************************************************************************
- * Resets the 10GbE optics module.
- *
- * hw - Struct containing variables accessed by shared code
- *****************************************************************************/
-static void
-ixgb_optics_reset(struct ixgb_hw *hw)
-{
-	if (hw->phy_type == ixgb_phy_type_txn17401) {
-		ixgb_write_phy_reg(hw,
-				   MDIO_CTRL1,
-				   IXGB_PHY_ADDRESS,
-				   MDIO_MMD_PMAPMD,
-				   MDIO_CTRL1_RESET);
-
-		ixgb_read_phy_reg(hw, MDIO_CTRL1, IXGB_PHY_ADDRESS, MDIO_MMD_PMAPMD);
-	}
-}
-
-/******************************************************************************
- * Resets the 10GbE optics module for Sun variant NIC.
- *
- * hw - Struct containing variables accessed by shared code
- *****************************************************************************/
-
-#define   IXGB_BCM8704_USER_PMD_TX_CTRL_REG         0xC803
-#define   IXGB_BCM8704_USER_PMD_TX_CTRL_REG_VAL     0x0164
-#define   IXGB_BCM8704_USER_CTRL_REG                0xC800
-#define   IXGB_BCM8704_USER_CTRL_REG_VAL            0x7FBF
-#define   IXGB_BCM8704_USER_DEV3_ADDR               0x0003
-#define   IXGB_SUN_PHY_ADDRESS                      0x0000
-#define   IXGB_SUN_PHY_RESET_DELAY                     305
-
-static void
-ixgb_optics_reset_bcm(struct ixgb_hw *hw)
-{
-	u32 ctrl = IXGB_READ_REG(hw, CTRL0);
-	ctrl &= ~IXGB_CTRL0_SDP2;
-	ctrl |= IXGB_CTRL0_SDP3;
-	IXGB_WRITE_REG(hw, CTRL0, ctrl);
-	IXGB_WRITE_FLUSH(hw);
-
-	/* SerDes needs extra delay */
-	msleep(IXGB_SUN_PHY_RESET_DELAY);
-
-	/* Broadcom 7408L configuration */
-	/* Reference clock config */
-	ixgb_write_phy_reg(hw,
-			   IXGB_BCM8704_USER_PMD_TX_CTRL_REG,
-			   IXGB_SUN_PHY_ADDRESS,
-			   IXGB_BCM8704_USER_DEV3_ADDR,
-			   IXGB_BCM8704_USER_PMD_TX_CTRL_REG_VAL);
-	/*  we must read the registers twice */
-	ixgb_read_phy_reg(hw,
-			  IXGB_BCM8704_USER_PMD_TX_CTRL_REG,
-			  IXGB_SUN_PHY_ADDRESS,
-			  IXGB_BCM8704_USER_DEV3_ADDR);
-	ixgb_read_phy_reg(hw,
-			  IXGB_BCM8704_USER_PMD_TX_CTRL_REG,
-			  IXGB_SUN_PHY_ADDRESS,
-			  IXGB_BCM8704_USER_DEV3_ADDR);
-
-	ixgb_write_phy_reg(hw,
-			   IXGB_BCM8704_USER_CTRL_REG,
-			   IXGB_SUN_PHY_ADDRESS,
-			   IXGB_BCM8704_USER_DEV3_ADDR,
-			   IXGB_BCM8704_USER_CTRL_REG_VAL);
-	ixgb_read_phy_reg(hw,
-			  IXGB_BCM8704_USER_CTRL_REG,
-			  IXGB_SUN_PHY_ADDRESS,
-			  IXGB_BCM8704_USER_DEV3_ADDR);
-	ixgb_read_phy_reg(hw,
-			  IXGB_BCM8704_USER_CTRL_REG,
-			  IXGB_SUN_PHY_ADDRESS,
-			  IXGB_BCM8704_USER_DEV3_ADDR);
-
-	/* SerDes needs extra delay */
-	msleep(IXGB_SUN_PHY_RESET_DELAY);
-}
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h b/drivers/net/ethernet/intel/ixgb/ixgb_hw.h
deleted file mode 100644
index 70bcff5fb3db..000000000000
--- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h
+++ /dev/null
@@ -1,767 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 1999 - 2008 Intel Corporation. */
-
-#ifndef _IXGB_HW_H_
-#define _IXGB_HW_H_
-
-#include <linux/mdio.h>
-
-#include "ixgb_osdep.h"
-
-/* Enums */
-typedef enum {
-	ixgb_mac_unknown = 0,
-	ixgb_82597,
-	ixgb_num_macs
-} ixgb_mac_type;
-
-/* Types of physical layer modules */
-typedef enum {
-	ixgb_phy_type_unknown = 0,
-	ixgb_phy_type_g6005,	/* 850nm, MM fiber, XPAK transceiver */
-	ixgb_phy_type_g6104,	/* 1310nm, SM fiber, XPAK transceiver */
-	ixgb_phy_type_txn17201,	/* 850nm, MM fiber, XPAK transceiver */
-	ixgb_phy_type_txn17401,	/* 1310nm, SM fiber, XENPAK transceiver */
-	ixgb_phy_type_bcm	/* SUN specific board */
-} ixgb_phy_type;
-
-/* XPAK transceiver vendors, for the SR adapters */
-typedef enum {
-	ixgb_xpak_vendor_intel,
-	ixgb_xpak_vendor_infineon
-} ixgb_xpak_vendor;
-
-/* Media Types */
-typedef enum {
-	ixgb_media_type_unknown = 0,
-	ixgb_media_type_fiber = 1,
-	ixgb_media_type_copper = 2,
-	ixgb_num_media_types
-} ixgb_media_type;
-
-/* Flow Control Settings */
-typedef enum {
-	ixgb_fc_none = 0,
-	ixgb_fc_rx_pause = 1,
-	ixgb_fc_tx_pause = 2,
-	ixgb_fc_full = 3,
-	ixgb_fc_default = 0xFF
-} ixgb_fc_type;
-
-/* PCI bus types */
-typedef enum {
-	ixgb_bus_type_unknown = 0,
-	ixgb_bus_type_pci,
-	ixgb_bus_type_pcix
-} ixgb_bus_type;
-
-/* PCI bus speeds */
-typedef enum {
-	ixgb_bus_speed_unknown = 0,
-	ixgb_bus_speed_33,
-	ixgb_bus_speed_66,
-	ixgb_bus_speed_100,
-	ixgb_bus_speed_133,
-	ixgb_bus_speed_reserved
-} ixgb_bus_speed;
-
-/* PCI bus widths */
-typedef enum {
-	ixgb_bus_width_unknown = 0,
-	ixgb_bus_width_32,
-	ixgb_bus_width_64
-} ixgb_bus_width;
-
-#define IXGB_EEPROM_SIZE    64	/* Size in words */
-
-#define SPEED_10000  10000
-#define FULL_DUPLEX  2
-
-#define MIN_NUMBER_OF_DESCRIPTORS       8
-#define MAX_NUMBER_OF_DESCRIPTORS  0xFFF8	/* 13 bits in RDLEN/TDLEN, 128B aligned     */
-
-#define IXGB_DELAY_BEFORE_RESET        10	/* allow 10ms after idling rx/tx units      */
-#define IXGB_DELAY_AFTER_RESET          1	/* allow 1ms after the reset                */
-#define IXGB_DELAY_AFTER_EE_RESET      10	/* allow 10ms after the EEPROM reset        */
-
-#define IXGB_DELAY_USECS_AFTER_LINK_RESET    13	/* allow 13 microseconds after the reset    */
-					   /* NOTE: this is MICROSECONDS               */
-#define MAX_RESET_ITERATIONS            8	/* number of iterations to get things right */
-
-/* General Registers */
-#define IXGB_CTRL0   0x00000	/* Device Control Register 0 - RW */
-#define IXGB_CTRL1   0x00008	/* Device Control Register 1 - RW */
-#define IXGB_STATUS  0x00010	/* Device Status Register - RO */
-#define IXGB_EECD    0x00018	/* EEPROM/Flash Control/Data Register - RW */
-#define IXGB_MFS     0x00020	/* Maximum Frame Size - RW */
-
-/* Interrupt */
-#define IXGB_ICR     0x00080	/* Interrupt Cause Read - R/clr */
-#define IXGB_ICS     0x00088	/* Interrupt Cause Set - RW */
-#define IXGB_IMS     0x00090	/* Interrupt Mask Set/Read - RW */
-#define IXGB_IMC     0x00098	/* Interrupt Mask Clear - WO */
-
-/* Receive */
-#define IXGB_RCTL    0x00100	/* RX Control - RW */
-#define IXGB_FCRTL   0x00108	/* Flow Control Receive Threshold Low - RW */
-#define IXGB_FCRTH   0x00110	/* Flow Control Receive Threshold High - RW */
-#define IXGB_RDBAL   0x00118	/* RX Descriptor Base Low - RW */
-#define IXGB_RDBAH   0x0011C	/* RX Descriptor Base High - RW */
-#define IXGB_RDLEN   0x00120	/* RX Descriptor Length - RW */
-#define IXGB_RDH     0x00128	/* RX Descriptor Head - RW */
-#define IXGB_RDT     0x00130	/* RX Descriptor Tail - RW */
-#define IXGB_RDTR    0x00138	/* RX Delay Timer Ring - RW */
-#define IXGB_RXDCTL  0x00140	/* Receive Descriptor Control - RW */
-#define IXGB_RAIDC   0x00148	/* Receive Adaptive Interrupt Delay Control - RW */
-#define IXGB_RXCSUM  0x00158	/* Receive Checksum Control - RW */
-#define IXGB_RA      0x00180	/* Receive Address Array Base - RW */
-#define IXGB_RAL     0x00180	/* Receive Address Low [0:15] - RW */
-#define IXGB_RAH     0x00184	/* Receive Address High [0:15] - RW */
-#define IXGB_MTA     0x00200	/* Multicast Table Array [0:127] - RW */
-#define IXGB_VFTA    0x00400	/* VLAN Filter Table Array [0:127] - RW */
-#define IXGB_REQ_RX_DESCRIPTOR_MULTIPLE 8
-
-/* Transmit */
-#define IXGB_TCTL    0x00600	/* TX Control - RW */
-#define IXGB_TDBAL   0x00608	/* TX Descriptor Base Low - RW */
-#define IXGB_TDBAH   0x0060C	/* TX Descriptor Base High - RW */
-#define IXGB_TDLEN   0x00610	/* TX Descriptor Length - RW */
-#define IXGB_TDH     0x00618	/* TX Descriptor Head - RW */
-#define IXGB_TDT     0x00620	/* TX Descriptor Tail - RW */
-#define IXGB_TIDV    0x00628	/* TX Interrupt Delay Value - RW */
-#define IXGB_TXDCTL  0x00630	/* Transmit Descriptor Control - RW */
-#define IXGB_TSPMT   0x00638	/* TCP Segmentation PAD & Min Threshold - RW */
-#define IXGB_PAP     0x00640	/* Pause and Pace - RW */
-#define IXGB_REQ_TX_DESCRIPTOR_MULTIPLE 8
-
-/* Physical */
-#define IXGB_PCSC1   0x00700	/* PCS Control 1 - RW */
-#define IXGB_PCSC2   0x00708	/* PCS Control 2 - RW */
-#define IXGB_PCSS1   0x00710	/* PCS Status 1 - RO */
-#define IXGB_PCSS2   0x00718	/* PCS Status 2 - RO */
-#define IXGB_XPCSS   0x00720	/* 10GBASE-X PCS Status (or XGXS Lane Status) - RO */
-#define IXGB_UCCR    0x00728	/* Unilink Circuit Control Register */
-#define IXGB_XPCSTC  0x00730	/* 10GBASE-X PCS Test Control */
-#define IXGB_MACA    0x00738	/* MDI Autoscan Command and Address - RW */
-#define IXGB_APAE    0x00740	/* Autoscan PHY Address Enable - RW */
-#define IXGB_ARD     0x00748	/* Autoscan Read Data - RO */
-#define IXGB_AIS     0x00750	/* Autoscan Interrupt Status - RO */
-#define IXGB_MSCA    0x00758	/* MDI Single Command and Address - RW */
-#define IXGB_MSRWD   0x00760	/* MDI Single Read and Write Data - RW, RO */
-
-/* Wake-up */
-#define IXGB_WUFC    0x00808	/* Wake Up Filter Control - RW */
-#define IXGB_WUS     0x00810	/* Wake Up Status - RO */
-#define IXGB_FFLT    0x01000	/* Flexible Filter Length Table - RW */
-#define IXGB_FFMT    0x01020	/* Flexible Filter Mask Table - RW */
-#define IXGB_FTVT    0x01420	/* Flexible Filter Value Table - RW */
-
-/* Statistics */
-#define IXGB_TPRL    0x02000	/* Total Packets Received (Low) */
-#define IXGB_TPRH    0x02004	/* Total Packets Received (High) */
-#define IXGB_GPRCL   0x02008	/* Good Packets Received Count (Low) */
-#define IXGB_GPRCH   0x0200C	/* Good Packets Received Count (High) */
-#define IXGB_BPRCL   0x02010	/* Broadcast Packets Received Count (Low) */
-#define IXGB_BPRCH   0x02014	/* Broadcast Packets Received Count (High) */
-#define IXGB_MPRCL   0x02018	/* Multicast Packets Received Count (Low) */
-#define IXGB_MPRCH   0x0201C	/* Multicast Packets Received Count (High) */
-#define IXGB_UPRCL   0x02020	/* Unicast Packets Received Count (Low) */
-#define IXGB_UPRCH   0x02024	/* Unicast Packets Received Count (High) */
-#define IXGB_VPRCL   0x02028	/* VLAN Packets Received Count (Low) */
-#define IXGB_VPRCH   0x0202C	/* VLAN Packets Received Count (High) */
-#define IXGB_JPRCL   0x02030	/* Jumbo Packets Received Count (Low) */
-#define IXGB_JPRCH   0x02034	/* Jumbo Packets Received Count (High) */
-#define IXGB_GORCL   0x02038	/* Good Octets Received Count (Low) */
-#define IXGB_GORCH   0x0203C	/* Good Octets Received Count (High) */
-#define IXGB_TORL    0x02040	/* Total Octets Received (Low) */
-#define IXGB_TORH    0x02044	/* Total Octets Received (High) */
-#define IXGB_RNBC    0x02048	/* Receive No Buffers Count */
-#define IXGB_RUC     0x02050	/* Receive Undersize Count */
-#define IXGB_ROC     0x02058	/* Receive Oversize Count */
-#define IXGB_RLEC    0x02060	/* Receive Length Error Count */
-#define IXGB_CRCERRS 0x02068	/* CRC Error Count */
-#define IXGB_ICBC    0x02070	/* Illegal control byte in mid-packet Count */
-#define IXGB_ECBC    0x02078	/* Error Control byte in mid-packet Count */
-#define IXGB_MPC     0x02080	/* Missed Packets Count */
-#define IXGB_TPTL    0x02100	/* Total Packets Transmitted (Low) */
-#define IXGB_TPTH    0x02104	/* Total Packets Transmitted (High) */
-#define IXGB_GPTCL   0x02108	/* Good Packets Transmitted Count (Low) */
-#define IXGB_GPTCH   0x0210C	/* Good Packets Transmitted Count (High) */
-#define IXGB_BPTCL   0x02110	/* Broadcast Packets Transmitted Count (Low) */
-#define IXGB_BPTCH   0x02114	/* Broadcast Packets Transmitted Count (High) */
-#define IXGB_MPTCL   0x02118	/* Multicast Packets Transmitted Count (Low) */
-#define IXGB_MPTCH   0x0211C	/* Multicast Packets Transmitted Count (High) */
-#define IXGB_UPTCL   0x02120	/* Unicast Packets Transmitted Count (Low) */
-#define IXGB_UPTCH   0x02124	/* Unicast Packets Transmitted Count (High) */
-#define IXGB_VPTCL   0x02128	/* VLAN Packets Transmitted Count (Low) */
-#define IXGB_VPTCH   0x0212C	/* VLAN Packets Transmitted Count (High) */
-#define IXGB_JPTCL   0x02130	/* Jumbo Packets Transmitted Count (Low) */
-#define IXGB_JPTCH   0x02134	/* Jumbo Packets Transmitted Count (High) */
-#define IXGB_GOTCL   0x02138	/* Good Octets Transmitted Count (Low) */
-#define IXGB_GOTCH   0x0213C	/* Good Octets Transmitted Count (High) */
-#define IXGB_TOTL    0x02140	/* Total Octets Transmitted Count (Low) */
-#define IXGB_TOTH    0x02144	/* Total Octets Transmitted Count (High) */
-#define IXGB_DC      0x02148	/* Defer Count */
-#define IXGB_PLT64C  0x02150	/* Packet Transmitted was less than 64 bytes Count */
-#define IXGB_TSCTC   0x02170	/* TCP Segmentation Context Transmitted Count */
-#define IXGB_TSCTFC  0x02178	/* TCP Segmentation Context Tx Fail Count */
-#define IXGB_IBIC    0x02180	/* Illegal byte during Idle stream count */
-#define IXGB_RFC     0x02188	/* Remote Fault Count */
-#define IXGB_LFC     0x02190	/* Local Fault Count */
-#define IXGB_PFRC    0x02198	/* Pause Frame Receive Count */
-#define IXGB_PFTC    0x021A0	/* Pause Frame Transmit Count */
-#define IXGB_MCFRC   0x021A8	/* MAC Control Frames (non-Pause) Received Count */
-#define IXGB_MCFTC   0x021B0	/* MAC Control Frames (non-Pause) Transmitted Count */
-#define IXGB_XONRXC  0x021B8	/* XON Received Count */
-#define IXGB_XONTXC  0x021C0	/* XON Transmitted Count */
-#define IXGB_XOFFRXC 0x021C8	/* XOFF Received Count */
-#define IXGB_XOFFTXC 0x021D0	/* XOFF Transmitted Count */
-#define IXGB_RJC     0x021D8	/* Receive Jabber Count */
-
-/* CTRL0 Bit Masks */
-#define IXGB_CTRL0_LRST     0x00000008
-#define IXGB_CTRL0_JFE      0x00000010
-#define IXGB_CTRL0_XLE      0x00000020
-#define IXGB_CTRL0_MDCS     0x00000040
-#define IXGB_CTRL0_CMDC     0x00000080
-#define IXGB_CTRL0_SDP0     0x00040000
-#define IXGB_CTRL0_SDP1     0x00080000
-#define IXGB_CTRL0_SDP2     0x00100000
-#define IXGB_CTRL0_SDP3     0x00200000
-#define IXGB_CTRL0_SDP0_DIR 0x00400000
-#define IXGB_CTRL0_SDP1_DIR 0x00800000
-#define IXGB_CTRL0_SDP2_DIR 0x01000000
-#define IXGB_CTRL0_SDP3_DIR 0x02000000
-#define IXGB_CTRL0_RST      0x04000000
-#define IXGB_CTRL0_RPE      0x08000000
-#define IXGB_CTRL0_TPE      0x10000000
-#define IXGB_CTRL0_VME      0x40000000
-
-/* CTRL1 Bit Masks */
-#define IXGB_CTRL1_GPI0_EN     0x00000001
-#define IXGB_CTRL1_GPI1_EN     0x00000002
-#define IXGB_CTRL1_GPI2_EN     0x00000004
-#define IXGB_CTRL1_GPI3_EN     0x00000008
-#define IXGB_CTRL1_SDP4        0x00000010
-#define IXGB_CTRL1_SDP5        0x00000020
-#define IXGB_CTRL1_SDP6        0x00000040
-#define IXGB_CTRL1_SDP7        0x00000080
-#define IXGB_CTRL1_SDP4_DIR    0x00000100
-#define IXGB_CTRL1_SDP5_DIR    0x00000200
-#define IXGB_CTRL1_SDP6_DIR    0x00000400
-#define IXGB_CTRL1_SDP7_DIR    0x00000800
-#define IXGB_CTRL1_EE_RST      0x00002000
-#define IXGB_CTRL1_RO_DIS      0x00020000
-#define IXGB_CTRL1_PCIXHM_MASK 0x00C00000
-#define IXGB_CTRL1_PCIXHM_1_2  0x00000000
-#define IXGB_CTRL1_PCIXHM_5_8  0x00400000
-#define IXGB_CTRL1_PCIXHM_3_4  0x00800000
-#define IXGB_CTRL1_PCIXHM_7_8  0x00C00000
-
-/* STATUS Bit Masks */
-#define IXGB_STATUS_LU            0x00000002
-#define IXGB_STATUS_AIP           0x00000004
-#define IXGB_STATUS_TXOFF         0x00000010
-#define IXGB_STATUS_XAUIME        0x00000020
-#define IXGB_STATUS_RES           0x00000040
-#define IXGB_STATUS_RIS           0x00000080
-#define IXGB_STATUS_RIE           0x00000100
-#define IXGB_STATUS_RLF           0x00000200
-#define IXGB_STATUS_RRF           0x00000400
-#define IXGB_STATUS_PCI_SPD       0x00000800
-#define IXGB_STATUS_BUS64         0x00001000
-#define IXGB_STATUS_PCIX_MODE     0x00002000
-#define IXGB_STATUS_PCIX_SPD_MASK 0x0000C000
-#define IXGB_STATUS_PCIX_SPD_66   0x00000000
-#define IXGB_STATUS_PCIX_SPD_100  0x00004000
-#define IXGB_STATUS_PCIX_SPD_133  0x00008000
-#define IXGB_STATUS_REV_ID_MASK   0x000F0000
-#define IXGB_STATUS_REV_ID_SHIFT  16
-
-/* EECD Bit Masks */
-#define IXGB_EECD_SK       0x00000001
-#define IXGB_EECD_CS       0x00000002
-#define IXGB_EECD_DI       0x00000004
-#define IXGB_EECD_DO       0x00000008
-#define IXGB_EECD_FWE_MASK 0x00000030
-#define IXGB_EECD_FWE_DIS  0x00000010
-#define IXGB_EECD_FWE_EN   0x00000020
-
-/* MFS */
-#define IXGB_MFS_SHIFT 16
-
-/* Interrupt Register Bit Masks (used for ICR, ICS, IMS, and IMC) */
-#define IXGB_INT_TXDW     0x00000001
-#define IXGB_INT_TXQE     0x00000002
-#define IXGB_INT_LSC      0x00000004
-#define IXGB_INT_RXSEQ    0x00000008
-#define IXGB_INT_RXDMT0   0x00000010
-#define IXGB_INT_RXO      0x00000040
-#define IXGB_INT_RXT0     0x00000080
-#define IXGB_INT_AUTOSCAN 0x00000200
-#define IXGB_INT_GPI0     0x00000800
-#define IXGB_INT_GPI1     0x00001000
-#define IXGB_INT_GPI2     0x00002000
-#define IXGB_INT_GPI3     0x00004000
-
-/* RCTL Bit Masks */
-#define IXGB_RCTL_RXEN        0x00000002
-#define IXGB_RCTL_SBP         0x00000004
-#define IXGB_RCTL_UPE         0x00000008
-#define IXGB_RCTL_MPE         0x00000010
-#define IXGB_RCTL_RDMTS_MASK  0x00000300
-#define IXGB_RCTL_RDMTS_1_2   0x00000000
-#define IXGB_RCTL_RDMTS_1_4   0x00000100
-#define IXGB_RCTL_RDMTS_1_8   0x00000200
-#define IXGB_RCTL_MO_MASK     0x00003000
-#define IXGB_RCTL_MO_47_36    0x00000000
-#define IXGB_RCTL_MO_46_35    0x00001000
-#define IXGB_RCTL_MO_45_34    0x00002000
-#define IXGB_RCTL_MO_43_32    0x00003000
-#define IXGB_RCTL_MO_SHIFT    12
-#define IXGB_RCTL_BAM         0x00008000
-#define IXGB_RCTL_BSIZE_MASK  0x00030000
-#define IXGB_RCTL_BSIZE_2048  0x00000000
-#define IXGB_RCTL_BSIZE_4096  0x00010000
-#define IXGB_RCTL_BSIZE_8192  0x00020000
-#define IXGB_RCTL_BSIZE_16384 0x00030000
-#define IXGB_RCTL_VFE         0x00040000
-#define IXGB_RCTL_CFIEN       0x00080000
-#define IXGB_RCTL_CFI         0x00100000
-#define IXGB_RCTL_RPDA_MASK   0x00600000
-#define IXGB_RCTL_RPDA_MC_MAC 0x00000000
-#define IXGB_RCTL_MC_ONLY     0x00400000
-#define IXGB_RCTL_CFF         0x00800000
-#define IXGB_RCTL_SECRC       0x04000000
-#define IXGB_RDT_FPDB         0x80000000
-
-#define IXGB_RCTL_IDLE_RX_UNIT 0
-
-/* FCRTL Bit Masks */
-#define IXGB_FCRTL_XONE       0x80000000
-
-/* RXDCTL Bit Masks */
-#define IXGB_RXDCTL_PTHRESH_MASK  0x000001FF
-#define IXGB_RXDCTL_PTHRESH_SHIFT 0
-#define IXGB_RXDCTL_HTHRESH_MASK  0x0003FE00
-#define IXGB_RXDCTL_HTHRESH_SHIFT 9
-#define IXGB_RXDCTL_WTHRESH_MASK  0x07FC0000
-#define IXGB_RXDCTL_WTHRESH_SHIFT 18
-
-/* RAIDC Bit Masks */
-#define IXGB_RAIDC_HIGHTHRS_MASK 0x0000003F
-#define IXGB_RAIDC_DELAY_MASK    0x000FF800
-#define IXGB_RAIDC_DELAY_SHIFT   11
-#define IXGB_RAIDC_POLL_MASK     0x1FF00000
-#define IXGB_RAIDC_POLL_SHIFT    20
-#define IXGB_RAIDC_RXT_GATE      0x40000000
-#define IXGB_RAIDC_EN            0x80000000
-
-#define IXGB_RAIDC_POLL_1000_INTERRUPTS_PER_SECOND      1220
-#define IXGB_RAIDC_POLL_5000_INTERRUPTS_PER_SECOND      244
-#define IXGB_RAIDC_POLL_10000_INTERRUPTS_PER_SECOND     122
-#define IXGB_RAIDC_POLL_20000_INTERRUPTS_PER_SECOND     61
-
-/* RXCSUM Bit Masks */
-#define IXGB_RXCSUM_IPOFL 0x00000100
-#define IXGB_RXCSUM_TUOFL 0x00000200
-
-/* RAH Bit Masks */
-#define IXGB_RAH_ASEL_MASK 0x00030000
-#define IXGB_RAH_ASEL_DEST 0x00000000
-#define IXGB_RAH_ASEL_SRC  0x00010000
-#define IXGB_RAH_AV        0x80000000
-
-/* TCTL Bit Masks */
-#define IXGB_TCTL_TCE  0x00000001
-#define IXGB_TCTL_TXEN 0x00000002
-#define IXGB_TCTL_TPDE 0x00000004
-
-#define IXGB_TCTL_IDLE_TX_UNIT  0
-
-/* TXDCTL Bit Masks */
-#define IXGB_TXDCTL_PTHRESH_MASK  0x0000007F
-#define IXGB_TXDCTL_HTHRESH_MASK  0x00007F00
-#define IXGB_TXDCTL_HTHRESH_SHIFT 8
-#define IXGB_TXDCTL_WTHRESH_MASK  0x007F0000
-#define IXGB_TXDCTL_WTHRESH_SHIFT 16
-
-/* TSPMT Bit Masks */
-#define IXGB_TSPMT_TSMT_MASK   0x0000FFFF
-#define IXGB_TSPMT_TSPBP_MASK  0xFFFF0000
-#define IXGB_TSPMT_TSPBP_SHIFT 16
-
-/* PAP Bit Masks */
-#define IXGB_PAP_TXPC_MASK 0x0000FFFF
-#define IXGB_PAP_TXPV_MASK 0x000F0000
-#define IXGB_PAP_TXPV_10G  0x00000000
-#define IXGB_PAP_TXPV_1G   0x00010000
-#define IXGB_PAP_TXPV_2G   0x00020000
-#define IXGB_PAP_TXPV_3G   0x00030000
-#define IXGB_PAP_TXPV_4G   0x00040000
-#define IXGB_PAP_TXPV_5G   0x00050000
-#define IXGB_PAP_TXPV_6G   0x00060000
-#define IXGB_PAP_TXPV_7G   0x00070000
-#define IXGB_PAP_TXPV_8G   0x00080000
-#define IXGB_PAP_TXPV_9G   0x00090000
-#define IXGB_PAP_TXPV_WAN  0x000F0000
-
-/* PCSC1 Bit Masks */
-#define IXGB_PCSC1_LOOPBACK 0x00004000
-
-/* PCSC2 Bit Masks */
-#define IXGB_PCSC2_PCS_TYPE_MASK  0x00000003
-#define IXGB_PCSC2_PCS_TYPE_10GBX 0x00000001
-
-/* PCSS1 Bit Masks */
-#define IXGB_PCSS1_LOCAL_FAULT    0x00000080
-#define IXGB_PCSS1_RX_LINK_STATUS 0x00000004
-
-/* PCSS2 Bit Masks */
-#define IXGB_PCSS2_DEV_PRES_MASK 0x0000C000
-#define IXGB_PCSS2_DEV_PRES      0x00004000
-#define IXGB_PCSS2_TX_LF         0x00000800
-#define IXGB_PCSS2_RX_LF         0x00000400
-#define IXGB_PCSS2_10GBW         0x00000004
-#define IXGB_PCSS2_10GBX         0x00000002
-#define IXGB_PCSS2_10GBR         0x00000001
-
-/* XPCSS Bit Masks */
-#define IXGB_XPCSS_ALIGN_STATUS 0x00001000
-#define IXGB_XPCSS_PATTERN_TEST 0x00000800
-#define IXGB_XPCSS_LANE_3_SYNC  0x00000008
-#define IXGB_XPCSS_LANE_2_SYNC  0x00000004
-#define IXGB_XPCSS_LANE_1_SYNC  0x00000002
-#define IXGB_XPCSS_LANE_0_SYNC  0x00000001
-
-/* XPCSTC Bit Masks */
-#define IXGB_XPCSTC_BERT_TRIG       0x00200000
-#define IXGB_XPCSTC_BERT_SST        0x00100000
-#define IXGB_XPCSTC_BERT_PSZ_MASK   0x000C0000
-#define IXGB_XPCSTC_BERT_PSZ_SHIFT  17
-#define IXGB_XPCSTC_BERT_PSZ_INF    0x00000003
-#define IXGB_XPCSTC_BERT_PSZ_68     0x00000001
-#define IXGB_XPCSTC_BERT_PSZ_1028   0x00000000
-
-/* MSCA bit Masks */
-/* New Protocol Address */
-#define IXGB_MSCA_NP_ADDR_MASK      0x0000FFFF
-#define IXGB_MSCA_NP_ADDR_SHIFT     0
-/* Either Device Type or Register Address,depending on ST_CODE */
-#define IXGB_MSCA_DEV_TYPE_MASK     0x001F0000
-#define IXGB_MSCA_DEV_TYPE_SHIFT    16
-#define IXGB_MSCA_PHY_ADDR_MASK     0x03E00000
-#define IXGB_MSCA_PHY_ADDR_SHIFT    21
-#define IXGB_MSCA_OP_CODE_MASK      0x0C000000
-/* OP_CODE == 00, Address cycle, New Protocol           */
-/* OP_CODE == 01, Write operation                       */
-/* OP_CODE == 10, Read operation                        */
-/* OP_CODE == 11, Read, auto increment, New Protocol    */
-#define IXGB_MSCA_ADDR_CYCLE        0x00000000
-#define IXGB_MSCA_WRITE             0x04000000
-#define IXGB_MSCA_READ              0x08000000
-#define IXGB_MSCA_READ_AUTOINC      0x0C000000
-#define IXGB_MSCA_OP_CODE_SHIFT     26
-#define IXGB_MSCA_ST_CODE_MASK      0x30000000
-/* ST_CODE == 00, New Protocol  */
-/* ST_CODE == 01, Old Protocol  */
-#define IXGB_MSCA_NEW_PROTOCOL      0x00000000
-#define IXGB_MSCA_OLD_PROTOCOL      0x10000000
-#define IXGB_MSCA_ST_CODE_SHIFT     28
-/* Initiate command, self-clearing when command completes */
-#define IXGB_MSCA_MDI_COMMAND       0x40000000
-/*MDI In Progress Enable. */
-#define IXGB_MSCA_MDI_IN_PROG_EN    0x80000000
-
-/* MSRWD bit masks */
-#define IXGB_MSRWD_WRITE_DATA_MASK  0x0000FFFF
-#define IXGB_MSRWD_WRITE_DATA_SHIFT 0
-#define IXGB_MSRWD_READ_DATA_MASK   0xFFFF0000
-#define IXGB_MSRWD_READ_DATA_SHIFT  16
-
-/* Definitions for the optics devices on the MDIO bus. */
-#define IXGB_PHY_ADDRESS             0x0	/* Single PHY, multiple "Devices" */
-
-#define MDIO_PMA_PMD_XPAK_VENDOR_NAME       0x803A	/* XPAK/XENPAK devices only */
-
-/* Vendor-specific MDIO registers */
-#define G6XXX_PMA_PMD_VS1                   0xC001	/* Vendor-specific register */
-#define G6XXX_XGXS_XAUI_VS2                 0x18	/* Vendor-specific register */
-
-#define G6XXX_PMA_PMD_VS1_PLL_RESET         0x80
-#define G6XXX_PMA_PMD_VS1_REMOVE_PLL_RESET  0x00
-#define G6XXX_XGXS_XAUI_VS2_INPUT_MASK      0x0F	/* XAUI lanes synchronized */
-
-/* Layout of a single receive descriptor.  The controller assumes that this
- * structure is packed into 16 bytes, which is a safe assumption with most
- * compilers.  However, some compilers may insert padding between the fields,
- * in which case the structure must be packed in some compiler-specific
- * manner. */
-struct ixgb_rx_desc {
-	__le64 buff_addr;
-	__le16 length;
-	__le16 reserved;
-	u8 status;
-	u8 errors;
-	__le16 special;
-};
-
-#define IXGB_RX_DESC_STATUS_DD    0x01
-#define IXGB_RX_DESC_STATUS_EOP   0x02
-#define IXGB_RX_DESC_STATUS_IXSM  0x04
-#define IXGB_RX_DESC_STATUS_VP    0x08
-#define IXGB_RX_DESC_STATUS_TCPCS 0x20
-#define IXGB_RX_DESC_STATUS_IPCS  0x40
-#define IXGB_RX_DESC_STATUS_PIF   0x80
-
-#define IXGB_RX_DESC_ERRORS_CE   0x01
-#define IXGB_RX_DESC_ERRORS_SE   0x02
-#define IXGB_RX_DESC_ERRORS_P    0x08
-#define IXGB_RX_DESC_ERRORS_TCPE 0x20
-#define IXGB_RX_DESC_ERRORS_IPE  0x40
-#define IXGB_RX_DESC_ERRORS_RXE  0x80
-
-#define IXGB_RX_DESC_SPECIAL_VLAN_MASK  0x0FFF	/* VLAN ID is in lower 12 bits */
-#define IXGB_RX_DESC_SPECIAL_PRI_MASK   0xE000	/* Priority is in upper 3 bits */
-#define IXGB_RX_DESC_SPECIAL_PRI_SHIFT  0x000D	/* Priority is in upper 3 of 16 */
-
-/* Layout of a single transmit descriptor.  The controller assumes that this
- * structure is packed into 16 bytes, which is a safe assumption with most
- * compilers.  However, some compilers may insert padding between the fields,
- * in which case the structure must be packed in some compiler-specific
- * manner. */
-struct ixgb_tx_desc {
-	__le64 buff_addr;
-	__le32 cmd_type_len;
-	u8 status;
-	u8 popts;
-	__le16 vlan;
-};
-
-#define IXGB_TX_DESC_LENGTH_MASK    0x000FFFFF
-#define IXGB_TX_DESC_TYPE_MASK      0x00F00000
-#define IXGB_TX_DESC_TYPE_SHIFT     20
-#define IXGB_TX_DESC_CMD_MASK       0xFF000000
-#define IXGB_TX_DESC_CMD_SHIFT      24
-#define IXGB_TX_DESC_CMD_EOP        0x01000000
-#define IXGB_TX_DESC_CMD_TSE        0x04000000
-#define IXGB_TX_DESC_CMD_RS         0x08000000
-#define IXGB_TX_DESC_CMD_VLE        0x40000000
-#define IXGB_TX_DESC_CMD_IDE        0x80000000
-
-#define IXGB_TX_DESC_TYPE           0x00100000
-
-#define IXGB_TX_DESC_STATUS_DD  0x01
-
-#define IXGB_TX_DESC_POPTS_IXSM 0x01
-#define IXGB_TX_DESC_POPTS_TXSM 0x02
-#define IXGB_TX_DESC_SPECIAL_PRI_SHIFT  IXGB_RX_DESC_SPECIAL_PRI_SHIFT	/* Priority is in upper 3 of 16 */
-
-struct ixgb_context_desc {
-	u8 ipcss;
-	u8 ipcso;
-	__le16 ipcse;
-	u8 tucss;
-	u8 tucso;
-	__le16 tucse;
-	__le32 cmd_type_len;
-	u8 status;
-	u8 hdr_len;
-	__le16 mss;
-};
-
-#define IXGB_CONTEXT_DESC_CMD_TCP 0x01000000
-#define IXGB_CONTEXT_DESC_CMD_IP  0x02000000
-#define IXGB_CONTEXT_DESC_CMD_TSE 0x04000000
-#define IXGB_CONTEXT_DESC_CMD_RS  0x08000000
-#define IXGB_CONTEXT_DESC_CMD_IDE 0x80000000
-
-#define IXGB_CONTEXT_DESC_TYPE 0x00000000
-
-#define IXGB_CONTEXT_DESC_STATUS_DD 0x01
-
-/* Filters */
-#define IXGB_MC_TBL_SIZE          128	/* Multicast Filter Table (4096 bits) */
-#define IXGB_VLAN_FILTER_TBL_SIZE 128	/* VLAN Filter Table (4096 bits) */
-#define IXGB_RAR_ENTRIES		  3	/* Number of entries in Rx Address array */
-
-#define IXGB_MEMORY_REGISTER_BASE_ADDRESS   0
-#define ENET_HEADER_SIZE			14
-#define ENET_FCS_LENGTH			 4
-#define IXGB_MAX_NUM_MULTICAST_ADDRESSES	128
-#define IXGB_MIN_ENET_FRAME_SIZE_WITHOUT_FCS	60
-#define IXGB_MAX_ENET_FRAME_SIZE_WITHOUT_FCS	1514
-#define IXGB_MAX_JUMBO_FRAME_SIZE		0x3F00
-
-/* Phy Addresses */
-#define IXGB_OPTICAL_PHY_ADDR 0x0	/* Optical Module phy address */
-#define IXGB_XAUII_PHY_ADDR   0x1	/* Xauii transceiver phy address */
-#define IXGB_DIAG_PHY_ADDR    0x1F	/* Diagnostic Device phy address */
-
-/* This structure takes a 64k flash and maps it for identification commands */
-struct ixgb_flash_buffer {
-	u8 manufacturer_id;
-	u8 device_id;
-	u8 filler1[0x2AA8];
-	u8 cmd2;
-	u8 filler2[0x2AAA];
-	u8 cmd1;
-	u8 filler3[0xAAAA];
-};
-
-/* Flow control parameters */
-struct ixgb_fc {
-	u32 high_water;	/* Flow Control High-water          */
-	u32 low_water;	/* Flow Control Low-water           */
-	u16 pause_time;	/* Flow Control Pause timer         */
-	bool send_xon;		/* Flow control send XON            */
-	ixgb_fc_type type;	/* Type of flow control             */
-};
-
-/* The historical defaults for the flow control values are given below. */
-#define FC_DEFAULT_HI_THRESH        (0x8000)	/* 32KB */
-#define FC_DEFAULT_LO_THRESH        (0x4000)	/* 16KB */
-#define FC_DEFAULT_TX_TIMER         (0x100)	/* ~130 us */
-
-/* Phy definitions */
-#define IXGB_MAX_PHY_REG_ADDRESS    0xFFFF
-#define IXGB_MAX_PHY_ADDRESS        31
-#define IXGB_MAX_PHY_DEV_TYPE       31
-
-/* Bus parameters */
-struct ixgb_bus {
-	ixgb_bus_speed speed;
-	ixgb_bus_width width;
-	ixgb_bus_type type;
-};
-
-struct ixgb_hw {
-	u8 __iomem *hw_addr;/* Base Address of the hardware     */
-	void *back;		/* Pointer to OS-dependent struct   */
-	struct ixgb_fc fc;	/* Flow control parameters          */
-	struct ixgb_bus bus;	/* Bus parameters                   */
-	u32 phy_id;	/* Phy Identifier                   */
-	u32 phy_addr;	/* XGMII address of Phy             */
-	ixgb_mac_type mac_type;	/* Identifier for MAC controller    */
-	ixgb_phy_type phy_type;	/* Transceiver/phy identifier       */
-	u32 max_frame_size;	/* Maximum frame size supported     */
-	u32 mc_filter_type;	/* Multicast filter hash type       */
-	u32 num_mc_addrs;	/* Number of current Multicast addrs */
-	u8 curr_mac_addr[ETH_ALEN];	/* Individual address currently programmed in MAC */
-	u32 num_tx_desc;	/* Number of Transmit descriptors   */
-	u32 num_rx_desc;	/* Number of Receive descriptors    */
-	u32 rx_buffer_size;	/* Size of Receive buffer           */
-	bool link_up;		/* true if link is valid            */
-	bool adapter_stopped;	/* State of adapter                 */
-	u16 device_id;	/* device id from PCI configuration space */
-	u16 vendor_id;	/* vendor id from PCI configuration space */
-	u8 revision_id;	/* revision id from PCI configuration space */
-	u16 subsystem_vendor_id;	/* subsystem vendor id from PCI configuration space */
-	u16 subsystem_id;	/* subsystem id from PCI configuration space */
-	u32 bar0;		/* Base Address registers           */
-	u32 bar1;
-	u32 bar2;
-	u32 bar3;
-	u16 pci_cmd_word;	/* PCI command register id from PCI configuration space */
-	__le16 eeprom[IXGB_EEPROM_SIZE];	/* EEPROM contents read at init time  */
-	unsigned long io_base;	/* Our I/O mapped location */
-	u32 lastLFC;
-	u32 lastRFC;
-};
-
-/* Statistics reported by the hardware */
-struct ixgb_hw_stats {
-	u64 tprl;
-	u64 tprh;
-	u64 gprcl;
-	u64 gprch;
-	u64 bprcl;
-	u64 bprch;
-	u64 mprcl;
-	u64 mprch;
-	u64 uprcl;
-	u64 uprch;
-	u64 vprcl;
-	u64 vprch;
-	u64 jprcl;
-	u64 jprch;
-	u64 gorcl;
-	u64 gorch;
-	u64 torl;
-	u64 torh;
-	u64 rnbc;
-	u64 ruc;
-	u64 roc;
-	u64 rlec;
-	u64 crcerrs;
-	u64 icbc;
-	u64 ecbc;
-	u64 mpc;
-	u64 tptl;
-	u64 tpth;
-	u64 gptcl;
-	u64 gptch;
-	u64 bptcl;
-	u64 bptch;
-	u64 mptcl;
-	u64 mptch;
-	u64 uptcl;
-	u64 uptch;
-	u64 vptcl;
-	u64 vptch;
-	u64 jptcl;
-	u64 jptch;
-	u64 gotcl;
-	u64 gotch;
-	u64 totl;
-	u64 toth;
-	u64 dc;
-	u64 plt64c;
-	u64 tsctc;
-	u64 tsctfc;
-	u64 ibic;
-	u64 rfc;
-	u64 lfc;
-	u64 pfrc;
-	u64 pftc;
-	u64 mcfrc;
-	u64 mcftc;
-	u64 xonrxc;
-	u64 xontxc;
-	u64 xoffrxc;
-	u64 xofftxc;
-	u64 rjc;
-};
-
-/* Function Prototypes */
-bool ixgb_adapter_stop(struct ixgb_hw *hw);
-bool ixgb_init_hw(struct ixgb_hw *hw);
-bool ixgb_adapter_start(struct ixgb_hw *hw);
-void ixgb_check_for_link(struct ixgb_hw *hw);
-bool ixgb_check_for_bad_link(struct ixgb_hw *hw);
-
-void ixgb_rar_set(struct ixgb_hw *hw, const u8 *addr, u32 index);
-
-/* Filters (multicast, vlan, receive) */
-void ixgb_mc_addr_list_update(struct ixgb_hw *hw, u8 *mc_addr_list,
-			      u32 mc_addr_count, u32 pad);
-
-/* Vfta functions */
-void ixgb_write_vfta(struct ixgb_hw *hw, u32 offset, u32 value);
-
-/* Access functions to eeprom data */
-void ixgb_get_ee_mac_addr(struct ixgb_hw *hw, u8 *mac_addr);
-u32 ixgb_get_ee_pba_number(struct ixgb_hw *hw);
-u16 ixgb_get_ee_device_id(struct ixgb_hw *hw);
-bool ixgb_get_eeprom_data(struct ixgb_hw *hw);
-__le16 ixgb_get_eeprom_word(struct ixgb_hw *hw, u16 index);
-
-/* Everything else */
-void ixgb_led_on(struct ixgb_hw *hw);
-void ixgb_led_off(struct ixgb_hw *hw);
-void ixgb_write_pci_cfg(struct ixgb_hw *hw,
-			 u32 reg,
-			 u16 * value);
-
-
-#endif /* _IXGB_HW_H_ */
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_ids.h b/drivers/net/ethernet/intel/ixgb/ixgb_ids.h
deleted file mode 100644
index 9695b8215f01..000000000000
--- a/drivers/net/ethernet/intel/ixgb/ixgb_ids.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 1999 - 2008 Intel Corporation. */
-
-#ifndef _IXGB_IDS_H_
-#define _IXGB_IDS_H_
-
-/**********************************************************************
-** The Device and Vendor IDs for 10 Gigabit MACs
-**********************************************************************/
-
-#define IXGB_DEVICE_ID_82597EX      0x1048
-#define IXGB_DEVICE_ID_82597EX_SR   0x1A48
-#define IXGB_DEVICE_ID_82597EX_LR   0x1B48
-#define IXGB_SUBDEVICE_ID_A11F      0xA11F
-#define IXGB_SUBDEVICE_ID_A01F      0xA01F
-
-#define IXGB_DEVICE_ID_82597EX_CX4   0x109E
-#define IXGB_SUBDEVICE_ID_A00C  0xA00C
-#define IXGB_SUBDEVICE_ID_A01C  0xA01C
-#define IXGB_SUBDEVICE_ID_7036  0x7036
-
-#endif /* #ifndef _IXGB_IDS_H_ */
-/* End of File */
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
deleted file mode 100644
index b4d47e7a76c8..000000000000
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ /dev/null
@@ -1,2285 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 1999 - 2008 Intel Corporation. */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/prefetch.h>
-#include "ixgb.h"
-
-char ixgb_driver_name[] = "ixgb";
-static char ixgb_driver_string[] = "Intel(R) PRO/10GbE Network Driver";
-
-static const char ixgb_copyright[] = "Copyright (c) 1999-2008 Intel Corporation.";
-
-#define IXGB_CB_LENGTH 256
-static unsigned int copybreak __read_mostly = IXGB_CB_LENGTH;
-module_param(copybreak, uint, 0644);
-MODULE_PARM_DESC(copybreak,
-	"Maximum size of packet that is copied to a new buffer on receive");
-
-/* ixgb_pci_tbl - PCI Device ID Table
- *
- * Wildcard entries (PCI_ANY_ID) should come last
- * Last entry must be all 0s
- *
- * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
- *   Class, Class Mask, private data (not used) }
- */
-static const struct pci_device_id ixgb_pci_tbl[] = {
-	{PCI_VENDOR_ID_INTEL, IXGB_DEVICE_ID_82597EX,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
-	{PCI_VENDOR_ID_INTEL, IXGB_DEVICE_ID_82597EX_CX4,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
-	{PCI_VENDOR_ID_INTEL, IXGB_DEVICE_ID_82597EX_SR,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
-	{PCI_VENDOR_ID_INTEL, IXGB_DEVICE_ID_82597EX_LR,
-	 PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
-
-	/* required last entry */
-	{0,}
-};
-
-MODULE_DEVICE_TABLE(pci, ixgb_pci_tbl);
-
-/* Local Function Prototypes */
-static int ixgb_init_module(void);
-static void ixgb_exit_module(void);
-static int ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
-static void ixgb_remove(struct pci_dev *pdev);
-static int ixgb_sw_init(struct ixgb_adapter *adapter);
-static int ixgb_open(struct net_device *netdev);
-static int ixgb_close(struct net_device *netdev);
-static void ixgb_configure_tx(struct ixgb_adapter *adapter);
-static void ixgb_configure_rx(struct ixgb_adapter *adapter);
-static void ixgb_setup_rctl(struct ixgb_adapter *adapter);
-static void ixgb_clean_tx_ring(struct ixgb_adapter *adapter);
-static void ixgb_clean_rx_ring(struct ixgb_adapter *adapter);
-static void ixgb_set_multi(struct net_device *netdev);
-static void ixgb_watchdog(struct timer_list *t);
-static netdev_tx_t ixgb_xmit_frame(struct sk_buff *skb,
-				   struct net_device *netdev);
-static int ixgb_change_mtu(struct net_device *netdev, int new_mtu);
-static int ixgb_set_mac(struct net_device *netdev, void *p);
-static irqreturn_t ixgb_intr(int irq, void *data);
-static bool ixgb_clean_tx_irq(struct ixgb_adapter *adapter);
-
-static int ixgb_clean(struct napi_struct *, int);
-static bool ixgb_clean_rx_irq(struct ixgb_adapter *, int *, int);
-static void ixgb_alloc_rx_buffers(struct ixgb_adapter *, int);
-
-static void ixgb_tx_timeout(struct net_device *dev, unsigned int txqueue);
-static void ixgb_tx_timeout_task(struct work_struct *work);
-
-static void ixgb_vlan_strip_enable(struct ixgb_adapter *adapter);
-static void ixgb_vlan_strip_disable(struct ixgb_adapter *adapter);
-static int ixgb_vlan_rx_add_vid(struct net_device *netdev,
-				__be16 proto, u16 vid);
-static int ixgb_vlan_rx_kill_vid(struct net_device *netdev,
-				 __be16 proto, u16 vid);
-static void ixgb_restore_vlan(struct ixgb_adapter *adapter);
-
-static pci_ers_result_t ixgb_io_error_detected (struct pci_dev *pdev,
-                             pci_channel_state_t state);
-static pci_ers_result_t ixgb_io_slot_reset (struct pci_dev *pdev);
-static void ixgb_io_resume (struct pci_dev *pdev);
-
-static const struct pci_error_handlers ixgb_err_handler = {
-	.error_detected = ixgb_io_error_detected,
-	.slot_reset = ixgb_io_slot_reset,
-	.resume = ixgb_io_resume,
-};
-
-static struct pci_driver ixgb_driver = {
-	.name     = ixgb_driver_name,
-	.id_table = ixgb_pci_tbl,
-	.probe    = ixgb_probe,
-	.remove   = ixgb_remove,
-	.err_handler = &ixgb_err_handler
-};
-
-MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
-MODULE_DESCRIPTION("Intel(R) PRO/10GbE Network Driver");
-MODULE_LICENSE("GPL v2");
-
-#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
-static int debug = -1;
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
-
-/**
- * ixgb_init_module - Driver Registration Routine
- *
- * ixgb_init_module is the first routine called when the driver is
- * loaded. All it does is register with the PCI subsystem.
- **/
-
-static int __init
-ixgb_init_module(void)
-{
-	pr_info("%s\n", ixgb_driver_string);
-	pr_info("%s\n", ixgb_copyright);
-
-	return pci_register_driver(&ixgb_driver);
-}
-
-module_init(ixgb_init_module);
-
-/**
- * ixgb_exit_module - Driver Exit Cleanup Routine
- *
- * ixgb_exit_module is called just before the driver is removed
- * from memory.
- **/
-
-static void __exit
-ixgb_exit_module(void)
-{
-	pci_unregister_driver(&ixgb_driver);
-}
-
-module_exit(ixgb_exit_module);
-
-/**
- * ixgb_irq_disable - Mask off interrupt generation on the NIC
- * @adapter: board private structure
- **/
-
-static void
-ixgb_irq_disable(struct ixgb_adapter *adapter)
-{
-	IXGB_WRITE_REG(&adapter->hw, IMC, ~0);
-	IXGB_WRITE_FLUSH(&adapter->hw);
-	synchronize_irq(adapter->pdev->irq);
-}
-
-/**
- * ixgb_irq_enable - Enable default interrupt generation settings
- * @adapter: board private structure
- **/
-
-static void
-ixgb_irq_enable(struct ixgb_adapter *adapter)
-{
-	u32 val = IXGB_INT_RXT0 | IXGB_INT_RXDMT0 |
-		  IXGB_INT_TXDW | IXGB_INT_LSC;
-	if (adapter->hw.subsystem_vendor_id == PCI_VENDOR_ID_SUN)
-		val |= IXGB_INT_GPI0;
-	IXGB_WRITE_REG(&adapter->hw, IMS, val);
-	IXGB_WRITE_FLUSH(&adapter->hw);
-}
-
-int
-ixgb_up(struct ixgb_adapter *adapter)
-{
-	struct net_device *netdev = adapter->netdev;
-	int err, irq_flags = IRQF_SHARED;
-	int max_frame = netdev->mtu + ENET_HEADER_SIZE + ENET_FCS_LENGTH;
-	struct ixgb_hw *hw = &adapter->hw;
-
-	/* hardware has been reset, we need to reload some things */
-
-	ixgb_rar_set(hw, netdev->dev_addr, 0);
-	ixgb_set_multi(netdev);
-
-	ixgb_restore_vlan(adapter);
-
-	ixgb_configure_tx(adapter);
-	ixgb_setup_rctl(adapter);
-	ixgb_configure_rx(adapter);
-	ixgb_alloc_rx_buffers(adapter, IXGB_DESC_UNUSED(&adapter->rx_ring));
-
-	/* disable interrupts and get the hardware into a known state */
-	IXGB_WRITE_REG(&adapter->hw, IMC, 0xffffffff);
-
-	/* only enable MSI if bus is in PCI-X mode */
-	if (IXGB_READ_REG(&adapter->hw, STATUS) & IXGB_STATUS_PCIX_MODE) {
-		err = pci_enable_msi(adapter->pdev);
-		if (!err) {
-			adapter->have_msi = true;
-			irq_flags = 0;
-		}
-		/* proceed to try to request regular interrupt */
-	}
-
-	err = request_irq(adapter->pdev->irq, ixgb_intr, irq_flags,
-	                  netdev->name, netdev);
-	if (err) {
-		if (adapter->have_msi)
-			pci_disable_msi(adapter->pdev);
-		netif_err(adapter, probe, adapter->netdev,
-			  "Unable to allocate interrupt Error: %d\n", err);
-		return err;
-	}
-
-	if ((hw->max_frame_size != max_frame) ||
-		(hw->max_frame_size !=
-		(IXGB_READ_REG(hw, MFS) >> IXGB_MFS_SHIFT))) {
-
-		hw->max_frame_size = max_frame;
-
-		IXGB_WRITE_REG(hw, MFS, hw->max_frame_size << IXGB_MFS_SHIFT);
-
-		if (hw->max_frame_size >
-		   IXGB_MAX_ENET_FRAME_SIZE_WITHOUT_FCS + ENET_FCS_LENGTH) {
-			u32 ctrl0 = IXGB_READ_REG(hw, CTRL0);
-
-			if (!(ctrl0 & IXGB_CTRL0_JFE)) {
-				ctrl0 |= IXGB_CTRL0_JFE;
-				IXGB_WRITE_REG(hw, CTRL0, ctrl0);
-			}
-		}
-	}
-
-	clear_bit(__IXGB_DOWN, &adapter->flags);
-
-	napi_enable(&adapter->napi);
-	ixgb_irq_enable(adapter);
-
-	netif_wake_queue(netdev);
-
-	mod_timer(&adapter->watchdog_timer, jiffies);
-
-	return 0;
-}
-
-void
-ixgb_down(struct ixgb_adapter *adapter, bool kill_watchdog)
-{
-	struct net_device *netdev = adapter->netdev;
-
-	/* prevent the interrupt handler from restarting watchdog */
-	set_bit(__IXGB_DOWN, &adapter->flags);
-
-	netif_carrier_off(netdev);
-
-	napi_disable(&adapter->napi);
-	/* waiting for NAPI to complete can re-enable interrupts */
-	ixgb_irq_disable(adapter);
-	free_irq(adapter->pdev->irq, netdev);
-
-	if (adapter->have_msi)
-		pci_disable_msi(adapter->pdev);
-
-	if (kill_watchdog)
-		del_timer_sync(&adapter->watchdog_timer);
-
-	adapter->link_speed = 0;
-	adapter->link_duplex = 0;
-	netif_stop_queue(netdev);
-
-	ixgb_reset(adapter);
-	ixgb_clean_tx_ring(adapter);
-	ixgb_clean_rx_ring(adapter);
-}
-
-void
-ixgb_reset(struct ixgb_adapter *adapter)
-{
-	struct ixgb_hw *hw = &adapter->hw;
-
-	ixgb_adapter_stop(hw);
-	if (!ixgb_init_hw(hw))
-		netif_err(adapter, probe, adapter->netdev, "ixgb_init_hw failed\n");
-
-	/* restore frame size information */
-	IXGB_WRITE_REG(hw, MFS, hw->max_frame_size << IXGB_MFS_SHIFT);
-	if (hw->max_frame_size >
-	    IXGB_MAX_ENET_FRAME_SIZE_WITHOUT_FCS + ENET_FCS_LENGTH) {
-		u32 ctrl0 = IXGB_READ_REG(hw, CTRL0);
-		if (!(ctrl0 & IXGB_CTRL0_JFE)) {
-			ctrl0 |= IXGB_CTRL0_JFE;
-			IXGB_WRITE_REG(hw, CTRL0, ctrl0);
-		}
-	}
-}
-
-static netdev_features_t
-ixgb_fix_features(struct net_device *netdev, netdev_features_t features)
-{
-	/*
-	 * Tx VLAN insertion does not work per HW design when Rx stripping is
-	 * disabled.
-	 */
-	if (!(features & NETIF_F_HW_VLAN_CTAG_RX))
-		features &= ~NETIF_F_HW_VLAN_CTAG_TX;
-
-	return features;
-}
-
-static int
-ixgb_set_features(struct net_device *netdev, netdev_features_t features)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	netdev_features_t changed = features ^ netdev->features;
-
-	if (!(changed & (NETIF_F_RXCSUM|NETIF_F_HW_VLAN_CTAG_RX)))
-		return 0;
-
-	adapter->rx_csum = !!(features & NETIF_F_RXCSUM);
-
-	if (netif_running(netdev)) {
-		ixgb_down(adapter, true);
-		ixgb_up(adapter);
-		ixgb_set_speed_duplex(netdev);
-	} else
-		ixgb_reset(adapter);
-
-	return 0;
-}
-
-
-static const struct net_device_ops ixgb_netdev_ops = {
-	.ndo_open 		= ixgb_open,
-	.ndo_stop		= ixgb_close,
-	.ndo_start_xmit		= ixgb_xmit_frame,
-	.ndo_set_rx_mode	= ixgb_set_multi,
-	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address	= ixgb_set_mac,
-	.ndo_change_mtu		= ixgb_change_mtu,
-	.ndo_tx_timeout		= ixgb_tx_timeout,
-	.ndo_vlan_rx_add_vid	= ixgb_vlan_rx_add_vid,
-	.ndo_vlan_rx_kill_vid	= ixgb_vlan_rx_kill_vid,
-	.ndo_fix_features       = ixgb_fix_features,
-	.ndo_set_features       = ixgb_set_features,
-};
-
-/**
- * ixgb_probe - Device Initialization Routine
- * @pdev: PCI device information struct
- * @ent: entry in ixgb_pci_tbl
- *
- * Returns 0 on success, negative on failure
- *
- * ixgb_probe initializes an adapter identified by a pci_dev structure.
- * The OS initialization, configuring of the adapter private structure,
- * and a hardware reset occur.
- **/
-
-static int
-ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
-{
-	struct net_device *netdev = NULL;
-	struct ixgb_adapter *adapter;
-	static int cards_found = 0;
-	u8 addr[ETH_ALEN];
-	int i;
-	int err;
-
-	err = pci_enable_device(pdev);
-	if (err)
-		return err;
-
-	err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
-	if (err) {
-		pr_err("No usable DMA configuration, aborting\n");
-		goto err_dma_mask;
-	}
-
-	err = pci_request_regions(pdev, ixgb_driver_name);
-	if (err)
-		goto err_request_regions;
-
-	pci_set_master(pdev);
-
-	netdev = alloc_etherdev(sizeof(struct ixgb_adapter));
-	if (!netdev) {
-		err = -ENOMEM;
-		goto err_alloc_etherdev;
-	}
-
-	SET_NETDEV_DEV(netdev, &pdev->dev);
-
-	pci_set_drvdata(pdev, netdev);
-	adapter = netdev_priv(netdev);
-	adapter->netdev = netdev;
-	adapter->pdev = pdev;
-	adapter->hw.back = adapter;
-	adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
-
-	adapter->hw.hw_addr = pci_ioremap_bar(pdev, BAR_0);
-	if (!adapter->hw.hw_addr) {
-		err = -EIO;
-		goto err_ioremap;
-	}
-
-	for (i = BAR_1; i < PCI_STD_NUM_BARS; i++) {
-		if (pci_resource_len(pdev, i) == 0)
-			continue;
-		if (pci_resource_flags(pdev, i) & IORESOURCE_IO) {
-			adapter->hw.io_base = pci_resource_start(pdev, i);
-			break;
-		}
-	}
-
-	netdev->netdev_ops = &ixgb_netdev_ops;
-	ixgb_set_ethtool_ops(netdev);
-	netdev->watchdog_timeo = 5 * HZ;
-	netif_napi_add(netdev, &adapter->napi, ixgb_clean);
-
-	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
-
-	adapter->bd_number = cards_found;
-	adapter->link_speed = 0;
-	adapter->link_duplex = 0;
-
-	/* setup the private structure */
-
-	err = ixgb_sw_init(adapter);
-	if (err)
-		goto err_sw_init;
-
-	netdev->hw_features = NETIF_F_SG |
-			   NETIF_F_TSO |
-			   NETIF_F_HW_CSUM |
-			   NETIF_F_HW_VLAN_CTAG_TX |
-			   NETIF_F_HW_VLAN_CTAG_RX;
-	netdev->features = netdev->hw_features |
-			   NETIF_F_HW_VLAN_CTAG_FILTER;
-	netdev->hw_features |= NETIF_F_RXCSUM;
-
-	netdev->features |= NETIF_F_HIGHDMA;
-	netdev->vlan_features |= NETIF_F_HIGHDMA;
-
-	/* MTU range: 68 - 16114 */
-	netdev->min_mtu = ETH_MIN_MTU;
-	netdev->max_mtu = IXGB_MAX_JUMBO_FRAME_SIZE - ETH_HLEN;
-
-	/* make sure the EEPROM is good */
-
-	if (!ixgb_validate_eeprom_checksum(&adapter->hw)) {
-		netif_err(adapter, probe, adapter->netdev,
-			  "The EEPROM Checksum Is Not Valid\n");
-		err = -EIO;
-		goto err_eeprom;
-	}
-
-	ixgb_get_ee_mac_addr(&adapter->hw, addr);
-	eth_hw_addr_set(netdev, addr);
-
-	if (!is_valid_ether_addr(netdev->dev_addr)) {
-		netif_err(adapter, probe, adapter->netdev, "Invalid MAC Address\n");
-		err = -EIO;
-		goto err_eeprom;
-	}
-
-	adapter->part_num = ixgb_get_ee_pba_number(&adapter->hw);
-
-	timer_setup(&adapter->watchdog_timer, ixgb_watchdog, 0);
-
-	INIT_WORK(&adapter->tx_timeout_task, ixgb_tx_timeout_task);
-
-	strcpy(netdev->name, "eth%d");
-	err = register_netdev(netdev);
-	if (err)
-		goto err_register;
-
-	/* carrier off reporting is important to ethtool even BEFORE open */
-	netif_carrier_off(netdev);
-
-	netif_info(adapter, probe, adapter->netdev,
-		   "Intel(R) PRO/10GbE Network Connection\n");
-	ixgb_check_options(adapter);
-	/* reset the hardware with the new settings */
-
-	ixgb_reset(adapter);
-
-	cards_found++;
-	return 0;
-
-err_register:
-err_sw_init:
-err_eeprom:
-	iounmap(adapter->hw.hw_addr);
-err_ioremap:
-	free_netdev(netdev);
-err_alloc_etherdev:
-	pci_release_regions(pdev);
-err_request_regions:
-err_dma_mask:
-	pci_disable_device(pdev);
-	return err;
-}
-
-/**
- * ixgb_remove - Device Removal Routine
- * @pdev: PCI device information struct
- *
- * ixgb_remove is called by the PCI subsystem to alert the driver
- * that it should release a PCI device.  The could be caused by a
- * Hot-Plug event, or because the driver is going to be removed from
- * memory.
- **/
-
-static void
-ixgb_remove(struct pci_dev *pdev)
-{
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-
-	cancel_work_sync(&adapter->tx_timeout_task);
-
-	unregister_netdev(netdev);
-
-	iounmap(adapter->hw.hw_addr);
-	pci_release_regions(pdev);
-
-	free_netdev(netdev);
-	pci_disable_device(pdev);
-}
-
-/**
- * ixgb_sw_init - Initialize general software structures (struct ixgb_adapter)
- * @adapter: board private structure to initialize
- *
- * ixgb_sw_init initializes the Adapter private data structure.
- * Fields are initialized based on PCI device information and
- * OS network device settings (MTU size).
- **/
-
-static int
-ixgb_sw_init(struct ixgb_adapter *adapter)
-{
-	struct ixgb_hw *hw = &adapter->hw;
-	struct net_device *netdev = adapter->netdev;
-	struct pci_dev *pdev = adapter->pdev;
-
-	/* PCI config space info */
-
-	hw->vendor_id = pdev->vendor;
-	hw->device_id = pdev->device;
-	hw->subsystem_vendor_id = pdev->subsystem_vendor;
-	hw->subsystem_id = pdev->subsystem_device;
-
-	hw->max_frame_size = netdev->mtu + ENET_HEADER_SIZE + ENET_FCS_LENGTH;
-	adapter->rx_buffer_len = hw->max_frame_size + 8; /* + 8 for errata */
-
-	if ((hw->device_id == IXGB_DEVICE_ID_82597EX) ||
-	    (hw->device_id == IXGB_DEVICE_ID_82597EX_CX4) ||
-	    (hw->device_id == IXGB_DEVICE_ID_82597EX_LR) ||
-	    (hw->device_id == IXGB_DEVICE_ID_82597EX_SR))
-		hw->mac_type = ixgb_82597;
-	else {
-		/* should never have loaded on this device */
-		netif_err(adapter, probe, adapter->netdev, "unsupported device id\n");
-	}
-
-	/* enable flow control to be programmed */
-	hw->fc.send_xon = 1;
-
-	set_bit(__IXGB_DOWN, &adapter->flags);
-	return 0;
-}
-
-/**
- * ixgb_open - Called when a network interface is made active
- * @netdev: network interface device structure
- *
- * Returns 0 on success, negative value on failure
- *
- * The open entry point is called when a network interface is made
- * active by the system (IFF_UP).  At this point all resources needed
- * for transmit and receive operations are allocated, the interrupt
- * handler is registered with the OS, the watchdog timer is started,
- * and the stack is notified that the interface is ready.
- **/
-
-static int
-ixgb_open(struct net_device *netdev)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	int err;
-
-	/* allocate transmit descriptors */
-	err = ixgb_setup_tx_resources(adapter);
-	if (err)
-		goto err_setup_tx;
-
-	netif_carrier_off(netdev);
-
-	/* allocate receive descriptors */
-
-	err = ixgb_setup_rx_resources(adapter);
-	if (err)
-		goto err_setup_rx;
-
-	err = ixgb_up(adapter);
-	if (err)
-		goto err_up;
-
-	netif_start_queue(netdev);
-
-	return 0;
-
-err_up:
-	ixgb_free_rx_resources(adapter);
-err_setup_rx:
-	ixgb_free_tx_resources(adapter);
-err_setup_tx:
-	ixgb_reset(adapter);
-
-	return err;
-}
-
-/**
- * ixgb_close - Disables a network interface
- * @netdev: network interface device structure
- *
- * Returns 0, this is not allowed to fail
- *
- * The close entry point is called when an interface is de-activated
- * by the OS.  The hardware is still under the drivers control, but
- * needs to be disabled.  A global MAC reset is issued to stop the
- * hardware, and all transmit and receive resources are freed.
- **/
-
-static int
-ixgb_close(struct net_device *netdev)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-
-	ixgb_down(adapter, true);
-
-	ixgb_free_tx_resources(adapter);
-	ixgb_free_rx_resources(adapter);
-
-	return 0;
-}
-
-/**
- * ixgb_setup_tx_resources - allocate Tx resources (Descriptors)
- * @adapter: board private structure
- *
- * Return 0 on success, negative on failure
- **/
-
-int
-ixgb_setup_tx_resources(struct ixgb_adapter *adapter)
-{
-	struct ixgb_desc_ring *txdr = &adapter->tx_ring;
-	struct pci_dev *pdev = adapter->pdev;
-	int size;
-
-	size = sizeof(struct ixgb_buffer) * txdr->count;
-	txdr->buffer_info = vzalloc(size);
-	if (!txdr->buffer_info)
-		return -ENOMEM;
-
-	/* round up to nearest 4K */
-
-	txdr->size = txdr->count * sizeof(struct ixgb_tx_desc);
-	txdr->size = ALIGN(txdr->size, 4096);
-
-	txdr->desc = dma_alloc_coherent(&pdev->dev, txdr->size, &txdr->dma,
-					GFP_KERNEL);
-	if (!txdr->desc) {
-		vfree(txdr->buffer_info);
-		return -ENOMEM;
-	}
-
-	txdr->next_to_use = 0;
-	txdr->next_to_clean = 0;
-
-	return 0;
-}
-
-/**
- * ixgb_configure_tx - Configure 82597 Transmit Unit after Reset.
- * @adapter: board private structure
- *
- * Configure the Tx unit of the MAC after a reset.
- **/
-
-static void
-ixgb_configure_tx(struct ixgb_adapter *adapter)
-{
-	u64 tdba = adapter->tx_ring.dma;
-	u32 tdlen = adapter->tx_ring.count * sizeof(struct ixgb_tx_desc);
-	u32 tctl;
-	struct ixgb_hw *hw = &adapter->hw;
-
-	/* Setup the Base and Length of the Tx Descriptor Ring
-	 * tx_ring.dma can be either a 32 or 64 bit value
-	 */
-
-	IXGB_WRITE_REG(hw, TDBAL, (tdba & 0x00000000ffffffffULL));
-	IXGB_WRITE_REG(hw, TDBAH, (tdba >> 32));
-
-	IXGB_WRITE_REG(hw, TDLEN, tdlen);
-
-	/* Setup the HW Tx Head and Tail descriptor pointers */
-
-	IXGB_WRITE_REG(hw, TDH, 0);
-	IXGB_WRITE_REG(hw, TDT, 0);
-
-	/* don't set up txdctl, it induces performance problems if configured
-	 * incorrectly */
-	/* Set the Tx Interrupt Delay register */
-
-	IXGB_WRITE_REG(hw, TIDV, adapter->tx_int_delay);
-
-	/* Program the Transmit Control Register */
-
-	tctl = IXGB_TCTL_TCE | IXGB_TCTL_TXEN | IXGB_TCTL_TPDE;
-	IXGB_WRITE_REG(hw, TCTL, tctl);
-
-	/* Setup Transmit Descriptor Settings for this adapter */
-	adapter->tx_cmd_type =
-		IXGB_TX_DESC_TYPE |
-		(adapter->tx_int_delay_enable ? IXGB_TX_DESC_CMD_IDE : 0);
-}
-
-/**
- * ixgb_setup_rx_resources - allocate Rx resources (Descriptors)
- * @adapter: board private structure
- *
- * Returns 0 on success, negative on failure
- **/
-
-int
-ixgb_setup_rx_resources(struct ixgb_adapter *adapter)
-{
-	struct ixgb_desc_ring *rxdr = &adapter->rx_ring;
-	struct pci_dev *pdev = adapter->pdev;
-	int size;
-
-	size = sizeof(struct ixgb_buffer) * rxdr->count;
-	rxdr->buffer_info = vzalloc(size);
-	if (!rxdr->buffer_info)
-		return -ENOMEM;
-
-	/* Round up to nearest 4K */
-
-	rxdr->size = rxdr->count * sizeof(struct ixgb_rx_desc);
-	rxdr->size = ALIGN(rxdr->size, 4096);
-
-	rxdr->desc = dma_alloc_coherent(&pdev->dev, rxdr->size, &rxdr->dma,
-					GFP_KERNEL);
-
-	if (!rxdr->desc) {
-		vfree(rxdr->buffer_info);
-		return -ENOMEM;
-	}
-
-	rxdr->next_to_clean = 0;
-	rxdr->next_to_use = 0;
-
-	return 0;
-}
-
-/**
- * ixgb_setup_rctl - configure the receive control register
- * @adapter: Board private structure
- **/
-
-static void
-ixgb_setup_rctl(struct ixgb_adapter *adapter)
-{
-	u32 rctl;
-
-	rctl = IXGB_READ_REG(&adapter->hw, RCTL);
-
-	rctl &= ~(3 << IXGB_RCTL_MO_SHIFT);
-
-	rctl |=
-		IXGB_RCTL_BAM | IXGB_RCTL_RDMTS_1_2 |
-		IXGB_RCTL_RXEN | IXGB_RCTL_CFF |
-		(adapter->hw.mc_filter_type << IXGB_RCTL_MO_SHIFT);
-
-	rctl |= IXGB_RCTL_SECRC;
-
-	if (adapter->rx_buffer_len <= IXGB_RXBUFFER_2048)
-		rctl |= IXGB_RCTL_BSIZE_2048;
-	else if (adapter->rx_buffer_len <= IXGB_RXBUFFER_4096)
-		rctl |= IXGB_RCTL_BSIZE_4096;
-	else if (adapter->rx_buffer_len <= IXGB_RXBUFFER_8192)
-		rctl |= IXGB_RCTL_BSIZE_8192;
-	else if (adapter->rx_buffer_len <= IXGB_RXBUFFER_16384)
-		rctl |= IXGB_RCTL_BSIZE_16384;
-
-	IXGB_WRITE_REG(&adapter->hw, RCTL, rctl);
-}
-
-/**
- * ixgb_configure_rx - Configure 82597 Receive Unit after Reset.
- * @adapter: board private structure
- *
- * Configure the Rx unit of the MAC after a reset.
- **/
-
-static void
-ixgb_configure_rx(struct ixgb_adapter *adapter)
-{
-	u64 rdba = adapter->rx_ring.dma;
-	u32 rdlen = adapter->rx_ring.count * sizeof(struct ixgb_rx_desc);
-	struct ixgb_hw *hw = &adapter->hw;
-	u32 rctl;
-	u32 rxcsum;
-
-	/* make sure receives are disabled while setting up the descriptors */
-
-	rctl = IXGB_READ_REG(hw, RCTL);
-	IXGB_WRITE_REG(hw, RCTL, rctl & ~IXGB_RCTL_RXEN);
-
-	/* set the Receive Delay Timer Register */
-
-	IXGB_WRITE_REG(hw, RDTR, adapter->rx_int_delay);
-
-	/* Setup the Base and Length of the Rx Descriptor Ring */
-
-	IXGB_WRITE_REG(hw, RDBAL, (rdba & 0x00000000ffffffffULL));
-	IXGB_WRITE_REG(hw, RDBAH, (rdba >> 32));
-
-	IXGB_WRITE_REG(hw, RDLEN, rdlen);
-
-	/* Setup the HW Rx Head and Tail Descriptor Pointers */
-	IXGB_WRITE_REG(hw, RDH, 0);
-	IXGB_WRITE_REG(hw, RDT, 0);
-
-	/* due to the hardware errata with RXDCTL, we are unable to use any of
-	 * the performance enhancing features of it without causing other
-	 * subtle bugs, some of the bugs could include receive length
-	 * corruption at high data rates (WTHRESH > 0) and/or receive
-	 * descriptor ring irregularites (particularly in hardware cache) */
-	IXGB_WRITE_REG(hw, RXDCTL, 0);
-
-	/* Enable Receive Checksum Offload for TCP and UDP */
-	if (adapter->rx_csum) {
-		rxcsum = IXGB_READ_REG(hw, RXCSUM);
-		rxcsum |= IXGB_RXCSUM_TUOFL;
-		IXGB_WRITE_REG(hw, RXCSUM, rxcsum);
-	}
-
-	/* Enable Receives */
-
-	IXGB_WRITE_REG(hw, RCTL, rctl);
-}
-
-/**
- * ixgb_free_tx_resources - Free Tx Resources
- * @adapter: board private structure
- *
- * Free all transmit software resources
- **/
-
-void
-ixgb_free_tx_resources(struct ixgb_adapter *adapter)
-{
-	struct pci_dev *pdev = adapter->pdev;
-
-	ixgb_clean_tx_ring(adapter);
-
-	vfree(adapter->tx_ring.buffer_info);
-	adapter->tx_ring.buffer_info = NULL;
-
-	dma_free_coherent(&pdev->dev, adapter->tx_ring.size,
-			  adapter->tx_ring.desc, adapter->tx_ring.dma);
-
-	adapter->tx_ring.desc = NULL;
-}
-
-static void
-ixgb_unmap_and_free_tx_resource(struct ixgb_adapter *adapter,
-                                struct ixgb_buffer *buffer_info)
-{
-	if (buffer_info->dma) {
-		if (buffer_info->mapped_as_page)
-			dma_unmap_page(&adapter->pdev->dev, buffer_info->dma,
-				       buffer_info->length, DMA_TO_DEVICE);
-		else
-			dma_unmap_single(&adapter->pdev->dev, buffer_info->dma,
-					 buffer_info->length, DMA_TO_DEVICE);
-		buffer_info->dma = 0;
-	}
-
-	if (buffer_info->skb) {
-		dev_kfree_skb_any(buffer_info->skb);
-		buffer_info->skb = NULL;
-	}
-	buffer_info->time_stamp = 0;
-	/* these fields must always be initialized in tx
-	 * buffer_info->length = 0;
-	 * buffer_info->next_to_watch = 0; */
-}
-
-/**
- * ixgb_clean_tx_ring - Free Tx Buffers
- * @adapter: board private structure
- **/
-
-static void
-ixgb_clean_tx_ring(struct ixgb_adapter *adapter)
-{
-	struct ixgb_desc_ring *tx_ring = &adapter->tx_ring;
-	struct ixgb_buffer *buffer_info;
-	unsigned long size;
-	unsigned int i;
-
-	/* Free all the Tx ring sk_buffs */
-
-	for (i = 0; i < tx_ring->count; i++) {
-		buffer_info = &tx_ring->buffer_info[i];
-		ixgb_unmap_and_free_tx_resource(adapter, buffer_info);
-	}
-
-	size = sizeof(struct ixgb_buffer) * tx_ring->count;
-	memset(tx_ring->buffer_info, 0, size);
-
-	/* Zero out the descriptor ring */
-
-	memset(tx_ring->desc, 0, tx_ring->size);
-
-	tx_ring->next_to_use = 0;
-	tx_ring->next_to_clean = 0;
-
-	IXGB_WRITE_REG(&adapter->hw, TDH, 0);
-	IXGB_WRITE_REG(&adapter->hw, TDT, 0);
-}
-
-/**
- * ixgb_free_rx_resources - Free Rx Resources
- * @adapter: board private structure
- *
- * Free all receive software resources
- **/
-
-void
-ixgb_free_rx_resources(struct ixgb_adapter *adapter)
-{
-	struct ixgb_desc_ring *rx_ring = &adapter->rx_ring;
-	struct pci_dev *pdev = adapter->pdev;
-
-	ixgb_clean_rx_ring(adapter);
-
-	vfree(rx_ring->buffer_info);
-	rx_ring->buffer_info = NULL;
-
-	dma_free_coherent(&pdev->dev, rx_ring->size, rx_ring->desc,
-			  rx_ring->dma);
-
-	rx_ring->desc = NULL;
-}
-
-/**
- * ixgb_clean_rx_ring - Free Rx Buffers
- * @adapter: board private structure
- **/
-
-static void
-ixgb_clean_rx_ring(struct ixgb_adapter *adapter)
-{
-	struct ixgb_desc_ring *rx_ring = &adapter->rx_ring;
-	struct ixgb_buffer *buffer_info;
-	struct pci_dev *pdev = adapter->pdev;
-	unsigned long size;
-	unsigned int i;
-
-	/* Free all the Rx ring sk_buffs */
-
-	for (i = 0; i < rx_ring->count; i++) {
-		buffer_info = &rx_ring->buffer_info[i];
-		if (buffer_info->dma) {
-			dma_unmap_single(&pdev->dev,
-					 buffer_info->dma,
-					 buffer_info->length,
-					 DMA_FROM_DEVICE);
-			buffer_info->dma = 0;
-			buffer_info->length = 0;
-		}
-
-		if (buffer_info->skb) {
-			dev_kfree_skb(buffer_info->skb);
-			buffer_info->skb = NULL;
-		}
-	}
-
-	size = sizeof(struct ixgb_buffer) * rx_ring->count;
-	memset(rx_ring->buffer_info, 0, size);
-
-	/* Zero out the descriptor ring */
-
-	memset(rx_ring->desc, 0, rx_ring->size);
-
-	rx_ring->next_to_clean = 0;
-	rx_ring->next_to_use = 0;
-
-	IXGB_WRITE_REG(&adapter->hw, RDH, 0);
-	IXGB_WRITE_REG(&adapter->hw, RDT, 0);
-}
-
-/**
- * ixgb_set_mac - Change the Ethernet Address of the NIC
- * @netdev: network interface device structure
- * @p: pointer to an address structure
- *
- * Returns 0 on success, negative on failure
- **/
-
-static int
-ixgb_set_mac(struct net_device *netdev, void *p)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	struct sockaddr *addr = p;
-
-	if (!is_valid_ether_addr(addr->sa_data))
-		return -EADDRNOTAVAIL;
-
-	eth_hw_addr_set(netdev, addr->sa_data);
-
-	ixgb_rar_set(&adapter->hw, addr->sa_data, 0);
-
-	return 0;
-}
-
-/**
- * ixgb_set_multi - Multicast and Promiscuous mode set
- * @netdev: network interface device structure
- *
- * The set_multi entry point is called whenever the multicast address
- * list or the network interface flags are updated.  This routine is
- * responsible for configuring the hardware for proper multicast,
- * promiscuous mode, and all-multi behavior.
- **/
-
-static void
-ixgb_set_multi(struct net_device *netdev)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	struct ixgb_hw *hw = &adapter->hw;
-	struct netdev_hw_addr *ha;
-	u32 rctl;
-
-	/* Check for Promiscuous and All Multicast modes */
-
-	rctl = IXGB_READ_REG(hw, RCTL);
-
-	if (netdev->flags & IFF_PROMISC) {
-		rctl |= (IXGB_RCTL_UPE | IXGB_RCTL_MPE);
-		/* disable VLAN filtering */
-		rctl &= ~IXGB_RCTL_CFIEN;
-		rctl &= ~IXGB_RCTL_VFE;
-	} else {
-		if (netdev->flags & IFF_ALLMULTI) {
-			rctl |= IXGB_RCTL_MPE;
-			rctl &= ~IXGB_RCTL_UPE;
-		} else {
-			rctl &= ~(IXGB_RCTL_UPE | IXGB_RCTL_MPE);
-		}
-		/* enable VLAN filtering */
-		rctl |= IXGB_RCTL_VFE;
-		rctl &= ~IXGB_RCTL_CFIEN;
-	}
-
-	if (netdev_mc_count(netdev) > IXGB_MAX_NUM_MULTICAST_ADDRESSES) {
-		rctl |= IXGB_RCTL_MPE;
-		IXGB_WRITE_REG(hw, RCTL, rctl);
-	} else {
-		u8 *mta = kmalloc_array(ETH_ALEN,
-				        IXGB_MAX_NUM_MULTICAST_ADDRESSES,
-				        GFP_ATOMIC);
-		u8 *addr;
-		if (!mta)
-			goto alloc_failed;
-
-		IXGB_WRITE_REG(hw, RCTL, rctl);
-
-		addr = mta;
-		netdev_for_each_mc_addr(ha, netdev) {
-			memcpy(addr, ha->addr, ETH_ALEN);
-			addr += ETH_ALEN;
-		}
-
-		ixgb_mc_addr_list_update(hw, mta, netdev_mc_count(netdev), 0);
-		kfree(mta);
-	}
-
-alloc_failed:
-	if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
-		ixgb_vlan_strip_enable(adapter);
-	else
-		ixgb_vlan_strip_disable(adapter);
-
-}
-
-/**
- * ixgb_watchdog - Timer Call-back
- * @t: pointer to timer_list containing our private info pointer
- **/
-
-static void
-ixgb_watchdog(struct timer_list *t)
-{
-	struct ixgb_adapter *adapter = from_timer(adapter, t, watchdog_timer);
-	struct net_device *netdev = adapter->netdev;
-	struct ixgb_desc_ring *txdr = &adapter->tx_ring;
-
-	ixgb_check_for_link(&adapter->hw);
-
-	if (ixgb_check_for_bad_link(&adapter->hw)) {
-		/* force the reset path */
-		netif_stop_queue(netdev);
-	}
-
-	if (adapter->hw.link_up) {
-		if (!netif_carrier_ok(netdev)) {
-			netdev_info(netdev,
-				    "NIC Link is Up 10 Gbps Full Duplex, Flow Control: %s\n",
-				    (adapter->hw.fc.type == ixgb_fc_full) ?
-				    "RX/TX" :
-				    (adapter->hw.fc.type == ixgb_fc_rx_pause) ?
-				     "RX" :
-				    (adapter->hw.fc.type == ixgb_fc_tx_pause) ?
-				    "TX" : "None");
-			adapter->link_speed = 10000;
-			adapter->link_duplex = FULL_DUPLEX;
-			netif_carrier_on(netdev);
-		}
-	} else {
-		if (netif_carrier_ok(netdev)) {
-			adapter->link_speed = 0;
-			adapter->link_duplex = 0;
-			netdev_info(netdev, "NIC Link is Down\n");
-			netif_carrier_off(netdev);
-		}
-	}
-
-	ixgb_update_stats(adapter);
-
-	if (!netif_carrier_ok(netdev)) {
-		if (IXGB_DESC_UNUSED(txdr) + 1 < txdr->count) {
-			/* We've lost link, so the controller stops DMA,
-			 * but we've got queued Tx work that's never going
-			 * to get done, so reset controller to flush Tx.
-			 * (Do the reset outside of interrupt context). */
-			schedule_work(&adapter->tx_timeout_task);
-			/* return immediately since reset is imminent */
-			return;
-		}
-	}
-
-	/* Force detection of hung controller every watchdog period */
-	adapter->detect_tx_hung = true;
-
-	/* generate an interrupt to force clean up of any stragglers */
-	IXGB_WRITE_REG(&adapter->hw, ICS, IXGB_INT_TXDW);
-
-	/* Reset the timer */
-	mod_timer(&adapter->watchdog_timer, jiffies + 2 * HZ);
-}
-
-#define IXGB_TX_FLAGS_CSUM		0x00000001
-#define IXGB_TX_FLAGS_VLAN		0x00000002
-#define IXGB_TX_FLAGS_TSO		0x00000004
-
-static int
-ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb)
-{
-	struct ixgb_context_desc *context_desc;
-	unsigned int i;
-	u8 ipcss, ipcso, tucss, tucso, hdr_len;
-	u16 ipcse, tucse, mss;
-
-	if (likely(skb_is_gso(skb))) {
-		struct ixgb_buffer *buffer_info;
-		struct iphdr *iph;
-		int err;
-
-		err = skb_cow_head(skb, 0);
-		if (err < 0)
-			return err;
-
-		hdr_len = skb_tcp_all_headers(skb);
-		mss = skb_shinfo(skb)->gso_size;
-		iph = ip_hdr(skb);
-		iph->tot_len = 0;
-		iph->check = 0;
-		tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
-							 iph->daddr, 0,
-							 IPPROTO_TCP, 0);
-		ipcss = skb_network_offset(skb);
-		ipcso = (void *)&(iph->check) - (void *)skb->data;
-		ipcse = skb_transport_offset(skb) - 1;
-		tucss = skb_transport_offset(skb);
-		tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
-		tucse = 0;
-
-		i = adapter->tx_ring.next_to_use;
-		context_desc = IXGB_CONTEXT_DESC(adapter->tx_ring, i);
-		buffer_info = &adapter->tx_ring.buffer_info[i];
-		WARN_ON(buffer_info->dma != 0);
-
-		context_desc->ipcss = ipcss;
-		context_desc->ipcso = ipcso;
-		context_desc->ipcse = cpu_to_le16(ipcse);
-		context_desc->tucss = tucss;
-		context_desc->tucso = tucso;
-		context_desc->tucse = cpu_to_le16(tucse);
-		context_desc->mss = cpu_to_le16(mss);
-		context_desc->hdr_len = hdr_len;
-		context_desc->status = 0;
-		context_desc->cmd_type_len = cpu_to_le32(
-						  IXGB_CONTEXT_DESC_TYPE
-						| IXGB_CONTEXT_DESC_CMD_TSE
-						| IXGB_CONTEXT_DESC_CMD_IP
-						| IXGB_CONTEXT_DESC_CMD_TCP
-						| IXGB_CONTEXT_DESC_CMD_IDE
-						| (skb->len - (hdr_len)));
-
-
-		if (++i == adapter->tx_ring.count) i = 0;
-		adapter->tx_ring.next_to_use = i;
-
-		return 1;
-	}
-
-	return 0;
-}
-
-static bool
-ixgb_tx_csum(struct ixgb_adapter *adapter, struct sk_buff *skb)
-{
-	struct ixgb_context_desc *context_desc;
-	unsigned int i;
-	u8 css, cso;
-
-	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
-		struct ixgb_buffer *buffer_info;
-		css = skb_checksum_start_offset(skb);
-		cso = css + skb->csum_offset;
-
-		i = adapter->tx_ring.next_to_use;
-		context_desc = IXGB_CONTEXT_DESC(adapter->tx_ring, i);
-		buffer_info = &adapter->tx_ring.buffer_info[i];
-		WARN_ON(buffer_info->dma != 0);
-
-		context_desc->tucss = css;
-		context_desc->tucso = cso;
-		context_desc->tucse = 0;
-		/* zero out any previously existing data in one instruction */
-		*(u32 *)&(context_desc->ipcss) = 0;
-		context_desc->status = 0;
-		context_desc->hdr_len = 0;
-		context_desc->mss = 0;
-		context_desc->cmd_type_len =
-			cpu_to_le32(IXGB_CONTEXT_DESC_TYPE
-				    | IXGB_TX_DESC_CMD_IDE);
-
-		if (++i == adapter->tx_ring.count) i = 0;
-		adapter->tx_ring.next_to_use = i;
-
-		return true;
-	}
-
-	return false;
-}
-
-#define IXGB_MAX_TXD_PWR	14
-#define IXGB_MAX_DATA_PER_TXD	(1<<IXGB_MAX_TXD_PWR)
-
-static int
-ixgb_tx_map(struct ixgb_adapter *adapter, struct sk_buff *skb,
-	    unsigned int first)
-{
-	struct ixgb_desc_ring *tx_ring = &adapter->tx_ring;
-	struct pci_dev *pdev = adapter->pdev;
-	struct ixgb_buffer *buffer_info;
-	int len = skb_headlen(skb);
-	unsigned int offset = 0, size, count = 0, i;
-	unsigned int mss = skb_shinfo(skb)->gso_size;
-	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
-	unsigned int f;
-
-	i = tx_ring->next_to_use;
-
-	while (len) {
-		buffer_info = &tx_ring->buffer_info[i];
-		size = min(len, IXGB_MAX_DATA_PER_TXD);
-		/* Workaround for premature desc write-backs
-		 * in TSO mode.  Append 4-byte sentinel desc */
-		if (unlikely(mss && !nr_frags && size == len && size > 8))
-			size -= 4;
-
-		buffer_info->length = size;
-		WARN_ON(buffer_info->dma != 0);
-		buffer_info->time_stamp = jiffies;
-		buffer_info->mapped_as_page = false;
-		buffer_info->dma = dma_map_single(&pdev->dev,
-						  skb->data + offset,
-						  size, DMA_TO_DEVICE);
-		if (dma_mapping_error(&pdev->dev, buffer_info->dma))
-			goto dma_error;
-		buffer_info->next_to_watch = 0;
-
-		len -= size;
-		offset += size;
-		count++;
-		if (len) {
-			i++;
-			if (i == tx_ring->count)
-				i = 0;
-		}
-	}
-
-	for (f = 0; f < nr_frags; f++) {
-		const skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
-		len = skb_frag_size(frag);
-		offset = 0;
-
-		while (len) {
-			i++;
-			if (i == tx_ring->count)
-				i = 0;
-
-			buffer_info = &tx_ring->buffer_info[i];
-			size = min(len, IXGB_MAX_DATA_PER_TXD);
-
-			/* Workaround for premature desc write-backs
-			 * in TSO mode.  Append 4-byte sentinel desc */
-			if (unlikely(mss && (f == (nr_frags - 1))
-				     && size == len && size > 8))
-				size -= 4;
-
-			buffer_info->length = size;
-			buffer_info->time_stamp = jiffies;
-			buffer_info->mapped_as_page = true;
-			buffer_info->dma =
-				skb_frag_dma_map(&pdev->dev, frag, offset, size,
-						 DMA_TO_DEVICE);
-			if (dma_mapping_error(&pdev->dev, buffer_info->dma))
-				goto dma_error;
-			buffer_info->next_to_watch = 0;
-
-			len -= size;
-			offset += size;
-			count++;
-		}
-	}
-	tx_ring->buffer_info[i].skb = skb;
-	tx_ring->buffer_info[first].next_to_watch = i;
-
-	return count;
-
-dma_error:
-	dev_err(&pdev->dev, "TX DMA map failed\n");
-	buffer_info->dma = 0;
-	if (count)
-		count--;
-
-	while (count--) {
-		if (i==0)
-			i += tx_ring->count;
-		i--;
-		buffer_info = &tx_ring->buffer_info[i];
-		ixgb_unmap_and_free_tx_resource(adapter, buffer_info);
-	}
-
-	return 0;
-}
-
-static void
-ixgb_tx_queue(struct ixgb_adapter *adapter, int count, int vlan_id,int tx_flags)
-{
-	struct ixgb_desc_ring *tx_ring = &adapter->tx_ring;
-	struct ixgb_tx_desc *tx_desc = NULL;
-	struct ixgb_buffer *buffer_info;
-	u32 cmd_type_len = adapter->tx_cmd_type;
-	u8 status = 0;
-	u8 popts = 0;
-	unsigned int i;
-
-	if (tx_flags & IXGB_TX_FLAGS_TSO) {
-		cmd_type_len |= IXGB_TX_DESC_CMD_TSE;
-		popts |= (IXGB_TX_DESC_POPTS_IXSM | IXGB_TX_DESC_POPTS_TXSM);
-	}
-
-	if (tx_flags & IXGB_TX_FLAGS_CSUM)
-		popts |= IXGB_TX_DESC_POPTS_TXSM;
-
-	if (tx_flags & IXGB_TX_FLAGS_VLAN)
-		cmd_type_len |= IXGB_TX_DESC_CMD_VLE;
-
-	i = tx_ring->next_to_use;
-
-	while (count--) {
-		buffer_info = &tx_ring->buffer_info[i];
-		tx_desc = IXGB_TX_DESC(*tx_ring, i);
-		tx_desc->buff_addr = cpu_to_le64(buffer_info->dma);
-		tx_desc->cmd_type_len =
-			cpu_to_le32(cmd_type_len | buffer_info->length);
-		tx_desc->status = status;
-		tx_desc->popts = popts;
-		tx_desc->vlan = cpu_to_le16(vlan_id);
-
-		if (++i == tx_ring->count) i = 0;
-	}
-
-	tx_desc->cmd_type_len |=
-		cpu_to_le32(IXGB_TX_DESC_CMD_EOP | IXGB_TX_DESC_CMD_RS);
-
-	/* Force memory writes to complete before letting h/w
-	 * know there are new descriptors to fetch.  (Only
-	 * applicable for weak-ordered memory model archs,
-	 * such as IA-64). */
-	wmb();
-
-	tx_ring->next_to_use = i;
-	IXGB_WRITE_REG(&adapter->hw, TDT, i);
-}
-
-static int __ixgb_maybe_stop_tx(struct net_device *netdev, int size)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	struct ixgb_desc_ring *tx_ring = &adapter->tx_ring;
-
-	netif_stop_queue(netdev);
-	/* Herbert's original patch had:
-	 *  smp_mb__after_netif_stop_queue();
-	 * but since that doesn't exist yet, just open code it. */
-	smp_mb();
-
-	/* We need to check again in a case another CPU has just
-	 * made room available. */
-	if (likely(IXGB_DESC_UNUSED(tx_ring) < size))
-		return -EBUSY;
-
-	/* A reprieve! */
-	netif_start_queue(netdev);
-	++adapter->restart_queue;
-	return 0;
-}
-
-static int ixgb_maybe_stop_tx(struct net_device *netdev,
-                              struct ixgb_desc_ring *tx_ring, int size)
-{
-	if (likely(IXGB_DESC_UNUSED(tx_ring) >= size))
-		return 0;
-	return __ixgb_maybe_stop_tx(netdev, size);
-}
-
-
-/* Tx Descriptors needed, worst case */
-#define TXD_USE_COUNT(S) (((S) >> IXGB_MAX_TXD_PWR) + \
-			 (((S) & (IXGB_MAX_DATA_PER_TXD - 1)) ? 1 : 0))
-#define DESC_NEEDED TXD_USE_COUNT(IXGB_MAX_DATA_PER_TXD) /* skb->date */ + \
-	MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE) + 1 /* for context */ \
-	+ 1 /* one more needed for sentinel TSO workaround */
-
-static netdev_tx_t
-ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	unsigned int first;
-	unsigned int tx_flags = 0;
-	int vlan_id = 0;
-	int count = 0;
-	int tso;
-
-	if (test_bit(__IXGB_DOWN, &adapter->flags)) {
-		dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	}
-
-	if (skb->len <= 0) {
-		dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	}
-
-	if (unlikely(ixgb_maybe_stop_tx(netdev, &adapter->tx_ring,
-                     DESC_NEEDED)))
-		return NETDEV_TX_BUSY;
-
-	if (skb_vlan_tag_present(skb)) {
-		tx_flags |= IXGB_TX_FLAGS_VLAN;
-		vlan_id = skb_vlan_tag_get(skb);
-	}
-
-	first = adapter->tx_ring.next_to_use;
-
-	tso = ixgb_tso(adapter, skb);
-	if (tso < 0) {
-		dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	}
-
-	if (likely(tso))
-		tx_flags |= IXGB_TX_FLAGS_TSO;
-	else if (ixgb_tx_csum(adapter, skb))
-		tx_flags |= IXGB_TX_FLAGS_CSUM;
-
-	count = ixgb_tx_map(adapter, skb, first);
-
-	if (count) {
-		ixgb_tx_queue(adapter, count, vlan_id, tx_flags);
-		/* Make sure there is space in the ring for the next send. */
-		ixgb_maybe_stop_tx(netdev, &adapter->tx_ring, DESC_NEEDED);
-
-	} else {
-		dev_kfree_skb_any(skb);
-		adapter->tx_ring.buffer_info[first].time_stamp = 0;
-		adapter->tx_ring.next_to_use = first;
-	}
-
-	return NETDEV_TX_OK;
-}
-
-/**
- * ixgb_tx_timeout - Respond to a Tx Hang
- * @netdev: network interface device structure
- * @txqueue: queue hanging (unused)
- **/
-
-static void
-ixgb_tx_timeout(struct net_device *netdev, unsigned int __always_unused txqueue)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-
-	/* Do the reset outside of interrupt context */
-	schedule_work(&adapter->tx_timeout_task);
-}
-
-static void
-ixgb_tx_timeout_task(struct work_struct *work)
-{
-	struct ixgb_adapter *adapter =
-		container_of(work, struct ixgb_adapter, tx_timeout_task);
-
-	adapter->tx_timeout_count++;
-	ixgb_down(adapter, true);
-	ixgb_up(adapter);
-}
-
-/**
- * ixgb_change_mtu - Change the Maximum Transfer Unit
- * @netdev: network interface device structure
- * @new_mtu: new value for maximum frame size
- *
- * Returns 0 on success, negative on failure
- **/
-
-static int
-ixgb_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	int max_frame = new_mtu + ENET_HEADER_SIZE + ENET_FCS_LENGTH;
-
-	if (netif_running(netdev))
-		ixgb_down(adapter, true);
-
-	adapter->rx_buffer_len = max_frame + 8; /* + 8 for errata */
-
-	netdev->mtu = new_mtu;
-
-	if (netif_running(netdev))
-		ixgb_up(adapter);
-
-	return 0;
-}
-
-/**
- * ixgb_update_stats - Update the board statistics counters.
- * @adapter: board private structure
- **/
-
-void
-ixgb_update_stats(struct ixgb_adapter *adapter)
-{
-	struct net_device *netdev = adapter->netdev;
-	struct pci_dev *pdev = adapter->pdev;
-
-	/* Prevent stats update while adapter is being reset */
-	if (pci_channel_offline(pdev))
-		return;
-
-	if ((netdev->flags & IFF_PROMISC) || (netdev->flags & IFF_ALLMULTI) ||
-	   (netdev_mc_count(netdev) > IXGB_MAX_NUM_MULTICAST_ADDRESSES)) {
-		u64 multi = IXGB_READ_REG(&adapter->hw, MPRCL);
-		u32 bcast_l = IXGB_READ_REG(&adapter->hw, BPRCL);
-		u32 bcast_h = IXGB_READ_REG(&adapter->hw, BPRCH);
-		u64 bcast = ((u64)bcast_h << 32) | bcast_l;
-
-		multi |= ((u64)IXGB_READ_REG(&adapter->hw, MPRCH) << 32);
-		/* fix up multicast stats by removing broadcasts */
-		if (multi >= bcast)
-			multi -= bcast;
-
-		adapter->stats.mprcl += (multi & 0xFFFFFFFF);
-		adapter->stats.mprch += (multi >> 32);
-		adapter->stats.bprcl += bcast_l;
-		adapter->stats.bprch += bcast_h;
-	} else {
-		adapter->stats.mprcl += IXGB_READ_REG(&adapter->hw, MPRCL);
-		adapter->stats.mprch += IXGB_READ_REG(&adapter->hw, MPRCH);
-		adapter->stats.bprcl += IXGB_READ_REG(&adapter->hw, BPRCL);
-		adapter->stats.bprch += IXGB_READ_REG(&adapter->hw, BPRCH);
-	}
-	adapter->stats.tprl += IXGB_READ_REG(&adapter->hw, TPRL);
-	adapter->stats.tprh += IXGB_READ_REG(&adapter->hw, TPRH);
-	adapter->stats.gprcl += IXGB_READ_REG(&adapter->hw, GPRCL);
-	adapter->stats.gprch += IXGB_READ_REG(&adapter->hw, GPRCH);
-	adapter->stats.uprcl += IXGB_READ_REG(&adapter->hw, UPRCL);
-	adapter->stats.uprch += IXGB_READ_REG(&adapter->hw, UPRCH);
-	adapter->stats.vprcl += IXGB_READ_REG(&adapter->hw, VPRCL);
-	adapter->stats.vprch += IXGB_READ_REG(&adapter->hw, VPRCH);
-	adapter->stats.jprcl += IXGB_READ_REG(&adapter->hw, JPRCL);
-	adapter->stats.jprch += IXGB_READ_REG(&adapter->hw, JPRCH);
-	adapter->stats.gorcl += IXGB_READ_REG(&adapter->hw, GORCL);
-	adapter->stats.gorch += IXGB_READ_REG(&adapter->hw, GORCH);
-	adapter->stats.torl += IXGB_READ_REG(&adapter->hw, TORL);
-	adapter->stats.torh += IXGB_READ_REG(&adapter->hw, TORH);
-	adapter->stats.rnbc += IXGB_READ_REG(&adapter->hw, RNBC);
-	adapter->stats.ruc += IXGB_READ_REG(&adapter->hw, RUC);
-	adapter->stats.roc += IXGB_READ_REG(&adapter->hw, ROC);
-	adapter->stats.rlec += IXGB_READ_REG(&adapter->hw, RLEC);
-	adapter->stats.crcerrs += IXGB_READ_REG(&adapter->hw, CRCERRS);
-	adapter->stats.icbc += IXGB_READ_REG(&adapter->hw, ICBC);
-	adapter->stats.ecbc += IXGB_READ_REG(&adapter->hw, ECBC);
-	adapter->stats.mpc += IXGB_READ_REG(&adapter->hw, MPC);
-	adapter->stats.tptl += IXGB_READ_REG(&adapter->hw, TPTL);
-	adapter->stats.tpth += IXGB_READ_REG(&adapter->hw, TPTH);
-	adapter->stats.gptcl += IXGB_READ_REG(&adapter->hw, GPTCL);
-	adapter->stats.gptch += IXGB_READ_REG(&adapter->hw, GPTCH);
-	adapter->stats.bptcl += IXGB_READ_REG(&adapter->hw, BPTCL);
-	adapter->stats.bptch += IXGB_READ_REG(&adapter->hw, BPTCH);
-	adapter->stats.mptcl += IXGB_READ_REG(&adapter->hw, MPTCL);
-	adapter->stats.mptch += IXGB_READ_REG(&adapter->hw, MPTCH);
-	adapter->stats.uptcl += IXGB_READ_REG(&adapter->hw, UPTCL);
-	adapter->stats.uptch += IXGB_READ_REG(&adapter->hw, UPTCH);
-	adapter->stats.vptcl += IXGB_READ_REG(&adapter->hw, VPTCL);
-	adapter->stats.vptch += IXGB_READ_REG(&adapter->hw, VPTCH);
-	adapter->stats.jptcl += IXGB_READ_REG(&adapter->hw, JPTCL);
-	adapter->stats.jptch += IXGB_READ_REG(&adapter->hw, JPTCH);
-	adapter->stats.gotcl += IXGB_READ_REG(&adapter->hw, GOTCL);
-	adapter->stats.gotch += IXGB_READ_REG(&adapter->hw, GOTCH);
-	adapter->stats.totl += IXGB_READ_REG(&adapter->hw, TOTL);
-	adapter->stats.toth += IXGB_READ_REG(&adapter->hw, TOTH);
-	adapter->stats.dc += IXGB_READ_REG(&adapter->hw, DC);
-	adapter->stats.plt64c += IXGB_READ_REG(&adapter->hw, PLT64C);
-	adapter->stats.tsctc += IXGB_READ_REG(&adapter->hw, TSCTC);
-	adapter->stats.tsctfc += IXGB_READ_REG(&adapter->hw, TSCTFC);
-	adapter->stats.ibic += IXGB_READ_REG(&adapter->hw, IBIC);
-	adapter->stats.rfc += IXGB_READ_REG(&adapter->hw, RFC);
-	adapter->stats.lfc += IXGB_READ_REG(&adapter->hw, LFC);
-	adapter->stats.pfrc += IXGB_READ_REG(&adapter->hw, PFRC);
-	adapter->stats.pftc += IXGB_READ_REG(&adapter->hw, PFTC);
-	adapter->stats.mcfrc += IXGB_READ_REG(&adapter->hw, MCFRC);
-	adapter->stats.mcftc += IXGB_READ_REG(&adapter->hw, MCFTC);
-	adapter->stats.xonrxc += IXGB_READ_REG(&adapter->hw, XONRXC);
-	adapter->stats.xontxc += IXGB_READ_REG(&adapter->hw, XONTXC);
-	adapter->stats.xoffrxc += IXGB_READ_REG(&adapter->hw, XOFFRXC);
-	adapter->stats.xofftxc += IXGB_READ_REG(&adapter->hw, XOFFTXC);
-	adapter->stats.rjc += IXGB_READ_REG(&adapter->hw, RJC);
-
-	/* Fill out the OS statistics structure */
-
-	netdev->stats.rx_packets = adapter->stats.gprcl;
-	netdev->stats.tx_packets = adapter->stats.gptcl;
-	netdev->stats.rx_bytes = adapter->stats.gorcl;
-	netdev->stats.tx_bytes = adapter->stats.gotcl;
-	netdev->stats.multicast = adapter->stats.mprcl;
-	netdev->stats.collisions = 0;
-
-	/* ignore RLEC as it reports errors for padded (<64bytes) frames
-	 * with a length in the type/len field */
-	netdev->stats.rx_errors =
-	    /* adapter->stats.rnbc + */ adapter->stats.crcerrs +
-	    adapter->stats.ruc +
-	    adapter->stats.roc /*+ adapter->stats.rlec */  +
-	    adapter->stats.icbc +
-	    adapter->stats.ecbc + adapter->stats.mpc;
-
-	/* see above
-	 * netdev->stats.rx_length_errors = adapter->stats.rlec;
-	 */
-
-	netdev->stats.rx_crc_errors = adapter->stats.crcerrs;
-	netdev->stats.rx_fifo_errors = adapter->stats.mpc;
-	netdev->stats.rx_missed_errors = adapter->stats.mpc;
-	netdev->stats.rx_over_errors = adapter->stats.mpc;
-
-	netdev->stats.tx_errors = 0;
-	netdev->stats.rx_frame_errors = 0;
-	netdev->stats.tx_aborted_errors = 0;
-	netdev->stats.tx_carrier_errors = 0;
-	netdev->stats.tx_fifo_errors = 0;
-	netdev->stats.tx_heartbeat_errors = 0;
-	netdev->stats.tx_window_errors = 0;
-}
-
-/**
- * ixgb_intr - Interrupt Handler
- * @irq: interrupt number
- * @data: pointer to a network interface device structure
- **/
-
-static irqreturn_t
-ixgb_intr(int irq, void *data)
-{
-	struct net_device *netdev = data;
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	struct ixgb_hw *hw = &adapter->hw;
-	u32 icr = IXGB_READ_REG(hw, ICR);
-
-	if (unlikely(!icr))
-		return IRQ_NONE;  /* Not our interrupt */
-
-	if (unlikely(icr & (IXGB_INT_RXSEQ | IXGB_INT_LSC)))
-		if (!test_bit(__IXGB_DOWN, &adapter->flags))
-			mod_timer(&adapter->watchdog_timer, jiffies);
-
-	if (napi_schedule_prep(&adapter->napi)) {
-
-		/* Disable interrupts and register for poll. The flush
-		  of the posted write is intentionally left out.
-		*/
-
-		IXGB_WRITE_REG(&adapter->hw, IMC, ~0);
-		__napi_schedule(&adapter->napi);
-	}
-	return IRQ_HANDLED;
-}
-
-/**
- * ixgb_clean - NAPI Rx polling callback
- * @napi: napi struct pointer
- * @budget: max number of receives to clean
- **/
-
-static int
-ixgb_clean(struct napi_struct *napi, int budget)
-{
-	struct ixgb_adapter *adapter = container_of(napi, struct ixgb_adapter, napi);
-	int work_done = 0;
-
-	ixgb_clean_tx_irq(adapter);
-	ixgb_clean_rx_irq(adapter, &work_done, budget);
-
-	/* If budget not fully consumed, exit the polling mode */
-	if (work_done < budget) {
-		napi_complete_done(napi, work_done);
-		if (!test_bit(__IXGB_DOWN, &adapter->flags))
-			ixgb_irq_enable(adapter);
-	}
-
-	return work_done;
-}
-
-/**
- * ixgb_clean_tx_irq - Reclaim resources after transmit completes
- * @adapter: board private structure
- **/
-
-static bool
-ixgb_clean_tx_irq(struct ixgb_adapter *adapter)
-{
-	struct ixgb_desc_ring *tx_ring = &adapter->tx_ring;
-	struct net_device *netdev = adapter->netdev;
-	struct ixgb_tx_desc *tx_desc, *eop_desc;
-	struct ixgb_buffer *buffer_info;
-	unsigned int i, eop;
-	bool cleaned = false;
-
-	i = tx_ring->next_to_clean;
-	eop = tx_ring->buffer_info[i].next_to_watch;
-	eop_desc = IXGB_TX_DESC(*tx_ring, eop);
-
-	while (eop_desc->status & IXGB_TX_DESC_STATUS_DD) {
-
-		rmb(); /* read buffer_info after eop_desc */
-		for (cleaned = false; !cleaned; ) {
-			tx_desc = IXGB_TX_DESC(*tx_ring, i);
-			buffer_info = &tx_ring->buffer_info[i];
-
-			if (tx_desc->popts &
-			   (IXGB_TX_DESC_POPTS_TXSM |
-			    IXGB_TX_DESC_POPTS_IXSM))
-				adapter->hw_csum_tx_good++;
-
-			ixgb_unmap_and_free_tx_resource(adapter, buffer_info);
-
-			*(u32 *)&(tx_desc->status) = 0;
-
-			cleaned = (i == eop);
-			if (++i == tx_ring->count) i = 0;
-		}
-
-		eop = tx_ring->buffer_info[i].next_to_watch;
-		eop_desc = IXGB_TX_DESC(*tx_ring, eop);
-	}
-
-	tx_ring->next_to_clean = i;
-
-	if (unlikely(cleaned && netif_carrier_ok(netdev) &&
-		     IXGB_DESC_UNUSED(tx_ring) >= DESC_NEEDED)) {
-		/* Make sure that anybody stopping the queue after this
-		 * sees the new next_to_clean. */
-		smp_mb();
-
-		if (netif_queue_stopped(netdev) &&
-		    !(test_bit(__IXGB_DOWN, &adapter->flags))) {
-			netif_wake_queue(netdev);
-			++adapter->restart_queue;
-		}
-	}
-
-	if (adapter->detect_tx_hung) {
-		/* detect a transmit hang in hardware, this serializes the
-		 * check with the clearing of time_stamp and movement of i */
-		adapter->detect_tx_hung = false;
-		if (tx_ring->buffer_info[eop].time_stamp &&
-		   time_after(jiffies, tx_ring->buffer_info[eop].time_stamp + HZ)
-		   && !(IXGB_READ_REG(&adapter->hw, STATUS) &
-		        IXGB_STATUS_TXOFF)) {
-			/* detected Tx unit hang */
-			netif_err(adapter, drv, adapter->netdev,
-				  "Detected Tx Unit Hang\n"
-				  "  TDH                  <%x>\n"
-				  "  TDT                  <%x>\n"
-				  "  next_to_use          <%x>\n"
-				  "  next_to_clean        <%x>\n"
-				  "buffer_info[next_to_clean]\n"
-				  "  time_stamp           <%lx>\n"
-				  "  next_to_watch        <%x>\n"
-				  "  jiffies              <%lx>\n"
-				  "  next_to_watch.status <%x>\n",
-				  IXGB_READ_REG(&adapter->hw, TDH),
-				  IXGB_READ_REG(&adapter->hw, TDT),
-				  tx_ring->next_to_use,
-				  tx_ring->next_to_clean,
-				  tx_ring->buffer_info[eop].time_stamp,
-				  eop,
-				  jiffies,
-				  eop_desc->status);
-			netif_stop_queue(netdev);
-		}
-	}
-
-	return cleaned;
-}
-
-/**
- * ixgb_rx_checksum - Receive Checksum Offload for 82597.
- * @adapter: board private structure
- * @rx_desc: receive descriptor
- * @skb: socket buffer with received data
- **/
-
-static void
-ixgb_rx_checksum(struct ixgb_adapter *adapter,
-                 struct ixgb_rx_desc *rx_desc,
-                 struct sk_buff *skb)
-{
-	/* Ignore Checksum bit is set OR
-	 * TCP Checksum has not been calculated
-	 */
-	if ((rx_desc->status & IXGB_RX_DESC_STATUS_IXSM) ||
-	   (!(rx_desc->status & IXGB_RX_DESC_STATUS_TCPCS))) {
-		skb_checksum_none_assert(skb);
-		return;
-	}
-
-	/* At this point we know the hardware did the TCP checksum */
-	/* now look at the TCP checksum error bit */
-	if (rx_desc->errors & IXGB_RX_DESC_ERRORS_TCPE) {
-		/* let the stack verify checksum errors */
-		skb_checksum_none_assert(skb);
-		adapter->hw_csum_rx_error++;
-	} else {
-		/* TCP checksum is good */
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		adapter->hw_csum_rx_good++;
-	}
-}
-
-/*
- * this should improve performance for small packets with large amounts
- * of reassembly being done in the stack
- */
-static void ixgb_check_copybreak(struct napi_struct *napi,
-				 struct ixgb_buffer *buffer_info,
-				 u32 length, struct sk_buff **skb)
-{
-	struct sk_buff *new_skb;
-
-	if (length > copybreak)
-		return;
-
-	new_skb = napi_alloc_skb(napi, length);
-	if (!new_skb)
-		return;
-
-	skb_copy_to_linear_data_offset(new_skb, -NET_IP_ALIGN,
-				       (*skb)->data - NET_IP_ALIGN,
-				       length + NET_IP_ALIGN);
-	/* save the skb in buffer_info as good */
-	buffer_info->skb = *skb;
-	*skb = new_skb;
-}
-
-/**
- * ixgb_clean_rx_irq - Send received data up the network stack,
- * @adapter: board private structure
- * @work_done: output pointer to amount of packets cleaned
- * @work_to_do: how much work we can complete
- **/
-
-static bool
-ixgb_clean_rx_irq(struct ixgb_adapter *adapter, int *work_done, int work_to_do)
-{
-	struct ixgb_desc_ring *rx_ring = &adapter->rx_ring;
-	struct net_device *netdev = adapter->netdev;
-	struct pci_dev *pdev = adapter->pdev;
-	struct ixgb_rx_desc *rx_desc, *next_rxd;
-	struct ixgb_buffer *buffer_info, *next_buffer, *next2_buffer;
-	u32 length;
-	unsigned int i, j;
-	int cleaned_count = 0;
-	bool cleaned = false;
-
-	i = rx_ring->next_to_clean;
-	rx_desc = IXGB_RX_DESC(*rx_ring, i);
-	buffer_info = &rx_ring->buffer_info[i];
-
-	while (rx_desc->status & IXGB_RX_DESC_STATUS_DD) {
-		struct sk_buff *skb;
-		u8 status;
-
-		if (*work_done >= work_to_do)
-			break;
-
-		(*work_done)++;
-		rmb();	/* read descriptor and rx_buffer_info after status DD */
-		status = rx_desc->status;
-		skb = buffer_info->skb;
-		buffer_info->skb = NULL;
-
-		prefetch(skb->data - NET_IP_ALIGN);
-
-		if (++i == rx_ring->count)
-			i = 0;
-		next_rxd = IXGB_RX_DESC(*rx_ring, i);
-		prefetch(next_rxd);
-
-		j = i + 1;
-		if (j == rx_ring->count)
-			j = 0;
-		next2_buffer = &rx_ring->buffer_info[j];
-		prefetch(next2_buffer);
-
-		next_buffer = &rx_ring->buffer_info[i];
-
-		cleaned = true;
-		cleaned_count++;
-
-		dma_unmap_single(&pdev->dev,
-				 buffer_info->dma,
-				 buffer_info->length,
-				 DMA_FROM_DEVICE);
-		buffer_info->dma = 0;
-
-		length = le16_to_cpu(rx_desc->length);
-		rx_desc->length = 0;
-
-		if (unlikely(!(status & IXGB_RX_DESC_STATUS_EOP))) {
-
-			/* All receives must fit into a single buffer */
-
-			pr_debug("Receive packet consumed multiple buffers length<%x>\n",
-				 length);
-
-			dev_kfree_skb_irq(skb);
-			goto rxdesc_done;
-		}
-
-		if (unlikely(rx_desc->errors &
-		    (IXGB_RX_DESC_ERRORS_CE | IXGB_RX_DESC_ERRORS_SE |
-		     IXGB_RX_DESC_ERRORS_P | IXGB_RX_DESC_ERRORS_RXE))) {
-			dev_kfree_skb_irq(skb);
-			goto rxdesc_done;
-		}
-
-		ixgb_check_copybreak(&adapter->napi, buffer_info, length, &skb);
-
-		/* Good Receive */
-		skb_put(skb, length);
-
-		/* Receive Checksum Offload */
-		ixgb_rx_checksum(adapter, rx_desc, skb);
-
-		skb->protocol = eth_type_trans(skb, netdev);
-		if (status & IXGB_RX_DESC_STATUS_VP)
-			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
-				       le16_to_cpu(rx_desc->special));
-
-		netif_receive_skb(skb);
-
-rxdesc_done:
-		/* clean up descriptor, might be written over by hw */
-		rx_desc->status = 0;
-
-		/* return some buffers to hardware, one at a time is too slow */
-		if (unlikely(cleaned_count >= IXGB_RX_BUFFER_WRITE)) {
-			ixgb_alloc_rx_buffers(adapter, cleaned_count);
-			cleaned_count = 0;
-		}
-
-		/* use prefetched values */
-		rx_desc = next_rxd;
-		buffer_info = next_buffer;
-	}
-
-	rx_ring->next_to_clean = i;
-
-	cleaned_count = IXGB_DESC_UNUSED(rx_ring);
-	if (cleaned_count)
-		ixgb_alloc_rx_buffers(adapter, cleaned_count);
-
-	return cleaned;
-}
-
-/**
- * ixgb_alloc_rx_buffers - Replace used receive buffers
- * @adapter: address of board private structure
- * @cleaned_count: how many buffers to allocate
- **/
-
-static void
-ixgb_alloc_rx_buffers(struct ixgb_adapter *adapter, int cleaned_count)
-{
-	struct ixgb_desc_ring *rx_ring = &adapter->rx_ring;
-	struct net_device *netdev = adapter->netdev;
-	struct pci_dev *pdev = adapter->pdev;
-	struct ixgb_rx_desc *rx_desc;
-	struct ixgb_buffer *buffer_info;
-	struct sk_buff *skb;
-	unsigned int i;
-	long cleancount;
-
-	i = rx_ring->next_to_use;
-	buffer_info = &rx_ring->buffer_info[i];
-	cleancount = IXGB_DESC_UNUSED(rx_ring);
-
-
-	/* leave three descriptors unused */
-	while (--cleancount > 2 && cleaned_count--) {
-		/* recycle! its good for you */
-		skb = buffer_info->skb;
-		if (skb) {
-			skb_trim(skb, 0);
-			goto map_skb;
-		}
-
-		skb = netdev_alloc_skb_ip_align(netdev, adapter->rx_buffer_len);
-		if (unlikely(!skb)) {
-			/* Better luck next round */
-			adapter->alloc_rx_buff_failed++;
-			break;
-		}
-
-		buffer_info->skb = skb;
-		buffer_info->length = adapter->rx_buffer_len;
-map_skb:
-		buffer_info->dma = dma_map_single(&pdev->dev,
-		                                  skb->data,
-		                                  adapter->rx_buffer_len,
-						  DMA_FROM_DEVICE);
-		if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
-			adapter->alloc_rx_buff_failed++;
-			break;
-		}
-
-		rx_desc = IXGB_RX_DESC(*rx_ring, i);
-		rx_desc->buff_addr = cpu_to_le64(buffer_info->dma);
-		/* guarantee DD bit not set now before h/w gets descriptor
-		 * this is the rest of the workaround for h/w double
-		 * writeback. */
-		rx_desc->status = 0;
-
-
-		if (++i == rx_ring->count)
-			i = 0;
-		buffer_info = &rx_ring->buffer_info[i];
-	}
-
-	if (likely(rx_ring->next_to_use != i)) {
-		rx_ring->next_to_use = i;
-		if (unlikely(i-- == 0))
-			i = (rx_ring->count - 1);
-
-		/* Force memory writes to complete before letting h/w
-		 * know there are new descriptors to fetch.  (Only
-		 * applicable for weak-ordered memory model archs, such
-		 * as IA-64). */
-		wmb();
-		IXGB_WRITE_REG(&adapter->hw, RDT, i);
-	}
-}
-
-static void
-ixgb_vlan_strip_enable(struct ixgb_adapter *adapter)
-{
-	u32 ctrl;
-
-	/* enable VLAN tag insert/strip */
-	ctrl = IXGB_READ_REG(&adapter->hw, CTRL0);
-	ctrl |= IXGB_CTRL0_VME;
-	IXGB_WRITE_REG(&adapter->hw, CTRL0, ctrl);
-}
-
-static void
-ixgb_vlan_strip_disable(struct ixgb_adapter *adapter)
-{
-	u32 ctrl;
-
-	/* disable VLAN tag insert/strip */
-	ctrl = IXGB_READ_REG(&adapter->hw, CTRL0);
-	ctrl &= ~IXGB_CTRL0_VME;
-	IXGB_WRITE_REG(&adapter->hw, CTRL0, ctrl);
-}
-
-static int
-ixgb_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	u32 vfta, index;
-
-	/* add VID to filter table */
-
-	index = (vid >> 5) & 0x7F;
-	vfta = IXGB_READ_REG_ARRAY(&adapter->hw, VFTA, index);
-	vfta |= (1 << (vid & 0x1F));
-	ixgb_write_vfta(&adapter->hw, index, vfta);
-	set_bit(vid, adapter->active_vlans);
-
-	return 0;
-}
-
-static int
-ixgb_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid)
-{
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	u32 vfta, index;
-
-	/* remove VID from filter table */
-
-	index = (vid >> 5) & 0x7F;
-	vfta = IXGB_READ_REG_ARRAY(&adapter->hw, VFTA, index);
-	vfta &= ~(1 << (vid & 0x1F));
-	ixgb_write_vfta(&adapter->hw, index, vfta);
-	clear_bit(vid, adapter->active_vlans);
-
-	return 0;
-}
-
-static void
-ixgb_restore_vlan(struct ixgb_adapter *adapter)
-{
-	u16 vid;
-
-	for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
-		ixgb_vlan_rx_add_vid(adapter->netdev, htons(ETH_P_8021Q), vid);
-}
-
-/**
- * ixgb_io_error_detected - called when PCI error is detected
- * @pdev:    pointer to pci device with error
- * @state:   pci channel state after error
- *
- * This callback is called by the PCI subsystem whenever
- * a PCI bus error is detected.
- */
-static pci_ers_result_t ixgb_io_error_detected(struct pci_dev *pdev,
-                                               pci_channel_state_t state)
-{
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-
-	netif_device_detach(netdev);
-
-	if (state == pci_channel_io_perm_failure)
-		return PCI_ERS_RESULT_DISCONNECT;
-
-	if (netif_running(netdev))
-		ixgb_down(adapter, true);
-
-	pci_disable_device(pdev);
-
-	/* Request a slot reset. */
-	return PCI_ERS_RESULT_NEED_RESET;
-}
-
-/**
- * ixgb_io_slot_reset - called after the pci bus has been reset.
- * @pdev: pointer to pci device with error
- *
- * This callback is called after the PCI bus has been reset.
- * Basically, this tries to restart the card from scratch.
- * This is a shortened version of the device probe/discovery code,
- * it resembles the first-half of the ixgb_probe() routine.
- */
-static pci_ers_result_t ixgb_io_slot_reset(struct pci_dev *pdev)
-{
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	u8 addr[ETH_ALEN];
-
-	if (pci_enable_device(pdev)) {
-		netif_err(adapter, probe, adapter->netdev,
-			  "Cannot re-enable PCI device after reset\n");
-		return PCI_ERS_RESULT_DISCONNECT;
-	}
-
-	/* Perform card reset only on one instance of the card */
-	if (0 != PCI_FUNC (pdev->devfn))
-		return PCI_ERS_RESULT_RECOVERED;
-
-	pci_set_master(pdev);
-
-	netif_carrier_off(netdev);
-	netif_stop_queue(netdev);
-	ixgb_reset(adapter);
-
-	/* Make sure the EEPROM is good */
-	if (!ixgb_validate_eeprom_checksum(&adapter->hw)) {
-		netif_err(adapter, probe, adapter->netdev,
-			  "After reset, the EEPROM checksum is not valid\n");
-		return PCI_ERS_RESULT_DISCONNECT;
-	}
-	ixgb_get_ee_mac_addr(&adapter->hw, addr);
-	eth_hw_addr_set(netdev, addr);
-	memcpy(netdev->perm_addr, netdev->dev_addr, netdev->addr_len);
-
-	if (!is_valid_ether_addr(netdev->perm_addr)) {
-		netif_err(adapter, probe, adapter->netdev,
-			  "After reset, invalid MAC address\n");
-		return PCI_ERS_RESULT_DISCONNECT;
-	}
-
-	return PCI_ERS_RESULT_RECOVERED;
-}
-
-/**
- * ixgb_io_resume - called when its OK to resume normal operations
- * @pdev: pointer to pci device with error
- *
- * The error recovery driver tells us that its OK to resume
- * normal operation. Implementation resembles the second-half
- * of the ixgb_probe() routine.
- */
-static void ixgb_io_resume(struct pci_dev *pdev)
-{
-	struct net_device *netdev = pci_get_drvdata(pdev);
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-
-	pci_set_master(pdev);
-
-	if (netif_running(netdev)) {
-		if (ixgb_up(adapter)) {
-			pr_err("can't bring device back up after reset\n");
-			return;
-		}
-	}
-
-	netif_device_attach(netdev);
-	mod_timer(&adapter->watchdog_timer, jiffies);
-}
-
-/* ixgb_main.c */
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h b/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h
deleted file mode 100644
index 7bd54efa698d..000000000000
--- a/drivers/net/ethernet/intel/ixgb/ixgb_osdep.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright(c) 1999 - 2008 Intel Corporation. */
-
-/* glue for the OS independent part of ixgb
- * includes register access macros
- */
-
-#ifndef _IXGB_OSDEP_H_
-#define _IXGB_OSDEP_H_
-
-#include <linux/types.h>
-#include <linux/delay.h>
-#include <asm/io.h>
-#include <linux/interrupt.h>
-#include <linux/sched.h>
-#include <linux/if_ether.h>
-
-#undef ASSERT
-#define ASSERT(x)	BUG_ON(!(x))
-
-#define ENTER() pr_debug("%s\n", __func__);
-
-#define IXGB_WRITE_REG(a, reg, value) ( \
-	writel((value), ((a)->hw_addr + IXGB_##reg)))
-
-#define IXGB_READ_REG(a, reg) ( \
-	readl((a)->hw_addr + IXGB_##reg))
-
-#define IXGB_WRITE_REG_ARRAY(a, reg, offset, value) ( \
-	writel((value), ((a)->hw_addr + IXGB_##reg + ((offset) << 2))))
-
-#define IXGB_READ_REG_ARRAY(a, reg, offset) ( \
-	readl((a)->hw_addr + IXGB_##reg + ((offset) << 2)))
-
-#define IXGB_WRITE_FLUSH(a) IXGB_READ_REG(a, STATUS)
-
-#define IXGB_MEMCPY memcpy
-
-#endif /* _IXGB_OSDEP_H_ */
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_param.c b/drivers/net/ethernet/intel/ixgb/ixgb_param.c
deleted file mode 100644
index d40f96250691..000000000000
--- a/drivers/net/ethernet/intel/ixgb/ixgb_param.c
+++ /dev/null
@@ -1,442 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright(c) 1999 - 2008 Intel Corporation. */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include "ixgb.h"
-
-/* This is the only thing that needs to be changed to adjust the
- * maximum number of ports that the driver can manage.
- */
-
-#define IXGB_MAX_NIC 8
-
-#define OPTION_UNSET	-1
-#define OPTION_DISABLED 0
-#define OPTION_ENABLED  1
-
-/* All parameters are treated the same, as an integer array of values.
- * This macro just reduces the need to repeat the same declaration code
- * over and over (plus this helps to avoid typo bugs).
- */
-
-#define IXGB_PARAM_INIT { [0 ... IXGB_MAX_NIC] = OPTION_UNSET }
-#define IXGB_PARAM(X, desc)					\
-	static int X[IXGB_MAX_NIC+1]		\
-		= IXGB_PARAM_INIT;				\
-	static unsigned int num_##X = 0;			\
-	module_param_array_named(X, X, int, &num_##X, 0);	\
-	MODULE_PARM_DESC(X, desc);
-
-/* Transmit Descriptor Count
- *
- * Valid Range: 64-4096
- *
- * Default Value: 256
- */
-
-IXGB_PARAM(TxDescriptors, "Number of transmit descriptors");
-
-/* Receive Descriptor Count
- *
- * Valid Range: 64-4096
- *
- * Default Value: 1024
- */
-
-IXGB_PARAM(RxDescriptors, "Number of receive descriptors");
-
-/* User Specified Flow Control Override
- *
- * Valid Range: 0-3
- *  - 0 - No Flow Control
- *  - 1 - Rx only, respond to PAUSE frames but do not generate them
- *  - 2 - Tx only, generate PAUSE frames but ignore them on receive
- *  - 3 - Full Flow Control Support
- *
- * Default Value: 2 - Tx only (silicon bug avoidance)
- */
-
-IXGB_PARAM(FlowControl, "Flow Control setting");
-
-/* XsumRX - Receive Checksum Offload Enable/Disable
- *
- * Valid Range: 0, 1
- *  - 0 - disables all checksum offload
- *  - 1 - enables receive IP/TCP/UDP checksum offload
- *        on 82597 based NICs
- *
- * Default Value: 1
- */
-
-IXGB_PARAM(XsumRX, "Disable or enable Receive Checksum offload");
-
-/* Transmit Interrupt Delay in units of 0.8192 microseconds
- *
- * Valid Range: 0-65535
- *
- * Default Value: 32
- */
-
-IXGB_PARAM(TxIntDelay, "Transmit Interrupt Delay");
-
-/* Receive Interrupt Delay in units of 0.8192 microseconds
- *
- * Valid Range: 0-65535
- *
- * Default Value: 72
- */
-
-IXGB_PARAM(RxIntDelay, "Receive Interrupt Delay");
-
-/* Receive Flow control high threshold (when we send a pause frame)
- * (FCRTH)
- *
- * Valid Range: 1,536 - 262,136 (0x600 - 0x3FFF8, 8 byte granularity)
- *
- * Default Value: 196,608 (0x30000)
- */
-
-IXGB_PARAM(RxFCHighThresh, "Receive Flow Control High Threshold");
-
-/* Receive Flow control low threshold (when we send a resume frame)
- * (FCRTL)
- *
- * Valid Range: 64 - 262,136 (0x40 - 0x3FFF8, 8 byte granularity)
- *              must be less than high threshold by at least 8 bytes
- *
- * Default Value:  163,840 (0x28000)
- */
-
-IXGB_PARAM(RxFCLowThresh, "Receive Flow Control Low Threshold");
-
-/* Flow control request timeout (how long to pause the link partner's tx)
- * (PAP 15:0)
- *
- * Valid Range: 1 - 65535
- *
- * Default Value:  65535 (0xffff) (we'll send an xon if we recover)
- */
-
-IXGB_PARAM(FCReqTimeout, "Flow Control Request Timeout");
-
-/* Interrupt Delay Enable
- *
- * Valid Range: 0, 1
- *
- *  - 0 - disables transmit interrupt delay
- *  - 1 - enables transmmit interrupt delay
- *
- * Default Value: 1
- */
-
-IXGB_PARAM(IntDelayEnable, "Transmit Interrupt Delay Enable");
-
-
-#define DEFAULT_TIDV	   		     32
-#define MAX_TIDV			 0xFFFF
-#define MIN_TIDV			      0
-
-#define DEFAULT_RDTR		   	     72
-#define MAX_RDTR			 0xFFFF
-#define MIN_RDTR			      0
-
-#define DEFAULT_FCRTL	  		0x28000
-#define DEFAULT_FCRTH			0x30000
-#define MIN_FCRTL			      0
-#define MAX_FCRTL			0x3FFE8
-#define MIN_FCRTH			      8
-#define MAX_FCRTH			0x3FFF0
-
-#define MIN_FCPAUSE			      1
-#define MAX_FCPAUSE			 0xffff
-#define DEFAULT_FCPAUSE		  	 0xFFFF /* this may be too long */
-
-struct ixgb_option {
-	enum { enable_option, range_option, list_option } type;
-	const char *name;
-	const char *err;
-	int def;
-	union {
-		struct {	/* range_option info */
-			int min;
-			int max;
-		} r;
-		struct {	/* list_option info */
-			int nr;
-			const struct ixgb_opt_list {
-				int i;
-				const char *str;
-			} *p;
-		} l;
-	} arg;
-};
-
-static int
-ixgb_validate_option(unsigned int *value, const struct ixgb_option *opt)
-{
-	if (*value == OPTION_UNSET) {
-		*value = opt->def;
-		return 0;
-	}
-
-	switch (opt->type) {
-	case enable_option:
-		switch (*value) {
-		case OPTION_ENABLED:
-			pr_info("%s Enabled\n", opt->name);
-			return 0;
-		case OPTION_DISABLED:
-			pr_info("%s Disabled\n", opt->name);
-			return 0;
-		}
-		break;
-	case range_option:
-		if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) {
-			pr_info("%s set to %i\n", opt->name, *value);
-			return 0;
-		}
-		break;
-	case list_option: {
-		int i;
-		const struct ixgb_opt_list *ent;
-
-		for (i = 0; i < opt->arg.l.nr; i++) {
-			ent = &opt->arg.l.p[i];
-			if (*value == ent->i) {
-				if (ent->str[0] != '\0')
-					pr_info("%s\n", ent->str);
-				return 0;
-			}
-		}
-	}
-		break;
-	default:
-		BUG();
-	}
-
-	pr_info("Invalid %s specified (%i) %s\n", opt->name, *value, opt->err);
-	*value = opt->def;
-	return -1;
-}
-
-/**
- * ixgb_check_options - Range Checking for Command Line Parameters
- * @adapter: board private structure
- *
- * This routine checks all command line parameters for valid user
- * input.  If an invalid value is given, or if no user specified
- * value exists, a default value is used.  The final value is stored
- * in a variable in the adapter structure.
- **/
-
-void
-ixgb_check_options(struct ixgb_adapter *adapter)
-{
-	int bd = adapter->bd_number;
-	if (bd >= IXGB_MAX_NIC) {
-		pr_notice("Warning: no configuration for board #%i\n", bd);
-		pr_notice("Using defaults for all values\n");
-	}
-
-	{ /* Transmit Descriptor Count */
-		static const struct ixgb_option opt = {
-			.type = range_option,
-			.name = "Transmit Descriptors",
-			.err  = "using default of " __MODULE_STRING(DEFAULT_TXD),
-			.def  = DEFAULT_TXD,
-			.arg  = { .r = { .min = MIN_TXD,
-					 .max = MAX_TXD}}
-		};
-		struct ixgb_desc_ring *tx_ring = &adapter->tx_ring;
-
-		if (num_TxDescriptors > bd) {
-			tx_ring->count = TxDescriptors[bd];
-			ixgb_validate_option(&tx_ring->count, &opt);
-		} else {
-			tx_ring->count = opt.def;
-		}
-		tx_ring->count = ALIGN(tx_ring->count, IXGB_REQ_TX_DESCRIPTOR_MULTIPLE);
-	}
-	{ /* Receive Descriptor Count */
-		static const struct ixgb_option opt = {
-			.type = range_option,
-			.name = "Receive Descriptors",
-			.err  = "using default of " __MODULE_STRING(DEFAULT_RXD),
-			.def  = DEFAULT_RXD,
-			.arg  = { .r = { .min = MIN_RXD,
-					 .max = MAX_RXD}}
-		};
-		struct ixgb_desc_ring *rx_ring = &adapter->rx_ring;
-
-		if (num_RxDescriptors > bd) {
-			rx_ring->count = RxDescriptors[bd];
-			ixgb_validate_option(&rx_ring->count, &opt);
-		} else {
-			rx_ring->count = opt.def;
-		}
-		rx_ring->count = ALIGN(rx_ring->count, IXGB_REQ_RX_DESCRIPTOR_MULTIPLE);
-	}
-	{ /* Receive Checksum Offload Enable */
-		static const struct ixgb_option opt = {
-			.type = enable_option,
-			.name = "Receive Checksum Offload",
-			.err  = "defaulting to Enabled",
-			.def  = OPTION_ENABLED
-		};
-
-		if (num_XsumRX > bd) {
-			unsigned int rx_csum = XsumRX[bd];
-			ixgb_validate_option(&rx_csum, &opt);
-			adapter->rx_csum = rx_csum;
-		} else {
-			adapter->rx_csum = opt.def;
-		}
-	}
-	{ /* Flow Control */
-
-		static const struct ixgb_opt_list fc_list[] = {
-		       { ixgb_fc_none, "Flow Control Disabled" },
-		       { ixgb_fc_rx_pause, "Flow Control Receive Only" },
-		       { ixgb_fc_tx_pause, "Flow Control Transmit Only" },
-		       { ixgb_fc_full, "Flow Control Enabled" },
-		       { ixgb_fc_default, "Flow Control Hardware Default" }
-		};
-
-		static const struct ixgb_option opt = {
-			.type = list_option,
-			.name = "Flow Control",
-			.err  = "reading default settings from EEPROM",
-			.def  = ixgb_fc_tx_pause,
-			.arg  = { .l = { .nr = ARRAY_SIZE(fc_list),
-					 .p = fc_list }}
-		};
-
-		if (num_FlowControl > bd) {
-			unsigned int fc = FlowControl[bd];
-			ixgb_validate_option(&fc, &opt);
-			adapter->hw.fc.type = fc;
-		} else {
-			adapter->hw.fc.type = opt.def;
-		}
-	}
-	{ /* Receive Flow Control High Threshold */
-		static const struct ixgb_option opt = {
-			.type = range_option,
-			.name = "Rx Flow Control High Threshold",
-			.err  = "using default of " __MODULE_STRING(DEFAULT_FCRTH),
-			.def  = DEFAULT_FCRTH,
-			.arg  = { .r = { .min = MIN_FCRTH,
-					 .max = MAX_FCRTH}}
-		};
-
-		if (num_RxFCHighThresh > bd) {
-			adapter->hw.fc.high_water = RxFCHighThresh[bd];
-			ixgb_validate_option(&adapter->hw.fc.high_water, &opt);
-		} else {
-			adapter->hw.fc.high_water = opt.def;
-		}
-		if (!(adapter->hw.fc.type & ixgb_fc_tx_pause) )
-			pr_info("Ignoring RxFCHighThresh when no RxFC\n");
-	}
-	{ /* Receive Flow Control Low Threshold */
-		static const struct ixgb_option opt = {
-			.type = range_option,
-			.name = "Rx Flow Control Low Threshold",
-			.err  = "using default of " __MODULE_STRING(DEFAULT_FCRTL),
-			.def  = DEFAULT_FCRTL,
-			.arg  = { .r = { .min = MIN_FCRTL,
-					 .max = MAX_FCRTL}}
-		};
-
-		if (num_RxFCLowThresh > bd) {
-			adapter->hw.fc.low_water = RxFCLowThresh[bd];
-			ixgb_validate_option(&adapter->hw.fc.low_water, &opt);
-		} else {
-			adapter->hw.fc.low_water = opt.def;
-		}
-		if (!(adapter->hw.fc.type & ixgb_fc_tx_pause) )
-			pr_info("Ignoring RxFCLowThresh when no RxFC\n");
-	}
-	{ /* Flow Control Pause Time Request*/
-		static const struct ixgb_option opt = {
-			.type = range_option,
-			.name = "Flow Control Pause Time Request",
-			.err  = "using default of "__MODULE_STRING(DEFAULT_FCPAUSE),
-			.def  = DEFAULT_FCPAUSE,
-			.arg = { .r = { .min = MIN_FCPAUSE,
-					.max = MAX_FCPAUSE}}
-		};
-
-		if (num_FCReqTimeout > bd) {
-			unsigned int pause_time = FCReqTimeout[bd];
-			ixgb_validate_option(&pause_time, &opt);
-			adapter->hw.fc.pause_time = pause_time;
-		} else {
-			adapter->hw.fc.pause_time = opt.def;
-		}
-		if (!(adapter->hw.fc.type & ixgb_fc_tx_pause) )
-			pr_info("Ignoring FCReqTimeout when no RxFC\n");
-	}
-	/* high low and spacing check for rx flow control thresholds */
-	if (adapter->hw.fc.type & ixgb_fc_tx_pause) {
-		/* high must be greater than low */
-		if (adapter->hw.fc.high_water < (adapter->hw.fc.low_water + 8)) {
-			/* set defaults */
-			pr_info("RxFCHighThresh must be >= (RxFCLowThresh + 8), Using Defaults\n");
-			adapter->hw.fc.high_water = DEFAULT_FCRTH;
-			adapter->hw.fc.low_water  = DEFAULT_FCRTL;
-		}
-	}
-	{ /* Receive Interrupt Delay */
-		static const struct ixgb_option opt = {
-			.type = range_option,
-			.name = "Receive Interrupt Delay",
-			.err  = "using default of " __MODULE_STRING(DEFAULT_RDTR),
-			.def  = DEFAULT_RDTR,
-			.arg  = { .r = { .min = MIN_RDTR,
-					 .max = MAX_RDTR}}
-		};
-
-		if (num_RxIntDelay > bd) {
-			adapter->rx_int_delay = RxIntDelay[bd];
-			ixgb_validate_option(&adapter->rx_int_delay, &opt);
-		} else {
-			adapter->rx_int_delay = opt.def;
-		}
-	}
-	{ /* Transmit Interrupt Delay */
-		static const struct ixgb_option opt = {
-			.type = range_option,
-			.name = "Transmit Interrupt Delay",
-			.err  = "using default of " __MODULE_STRING(DEFAULT_TIDV),
-			.def  = DEFAULT_TIDV,
-			.arg  = { .r = { .min = MIN_TIDV,
-					 .max = MAX_TIDV}}
-		};
-
-		if (num_TxIntDelay > bd) {
-			adapter->tx_int_delay = TxIntDelay[bd];
-			ixgb_validate_option(&adapter->tx_int_delay, &opt);
-		} else {
-			adapter->tx_int_delay = opt.def;
-		}
-	}
-
-	{ /* Transmit Interrupt Delay Enable */
-		static const struct ixgb_option opt = {
-			.type = enable_option,
-			.name = "Tx Interrupt Delay Enable",
-			.err  = "defaulting to Enabled",
-			.def  = OPTION_ENABLED
-		};
-
-		if (num_IntDelayEnable > bd) {
-			unsigned int ide = IntDelayEnable[bd];
-			ixgb_validate_option(&ide, &opt);
-			adapter->tx_int_delay_enable = ide;
-		} else {
-			adapter->tx_int_delay_enable = opt.def;
-		}
-	}
-}
-- 
cgit v1.2.3


From e3ac1c270466e4fe930010ca688f71b98282e0e6 Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Sun, 19 Mar 2023 12:56:40 +0000
Subject: dt-bindings: net: mediatek,net: add mt7981-eth binding

Introduce DT bindings for the MT7981 SoC to mediatek,net.yaml.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../devicetree/bindings/net/mediatek,net.yaml      | 53 ++++++++++++++++++++--
 1 file changed, 48 insertions(+), 5 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/mediatek,net.yaml b/Documentation/devicetree/bindings/net/mediatek,net.yaml
index 7ef696204c5a..b7f6474dc5ab 100644
--- a/Documentation/devicetree/bindings/net/mediatek,net.yaml
+++ b/Documentation/devicetree/bindings/net/mediatek,net.yaml
@@ -21,6 +21,7 @@ properties:
       - mediatek,mt7623-eth
       - mediatek,mt7622-eth
       - mediatek,mt7629-eth
+      - mediatek,mt7981-eth
       - mediatek,mt7986-eth
       - ralink,rt5350-eth
 
@@ -78,6 +79,11 @@ properties:
     description:
       List of phandles to wireless ethernet dispatch nodes.
 
+  mediatek,wed-pcie:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to the mediatek wed-pcie controller.
+
   dma-coherent: true
 
   mdio-bus:
@@ -123,6 +129,8 @@ allOf:
 
         mediatek,wed: false
 
+        mediatek,wed-pcie: false
+
   - if:
       properties:
         compatible:
@@ -160,6 +168,8 @@ allOf:
           description:
             Phandle to the mediatek pcie-mirror controller.
 
+        mediatek,wed-pcie: false
+
   - if:
       properties:
         compatible:
@@ -206,6 +216,44 @@ allOf:
 
         mediatek,wed: false
 
+        mediatek,wed-pcie: false
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: mediatek,mt7981-eth
+    then:
+      properties:
+        interrupts:
+          minItems: 4
+
+        clocks:
+          minItems: 15
+          maxItems: 15
+
+        clock-names:
+          items:
+            - const: fe
+            - const: gp2
+            - const: gp1
+            - const: wocpu0
+            - const: sgmii_ck
+            - const: sgmii_tx250m
+            - const: sgmii_rx250m
+            - const: sgmii_cdr_ref
+            - const: sgmii_cdr_fb
+            - const: sgmii2_tx250m
+            - const: sgmii2_rx250m
+            - const: sgmii2_cdr_ref
+            - const: sgmii2_cdr_fb
+            - const: netsys0
+            - const: netsys1
+
+        mediatek,sgmiisys:
+          minItems: 2
+          maxItems: 2
+
   - if:
       properties:
         compatible:
@@ -242,11 +290,6 @@ allOf:
           minItems: 2
           maxItems: 2
 
-        mediatek,wed-pcie:
-          $ref: /schemas/types.yaml#/definitions/phandle
-          description:
-            Phandle to the mediatek wed-pcie controller.
-
 patternProperties:
   "^mac@[0-1]$":
     type: object
-- 
cgit v1.2.3


From d4f08a703565abf47baa5a77d05365cf4598d55c Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Sun, 19 Mar 2023 12:56:52 +0000
Subject: dt-bindings: arm: mediatek: sgmiisys: Convert to DT schema

Convert mediatek,sgmiiisys bindings to DT schema format.
Add maintainer Matthias Brugger, no maintainers were listed in the
original documentation.
As this node is also referenced by the Ethernet controller and used
as SGMII PCS add this fact to the description.
Move the file to Documentation/devicetree/bindings/net/pcs/ which seems
more appropriate given that the great majority of registers are related
to SGMII PCS functionality and only one register represents clock bits.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../bindings/arm/mediatek/mediatek,sgmiisys.txt    | 27 ------------
 .../bindings/net/pcs/mediatek,sgmiisys.yaml        | 49 ++++++++++++++++++++++
 2 files changed, 49 insertions(+), 27 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt
 create mode 100644 Documentation/devicetree/bindings/net/pcs/mediatek,sgmiisys.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt
deleted file mode 100644
index d2c24c277514..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,sgmiisys.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-MediaTek SGMIISYS controller
-============================
-
-The MediaTek SGMIISYS controller provides various clocks to the system.
-
-Required Properties:
-
-- compatible: Should be:
-	- "mediatek,mt7622-sgmiisys", "syscon"
-	- "mediatek,mt7629-sgmiisys", "syscon"
-	- "mediatek,mt7981-sgmiisys_0", "syscon"
-	- "mediatek,mt7981-sgmiisys_1", "syscon"
-	- "mediatek,mt7986-sgmiisys_0", "syscon"
-	- "mediatek,mt7986-sgmiisys_1", "syscon"
-- #clock-cells: Must be 1
-
-The SGMIISYS controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-
-Example:
-
-sgmiisys: sgmiisys@1b128000 {
-	compatible = "mediatek,mt7622-sgmiisys", "syscon";
-	reg = <0 0x1b128000 0 0x1000>;
-	#clock-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/net/pcs/mediatek,sgmiisys.yaml b/Documentation/devicetree/bindings/net/pcs/mediatek,sgmiisys.yaml
new file mode 100644
index 000000000000..7ce597011a32
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/pcs/mediatek,sgmiisys.yaml
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/pcs/mediatek,sgmiisys.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek SGMIISYS Controller
+
+maintainers:
+  - Matthias Brugger <matthias.bgg@gmail.com>
+
+description:
+  The MediaTek SGMIISYS controller provides a SGMII PCS and some clocks
+  to the ethernet subsystem to which it is attached.
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - mediatek,mt7622-sgmiisys
+          - mediatek,mt7629-sgmiisys
+          - mediatek,mt7986-sgmiisys_0
+          - mediatek,mt7986-sgmiisys_1
+      - const: syscon
+
+  reg:
+    maxItems: 1
+
+  '#clock-cells':
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - '#clock-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    soc {
+      #address-cells = <2>;
+      #size-cells = <2>;
+      sgmiisys: syscon@1b128000 {
+        compatible = "mediatek,mt7622-sgmiisys", "syscon";
+        reg = <0 0x1b128000 0 0x1000>;
+        #clock-cells = <1>;
+      };
+    };
-- 
cgit v1.2.3


From 4f7eb19c4f44078100659f6ba073b0cc7191bc91 Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Sun, 19 Mar 2023 12:57:04 +0000
Subject: dt-bindings: net: pcs: mediatek,sgmiisys: add MT7981 SoC

Add mediatek,pnswap boolean property needed on many boards using the
MediaTek MT7981 SoC.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/devicetree/bindings/net/pcs/mediatek,sgmiisys.yaml | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/pcs/mediatek,sgmiisys.yaml b/Documentation/devicetree/bindings/net/pcs/mediatek,sgmiisys.yaml
index 7ce597011a32..66a95191bd77 100644
--- a/Documentation/devicetree/bindings/net/pcs/mediatek,sgmiisys.yaml
+++ b/Documentation/devicetree/bindings/net/pcs/mediatek,sgmiisys.yaml
@@ -19,6 +19,8 @@ properties:
       - enum:
           - mediatek,mt7622-sgmiisys
           - mediatek,mt7629-sgmiisys
+          - mediatek,mt7981-sgmiisys_0
+          - mediatek,mt7981-sgmiisys_1
           - mediatek,mt7986-sgmiisys_0
           - mediatek,mt7986-sgmiisys_1
       - const: syscon
@@ -29,6 +31,10 @@ properties:
   '#clock-cells':
     const: 1
 
+  mediatek,pnswap:
+    description: Invert polarity of the SGMII data lanes
+    type: boolean
+
 required:
   - compatible
   - reg
-- 
cgit v1.2.3


From 3ec5ac3133b54d5bcaaa9ddc95e15377178ef66e Mon Sep 17 00:00:00 2001
From: Álvaro Fernández Rojas <noltari@gmail.com>
Date: Tue, 21 Mar 2023 18:33:56 +0100
Subject: dt-bindings: net: dsa: b53: add more 63xx SoCs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BCM6318, BCM6362 and BCM63268 are SoCs with a B53 MMAP switch.

Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml b/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
index 5bef4128d175..57e0ef93b134 100644
--- a/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
@@ -57,8 +57,11 @@ properties:
       - items:
           - enum:
               - brcm,bcm3384-switch
+              - brcm,bcm6318-switch
               - brcm,bcm6328-switch
+              - brcm,bcm6362-switch
               - brcm,bcm6368-switch
+              - brcm,bcm63268-switch
           - const: brcm,bcm63xx-switch
 
 required:
-- 
cgit v1.2.3


From 3079bfdbda6cc776b4fba4556258966753a6d840 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 20 Mar 2023 18:37:54 -0500
Subject: dt-bindings: net: Drop unneeded quotes

Cleanup bindings dropping unneeded quotes. Once all these are fixed,
checking for this can be enabled in yamllint.

Acked-by: Marc Kleine-Budde <mkl@pengutronix.de> # for bindings/net/can
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Steen Hegelund <Steen.Hegelund@microchip.com>
Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp> # for bindings/net/toshiba,visconti-dwmac.yaml
Reviewed-by: Heiko Stuebner <heiko@sntech.de> #rockchip
Link: https://lore.kernel.org/r/20230320233758.2918972-1-robh@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../devicetree/bindings/net/actions,owl-emac.yaml      |  2 +-
 .../bindings/net/allwinner,sun4i-a10-emac.yaml         |  2 +-
 .../bindings/net/allwinner,sun4i-a10-mdio.yaml         |  2 +-
 Documentation/devicetree/bindings/net/altr,tse.yaml    |  2 +-
 .../devicetree/bindings/net/amlogic,meson-dwmac.yaml   |  4 ++--
 .../devicetree/bindings/net/aspeed,ast2600-mdio.yaml   |  2 +-
 Documentation/devicetree/bindings/net/brcm,amac.yaml   |  2 +-
 .../devicetree/bindings/net/brcm,systemport.yaml       |  2 +-
 .../devicetree/bindings/net/broadcom-bluetooth.yaml    |  2 +-
 .../devicetree/bindings/net/can/xilinx,can.yaml        |  6 +++---
 .../devicetree/bindings/net/dsa/brcm,sf2.yaml          |  2 +-
 Documentation/devicetree/bindings/net/dsa/qca8k.yaml   |  2 +-
 .../devicetree/bindings/net/engleder,tsnep.yaml        |  2 +-
 .../devicetree/bindings/net/ethernet-phy.yaml          |  2 +-
 .../devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml    |  2 +-
 .../bindings/net/intel,ixp46x-ptp-timer.yaml           |  4 ++--
 .../devicetree/bindings/net/intel,ixp4xx-ethernet.yaml | 12 ++++++------
 .../devicetree/bindings/net/intel,ixp4xx-hss.yaml      | 18 +++++++++---------
 .../devicetree/bindings/net/marvell,mvusb.yaml         |  2 +-
 .../devicetree/bindings/net/marvell-bluetooth.yaml     |  4 ++--
 Documentation/devicetree/bindings/net/mdio-gpio.yaml   |  2 +-
 .../devicetree/bindings/net/mediatek,net.yaml          |  2 +-
 .../devicetree/bindings/net/mediatek,star-emac.yaml    |  2 +-
 .../bindings/net/microchip,lan966x-switch.yaml         |  2 +-
 .../bindings/net/microchip,sparx5-switch.yaml          |  4 ++--
 Documentation/devicetree/bindings/net/mscc,miim.yaml   |  2 +-
 .../devicetree/bindings/net/nfc/marvell,nci.yaml       |  2 +-
 .../devicetree/bindings/net/nfc/nxp,pn532.yaml         |  2 +-
 .../bindings/net/pse-pd/podl-pse-regulator.yaml        |  2 +-
 .../devicetree/bindings/net/qcom,ipq4019-mdio.yaml     |  2 +-
 .../devicetree/bindings/net/qcom,ipq8064-mdio.yaml     |  2 +-
 .../devicetree/bindings/net/rockchip,emac.yaml         |  2 +-
 .../devicetree/bindings/net/rockchip-dwmac.yaml        |  4 ++--
 Documentation/devicetree/bindings/net/sff,sfp.yaml     |  4 ++--
 Documentation/devicetree/bindings/net/snps,dwmac.yaml  |  2 +-
 Documentation/devicetree/bindings/net/stm32-dwmac.yaml |  8 ++++----
 .../devicetree/bindings/net/ti,cpsw-switch.yaml        | 10 +++++-----
 .../devicetree/bindings/net/ti,davinci-mdio.yaml       |  2 +-
 Documentation/devicetree/bindings/net/ti,dp83822.yaml  |  6 +++---
 Documentation/devicetree/bindings/net/ti,dp83867.yaml  |  6 +++---
 Documentation/devicetree/bindings/net/ti,dp83869.yaml  |  6 +++---
 .../bindings/net/toshiba,visconti-dwmac.yaml           |  4 ++--
 .../devicetree/bindings/net/vertexcom-mse102x.yaml     |  4 ++--
 43 files changed, 79 insertions(+), 79 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/actions,owl-emac.yaml b/Documentation/devicetree/bindings/net/actions,owl-emac.yaml
index d30fada2ac39..5718ab4654b2 100644
--- a/Documentation/devicetree/bindings/net/actions,owl-emac.yaml
+++ b/Documentation/devicetree/bindings/net/actions,owl-emac.yaml
@@ -16,7 +16,7 @@ description: |
   operation modes at 10/100 Mb/s data transfer rates.
 
 allOf:
-  - $ref: "ethernet-controller.yaml#"
+  - $ref: ethernet-controller.yaml#
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
index 987b91b9afe9..eb26623dab51 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-emac.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Allwinner A10 EMAC Ethernet Controller
 
 allOf:
-  - $ref: "ethernet-controller.yaml#"
+  - $ref: ethernet-controller.yaml#
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
diff --git a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml
index ede977cdfb8d..85f552b907f3 100644
--- a/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml
+++ b/Documentation/devicetree/bindings/net/allwinner,sun4i-a10-mdio.yaml
@@ -11,7 +11,7 @@ maintainers:
   - Maxime Ripard <mripard@kernel.org>
 
 allOf:
-  - $ref: "mdio.yaml#"
+  - $ref: mdio.yaml#
 
 # Select every compatible, including the deprecated ones. This way, we
 # will be able to report a warning when we have that compatible, since
diff --git a/Documentation/devicetree/bindings/net/altr,tse.yaml b/Documentation/devicetree/bindings/net/altr,tse.yaml
index 8d1d94494349..9d02af468906 100644
--- a/Documentation/devicetree/bindings/net/altr,tse.yaml
+++ b/Documentation/devicetree/bindings/net/altr,tse.yaml
@@ -66,7 +66,7 @@ required:
   - tx-fifo-depth
 
 allOf:
-  - $ref: "ethernet-controller.yaml#"
+  - $ref: ethernet-controller.yaml#
   - if:
       properties:
         compatible:
diff --git a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
index ddd5a073c3a8..a2c51a84efa5 100644
--- a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
@@ -2,8 +2,8 @@
 # Copyright 2019 BayLibre, SAS
 %YAML 1.2
 ---
-$id: "http://devicetree.org/schemas/net/amlogic,meson-dwmac.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/net/amlogic,meson-dwmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: Amlogic Meson DWMAC Ethernet controller
 
diff --git a/Documentation/devicetree/bindings/net/aspeed,ast2600-mdio.yaml b/Documentation/devicetree/bindings/net/aspeed,ast2600-mdio.yaml
index f81eda8cb0a5..d6ef468495c5 100644
--- a/Documentation/devicetree/bindings/net/aspeed,ast2600-mdio.yaml
+++ b/Documentation/devicetree/bindings/net/aspeed,ast2600-mdio.yaml
@@ -15,7 +15,7 @@ description: |+
   MAC.
 
 allOf:
-  - $ref: "mdio.yaml#"
+  - $ref: mdio.yaml#
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/net/brcm,amac.yaml b/Documentation/devicetree/bindings/net/brcm,amac.yaml
index ee2eac8f5710..210fb29c4e7b 100644
--- a/Documentation/devicetree/bindings/net/brcm,amac.yaml
+++ b/Documentation/devicetree/bindings/net/brcm,amac.yaml
@@ -10,7 +10,7 @@ maintainers:
   - Florian Fainelli <f.fainelli@gmail.com>
 
 allOf:
-  - $ref: "ethernet-controller.yaml#"
+  - $ref: ethernet-controller.yaml#
   - if:
       properties:
         compatible:
diff --git a/Documentation/devicetree/bindings/net/brcm,systemport.yaml b/Documentation/devicetree/bindings/net/brcm,systemport.yaml
index 5fc9c9fafd85..b40006d44791 100644
--- a/Documentation/devicetree/bindings/net/brcm,systemport.yaml
+++ b/Documentation/devicetree/bindings/net/brcm,systemport.yaml
@@ -66,7 +66,7 @@ required:
   - phy-mode
 
 allOf:
-  - $ref: "ethernet-controller.yaml#"
+  - $ref: ethernet-controller.yaml#
 
 unevaluatedProperties: false
 
diff --git a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
index b964c7dcec15..cc70b00c6ce5 100644
--- a/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
+++ b/Documentation/devicetree/bindings/net/broadcom-bluetooth.yaml
@@ -121,7 +121,7 @@ required:
   - compatible
 
 dependencies:
-  brcm,requires-autobaud-mode: [ 'shutdown-gpios' ]
+  brcm,requires-autobaud-mode: [ shutdown-gpios ]
 
 if:
   not:
diff --git a/Documentation/devicetree/bindings/net/can/xilinx,can.yaml b/Documentation/devicetree/bindings/net/can/xilinx,can.yaml
index 65af8183cb9c..897d2cbda45b 100644
--- a/Documentation/devicetree/bindings/net/can/xilinx,can.yaml
+++ b/Documentation/devicetree/bindings/net/can/xilinx,can.yaml
@@ -35,15 +35,15 @@ properties:
     maxItems: 1
 
   tx-fifo-depth:
-    $ref: "/schemas/types.yaml#/definitions/uint32"
+    $ref: /schemas/types.yaml#/definitions/uint32
     description: CAN Tx fifo depth (Zynq, Axi CAN).
 
   rx-fifo-depth:
-    $ref: "/schemas/types.yaml#/definitions/uint32"
+    $ref: /schemas/types.yaml#/definitions/uint32
     description: CAN Rx fifo depth (Zynq, Axi CAN, CAN FD in sequential Rx mode)
 
   tx-mailbox-count:
-    $ref: "/schemas/types.yaml#/definitions/uint32"
+    $ref: /schemas/types.yaml#/definitions/uint32
     description: CAN Tx mailbox buffer count (CAN FD)
 
 required:
diff --git a/Documentation/devicetree/bindings/net/dsa/brcm,sf2.yaml b/Documentation/devicetree/bindings/net/dsa/brcm,sf2.yaml
index eed16e216fb6..37bf33bd4670 100644
--- a/Documentation/devicetree/bindings/net/dsa/brcm,sf2.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/brcm,sf2.yaml
@@ -103,7 +103,7 @@ required:
   - "#size-cells"
 
 allOf:
-  - $ref: "dsa.yaml#"
+  - $ref: dsa.yaml#
   - if:
       properties:
         compatible:
diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.yaml b/Documentation/devicetree/bindings/net/dsa/qca8k.yaml
index 389892592aac..fe9ebe285938 100644
--- a/Documentation/devicetree/bindings/net/dsa/qca8k.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/qca8k.yaml
@@ -66,7 +66,7 @@ properties:
                  With the legacy mapping the reg corresponding to the internal
                  mdio is the switch reg with an offset of -1.
 
-$ref: "dsa.yaml#"
+$ref: dsa.yaml#
 
 patternProperties:
   "^(ethernet-)?ports$":
diff --git a/Documentation/devicetree/bindings/net/engleder,tsnep.yaml b/Documentation/devicetree/bindings/net/engleder,tsnep.yaml
index 4116667133ce..82a5d7927ca4 100644
--- a/Documentation/devicetree/bindings/net/engleder,tsnep.yaml
+++ b/Documentation/devicetree/bindings/net/engleder,tsnep.yaml
@@ -62,7 +62,7 @@ properties:
 
   mdio:
     type: object
-    $ref: "mdio.yaml#"
+    $ref: mdio.yaml#
     description: optional node for embedded MDIO controller
 
 required:
diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
index 1327b81f15a2..ac04f8efa35c 100644
--- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
@@ -83,7 +83,7 @@ properties:
       0: Disable 2.4 Vpp operating mode.
       1: Request 2.4 Vpp operating mode from link partner.
       Absence of this property will leave configuration to default values.
-    $ref: "/schemas/types.yaml#/definitions/uint32"
+    $ref: /schemas/types.yaml#/definitions/uint32
     enum: [0, 1]
 
   broken-turn-around:
diff --git a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml
index 6e0763898d3a..a1b71b35319e 100644
--- a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml
+++ b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml
@@ -14,7 +14,7 @@ description:
   located under the 'dpmacs' node for the fsl-mc bus DTS node.
 
 allOf:
-  - $ref: "ethernet-controller.yaml#"
+  - $ref: ethernet-controller.yaml#
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml b/Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml
index 8b9b3f915d92..f92730b1d2fa 100644
--- a/Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml
+++ b/Documentation/devicetree/bindings/net/intel,ixp46x-ptp-timer.yaml
@@ -2,8 +2,8 @@
 # Copyright 2018 Linaro Ltd.
 %YAML 1.2
 ---
-$id: "http://devicetree.org/schemas/net/intel,ixp46x-ptp-timer.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/net/intel,ixp46x-ptp-timer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: Intel IXP46x PTP Timer (TSYNC)
 
diff --git a/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml b/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml
index 4e1b79818aff..4fdc5328826c 100644
--- a/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml
+++ b/Documentation/devicetree/bindings/net/intel,ixp4xx-ethernet.yaml
@@ -2,13 +2,13 @@
 # Copyright 2018 Linaro Ltd.
 %YAML 1.2
 ---
-$id: "http://devicetree.org/schemas/net/intel,ixp4xx-ethernet.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/net/intel,ixp4xx-ethernet.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: Intel IXP4xx ethernet
 
 allOf:
-  - $ref: "ethernet-controller.yaml#"
+  - $ref: ethernet-controller.yaml#
 
 maintainers:
   - Linus Walleij <linus.walleij@linaro.org>
@@ -28,7 +28,7 @@ properties:
     description: Ethernet MMIO address range
 
   queue-rx:
-    $ref: '/schemas/types.yaml#/definitions/phandle-array'
+    $ref: /schemas/types.yaml#/definitions/phandle-array
     items:
       - items:
           - description: phandle to the RX queue node
@@ -36,7 +36,7 @@ properties:
     description: phandle to the RX queue on the NPE
 
   queue-txready:
-    $ref: '/schemas/types.yaml#/definitions/phandle-array'
+    $ref: /schemas/types.yaml#/definitions/phandle-array
     items:
       - items:
           - description: phandle to the TX READY queue node
@@ -48,7 +48,7 @@ properties:
   phy-handle: true
 
   intel,npe-handle:
-    $ref: '/schemas/types.yaml#/definitions/phandle-array'
+    $ref: /schemas/types.yaml#/definitions/phandle-array
     items:
       - items:
           - description: phandle to the NPE this ethernet instance is using
diff --git a/Documentation/devicetree/bindings/net/intel,ixp4xx-hss.yaml b/Documentation/devicetree/bindings/net/intel,ixp4xx-hss.yaml
index e6329febb60c..7a405e9b37b2 100644
--- a/Documentation/devicetree/bindings/net/intel,ixp4xx-hss.yaml
+++ b/Documentation/devicetree/bindings/net/intel,ixp4xx-hss.yaml
@@ -2,8 +2,8 @@
 # Copyright 2021 Linaro Ltd.
 %YAML 1.2
 ---
-$id: "http://devicetree.org/schemas/net/intel,ixp4xx-hss.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/net/intel,ixp4xx-hss.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: Intel IXP4xx V.35 WAN High Speed Serial Link (HSS)
 
@@ -24,7 +24,7 @@ properties:
     description: The HSS instance
 
   intel,npe-handle:
-    $ref: '/schemas/types.yaml#/definitions/phandle-array'
+    $ref: /schemas/types.yaml#/definitions/phandle-array
     items:
       items:
         - description: phandle to the NPE this HSS instance is using
@@ -33,7 +33,7 @@ properties:
       and the instance to use in the second cell
 
   intel,queue-chl-rxtrig:
-    $ref: '/schemas/types.yaml#/definitions/phandle-array'
+    $ref: /schemas/types.yaml#/definitions/phandle-array
     items:
       - items:
           - description: phandle to the RX trigger queue on the NPE
@@ -41,7 +41,7 @@ properties:
     description: phandle to the RX trigger queue on the NPE
 
   intel,queue-chl-txready:
-    $ref: '/schemas/types.yaml#/definitions/phandle-array'
+    $ref: /schemas/types.yaml#/definitions/phandle-array
     items:
       - items:
           - description: phandle to the TX ready queue on the NPE
@@ -49,7 +49,7 @@ properties:
     description: phandle to the TX ready queue on the NPE
 
   intel,queue-pkt-rx:
-    $ref: '/schemas/types.yaml#/definitions/phandle-array'
+    $ref: /schemas/types.yaml#/definitions/phandle-array
     items:
       - items:
           - description: phandle to the RX queue on the NPE
@@ -57,7 +57,7 @@ properties:
     description: phandle to the packet RX queue on the NPE
 
   intel,queue-pkt-tx:
-    $ref: '/schemas/types.yaml#/definitions/phandle-array'
+    $ref: /schemas/types.yaml#/definitions/phandle-array
     maxItems: 4
     items:
       items:
@@ -66,7 +66,7 @@ properties:
     description: phandle to the packet TX0, TX1, TX2 and TX3 queues on the NPE
 
   intel,queue-pkt-rxfree:
-    $ref: '/schemas/types.yaml#/definitions/phandle-array'
+    $ref: /schemas/types.yaml#/definitions/phandle-array
     maxItems: 4
     items:
       items:
@@ -76,7 +76,7 @@ properties:
       RXFREE3 queues on the NPE
 
   intel,queue-pkt-txdone:
-    $ref: '/schemas/types.yaml#/definitions/phandle-array'
+    $ref: /schemas/types.yaml#/definitions/phandle-array
     items:
       - items:
           - description: phandle to the TXDONE queue on the NPE
diff --git a/Documentation/devicetree/bindings/net/marvell,mvusb.yaml b/Documentation/devicetree/bindings/net/marvell,mvusb.yaml
index 8e288ab38fd7..3a3325168048 100644
--- a/Documentation/devicetree/bindings/net/marvell,mvusb.yaml
+++ b/Documentation/devicetree/bindings/net/marvell,mvusb.yaml
@@ -20,7 +20,7 @@ description: |+
   definition.
 
 allOf:
-  - $ref: "mdio.yaml#"
+  - $ref: mdio.yaml#
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/net/marvell-bluetooth.yaml b/Documentation/devicetree/bindings/net/marvell-bluetooth.yaml
index 309ef21a1e37..6aa7a078faa2 100644
--- a/Documentation/devicetree/bindings/net/marvell-bluetooth.yaml
+++ b/Documentation/devicetree/bindings/net/marvell-bluetooth.yaml
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
 %YAML 1.2
 ---
-$id: "http://devicetree.org/schemas/net/marvell-bluetooth.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/net/marvell-bluetooth.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: Marvell Bluetooth chips
 
diff --git a/Documentation/devicetree/bindings/net/mdio-gpio.yaml b/Documentation/devicetree/bindings/net/mdio-gpio.yaml
index 1d83b8dcce2c..dca1aec119e3 100644
--- a/Documentation/devicetree/bindings/net/mdio-gpio.yaml
+++ b/Documentation/devicetree/bindings/net/mdio-gpio.yaml
@@ -12,7 +12,7 @@ maintainers:
   - Russell King <linux@armlinux.org.uk>
 
 allOf:
-  - $ref: "mdio.yaml#"
+  - $ref: mdio.yaml#
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/net/mediatek,net.yaml b/Documentation/devicetree/bindings/net/mediatek,net.yaml
index b7f6474dc5ab..acb2b2ac4fe1 100644
--- a/Documentation/devicetree/bindings/net/mediatek,net.yaml
+++ b/Documentation/devicetree/bindings/net/mediatek,net.yaml
@@ -97,7 +97,7 @@ properties:
     const: 0
 
 allOf:
-  - $ref: "ethernet-controller.yaml#"
+  - $ref: ethernet-controller.yaml#
   - if:
       properties:
         compatible:
diff --git a/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml b/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml
index 64c893c98d80..2e889f9a563e 100644
--- a/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml
+++ b/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml
@@ -15,7 +15,7 @@ description:
   modes with flow-control as well as CRC offloading and VLAN tags.
 
 allOf:
-  - $ref: "ethernet-controller.yaml#"
+  - $ref: ethernet-controller.yaml#
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml b/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml
index dc116f14750e..306ef9ecf2b9 100644
--- a/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml
+++ b/Documentation/devicetree/bindings/net/microchip,lan966x-switch.yaml
@@ -73,7 +73,7 @@ properties:
       "^port@[0-9a-f]+$":
         type: object
 
-        $ref: "/schemas/net/ethernet-controller.yaml#"
+        $ref: /schemas/net/ethernet-controller.yaml#
         unevaluatedProperties: false
 
         properties:
diff --git a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml
index 57ffeb8fc876..fcafef8d5a33 100644
--- a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml
+++ b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml
@@ -99,7 +99,7 @@ properties:
 
           microchip,bandwidth:
             description: Specifies bandwidth in Mbit/s allocated to the port.
-            $ref: "/schemas/types.yaml#/definitions/uint32"
+            $ref: /schemas/types.yaml#/definitions/uint32
             maximum: 25000
 
           microchip,sd-sgpio:
@@ -107,7 +107,7 @@ properties:
               Index of the ports Signal Detect SGPIO in the set of 384 SGPIOs
               This is optional, and only needed if the default used index is
               is not correct.
-            $ref: "/schemas/types.yaml#/definitions/uint32"
+            $ref: /schemas/types.yaml#/definitions/uint32
             minimum: 0
             maximum: 383
 
diff --git a/Documentation/devicetree/bindings/net/mscc,miim.yaml b/Documentation/devicetree/bindings/net/mscc,miim.yaml
index 2c451cfa4e0b..5b292e7c9e46 100644
--- a/Documentation/devicetree/bindings/net/mscc,miim.yaml
+++ b/Documentation/devicetree/bindings/net/mscc,miim.yaml
@@ -10,7 +10,7 @@ maintainers:
   - Alexandre Belloni <alexandre.belloni@bootlin.com>
 
 allOf:
-  - $ref: "mdio.yaml#"
+  - $ref: mdio.yaml#
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml b/Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml
index 308485a8ee6c..8e9a95f24c80 100644
--- a/Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml
+++ b/Documentation/devicetree/bindings/net/nfc/marvell,nci.yaml
@@ -28,7 +28,7 @@ properties:
     maxItems: 1
 
   reset-n-io:
-    $ref: "/schemas/types.yaml#/definitions/phandle-array"
+    $ref: /schemas/types.yaml#/definitions/phandle-array
     maxItems: 1
     description: |
       Output GPIO pin used to reset the chip (active low)
diff --git a/Documentation/devicetree/bindings/net/nfc/nxp,pn532.yaml b/Documentation/devicetree/bindings/net/nfc/nxp,pn532.yaml
index 0509e0166345..07c67c1e985f 100644
--- a/Documentation/devicetree/bindings/net/nfc/nxp,pn532.yaml
+++ b/Documentation/devicetree/bindings/net/nfc/nxp,pn532.yaml
@@ -31,7 +31,7 @@ required:
   - compatible
 
 dependencies:
-  interrupts: [ 'reg' ]
+  interrupts: [ reg ]
 
 additionalProperties: false
 
diff --git a/Documentation/devicetree/bindings/net/pse-pd/podl-pse-regulator.yaml b/Documentation/devicetree/bindings/net/pse-pd/podl-pse-regulator.yaml
index c6b1c188abf7..94a527e6aa1b 100644
--- a/Documentation/devicetree/bindings/net/pse-pd/podl-pse-regulator.yaml
+++ b/Documentation/devicetree/bindings/net/pse-pd/podl-pse-regulator.yaml
@@ -13,7 +13,7 @@ description: Regulator based PoDL PSE controller. The device must be referenced
   by the PHY node to control power injection to the Ethernet cable.
 
 allOf:
-  - $ref: "pse-controller.yaml#"
+  - $ref: pse-controller.yaml#
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml b/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml
index 7631ecc8fd01..3407e909e8a7 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml
@@ -51,7 +51,7 @@ required:
   - "#size-cells"
 
 allOf:
-  - $ref: "mdio.yaml#"
+  - $ref: mdio.yaml#
 
   - if:
       properties:
diff --git a/Documentation/devicetree/bindings/net/qcom,ipq8064-mdio.yaml b/Documentation/devicetree/bindings/net/qcom,ipq8064-mdio.yaml
index d7748dd33199..144001ff840c 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipq8064-mdio.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipq8064-mdio.yaml
@@ -14,7 +14,7 @@ description:
   used to communicate with the gmac phy connected.
 
 allOf:
-  - $ref: "mdio.yaml#"
+  - $ref: mdio.yaml#
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/net/rockchip,emac.yaml b/Documentation/devicetree/bindings/net/rockchip,emac.yaml
index a6d4f14df442..364028b3bba4 100644
--- a/Documentation/devicetree/bindings/net/rockchip,emac.yaml
+++ b/Documentation/devicetree/bindings/net/rockchip,emac.yaml
@@ -61,7 +61,7 @@ required:
   - mdio
 
 allOf:
-  - $ref: "ethernet-controller.yaml#"
+  - $ref: ethernet-controller.yaml#
   - if:
       properties:
         compatible:
diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
index 04936632fcbb..2a21bbe02892 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 %YAML 1.2
 ---
-$id: "http://devicetree.org/schemas/net/rockchip-dwmac.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/net/rockchip-dwmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: Rockchip 10/100/1000 Ethernet driver(GMAC)
 
diff --git a/Documentation/devicetree/bindings/net/sff,sfp.yaml b/Documentation/devicetree/bindings/net/sff,sfp.yaml
index 231c4d75e4b1..973e478a399d 100644
--- a/Documentation/devicetree/bindings/net/sff,sfp.yaml
+++ b/Documentation/devicetree/bindings/net/sff,sfp.yaml
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 %YAML 1.2
 ---
-$id: "http://devicetree.org/schemas/net/sff,sfp.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/net/sff,sfp.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: Small Form Factor (SFF) Committee Small Form-factor Pluggable (SFP)
   Transceiver
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 16b7d2904696..74f2ddc12018 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -555,7 +555,7 @@ dependencies:
   snps,reset-delays-us: ["snps,reset-gpio"]
 
 allOf:
-  - $ref: "ethernet-controller.yaml#"
+  - $ref: ethernet-controller.yaml#
   - if:
       properties:
         compatible:
diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml
index 5c93167b3b41..fc8c96b08d7d 100644
--- a/Documentation/devicetree/bindings/net/stm32-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/stm32-dwmac.yaml
@@ -2,8 +2,8 @@
 # Copyright 2019 BayLibre, SAS
 %YAML 1.2
 ---
-$id: "http://devicetree.org/schemas/net/stm32-dwmac.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/net/stm32-dwmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: STMicroelectronics STM32 / MCU DWMAC glue layer controller
 
@@ -26,7 +26,7 @@ select:
     - compatible
 
 allOf:
-  - $ref: "snps,dwmac.yaml#"
+  - $ref: snps,dwmac.yaml#
 
 properties:
   compatible:
@@ -73,7 +73,7 @@ properties:
         - ptp_ref
 
   st,syscon:
-    $ref: "/schemas/types.yaml#/definitions/phandle-array"
+    $ref: /schemas/types.yaml#/definitions/phandle-array
     items:
       - items:
           - description: phandle to the syscon node which encompases the glue register
diff --git a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml
index e36c7817be69..b04ac4966608 100644
--- a/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml
+++ b/Documentation/devicetree/bindings/net/ti,cpsw-switch.yaml
@@ -62,10 +62,10 @@ properties:
 
   interrupt-names:
     items:
-      - const: "rx_thresh"
-      - const: "rx"
-      - const: "tx"
-      - const: "misc"
+      - const: rx_thresh
+      - const: rx
+      - const: tx
+      - const: misc
 
   pinctrl-names: true
 
@@ -154,7 +154,7 @@ patternProperties:
     type: object
     description:
       CPSW MDIO bus.
-    $ref: "ti,davinci-mdio.yaml#"
+    $ref: ti,davinci-mdio.yaml#
 
 
 required:
diff --git a/Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml b/Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml
index a339202c5e8e..53604fab0b73 100644
--- a/Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml
+++ b/Documentation/devicetree/bindings/net/ti,davinci-mdio.yaml
@@ -13,7 +13,7 @@ description:
   TI SoC Davinci/Keystone2 MDIO Controller
 
 allOf:
-  - $ref: "mdio.yaml#"
+  - $ref: mdio.yaml#
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/net/ti,dp83822.yaml b/Documentation/devicetree/bindings/net/ti,dp83822.yaml
index f2489a9c852f..db74474207ed 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83822.yaml
+++ b/Documentation/devicetree/bindings/net/ti,dp83822.yaml
@@ -2,8 +2,8 @@
 # Copyright (C) 2020 Texas Instruments Incorporated
 %YAML 1.2
 ---
-$id: "http://devicetree.org/schemas/net/ti,dp83822.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/net/ti,dp83822.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: TI DP83822 ethernet PHY
 
@@ -21,7 +21,7 @@ description: |
     http://www.ti.com/lit/ds/symlink/dp83822i.pdf
 
 allOf:
-  - $ref: "ethernet-phy.yaml#"
+  - $ref: ethernet-phy.yaml#
 
 properties:
   reg:
diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.yaml b/Documentation/devicetree/bindings/net/ti,dp83867.yaml
index b8c0e4b5b494..4bc1f98fd9fe 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83867.yaml
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.yaml
@@ -2,13 +2,13 @@
 # Copyright (C) 2019 Texas Instruments Incorporated
 %YAML 1.2
 ---
-$id: "http://devicetree.org/schemas/net/ti,dp83867.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/net/ti,dp83867.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: TI DP83867 ethernet PHY
 
 allOf:
-  - $ref: "ethernet-controller.yaml#"
+  - $ref: ethernet-controller.yaml#
 
 maintainers:
   - Andrew Davis <afd@ti.com>
diff --git a/Documentation/devicetree/bindings/net/ti,dp83869.yaml b/Documentation/devicetree/bindings/net/ti,dp83869.yaml
index b04ff0014a59..fb6725df4668 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83869.yaml
+++ b/Documentation/devicetree/bindings/net/ti,dp83869.yaml
@@ -2,13 +2,13 @@
 # Copyright (C) 2019 Texas Instruments Incorporated
 %YAML 1.2
 ---
-$id: "http://devicetree.org/schemas/net/ti,dp83869.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/net/ti,dp83869.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: TI DP83869 ethernet PHY
 
 allOf:
-  - $ref: "ethernet-phy.yaml#"
+  - $ref: ethernet-phy.yaml#
 
 maintainers:
   - Andrew Davis <afd@ti.com>
diff --git a/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml b/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml
index 0988ed8d1c12..474fa8bcf302 100644
--- a/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
 %YAML 1.2
 ---
-$id: "http://devicetree.org/schemas/net/toshiba,visconti-dwmac.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/net/toshiba,visconti-dwmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: Toshiba Visconti DWMAC Ethernet controller
 
diff --git a/Documentation/devicetree/bindings/net/vertexcom-mse102x.yaml b/Documentation/devicetree/bindings/net/vertexcom-mse102x.yaml
index 6a71f694cb55..4c4ced8cfa4b 100644
--- a/Documentation/devicetree/bindings/net/vertexcom-mse102x.yaml
+++ b/Documentation/devicetree/bindings/net/vertexcom-mse102x.yaml
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 %YAML 1.2
 ---
-$id: "http://devicetree.org/schemas/net/vertexcom-mse102x.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/net/vertexcom-mse102x.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: The Vertexcom MSE102x (SPI)
 
-- 
cgit v1.2.3


From 3eb8eea2a453463f5606ce3e46cf225f88671440 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 21 Mar 2023 22:38:48 -0700
Subject: docs: networking: document NAPI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add basic documentation about NAPI. We can stop linking to the ancient
doc on the LF wiki.

Link: https://lore.kernel.org/all/20230315223044.471002-1-kuba@kernel.org/
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Pavel Pisa <pisa@cmp.felk.cvut.cz> # for ctucanfd-driver.rst
Reviewed-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20230322053848.198452-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../device_drivers/can/ctu/ctucanfd-driver.rst     |   3 +-
 .../device_drivers/ethernet/intel/e100.rst         |   3 +-
 .../device_drivers/ethernet/intel/i40e.rst         |   4 +-
 .../device_drivers/ethernet/intel/ice.rst          |   4 +-
 Documentation/networking/index.rst                 |   1 +
 Documentation/networking/napi.rst                  | 254 +++++++++++++++++++++
 include/linux/netdevice.h                          |  13 +-
 7 files changed, 269 insertions(+), 13 deletions(-)
 create mode 100644 Documentation/networking/napi.rst

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/can/ctu/ctucanfd-driver.rst b/Documentation/networking/device_drivers/can/ctu/ctucanfd-driver.rst
index 1a4fc6607582..1661d13174d5 100644
--- a/Documentation/networking/device_drivers/can/ctu/ctucanfd-driver.rst
+++ b/Documentation/networking/device_drivers/can/ctu/ctucanfd-driver.rst
@@ -229,8 +229,7 @@ frames for a while. This has a potential to avoid the costly round of
 enabling interrupts, handling an incoming IRQ in ISR, re-enabling the
 softirq and switching context back to softirq.
 
-More detailed documentation of NAPI may be found on the pages of Linux
-Foundation `<https://wiki.linuxfoundation.org/networking/napi>`_.
+See :ref:`Documentation/networking/napi.rst <napi>` for more information.
 
 Integrating the core to Xilinx Zynq
 -----------------------------------
diff --git a/Documentation/networking/device_drivers/ethernet/intel/e100.rst b/Documentation/networking/device_drivers/ethernet/intel/e100.rst
index 3d4a9ba21946..371b7e5c3293 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/e100.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/e100.rst
@@ -151,8 +151,7 @@ NAPI
 
 NAPI (Rx polling mode) is supported in the e100 driver.
 
-See https://wiki.linuxfoundation.org/networking/napi for more
-information on NAPI.
+See :ref:`Documentation/networking/napi.rst <napi>` for more information.
 
 Multiple Interfaces on Same Ethernet Broadcast Network
 ------------------------------------------------------
diff --git a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
index ac35bd472bdc..c495c4e16b3b 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
@@ -399,8 +399,8 @@ operate only in full duplex and only at their native speed.
 NAPI
 ----
 NAPI (Rx polling mode) is supported in the i40e driver.
-For more information on NAPI, see
-https://wiki.linuxfoundation.org/networking/napi
+
+See :ref:`Documentation/networking/napi.rst <napi>` for more information.
 
 Flow Control
 ------------
diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
index 5efea4dd1251..2b6dc7880d7b 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
@@ -817,10 +817,10 @@ NOTE:
 
 NAPI
 ----
+
 This driver supports NAPI (Rx polling mode).
-For more information on NAPI, see
-https://wiki.linuxfoundation.org/networking/napi
 
+See :ref:`Documentation/networking/napi.rst <napi>` for more information.
 
 MACVLAN
 -------
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 4ddcae33c336..24bb256d6d53 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -73,6 +73,7 @@ Contents:
    mpls-sysctl
    mptcp-sysctl
    multiqueue
+   napi
    netconsole
    netdev-features
    netdevices
diff --git a/Documentation/networking/napi.rst b/Documentation/networking/napi.rst
new file mode 100644
index 000000000000..a7a047742e93
--- /dev/null
+++ b/Documentation/networking/napi.rst
@@ -0,0 +1,254 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+.. _napi:
+
+====
+NAPI
+====
+
+NAPI is the event handling mechanism used by the Linux networking stack.
+The name NAPI no longer stands for anything in particular [#]_.
+
+In basic operation the device notifies the host about new events
+via an interrupt.
+The host then schedules a NAPI instance to process the events.
+The device may also be polled for events via NAPI without receiving
+interrupts first (:ref:`busy polling<poll>`).
+
+NAPI processing usually happens in the software interrupt context,
+but there is an option to use :ref:`separate kernel threads<threaded>`
+for NAPI processing.
+
+All in all NAPI abstracts away from the drivers the context and configuration
+of event (packet Rx and Tx) processing.
+
+Driver API
+==========
+
+The two most important elements of NAPI are the struct napi_struct
+and the associated poll method. struct napi_struct holds the state
+of the NAPI instance while the method is the driver-specific event
+handler. The method will typically free Tx packets that have been
+transmitted and process newly received packets.
+
+.. _drv_ctrl:
+
+Control API
+-----------
+
+netif_napi_add() and netif_napi_del() add/remove a NAPI instance
+from the system. The instances are attached to the netdevice passed
+as argument (and will be deleted automatically when netdevice is
+unregistered). Instances are added in a disabled state.
+
+napi_enable() and napi_disable() manage the disabled state.
+A disabled NAPI can't be scheduled and its poll method is guaranteed
+to not be invoked. napi_disable() waits for ownership of the NAPI
+instance to be released.
+
+The control APIs are not idempotent. Control API calls are safe against
+concurrent use of datapath APIs but an incorrect sequence of control API
+calls may result in crashes, deadlocks, or race conditions. For example,
+calling napi_disable() multiple times in a row will deadlock.
+
+Datapath API
+------------
+
+napi_schedule() is the basic method of scheduling a NAPI poll.
+Drivers should call this function in their interrupt handler
+(see :ref:`drv_sched` for more info). A successful call to napi_schedule()
+will take ownership of the NAPI instance.
+
+Later, after NAPI is scheduled, the driver's poll method will be
+called to process the events/packets. The method takes a ``budget``
+argument - drivers can process completions for any number of Tx
+packets but should only process up to ``budget`` number of
+Rx packets. Rx processing is usually much more expensive.
+
+In other words, it is recommended to ignore the budget argument when
+performing TX buffer reclamation to ensure that the reclamation is not
+arbitrarily bounded; however, it is required to honor the budget argument
+for RX processing.
+
+.. warning::
+
+   The ``budget`` argument may be 0 if core tries to only process Tx completions
+   and no Rx packets.
+
+The poll method returns the amount of work done. If the driver still
+has outstanding work to do (e.g. ``budget`` was exhausted)
+the poll method should return exactly ``budget``. In that case,
+the NAPI instance will be serviced/polled again (without the
+need to be scheduled).
+
+If event processing has been completed (all outstanding packets
+processed) the poll method should call napi_complete_done()
+before returning. napi_complete_done() releases the ownership
+of the instance.
+
+.. warning::
+
+   The case of finishing all events and using exactly ``budget``
+   must be handled carefully. There is no way to report this
+   (rare) condition to the stack, so the driver must either
+   not call napi_complete_done() and wait to be called again,
+   or return ``budget - 1``.
+
+   If the ``budget`` is 0 napi_complete_done() should never be called.
+
+Call sequence
+-------------
+
+Drivers should not make assumptions about the exact sequencing
+of calls. The poll method may be called without the driver scheduling
+the instance (unless the instance is disabled). Similarly,
+it's not guaranteed that the poll method will be called, even
+if napi_schedule() succeeded (e.g. if the instance gets disabled).
+
+As mentioned in the :ref:`drv_ctrl` section - napi_disable() and subsequent
+calls to the poll method only wait for the ownership of the instance
+to be released, not for the poll method to exit. This means that
+drivers should avoid accessing any data structures after calling
+napi_complete_done().
+
+.. _drv_sched:
+
+Scheduling and IRQ masking
+--------------------------
+
+Drivers should keep the interrupts masked after scheduling
+the NAPI instance - until NAPI polling finishes any further
+interrupts are unnecessary.
+
+Drivers which have to mask the interrupts explicitly (as opposed
+to IRQ being auto-masked by the device) should use the napi_schedule_prep()
+and __napi_schedule() calls:
+
+.. code-block:: c
+
+  if (napi_schedule_prep(&v->napi)) {
+      mydrv_mask_rxtx_irq(v->idx);
+      /* schedule after masking to avoid races */
+      __napi_schedule(&v->napi);
+  }
+
+IRQ should only be unmasked after a successful call to napi_complete_done():
+
+.. code-block:: c
+
+  if (budget && napi_complete_done(&v->napi, work_done)) {
+    mydrv_unmask_rxtx_irq(v->idx);
+    return min(work_done, budget - 1);
+  }
+
+napi_schedule_irqoff() is a variant of napi_schedule() which takes advantage
+of guarantees given by being invoked in IRQ context (no need to
+mask interrupts). Note that PREEMPT_RT forces all interrupts
+to be threaded so the interrupt may need to be marked ``IRQF_NO_THREAD``
+to avoid issues on real-time kernel configurations.
+
+Instance to queue mapping
+-------------------------
+
+Modern devices have multiple NAPI instances (struct napi_struct) per
+interface. There is no strong requirement on how the instances are
+mapped to queues and interrupts. NAPI is primarily a polling/processing
+abstraction without specific user-facing semantics. That said, most networking
+devices end up using NAPI in fairly similar ways.
+
+NAPI instances most often correspond 1:1:1 to interrupts and queue pairs
+(queue pair is a set of a single Rx and single Tx queue).
+
+In less common cases a NAPI instance may be used for multiple queues
+or Rx and Tx queues can be serviced by separate NAPI instances on a single
+core. Regardless of the queue assignment, however, there is usually still
+a 1:1 mapping between NAPI instances and interrupts.
+
+It's worth noting that the ethtool API uses a "channel" terminology where
+each channel can be either ``rx``, ``tx`` or ``combined``. It's not clear
+what constitutes a channel; the recommended interpretation is to understand
+a channel as an IRQ/NAPI which services queues of a given type. For example,
+a configuration of 1 ``rx``, 1 ``tx`` and 1 ``combined`` channel is expected
+to utilize 3 interrupts, 2 Rx and 2 Tx queues.
+
+User API
+========
+
+User interactions with NAPI depend on NAPI instance ID. The instance IDs
+are only visible to the user thru the ``SO_INCOMING_NAPI_ID`` socket option.
+It's not currently possible to query IDs used by a given device.
+
+Software IRQ coalescing
+-----------------------
+
+NAPI does not perform any explicit event coalescing by default.
+In most scenarios batching happens due to IRQ coalescing which is done
+by the device. There are cases where software coalescing is helpful.
+
+NAPI can be configured to arm a repoll timer instead of unmasking
+the hardware interrupts as soon as all packets are processed.
+The ``gro_flush_timeout`` sysfs configuration of the netdevice
+is reused to control the delay of the timer, while
+``napi_defer_hard_irqs`` controls the number of consecutive empty polls
+before NAPI gives up and goes back to using hardware IRQs.
+
+.. _poll:
+
+Busy polling
+------------
+
+Busy polling allows a user process to check for incoming packets before
+the device interrupt fires. As is the case with any busy polling it trades
+off CPU cycles for lower latency (production uses of NAPI busy polling
+are not well known).
+
+Busy polling is enabled by either setting ``SO_BUSY_POLL`` on
+selected sockets or using the global ``net.core.busy_poll`` and
+``net.core.busy_read`` sysctls. An io_uring API for NAPI busy polling
+also exists.
+
+IRQ mitigation
+---------------
+
+While busy polling is supposed to be used by low latency applications,
+a similar mechanism can be used for IRQ mitigation.
+
+Very high request-per-second applications (especially routing/forwarding
+applications and especially applications using AF_XDP sockets) may not
+want to be interrupted until they finish processing a request or a batch
+of packets.
+
+Such applications can pledge to the kernel that they will perform a busy
+polling operation periodically, and the driver should keep the device IRQs
+permanently masked. This mode is enabled by using the ``SO_PREFER_BUSY_POLL``
+socket option. To avoid system misbehavior the pledge is revoked
+if ``gro_flush_timeout`` passes without any busy poll call.
+
+The NAPI budget for busy polling is lower than the default (which makes
+sense given the low latency intention of normal busy polling). This is
+not the case with IRQ mitigation, however, so the budget can be adjusted
+with the ``SO_BUSY_POLL_BUDGET`` socket option.
+
+.. _threaded:
+
+Threaded NAPI
+-------------
+
+Threaded NAPI is an operating mode that uses dedicated kernel
+threads rather than software IRQ context for NAPI processing.
+The configuration is per netdevice and will affect all
+NAPI instances of that device. Each NAPI instance will spawn a separate
+thread (called ``napi/${ifc-name}-${napi-id}``).
+
+It is recommended to pin each kernel thread to a single CPU, the same
+CPU as the CPU which services the interrupt. Note that the mapping
+between IRQs and NAPI instances may not be trivial (and is driver
+dependent). The NAPI instance IDs will be assigned in the opposite
+order than the process IDs of the kernel threads.
+
+Threaded NAPI is controlled by writing 0/1 to the ``threaded`` file in
+netdev's sysfs directory.
+
+.. rubric:: Footnotes
+
+.. [#] NAPI was originally referred to as New API in 2.4 Linux.
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 674ee5daa7b1..18a5be6ddd0f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -509,15 +509,18 @@ static inline bool napi_reschedule(struct napi_struct *napi)
 	return false;
 }
 
-bool napi_complete_done(struct napi_struct *n, int work_done);
 /**
- *	napi_complete - NAPI processing complete
- *	@n: NAPI context
+ * napi_complete_done - NAPI processing complete
+ * @n: NAPI context
+ * @work_done: number of packets processed
  *
- * Mark NAPI processing as complete.
- * Consider using napi_complete_done() instead.
+ * Mark NAPI processing as complete. Should only be called if poll budget
+ * has not been completely consumed.
+ * Prefer over napi_complete().
  * Return false if device should avoid rearming interrupts.
  */
+bool napi_complete_done(struct napi_struct *n, int work_done);
+
 static inline bool napi_complete(struct napi_struct *n)
 {
 	return napi_complete_done(n, 0);
-- 
cgit v1.2.3


From e110ba65927151bf56d8012ddf39ce10f91cbdf9 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 22 Mar 2023 16:12:02 -0700
Subject: docs: netdev: add note about Changes Requested and revising commit
 messages

One of the most commonly asked questions is "I answered all questions
and don't need to make any code changes, why was the patch not applied".
Document our time honored tradition of asking people to repost with
improved commit messages, to record the answers to reviewer questions.

Take this opportunity to also recommend a change log format.

Link: https://lore.kernel.org/r/20230322231202.265835-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/process/maintainer-netdev.rst | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst
index 4a75686d35ab..e31d7a951073 100644
--- a/Documentation/process/maintainer-netdev.rst
+++ b/Documentation/process/maintainer-netdev.rst
@@ -109,6 +109,8 @@ Finally, the vX.Y gets released, and the whole cycle starts over.
 netdev patch review
 -------------------
 
+.. _patch_status:
+
 Patch status
 ~~~~~~~~~~~~
 
@@ -143,6 +145,33 @@ Asking the maintainer for status updates on your
 patch is a good way to ensure your patch is ignored or pushed to the
 bottom of the priority list.
 
+Changes requested
+~~~~~~~~~~~~~~~~~
+
+Patches :ref:`marked<patch_status>` as ``Changes Requested`` need
+to be revised. The new version should come with a change log,
+preferably including links to previous postings, for example::
+
+  [PATCH net-next v3] net: make cows go moo
+
+  Even users who don't drink milk appreciate hearing the cows go "moo".
+
+  The amount of mooing will depend on packet rate so should match
+  the diurnal cycle quite well.
+
+  Signed-of-by: Joe Defarmer <joe@barn.org>
+  ---
+  v3:
+    - add a note about time-of-day mooing fluctuation to the commit message
+  v2: https://lore.kernel.org/netdev/123themessageid@barn.org/
+    - fix missing argument in kernel doc for netif_is_bovine()
+    - fix memory leak in netdev_register_cow()
+  v1: https://lore.kernel.org/netdev/456getstheclicks@barn.org/
+
+The commit message should be revised to answer any questions reviewers
+had to ask in previous discussions. Occasionally the update of
+the commit message will be the only change in the new version.
+
 Partial resends
 ~~~~~~~~~~~~~~~
 
-- 
cgit v1.2.3


From b6b88111c0dbcef04cd0e0bafd646e4c09728302 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Tue, 21 Mar 2023 10:40:10 +0100
Subject: dt-bindings: net: wireless: add ath11k pcie bindings

Add devicetree bindings for Qualcomm ath11k PCIe devices such as WCN6855
for which the calibration data variant may need to be described.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
Link: https://lore.kernel.org/r/20230321094011.9759-2-johan+linaro@kernel.org
---
 .../bindings/net/wireless/qcom,ath11k-pci.yaml     | 58 ++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml
new file mode 100644
index 000000000000..817f02a8b481
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k-pci.yaml
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (c) 2023 Linaro Limited
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/wireless/qcom,ath11k-pci.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Technologies ath11k wireless devices (PCIe)
+
+maintainers:
+  - Kalle Valo <kvalo@kernel.org>
+
+description: |
+  Qualcomm Technologies IEEE 802.11ax PCIe devices
+
+properties:
+  compatible:
+    enum:
+      - pci17cb,1103  # WCN6855
+
+  reg:
+    maxItems: 1
+
+  qcom,ath11k-calibration-variant:
+    $ref: /schemas/types.yaml#/definitions/string
+    description: |
+      string to uniquely identify variant of the calibration data for designs
+      with colliding bus and device ids
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    pcie {
+        #address-cells = <3>;
+        #size-cells = <2>;
+
+        pcie@0 {
+            device_type = "pci";
+            reg = <0x0 0x0 0x0 0x0 0x0>;
+            #address-cells = <3>;
+            #size-cells = <2>;
+            ranges;
+
+            bus-range = <0x01 0xff>;
+
+            wifi@0 {
+                compatible = "pci17cb,1103";
+                reg = <0x10000 0x0 0x0 0x0 0x0>;
+
+                qcom,ath11k-calibration-variant = "LE_X13S";
+            };
+        };
+    };
-- 
cgit v1.2.3


From 6c831c4684124a544f73f7c9b83bc7b2eb0b23d3 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Sat, 25 Mar 2023 16:31:46 -0500
Subject: bpf: Treat KF_RELEASE kfuncs as KF_TRUSTED_ARGS

KF_RELEASE kfuncs are not currently treated as having KF_TRUSTED_ARGS,
even though they have a superset of the requirements of KF_TRUSTED_ARGS.
Like KF_TRUSTED_ARGS, KF_RELEASE kfuncs require a 0-offset argument, and
don't allow NULL-able arguments. Unlike KF_TRUSTED_ARGS which require
_either_ an argument with ref_obj_id > 0, _or_ (ref->type &
BPF_REG_TRUSTED_MODIFIERS) (and no unsafe modifiers allowed), KF_RELEASE
only allows for ref_obj_id > 0.  Because KF_RELEASE today doesn't
automatically imply KF_TRUSTED_ARGS, some of these requirements are
enforced in different ways that can make the behavior of the verifier
feel unpredictable. For example, a KF_RELEASE kfunc with a NULL-able
argument will currently fail in the verifier with a message like, "arg#0
is ptr_or_null_ expected ptr_ or socket" rather than "Possibly NULL
pointer passed to trusted arg0". Our intention is the same, but the
semantics are different due to implemenetation details that kfunc authors
and BPF program writers should not need to care about.

Let's make the behavior of the verifier more consistent and intuitive by
having KF_RELEASE kfuncs imply the presence of KF_TRUSTED_ARGS. Our
eventual goal is to have all kfuncs assume KF_TRUSTED_ARGS by default
anyways, so this takes us a step in that direction.

Note that it does not make sense to assume KF_TRUSTED_ARGS for all
KF_ACQUIRE kfuncs. KF_ACQUIRE kfuncs can have looser semantics than
KF_RELEASE, with e.g. KF_RCU | KF_RET_NULL. We may want to have
KF_ACQUIRE imply KF_TRUSTED_ARGS _unless_ KF_RCU is specified, but that
can be left to another patch set, and there are no such subtleties to
address for KF_RELEASE.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230325213144.486885-4-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/kfuncs.rst                           |  7 ++++---
 kernel/bpf/cpumask.c                                   |  2 +-
 kernel/bpf/verifier.c                                  |  2 +-
 net/bpf/test_run.c                                     |  6 ++++++
 tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c |  4 ++--
 tools/testing/selftests/bpf/progs/task_kfunc_failure.c |  6 +++---
 tools/testing/selftests/bpf/verifier/calls.c           | 10 +++++++---
 tools/testing/selftests/bpf/verifier/ref_tracking.c    |  6 +++---
 8 files changed, 27 insertions(+), 16 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index 69eccf6f98ef..bf1b85941452 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -179,9 +179,10 @@ both are orthogonal to each other.
 ---------------------
 
 The KF_RELEASE flag is used to indicate that the kfunc releases the pointer
-passed in to it. There can be only one referenced pointer that can be passed in.
-All copies of the pointer being released are invalidated as a result of invoking
-kfunc with this flag.
+passed in to it. There can be only one referenced pointer that can be passed
+in. All copies of the pointer being released are invalidated as a result of
+invoking kfunc with this flag. KF_RELEASE kfuncs automatically receive the
+protection afforded by the KF_TRUSTED_ARGS flag described below.
 
 2.4.4 KF_KPTR_GET flag
 ----------------------
diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c
index e991af7dc13c..7efdf5d770ca 100644
--- a/kernel/bpf/cpumask.c
+++ b/kernel/bpf/cpumask.c
@@ -402,7 +402,7 @@ __diag_pop();
 
 BTF_SET8_START(cpumask_kfunc_btf_ids)
 BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL)
-BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
 BTF_ID_FLAGS(func, bpf_cpumask_first, KF_RCU)
 BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_RCU)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 64f06f6e16bf..20eb2015842f 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -9307,7 +9307,7 @@ static bool is_kfunc_release(struct bpf_kfunc_call_arg_meta *meta)
 
 static bool is_kfunc_trusted_args(struct bpf_kfunc_call_arg_meta *meta)
 {
-	return meta->kfunc_flags & KF_TRUSTED_ARGS;
+	return (meta->kfunc_flags & KF_TRUSTED_ARGS) || is_kfunc_release(meta);
 }
 
 static bool is_kfunc_sleepable(struct bpf_kfunc_call_arg_meta *meta)
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 27587f1c5f36..f1652f5fbd2e 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -606,6 +606,11 @@ bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
 	return &prog_test_struct;
 }
 
+__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p)
+{
+	WARN_ON_ONCE(1);
+}
+
 __bpf_kfunc struct prog_test_member *
 bpf_kfunc_call_memb_acquire(void)
 {
@@ -800,6 +805,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset)
 BTF_SET8_END(test_sk_check_kfunc_ids)
 
 static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
index 807fb0ac41e9..48b2034cadb3 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
@@ -206,7 +206,7 @@ int BPF_PROG(cgrp_kfunc_get_unreleased, struct cgroup *cgrp, const char *path)
 }
 
 SEC("tp_btf/cgroup_mkdir")
-__failure __msg("expects refcounted")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
 int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path)
 {
 	struct __cgrps_kfunc_map_value *v;
@@ -234,7 +234,7 @@ int BPF_PROG(cgrp_kfunc_release_fp, struct cgroup *cgrp, const char *path)
 }
 
 SEC("tp_btf/cgroup_mkdir")
-__failure __msg("arg#0 is ptr_or_null_ expected ptr_ or socket")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
 int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path)
 {
 	struct __cgrps_kfunc_map_value local, *v;
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
index 27994d6b2914..2c374a7ffece 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
@@ -206,7 +206,7 @@ int BPF_PROG(task_kfunc_get_unreleased, struct task_struct *task, u64 clone_flag
 }
 
 SEC("tp_btf/task_newtask")
-__failure __msg("arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
 int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags)
 {
 	struct __tasks_kfunc_map_value *v;
@@ -234,7 +234,7 @@ int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags)
 }
 
 SEC("tp_btf/task_newtask")
-__failure __msg("arg#0 is ptr_or_null_ expected ptr_ or socket")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
 int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags)
 {
 	struct __tasks_kfunc_map_value local, *v;
@@ -277,7 +277,7 @@ int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_
 }
 
 SEC("tp_btf/task_newtask")
-__failure __msg("arg#0 is ptr_or_null_ expected ptr_ or socket")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
 int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags)
 {
 	struct task_struct *acquired;
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 5702fc9761ef..1bdf2b43e49e 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -109,7 +109,7 @@
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket",
+	.errstr = "Possibly NULL pointer passed to trusted arg0",
 	.fixup_kfunc_btf_id = {
 		{ "bpf_kfunc_call_test_acquire", 3 },
 		{ "bpf_kfunc_call_test_release", 5 },
@@ -165,19 +165,23 @@
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
 	BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
 	BPF_EXIT_INSN(),
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
-	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 16),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -4),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.fixup_kfunc_btf_id = {
 		{ "bpf_kfunc_call_test_acquire", 3 },
-		{ "bpf_kfunc_call_test_release", 9 },
+		{ "bpf_kfunc_call_test_offset", 9 },
+		{ "bpf_kfunc_call_test_release", 12 },
 	},
 	.result_unpriv = REJECT,
 	.result = REJECT,
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 9540164712b7..5a2e154dd1e0 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -142,7 +142,7 @@
 	.kfunc = "bpf",
 	.expected_attach_type = BPF_LSM_MAC,
 	.flags = BPF_F_SLEEPABLE,
-	.errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket",
+	.errstr = "Possibly NULL pointer passed to trusted arg0",
 	.fixup_kfunc_btf_id = {
 		{ "bpf_lookup_user_key", 2 },
 		{ "bpf_key_put", 4 },
@@ -163,7 +163,7 @@
 	.kfunc = "bpf",
 	.expected_attach_type = BPF_LSM_MAC,
 	.flags = BPF_F_SLEEPABLE,
-	.errstr = "arg#0 is ptr_or_null_ expected ptr_ or socket",
+	.errstr = "Possibly NULL pointer passed to trusted arg0",
 	.fixup_kfunc_btf_id = {
 		{ "bpf_lookup_system_key", 1 },
 		{ "bpf_key_put", 3 },
@@ -182,7 +182,7 @@
 	.kfunc = "bpf",
 	.expected_attach_type = BPF_LSM_MAC,
 	.flags = BPF_F_SLEEPABLE,
-	.errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar",
+	.errstr = "Possibly NULL pointer passed to trusted arg0",
 	.fixup_kfunc_btf_id = {
 		{ "bpf_key_put", 1 },
 	},
-- 
cgit v1.2.3


From 8cfee110711ed60bfdd39af0107ddef01d6b72c3 Mon Sep 17 00:00:00 2001
From: Dave Thaler <dthaler@microsoft.com>
Date: Sun, 26 Mar 2023 03:31:17 +0000
Subject: bpf, docs: Add extended call instructions

Add extended call instructions.  Uses the term "program-local" for
call by offset.  And there are instructions for calling helper functions
by "address" (the old way of using integer values), and for calling
helper functions by BTF ID (for kfuncs).

V1 -> V2: addressed comments from David Vernet

V2 -> V3: make descriptions in table consistent with updated names

V3 -> V4: addressed comments from Alexei

V4 -> V5: fixed alignment

Signed-off-by: Dave Thaler <dthaler@microsoft.com>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230326033117.1075-1-dthaler1968@googlemail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/instruction-set.rst | 59 ++++++++++++++++++++++-------------
 1 file changed, 37 insertions(+), 22 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index b44640589055..b77280eb926f 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -243,27 +243,29 @@ Jump instructions
 otherwise identical operations.
 The 'code' field encodes the operation as below:
 
-========  =====  =========================  ============
-code      value  description                notes
-========  =====  =========================  ============
-BPF_JA    0x00   PC += off                  BPF_JMP only
-BPF_JEQ   0x10   PC += off if dst == src
-BPF_JGT   0x20   PC += off if dst > src     unsigned
-BPF_JGE   0x30   PC += off if dst >= src    unsigned
-BPF_JSET  0x40   PC += off if dst & src
-BPF_JNE   0x50   PC += off if dst != src
-BPF_JSGT  0x60   PC += off if dst > src     signed
-BPF_JSGE  0x70   PC += off if dst >= src    signed
-BPF_CALL  0x80   function call              see `Helper functions`_
-BPF_EXIT  0x90   function / program return  BPF_JMP only
-BPF_JLT   0xa0   PC += off if dst < src     unsigned
-BPF_JLE   0xb0   PC += off if dst <= src    unsigned
-BPF_JSLT  0xc0   PC += off if dst < src     signed
-BPF_JSLE  0xd0   PC += off if dst <= src    signed
-========  =====  =========================  ============
+========  =====  ===  ===========================================  =========================================
+code      value  src  description                                  notes
+========  =====  ===  ===========================================  =========================================
+BPF_JA    0x0    0x0  PC += offset                                 BPF_JMP only
+BPF_JEQ   0x1    any  PC += offset if dst == src
+BPF_JGT   0x2    any  PC += offset if dst > src                    unsigned
+BPF_JGE   0x3    any  PC += offset if dst >= src                   unsigned
+BPF_JSET  0x4    any  PC += offset if dst & src
+BPF_JNE   0x5    any  PC += offset if dst != src
+BPF_JSGT  0x6    any  PC += offset if dst > src                    signed
+BPF_JSGE  0x7    any  PC += offset if dst >= src                   signed
+BPF_CALL  0x8    0x0  call helper function by address              see `Helper functions`_
+BPF_CALL  0x8    0x1  call PC += offset                            see `Program-local functions`_
+BPF_CALL  0x8    0x2  call helper function by BTF ID               see `Helper functions`_
+BPF_EXIT  0x9    0x0  return                                       BPF_JMP only
+BPF_JLT   0xa    any  PC += offset if dst < src                    unsigned
+BPF_JLE   0xb    any  PC += offset if dst <= src                   unsigned
+BPF_JSLT  0xc    any  PC += offset if dst < src                    signed
+BPF_JSLE  0xd    any  PC += offset if dst <= src                   signed
+========  =====  ===  ===========================================  =========================================
 
 The eBPF program needs to store the return value into register R0 before doing a
-BPF_EXIT.
+``BPF_EXIT``.
 
 Example:
 
@@ -277,9 +279,22 @@ Helper functions
 ~~~~~~~~~~~~~~~~
 
 Helper functions are a concept whereby BPF programs can call into a
-set of function calls exposed by the runtime.  Each helper
-function is identified by an integer used in a ``BPF_CALL`` instruction.
-The available helper functions may differ for each program type.
+set of function calls exposed by the underlying platform.
+
+Historically, each helper function was identified by an address
+encoded in the imm field.  The available helper functions may differ
+for each program type, but address values are unique across all program types.
+
+Platforms that support the BPF Type Format (BTF) support identifying
+a helper function by a BTF ID encoded in the imm field, where the BTF ID
+identifies the helper name and type.
+
+Program-local functions
+~~~~~~~~~~~~~~~~~~~~~~~
+Program-local functions are functions exposed by the same BPF program as the
+caller, and are referenced by offset from the call instruction, similar to
+``BPF_JA``.  A ``BPF_EXIT`` within the program-local function will return to
+the caller.
 
 Load and store instructions
 ===========================
-- 
cgit v1.2.3


From a20869b3a7859135cd4e7800d8960966ad34a5b7 Mon Sep 17 00:00:00 2001
From: Álvaro Fernández Rojas <noltari@gmail.com>
Date: Fri, 24 Mar 2023 09:41:37 +0100
Subject: dt-bindings: net: dsa: b53: add BCM53134 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BCM53134 are B53 switches connected by MDIO.

Signed-off-by: Álvaro Fernández Rojas <noltari@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Krzysztof Kozlowski <krzk@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml b/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
index 57e0ef93b134..4c78c546343f 100644
--- a/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/brcm,b53.yaml
@@ -19,6 +19,7 @@ properties:
       - const: brcm,bcm53115
       - const: brcm,bcm53125
       - const: brcm,bcm53128
+      - const: brcm,bcm53134
       - const: brcm,bcm5365
       - const: brcm,bcm5395
       - const: brcm,bcm5389
-- 
cgit v1.2.3


From 233eb4e786b57ea686b51c13a04cc2839fd682fc Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Thu, 23 Mar 2023 18:36:05 +0200
Subject: ethtool: Add support for configuring tx_push_buf_len

This attribute, which is part of ethtool's ring param configuration
allows the user to specify the maximum number of the packet's payload
that can be written directly to the device.

Example usage:
    # ethtool -G [interface] tx-push-buf-len [number of bytes]

Co-developed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/ethtool.yaml     |  8 +++++
 Documentation/networking/ethtool-netlink.rst | 47 ++++++++++++++++++----------
 include/linux/ethtool.h                      | 14 ++++++---
 include/uapi/linux/ethtool_netlink.h         |  2 ++
 net/ethtool/netlink.h                        |  2 +-
 net/ethtool/rings.c                          | 34 ++++++++++++++++++--
 6 files changed, 84 insertions(+), 23 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 4727c067e2ba..6d8ae3d9a680 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -165,6 +165,12 @@ attribute-sets:
       -
         name: rx-push
         type: u8
+      -
+        name: tx-push-buf-len
+        type: u32
+      -
+        name: tx-push-buf-len-max
+        type: u32
 
   -
     name: mm-stat
@@ -311,6 +317,8 @@ operations:
             - cqe-size
             - tx-push
             - rx-push
+            - tx-push-buf-len
+            - tx-push-buf-len-max
       dump: *ring-get-op
     -
       name: rings-set
diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index e1bc6186d7ea..cd0973d4ba01 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -860,22 +860,24 @@ Request contents:
 
 Kernel response contents:
 
-  ====================================  ======  ===========================
-  ``ETHTOOL_A_RINGS_HEADER``            nested  reply header
-  ``ETHTOOL_A_RINGS_RX_MAX``            u32     max size of RX ring
-  ``ETHTOOL_A_RINGS_RX_MINI_MAX``       u32     max size of RX mini ring
-  ``ETHTOOL_A_RINGS_RX_JUMBO_MAX``      u32     max size of RX jumbo ring
-  ``ETHTOOL_A_RINGS_TX_MAX``            u32     max size of TX ring
-  ``ETHTOOL_A_RINGS_RX``                u32     size of RX ring
-  ``ETHTOOL_A_RINGS_RX_MINI``           u32     size of RX mini ring
-  ``ETHTOOL_A_RINGS_RX_JUMBO``          u32     size of RX jumbo ring
-  ``ETHTOOL_A_RINGS_TX``                u32     size of TX ring
-  ``ETHTOOL_A_RINGS_RX_BUF_LEN``        u32     size of buffers on the ring
-  ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT``    u8      TCP header / data split
-  ``ETHTOOL_A_RINGS_CQE_SIZE``          u32     Size of TX/RX CQE
-  ``ETHTOOL_A_RINGS_TX_PUSH``           u8      flag of TX Push mode
-  ``ETHTOOL_A_RINGS_RX_PUSH``           u8      flag of RX Push mode
-  ====================================  ======  ===========================
+  =======================================   ======  ===========================
+  ``ETHTOOL_A_RINGS_HEADER``                nested  reply header
+  ``ETHTOOL_A_RINGS_RX_MAX``                u32     max size of RX ring
+  ``ETHTOOL_A_RINGS_RX_MINI_MAX``           u32     max size of RX mini ring
+  ``ETHTOOL_A_RINGS_RX_JUMBO_MAX``          u32     max size of RX jumbo ring
+  ``ETHTOOL_A_RINGS_TX_MAX``                u32     max size of TX ring
+  ``ETHTOOL_A_RINGS_RX``                    u32     size of RX ring
+  ``ETHTOOL_A_RINGS_RX_MINI``               u32     size of RX mini ring
+  ``ETHTOOL_A_RINGS_RX_JUMBO``              u32     size of RX jumbo ring
+  ``ETHTOOL_A_RINGS_TX``                    u32     size of TX ring
+  ``ETHTOOL_A_RINGS_RX_BUF_LEN``            u32     size of buffers on the ring
+  ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT``        u8      TCP header / data split
+  ``ETHTOOL_A_RINGS_CQE_SIZE``              u32     Size of TX/RX CQE
+  ``ETHTOOL_A_RINGS_TX_PUSH``               u8      flag of TX Push mode
+  ``ETHTOOL_A_RINGS_RX_PUSH``               u8      flag of RX Push mode
+  ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN``       u32     size of TX push buffer
+  ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX``   u32     max size of TX push buffer
+  =======================================   ======  ===========================
 
 ``ETHTOOL_A_RINGS_TCP_DATA_SPLIT`` indicates whether the device is usable with
 page-flipping TCP zero-copy receive (``getsockopt(TCP_ZEROCOPY_RECEIVE)``).
@@ -891,6 +893,18 @@ through MMIO writes, thus reducing the latency. However, enabling this feature
 may increase the CPU cost. Drivers may enforce additional per-packet
 eligibility checks (e.g. on packet size).
 
+``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN`` specifies the maximum number of bytes of a
+transmitted packet a driver can push directly to the underlying device
+('push' mode). Pushing some of the payload bytes to the device has the
+advantages of reducing latency for small packets by avoiding DMA mapping (same
+as ``ETHTOOL_A_RINGS_TX_PUSH`` parameter) as well as allowing the underlying
+device to process packet headers ahead of fetching its payload.
+This can help the device to make fast actions based on the packet's headers.
+This is similar to the "tx-copybreak" parameter, which copies the packet to a
+preallocated DMA memory area instead of mapping new memory. However,
+tx-push-buff parameter copies the packet directly to the device to allow the
+device to take faster actions on the packet.
+
 RINGS_SET
 =========
 
@@ -908,6 +922,7 @@ Request contents:
   ``ETHTOOL_A_RINGS_CQE_SIZE``          u32     Size of TX/RX CQE
   ``ETHTOOL_A_RINGS_TX_PUSH``           u8      flag of TX Push mode
   ``ETHTOOL_A_RINGS_RX_PUSH``           u8      flag of RX Push mode
+  ``ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN``   u32     size of TX push buffer
   ====================================  ======  ===========================
 
 Kernel checks that requested ring sizes do not exceed limits reported by
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 2792185dda22..798d35890118 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -75,6 +75,8 @@ enum {
  * @tx_push: The flag of tx push mode
  * @rx_push: The flag of rx push mode
  * @cqe_size: Size of TX/RX completion queue event
+ * @tx_push_buf_len: Size of TX push buffer
+ * @tx_push_buf_max_len: Maximum allowed size of TX push buffer
  */
 struct kernel_ethtool_ringparam {
 	u32	rx_buf_len;
@@ -82,6 +84,8 @@ struct kernel_ethtool_ringparam {
 	u8	tx_push;
 	u8	rx_push;
 	u32	cqe_size;
+	u32	tx_push_buf_len;
+	u32	tx_push_buf_max_len;
 };
 
 /**
@@ -90,12 +94,14 @@ struct kernel_ethtool_ringparam {
  * @ETHTOOL_RING_USE_CQE_SIZE: capture for setting cqe_size
  * @ETHTOOL_RING_USE_TX_PUSH: capture for setting tx_push
  * @ETHTOOL_RING_USE_RX_PUSH: capture for setting rx_push
+ * @ETHTOOL_RING_USE_TX_PUSH_BUF_LEN: capture for setting tx_push_buf_len
  */
 enum ethtool_supported_ring_param {
-	ETHTOOL_RING_USE_RX_BUF_LEN = BIT(0),
-	ETHTOOL_RING_USE_CQE_SIZE   = BIT(1),
-	ETHTOOL_RING_USE_TX_PUSH    = BIT(2),
-	ETHTOOL_RING_USE_RX_PUSH    = BIT(3),
+	ETHTOOL_RING_USE_RX_BUF_LEN		= BIT(0),
+	ETHTOOL_RING_USE_CQE_SIZE		= BIT(1),
+	ETHTOOL_RING_USE_TX_PUSH		= BIT(2),
+	ETHTOOL_RING_USE_RX_PUSH		= BIT(3),
+	ETHTOOL_RING_USE_TX_PUSH_BUF_LEN	= BIT(4),
 };
 
 #define __ETH_RSS_HASH_BIT(bit)	((u32)1 << (bit))
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index d39ce21381c5..1ebf8d455f07 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -357,6 +357,8 @@ enum {
 	ETHTOOL_A_RINGS_CQE_SIZE,			/* u32 */
 	ETHTOOL_A_RINGS_TX_PUSH,			/* u8 */
 	ETHTOOL_A_RINGS_RX_PUSH,			/* u8 */
+	ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN,		/* u32 */
+	ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX,		/* u32 */
 
 	/* add new constants above here */
 	__ETHTOOL_A_RINGS_CNT,
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index f7b189ed96b2..79424b34b553 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -413,7 +413,7 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT
 extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1];
 extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1];
 extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1];
-extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_RX_PUSH + 1];
+extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX + 1];
 extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1];
 extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1];
 extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1];
diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c
index f358cd57d094..1c4972526142 100644
--- a/net/ethtool/rings.c
+++ b/net/ethtool/rings.c
@@ -11,6 +11,7 @@ struct rings_reply_data {
 	struct ethnl_reply_data		base;
 	struct ethtool_ringparam	ringparam;
 	struct kernel_ethtool_ringparam	kernel_ringparam;
+	u32				supported_ring_params;
 };
 
 #define RINGS_REPDATA(__reply_base) \
@@ -32,6 +33,8 @@ static int rings_prepare_data(const struct ethnl_req_info *req_base,
 
 	if (!dev->ethtool_ops->get_ringparam)
 		return -EOPNOTSUPP;
+
+	data->supported_ring_params = dev->ethtool_ops->supported_ring_params;
 	ret = ethnl_ops_begin(dev);
 	if (ret < 0)
 		return ret;
@@ -57,7 +60,9 @@ static int rings_reply_size(const struct ethnl_req_info *req_base,
 	       nla_total_size(sizeof(u8))  +	/* _RINGS_TCP_DATA_SPLIT */
 	       nla_total_size(sizeof(u32)  +	/* _RINGS_CQE_SIZE */
 	       nla_total_size(sizeof(u8))  +	/* _RINGS_TX_PUSH */
-	       nla_total_size(sizeof(u8)));	/* _RINGS_RX_PUSH */
+	       nla_total_size(sizeof(u8))) +	/* _RINGS_RX_PUSH */
+	       nla_total_size(sizeof(u32)) +	/* _RINGS_TX_PUSH_BUF_LEN */
+	       nla_total_size(sizeof(u32));	/* _RINGS_TX_PUSH_BUF_LEN_MAX */
 }
 
 static int rings_fill_reply(struct sk_buff *skb,
@@ -67,6 +72,7 @@ static int rings_fill_reply(struct sk_buff *skb,
 	const struct rings_reply_data *data = RINGS_REPDATA(reply_base);
 	const struct kernel_ethtool_ringparam *kr = &data->kernel_ringparam;
 	const struct ethtool_ringparam *ringparam = &data->ringparam;
+	u32 supported_ring_params = data->supported_ring_params;
 
 	WARN_ON(kr->tcp_data_split > ETHTOOL_TCP_DATA_SPLIT_ENABLED);
 
@@ -98,7 +104,12 @@ static int rings_fill_reply(struct sk_buff *skb,
 	    (kr->cqe_size &&
 	     (nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size))) ||
 	    nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push) ||
-	    nla_put_u8(skb, ETHTOOL_A_RINGS_RX_PUSH, !!kr->rx_push))
+	    nla_put_u8(skb, ETHTOOL_A_RINGS_RX_PUSH, !!kr->rx_push) ||
+	    ((supported_ring_params & ETHTOOL_RING_USE_TX_PUSH_BUF_LEN) &&
+	     (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN_MAX,
+			  kr->tx_push_buf_max_len) ||
+	      nla_put_u32(skb, ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN,
+			  kr->tx_push_buf_len))))
 		return -EMSGSIZE;
 
 	return 0;
@@ -117,6 +128,7 @@ const struct nla_policy ethnl_rings_set_policy[] = {
 	[ETHTOOL_A_RINGS_CQE_SIZE]		= NLA_POLICY_MIN(NLA_U32, 1),
 	[ETHTOOL_A_RINGS_TX_PUSH]		= NLA_POLICY_MAX(NLA_U8, 1),
 	[ETHTOOL_A_RINGS_RX_PUSH]		= NLA_POLICY_MAX(NLA_U8, 1),
+	[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN]	= { .type = NLA_U32 },
 };
 
 static int
@@ -158,6 +170,14 @@ ethnl_set_rings_validate(struct ethnl_req_info *req_info,
 		return -EOPNOTSUPP;
 	}
 
+	if (tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN] &&
+	    !(ops->supported_ring_params & ETHTOOL_RING_USE_TX_PUSH_BUF_LEN)) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN],
+				    "setting tx push buf len is not supported");
+		return -EOPNOTSUPP;
+	}
+
 	return ops->get_ringparam && ops->set_ringparam ? 1 : -EOPNOTSUPP;
 }
 
@@ -189,6 +209,8 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info)
 			tb[ETHTOOL_A_RINGS_TX_PUSH], &mod);
 	ethnl_update_u8(&kernel_ringparam.rx_push,
 			tb[ETHTOOL_A_RINGS_RX_PUSH], &mod);
+	ethnl_update_u32(&kernel_ringparam.tx_push_buf_len,
+			 tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN], &mod);
 	if (!mod)
 		return 0;
 
@@ -209,6 +231,14 @@ ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info)
 		return -EINVAL;
 	}
 
+	if (kernel_ringparam.tx_push_buf_len > kernel_ringparam.tx_push_buf_max_len) {
+		NL_SET_ERR_MSG_ATTR_FMT(info->extack, tb[ETHTOOL_A_RINGS_TX_PUSH_BUF_LEN],
+					"Requested TX push buffer exceeds the maximum of %u",
+					kernel_ringparam.tx_push_buf_max_len);
+
+		return -EINVAL;
+	}
+
 	ret = dev->ethtool_ops->set_ringparam(dev, &ringparam,
 					      &kernel_ringparam, info->extack);
 	return ret < 0 ? ret : 1;
-- 
cgit v1.2.3


From b341be6de98caf746100b2740ed2db68cd083261 Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Date: Tue, 28 Mar 2023 09:33:24 +0200
Subject: dt-bindings: arm: stm32: add compatible for syscon gcan node

Since commit ad440432d1f9 ("dt-bindings: mfd: Ensure 'syscon' has a
more specific compatible") it is required to provide at least 2 compatibles
string for syscon node.
This patch documents the new compatible for stm32f4 SoC to support
global/shared CAN registers access for bxCAN controllers.

Signed-off-by: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/all/20230328073328.3949796-2-dario.binacchi@amarulasolutions.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml b/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
index b2b156cc160a..ad8e51aa01b0 100644
--- a/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
+++ b/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
@@ -20,6 +20,7 @@ properties:
               - st,stm32-syscfg
               - st,stm32-power-config
               - st,stm32-tamp
+              - st,stm32f4-gcan
           - const: syscon
       - items:
           - const: st,stm32-tamp
@@ -42,6 +43,7 @@ if:
       contains:
         enum:
           - st,stm32mp157-syscfg
+          - st,stm32f4-gcan
 then:
   required:
     - clocks
-- 
cgit v1.2.3


From e43250c0ac8123e4560ac39777cd94ab6e75ee23 Mon Sep 17 00:00:00 2001
From: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Date: Tue, 28 Mar 2023 09:33:25 +0200
Subject: dt-bindings: net: can: add STM32 bxcan DT bindings

Add documentation of device tree bindings for the STM32 basic extended
CAN (bxcan) controller.

Signed-off-by: Dario Binacchi <dario.binacchi@amarulasolutions.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/all/20230328073328.3949796-3-dario.binacchi@amarulasolutions.com
[mkl: drop unneeded quotes]
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 .../bindings/net/can/st,stm32-bxcan.yaml           | 85 ++++++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml b/Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml
new file mode 100644
index 000000000000..769fa5c27b76
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/can/st,stm32-bxcan.yaml
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/can/st,stm32-bxcan.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: STMicroelectronics bxCAN controller
+
+description: STMicroelectronics BxCAN controller for CAN bus
+
+maintainers:
+  - Dario Binacchi <dario.binacchi@amarulasolutions.com>
+
+allOf:
+  - $ref: can-controller.yaml#
+
+properties:
+  compatible:
+    enum:
+      - st,stm32f4-bxcan
+
+  st,can-primary:
+    description:
+      Primary and secondary mode of the bxCAN peripheral is only relevant
+      if the chip has two CAN peripherals. In that case they share some
+      of the required logic.
+      To avoid misunderstandings, it should be noted that ST documentation
+      uses the terms master/slave instead of primary/secondary.
+    type: boolean
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    items:
+      - description: transmit interrupt
+      - description: FIFO 0 receive interrupt
+      - description: FIFO 1 receive interrupt
+      - description: status change error interrupt
+
+  interrupt-names:
+    items:
+      - const: tx
+      - const: rx0
+      - const: rx1
+      - const: sce
+
+  resets:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  st,gcan:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    description:
+      The phandle to the gcan node which allows to access the 512-bytes
+      SRAM memory shared by the two bxCAN cells (CAN1 primary and CAN2
+      secondary) in dual CAN peripheral configuration.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - resets
+  - clocks
+  - st,gcan
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/stm32fx-clock.h>
+    #include <dt-bindings/mfd/stm32f4-rcc.h>
+
+    can1: can@40006400 {
+        compatible = "st,stm32f4-bxcan";
+        reg = <0x40006400 0x200>;
+        interrupts = <19>, <20>, <21>, <22>;
+        interrupt-names = "tx", "rx0", "rx1", "sce";
+        resets = <&rcc STM32F4_APB1_RESET(CAN1)>;
+        clocks = <&rcc 0 STM32F4_APB1_CLOCK(CAN1)>;
+        st,can-primary;
+        st,gcan = <&gcan>;
+    };
-- 
cgit v1.2.3


From 3905f8d64ccc2c640d8c1179f4452f2bf8f1df56 Mon Sep 17 00:00:00 2001
From: Dragos Tatulea <dtatulea@nvidia.com>
Date: Wed, 22 Feb 2023 11:27:38 +0200
Subject: net/mlx5e: RX, Remove unnecessary recycle parameter and page_cache
 stats

The recycle parameter used during page release is no longer
necessary: the page pool can detect when the page cannot be
recycled to the cache or ring without any outside hint.

The page pool will also take care of cleaning up after itself
once all the inflight pages have been released. So no need to
explicitly release pages to the system.

Remove the internal page_cache stats as the mlx5e_page_cache
struct no longer exists.

Delete the documentation entries along with the stats.

Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/counters.rst            | 26 ------------
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c   |  7 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c    | 47 ++++++++++------------
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 20 ---------
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 10 -----
 5 files changed, 25 insertions(+), 85 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
index 4cd8e869762b..6b2d1fe74ecf 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/counters.rst
@@ -346,32 +346,6 @@ the software port.
      - The number of receive packets with CQE compression on ring i [#accel]_.
      - Acceleration
 
-   * - `rx[i]_cache_reuse`
-     - The number of events of successful reuse of a page from a driver's
-       internal page cache.
-     - Acceleration
-
-   * - `rx[i]_cache_full`
-     - The number of events of full internal page cache where driver can't put a
-       page back to the cache for recycling (page will be freed).
-     - Acceleration
-
-   * - `rx[i]_cache_empty`
-     - The number of events where cache was empty - no page to give. Driver
-       shall allocate new page.
-     - Acceleration
-
-   * - `rx[i]_cache_busy`
-     - The number of events where cache head was busy and cannot be recycled.
-       Driver allocated new page.
-     - Acceleration
-
-   * - `rx[i]_cache_waive`
-     - The number of cache evacuation. This can occur due to page move to
-       another NUMA node or page was pfmemalloc-ed and should be freed as soon
-       as possible.
-     - Acceleration
-
    * - `rx[i]_arfs_err`
      - Number of flow rules that failed to be added to the flow table.
      - Error
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index ca6ac9772d22..15d15d0e5ef9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -505,7 +505,6 @@ mlx5e_xmit_xdp_frame(struct mlx5e_xdpsq *sq, struct mlx5e_xmit_data *xdptxd,
 static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
 				  struct mlx5e_xdp_wqe_info *wi,
 				  u32 *xsk_frames,
-				  bool recycle,
 				  struct xdp_frame_bulk *bq)
 {
 	struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
@@ -524,7 +523,7 @@ static void mlx5e_free_xdpsq_desc(struct mlx5e_xdpsq *sq,
 		case MLX5E_XDP_XMIT_MODE_PAGE:
 			/* XDP_TX from the regular RQ */
 			page_pool_put_defragged_page(xdpi.page.rq->page_pool,
-						     xdpi.page.page, -1, recycle);
+						     xdpi.page.page, -1, true);
 			break;
 		case MLX5E_XDP_XMIT_MODE_XSK:
 			/* AF_XDP send */
@@ -578,7 +577,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 
 			sqcc += wi->num_wqebbs;
 
-			mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true, &bq);
+			mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq);
 		} while (!last_wqe);
 
 		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
@@ -625,7 +624,7 @@ void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq)
 
 		sq->cc += wi->num_wqebbs;
 
-		mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, false, &bq);
+		mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, &bq);
 	}
 
 	xdp_flush_frame_bulk(&bq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index df5dbef9e5ec..1049805571c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -293,14 +293,13 @@ static int mlx5e_page_alloc_fragmented(struct mlx5e_rq *rq,
 }
 
 static void mlx5e_page_release_fragmented(struct mlx5e_rq *rq,
-					  struct mlx5e_frag_page *frag_page,
-					  bool recycle)
+					  struct mlx5e_frag_page *frag_page)
 {
 	u16 drain_count = MLX5E_PAGECNT_BIAS_MAX - frag_page->frags;
 	struct page *page = frag_page->page;
 
 	if (page_pool_defrag_page(page, drain_count) == 0)
-		page_pool_put_defragged_page(rq->page_pool, page, -1, recycle);
+		page_pool_put_defragged_page(rq->page_pool, page, -1, true);
 }
 
 static inline int mlx5e_get_rx_frag(struct mlx5e_rq *rq,
@@ -330,11 +329,10 @@ static bool mlx5e_frag_can_release(struct mlx5e_wqe_frag_info *frag)
 }
 
 static inline void mlx5e_put_rx_frag(struct mlx5e_rq *rq,
-				     struct mlx5e_wqe_frag_info *frag,
-				     bool recycle)
+				     struct mlx5e_wqe_frag_info *frag)
 {
 	if (mlx5e_frag_can_release(frag))
-		mlx5e_page_release_fragmented(rq, frag->frag_page, recycle);
+		mlx5e_page_release_fragmented(rq, frag->frag_page);
 }
 
 static inline struct mlx5e_wqe_frag_info *get_frag(struct mlx5e_rq *rq, u16 ix)
@@ -368,19 +366,18 @@ static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe_cyc *wqe,
 
 free_frags:
 	while (--i >= 0)
-		mlx5e_put_rx_frag(rq, --frag, true);
+		mlx5e_put_rx_frag(rq, --frag);
 
 	return err;
 }
 
 static inline void mlx5e_free_rx_wqe(struct mlx5e_rq *rq,
-				     struct mlx5e_wqe_frag_info *wi,
-				     bool recycle)
+				     struct mlx5e_wqe_frag_info *wi)
 {
 	int i;
 
 	for (i = 0; i < rq->wqe.info.num_frags; i++, wi++)
-		mlx5e_put_rx_frag(rq, wi, recycle);
+		mlx5e_put_rx_frag(rq, wi);
 }
 
 static void mlx5e_xsk_free_rx_wqe(struct mlx5e_wqe_frag_info *wi)
@@ -396,7 +393,7 @@ static void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix)
 	if (rq->xsk_pool)
 		mlx5e_xsk_free_rx_wqe(wi);
 	else
-		mlx5e_free_rx_wqe(rq, wi, false);
+		mlx5e_free_rx_wqe(rq, wi);
 }
 
 static void mlx5e_xsk_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
@@ -427,7 +424,7 @@ static void mlx5e_free_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk)
 		struct mlx5e_wqe_frag_info *wi;
 
 		wi = get_frag(rq, j);
-		mlx5e_free_rx_wqe(rq, wi, true);
+		mlx5e_free_rx_wqe(rq, wi);
 	}
 }
 
@@ -502,7 +499,7 @@ mlx5e_copy_skb_header(struct mlx5e_rq *rq, struct sk_buff *skb,
 }
 
 static void
-mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle)
+mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi)
 {
 	bool no_xdp_xmit;
 	int i;
@@ -516,9 +513,9 @@ mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle
 	if (rq->xsk_pool) {
 		struct xdp_buff **xsk_buffs = wi->alloc_units.xsk_buffs;
 
-		/* The `recycle` parameter is ignored, and the page is always
-		 * put into the Reuse Ring, because there is no way to return
-		 * the page to the userspace when the interface goes down.
+		/* The page is always put into the Reuse Ring, because there
+		 * is no way to return the page to userspace when the interface
+		 * goes down.
 		 */
 		for (i = 0; i < rq->mpwqe.pages_per_wqe; i++)
 			if (no_xdp_xmit || !test_bit(i, wi->skip_release_bitmap))
@@ -529,7 +526,7 @@ mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi, bool recycle
 				struct mlx5e_frag_page *frag_page;
 
 				frag_page = &wi->alloc_units.frag_pages[i];
-				mlx5e_page_release_fragmented(rq, frag_page, recycle);
+				mlx5e_page_release_fragmented(rq, frag_page);
 			}
 		}
 	}
@@ -663,7 +660,7 @@ err_unmap:
 		dma_info = &shampo->info[--index];
 		if (!(i & (MLX5E_SHAMPO_WQ_HEADER_PER_PAGE - 1))) {
 			dma_info->addr = ALIGN_DOWN(dma_info->addr, PAGE_SIZE);
-			mlx5e_page_release_fragmented(rq, dma_info->frag_page, true);
+			mlx5e_page_release_fragmented(rq, dma_info->frag_page);
 		}
 	}
 	rq->stats->buff_alloc_err++;
@@ -781,7 +778,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 err_unmap:
 	while (--i >= 0) {
 		frag_page--;
-		mlx5e_page_release_fragmented(rq, frag_page, true);
+		mlx5e_page_release_fragmented(rq, frag_page);
 	}
 
 err:
@@ -815,7 +812,7 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close
 		hd_info->addr = ALIGN_DOWN(hd_info->addr, PAGE_SIZE);
 		if (hd_info->frag_page && hd_info->frag_page != deleted_page) {
 			deleted_page = hd_info->frag_page;
-			mlx5e_page_release_fragmented(rq, hd_info->frag_page, false);
+			mlx5e_page_release_fragmented(rq, hd_info->frag_page);
 		}
 
 		hd_info->frag_page = NULL;
@@ -833,8 +830,8 @@ void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq, u16 len, u16 start, bool close
 static void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 {
 	struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
-	/* Don't recycle, this function is called on rq/netdev close */
-	mlx5e_free_rx_mpwqe(rq, wi, false);
+	/* This function is called on rq/netdev close. */
+	mlx5e_free_rx_mpwqe(rq, wi);
 }
 
 INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
@@ -1058,7 +1055,7 @@ INDIRECT_CALLABLE_SCOPE bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq)
 		struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, head);
 
 		/* Deferred free for better page pool cache usage. */
-		mlx5e_free_rx_mpwqe(rq, wi, true);
+		mlx5e_free_rx_mpwqe(rq, wi);
 
 		alloc_err = rq->xsk_pool ? mlx5e_xsk_alloc_rx_mpwqe(rq, head) :
 					   mlx5e_alloc_rx_mpwqe(rq, head);
@@ -1739,7 +1736,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
 			int i;
 
 			for (i = wi - head_wi; i < rq->wqe.info.num_frags; i++)
-				mlx5e_put_rx_frag(rq, &head_wi[i], true);
+				mlx5e_put_rx_frag(rq, &head_wi[i]);
 		}
 		return NULL; /* page/packet was consumed by XDP */
 	}
@@ -2158,7 +2155,7 @@ mlx5e_free_rx_shampo_hd_entry(struct mlx5e_rq *rq, u16 header_index)
 		struct mlx5e_dma_info *dma_info = &shampo->info[header_index];
 
 		dma_info->addr = ALIGN_DOWN(addr, PAGE_SIZE);
-		mlx5e_page_release_fragmented(rq, dma_info->frag_page, true);
+		mlx5e_page_release_fragmented(rq, dma_info->frag_page);
 	}
 	bitmap_clear(shampo->bitmap, header_index, 1);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 4478223c1720..f1d9596905c6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -179,11 +179,6 @@ static const struct counter_desc sw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) },
-	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_congst_umr) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_arfs_err) },
 	{ MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_recover) },
@@ -358,11 +353,6 @@ static void mlx5e_stats_grp_sw_update_stats_rq_stats(struct mlx5e_sw_stats *s,
 	s->rx_buff_alloc_err          += rq_stats->buff_alloc_err;
 	s->rx_cqe_compress_blks       += rq_stats->cqe_compress_blks;
 	s->rx_cqe_compress_pkts       += rq_stats->cqe_compress_pkts;
-	s->rx_cache_reuse             += rq_stats->cache_reuse;
-	s->rx_cache_full              += rq_stats->cache_full;
-	s->rx_cache_empty             += rq_stats->cache_empty;
-	s->rx_cache_busy              += rq_stats->cache_busy;
-	s->rx_cache_waive             += rq_stats->cache_waive;
 	s->rx_congst_umr              += rq_stats->congst_umr;
 	s->rx_arfs_err                += rq_stats->arfs_err;
 	s->rx_recover                 += rq_stats->recover;
@@ -1978,11 +1968,6 @@ static const struct counter_desc rq_stats_desc[] = {
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) },
-	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, congst_umr) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, arfs_err) },
 	{ MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, recover) },
@@ -2163,11 +2148,6 @@ static const struct counter_desc ptp_rq_stats_desc[] = {
 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, buff_alloc_err) },
 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_blks) },
 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) },
-	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_reuse) },
-	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_full) },
-	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_empty) },
-	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_busy) },
-	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, cache_waive) },
 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, congst_umr) },
 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, arfs_err) },
 	{ MLX5E_DECLARE_PTP_RQ_STAT(struct mlx5e_rq_stats, recover) },
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index b77100b60b50..1ff8a06027dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -193,11 +193,6 @@ struct mlx5e_sw_stats {
 	u64 rx_buff_alloc_err;
 	u64 rx_cqe_compress_blks;
 	u64 rx_cqe_compress_pkts;
-	u64 rx_cache_reuse;
-	u64 rx_cache_full;
-	u64 rx_cache_empty;
-	u64 rx_cache_busy;
-	u64 rx_cache_waive;
 	u64 rx_congst_umr;
 	u64 rx_arfs_err;
 	u64 rx_recover;
@@ -362,11 +357,6 @@ struct mlx5e_rq_stats {
 	u64 buff_alloc_err;
 	u64 cqe_compress_blks;
 	u64 cqe_compress_pkts;
-	u64 cache_reuse;
-	u64 cache_full;
-	u64 cache_empty;
-	u64 cache_busy;
-	u64 cache_waive;
 	u64 congst_umr;
 	u64 arfs_err;
 	u64 recover;
-- 
cgit v1.2.3


From e70f94c6c75c2e3274df474b4c0bf6153e0533c3 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 27 Mar 2023 10:26:46 -0700
Subject: docs: netdev: clarify the need to sending reverts as patches

We don't state explicitly that reverts need to be submitted
as a patch. It occasionally comes up.

Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20230327172646.2622943-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/process/maintainer-netdev.rst | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/process/maintainer-netdev.rst b/Documentation/process/maintainer-netdev.rst
index e31d7a951073..f73ac9e175a8 100644
--- a/Documentation/process/maintainer-netdev.rst
+++ b/Documentation/process/maintainer-netdev.rst
@@ -184,11 +184,18 @@ Handling misapplied patches
 
 Occasionally a patch series gets applied before receiving critical feedback,
 or the wrong version of a series gets applied.
-There is no revert possible, once it is pushed out, it stays like that.
+
+Making the patch disappear once it is pushed out is not possible, the commit
+history in netdev trees is immutable.
 Please send incremental versions on top of what has been merged in order to fix
 the patches the way they would look like if your latest patch series was to be
 merged.
 
+In cases where full revert is needed the revert has to be submitted
+as a patch to the list with a commit message explaining the technical
+problems with the reverted commit. Reverts should be used as a last resort,
+when original change is completely wrong; incremental fixes are preferred.
+
 Stable tree
 ~~~~~~~~~~~
 
-- 
cgit v1.2.3


From 2607191395bd4db544db05452625cd7e98bc0848 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Mon, 27 Mar 2023 09:31:34 +0100
Subject: tools: ynl: Add struct attr decoding to ynl

Add support for decoding attributes that contain C structs.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/genetlink-legacy.yaml |  5 +++++
 tools/net/ynl/lib/nlspec.py                 |  2 ++
 tools/net/ynl/lib/ynl.py                    | 15 ++++++++++++++-
 3 files changed, 21 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index 5dc6f1c07a97..d50c78b9f42d 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -218,6 +218,11 @@ properties:
                     description: Max length for a string or a binary attribute.
                     $ref: '#/$defs/len-or-define'
               sub-type: *attr-type
+              # Start genetlink-legacy
+              struct:
+                description: Name of the struct type used for the attribute.
+                type: string
+              # End genetlink-legacy
 
       # Make sure name-prefix does not appear in subsets (subsets inherit naming)
       dependencies:
diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py
index 6cc9b7646ae8..d1e5f60af580 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/lib/nlspec.py
@@ -152,6 +152,7 @@ class SpecAttr(SpecElement):
         value         numerical ID when serialized
         attr_set      Attribute Set containing this attr
         is_multi      bool, attr may repeat multiple times
+        struct_name   string, name of struct definition
         sub_type      string, name of sub type
     """
     def __init__(self, family, attr_set, yaml, value):
@@ -160,6 +161,7 @@ class SpecAttr(SpecElement):
         self.value = value
         self.attr_set = attr_set
         self.is_multi = yaml.get('multi-attr', False)
+        self.struct_name = yaml.get('struct')
         self.sub_type = yaml.get('sub-type')
 
 
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index eada229402fa..63af3bd9787d 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -102,6 +102,17 @@ class NlAttr:
         format, _ = self.type_formats[type]
         return list({ x[0] for x in struct.iter_unpack(format, self.raw) })
 
+    def as_struct(self, members):
+        value = dict()
+        offset = 0
+        for m in members:
+            # TODO: handle non-scalar members
+            format, size = self.type_formats[m.type]
+            decoded = struct.unpack_from(format, self.raw, offset)
+            offset += size
+            value[m.name] = decoded[0]
+        return value
+
     def __repr__(self):
         return f"[type:{self.type} len:{self._len}] {self.raw}"
 
@@ -377,7 +388,9 @@ class YnlFamily(SpecFamily):
         rsp[attr_spec['name']] = value
 
     def _decode_binary(self, attr, attr_spec):
-        if attr_spec.sub_type:
+        if attr_spec.struct_name:
+            decoded = attr.as_struct(self.consts[attr_spec.struct_name])
+        elif attr_spec.sub_type:
             decoded = attr.as_c_array(attr_spec.sub_type)
         else:
             decoded = attr.as_bin()
-- 
cgit v1.2.3


From f036d936ca57e8bc1f39b92cadfbac27095dc4e7 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Mon, 27 Mar 2023 09:31:35 +0100
Subject: tools: ynl: Add fixed-header support to ynl

Add support for netlink families that add an optional fixed header structure
after the genetlink header and before any attributes. The fixed-header can be
specified on a per op basis, or once for all operations, which serves as a
default value that can be overridden.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/genetlink-legacy.yaml | 11 +++++++++++
 tools/net/ynl/lib/nlspec.py                 | 21 +++++++++++++--------
 tools/net/ynl/lib/ynl.py                    | 24 ++++++++++++++++++++----
 3 files changed, 44 insertions(+), 12 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml
index d50c78b9f42d..b33541a51d6b 100644
--- a/Documentation/netlink/genetlink-legacy.yaml
+++ b/Documentation/netlink/genetlink-legacy.yaml
@@ -261,6 +261,14 @@ properties:
       async-enum:
         description: Name for the enum type with notifications/events.
         type: string
+      # Start genetlink-legacy
+      fixed-header: &fixed-header
+        description: |
+          Name of the structure defining the optional fixed-length protocol
+          header. This header is placed in a message after the netlink and
+          genetlink headers and before any attributes.
+        type: string
+      # End genetlink-legacy
       list:
         description: List of commands
         type: array
@@ -293,6 +301,9 @@ properties:
               type: array
               items:
                 enum: [ strict, dump ]
+            # Start genetlink-legacy
+            fixed-header: *fixed-header
+            # End genetlink-legacy
             do: &subop-type
               description: Main command handler.
               type: object
diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py
index d1e5f60af580..06a906d74f0e 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/lib/nlspec.py
@@ -263,16 +263,17 @@ class SpecOperation(SpecElement):
     Information about a single Netlink operation.
 
     Attributes:
-        value       numerical ID when serialized, None if req/rsp values differ
+        value           numerical ID when serialized, None if req/rsp values differ
 
-        req_value   numerical ID when serialized, user -> kernel
-        rsp_value   numerical ID when serialized, user <- kernel
-        is_call     bool, whether the operation is a call
-        is_async    bool, whether the operation is a notification
-        is_resv     bool, whether the operation does not exist (it's just a reserved ID)
-        attr_set    attribute set name
+        req_value       numerical ID when serialized, user -> kernel
+        rsp_value       numerical ID when serialized, user <- kernel
+        is_call         bool, whether the operation is a call
+        is_async        bool, whether the operation is a notification
+        is_resv         bool, whether the operation does not exist (it's just a reserved ID)
+        attr_set        attribute set name
+        fixed_header    string, optional name of fixed header struct
 
-        yaml        raw spec as loaded from the spec file
+        yaml            raw spec as loaded from the spec file
     """
     def __init__(self, family, yaml, req_value, rsp_value):
         super().__init__(family, yaml)
@@ -284,6 +285,7 @@ class SpecOperation(SpecElement):
         self.is_call = 'do' in yaml or 'dump' in yaml
         self.is_async = 'notify' in yaml or 'event' in yaml
         self.is_resv = not self.is_async and not self.is_call
+        self.fixed_header = self.yaml.get('fixed-header', family.fixed_header)
 
         # Added by resolve:
         self.attr_set = None
@@ -324,6 +326,7 @@ class SpecFamily(SpecElement):
         msgs_by_value  dict of all messages (indexed by name)
         ops        dict of all valid requests / responses
         consts     dict of all constants/enums
+        fixed_header  string, optional name of family default fixed header struct
     """
     def __init__(self, spec_path, schema_path=None):
         with open(spec_path, "r") as stream:
@@ -397,6 +400,7 @@ class SpecFamily(SpecElement):
         self._resolution_list.append(elem)
 
     def _dictify_ops_unified(self):
+        self.fixed_header = self.yaml['operations'].get('fixed-header')
         val = 1
         for elem in self.yaml['operations']['list']:
             if 'value' in elem:
@@ -408,6 +412,7 @@ class SpecFamily(SpecElement):
             self.msgs[op.name] = op
 
     def _dictify_ops_directional(self):
+        self.fixed_header = self.yaml['operations'].get('fixed-header')
         req_val = rsp_val = 1
         for elem in self.yaml['operations']['list']:
             if 'notify' in elem:
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index 63af3bd9787d..ec40918152e1 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -278,14 +278,22 @@ def _genl_load_families():
 
 
 class GenlMsg:
-    def __init__(self, nl_msg):
+    def __init__(self, nl_msg, fixed_header_members=[]):
         self.nl = nl_msg
 
         self.hdr = nl_msg.raw[0:4]
-        self.raw = nl_msg.raw[4:]
+        offset = 4
 
         self.genl_cmd, self.genl_version, _ = struct.unpack("BBH", self.hdr)
 
+        self.fixed_header_attrs = dict()
+        for m in fixed_header_members:
+            format, size = NlAttr.type_formats[m.type]
+            decoded = struct.unpack_from(format, nl_msg.raw, offset)
+            offset += size
+            self.fixed_header_attrs[m.name] = decoded[0]
+
+        self.raw = nl_msg.raw[offset:]
         self.raw_attrs = NlAttrs(self.raw)
 
     def __repr__(self):
@@ -509,6 +517,13 @@ class YnlFamily(SpecFamily):
 
         req_seq = random.randint(1024, 65535)
         msg = _genl_msg(self.family.family_id, nl_flags, op.req_value, 1, req_seq)
+        fixed_header_members = []
+        if op.fixed_header:
+            fixed_header_members = self.consts[op.fixed_header].members
+            for m in fixed_header_members:
+                value = vals.pop(m.name)
+                format, _ = NlAttr.type_formats[m.type]
+                msg += struct.pack(format, value)
         for name, value in vals.items():
             msg += self._add_attr(op.attr_set.name, name, value)
         msg = _genl_msg_finalize(msg)
@@ -535,7 +550,7 @@ class YnlFamily(SpecFamily):
                     done = True
                     break
 
-                gm = GenlMsg(nl_msg)
+                gm = GenlMsg(nl_msg, fixed_header_members)
                 # Check if this is a reply to our request
                 if nl_msg.nl_seq != req_seq or gm.genl_cmd != op.rsp_value:
                     if gm.genl_cmd in self.async_msg_ids:
@@ -545,7 +560,8 @@ class YnlFamily(SpecFamily):
                         print('Unexpected message: ' + repr(gm))
                         continue
 
-                rsp.append(self._decode(gm.raw_attrs, op.attr_set.name))
+                rsp.append(self._decode(gm.raw_attrs, op.attr_set.name)
+                           | gm.fixed_header_attrs)
 
         if not rsp:
             return None
-- 
cgit v1.2.3


From 643ef4a676e3a1ff1312e9fea6ae777c9a92fcd3 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Mon, 27 Mar 2023 09:31:36 +0100
Subject: netlink: specs: add partial specification for openvswitch

The openvswitch family has a fixed header, uses struct attrs and has array
values. This partial spec demonstrates these features in the YNL CLI. These
specs are sufficient to create, delete and dump datapaths and to dump vports:

$ ./tools/net/ynl/cli.py \
    --spec Documentation/netlink/specs/ovs_datapath.yaml \
    --do dp-new --json '{ "dp-ifindex": 0, "name": "demo", "upcall-pid": 0}'
None

$ ./tools/net/ynl/cli.py \
    --spec Documentation/netlink/specs/ovs_datapath.yaml \
    --dump dp-get --json '{ "dp-ifindex": 0 }'
[{'dp-ifindex': 3,
  'masks-cache-size': 256,
  'megaflow-stats': {'cache-hits': 0,
                     'mask-hit': 0,
                     'masks': 0,
                     'pad1': 0,
                     'padding': 0},
  'name': 'test',
  'stats': {'flows': 0, 'hit': 0, 'lost': 0, 'missed': 0},
  'user-features': {'dispatch-upcall-per-cpu',
                    'tc-recirc-sharing',
                    'unaligned'}},
 {'dp-ifindex': 48,
  'masks-cache-size': 256,
  'megaflow-stats': {'cache-hits': 0,
                     'mask-hit': 0,
                     'masks': 0,
                     'pad1': 0,
                     'padding': 0},
  'name': 'demo',
  'stats': {'flows': 0, 'hit': 0, 'lost': 0, 'missed': 0},
  'user-features': set()}]

$ ./tools/net/ynl/cli.py \
    --spec Documentation/netlink/specs/ovs_datapath.yaml \
    --do dp-del --json '{ "dp-ifindex": 0, "name": "demo"}'
None

$ ./tools/net/ynl/cli.py \
    --spec Documentation/netlink/specs/ovs_vport.yaml \
    --dump vport-get --json '{ "dp-ifindex": 3 }'
[{'dp-ifindex': 3,
  'ifindex': 3,
  'name': 'test',
  'port-no': 0,
  'stats': {'rx-bytes': 0,
            'rx-dropped': 0,
            'rx-errors': 0,
            'rx-packets': 0,
            'tx-bytes': 0,
            'tx-dropped': 0,
            'tx-errors': 0,
            'tx-packets': 0},
  'type': 'internal',
  'upcall-pid': [0],
  'upcall-stats': {'fail': 0, 'success': 0}}]

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/ovs_datapath.yaml | 153 ++++++++++++++++++++++++++
 Documentation/netlink/specs/ovs_vport.yaml    | 139 +++++++++++++++++++++++
 2 files changed, 292 insertions(+)
 create mode 100644 Documentation/netlink/specs/ovs_datapath.yaml
 create mode 100644 Documentation/netlink/specs/ovs_vport.yaml

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/ovs_datapath.yaml b/Documentation/netlink/specs/ovs_datapath.yaml
new file mode 100644
index 000000000000..6d71db8c4416
--- /dev/null
+++ b/Documentation/netlink/specs/ovs_datapath.yaml
@@ -0,0 +1,153 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: ovs_datapath
+version: 2
+protocol: genetlink-legacy
+
+doc:
+  OVS datapath configuration over generic netlink.
+
+definitions:
+  -
+    name: ovs-header
+    type: struct
+    members:
+      -
+        name: dp-ifindex
+        type: u32
+  -
+    name: user-features
+    type: flags
+    entries:
+      -
+        name: unaligned
+        doc: Allow last Netlink attribute to be unaligned
+      -
+        name: vport-pids
+        doc: Allow datapath to associate multiple Netlink PIDs to each vport
+      -
+        name: tc-recirc-sharing
+        doc: Allow tc offload recirc sharing
+      -
+        name: dispatch-upcall-per-cpu
+        doc: Allow per-cpu dispatch of upcalls
+  -
+    name: datapath-stats
+    type: struct
+    members:
+      -
+        name: hit
+        type: u64
+      -
+        name: missed
+        type: u64
+      -
+        name: lost
+        type: u64
+      -
+        name: flows
+        type: u64
+  -
+    name: megaflow-stats
+    type: struct
+    members:
+      -
+        name: mask-hit
+        type: u64
+      -
+        name: masks
+        type: u32
+      -
+        name: padding
+        type: u32
+      -
+        name: cache-hits
+        type: u64
+      -
+        name: pad1
+        type: u64
+
+attribute-sets:
+  -
+    name: datapath
+    attributes:
+      -
+        name: name
+        type: string
+      -
+        name: upcall-pid
+        doc: upcall pid
+        type: u32
+      -
+        name: stats
+        type: binary
+        struct: datapath-stats
+      -
+        name: megaflow-stats
+        type: binary
+        struct: megaflow-stats
+      -
+        name: user-features
+        type: u32
+        enum: user-features
+        enum-as-flags: true
+      -
+        name: pad
+        type: unused
+      -
+        name: masks-cache-size
+        type: u32
+      -
+        name: per-cpu-pids
+        type: binary
+        sub-type: u32
+
+operations:
+  fixed-header: ovs-header
+  list:
+    -
+      name: dp-get
+      doc: Get / dump OVS data path configuration and state
+      value: 3
+      attribute-set: datapath
+      do: &dp-get-op
+        request:
+          attributes:
+            - name
+        reply:
+          attributes:
+            - name
+            - upcall-pid
+            - stats
+            - megaflow-stats
+            - user-features
+            - masks-cache-size
+            - per-cpu-pids
+      dump: *dp-get-op
+    -
+      name: dp-new
+      doc: Create new OVS data path
+      value: 1
+      attribute-set: datapath
+      do:
+        request:
+          attributes:
+            - dp-ifindex
+            - name
+            - upcall-pid
+            - user-features
+    -
+      name: dp-del
+      doc: Delete existing OVS data path
+      value: 2
+      attribute-set: datapath
+      do:
+        request:
+          attributes:
+            - dp-ifindex
+            - name
+
+mcast-groups:
+  list:
+    -
+      name: ovs_datapath
diff --git a/Documentation/netlink/specs/ovs_vport.yaml b/Documentation/netlink/specs/ovs_vport.yaml
new file mode 100644
index 000000000000..8e55622ddf11
--- /dev/null
+++ b/Documentation/netlink/specs/ovs_vport.yaml
@@ -0,0 +1,139 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+
+name: ovs_vport
+version: 2
+protocol: genetlink-legacy
+
+doc:
+  OVS vport configuration over generic netlink.
+
+definitions:
+  -
+    name: ovs-header
+    type: struct
+    members:
+      -
+        name: dp-ifindex
+        type: u32
+  -
+    name: vport-type
+    type: enum
+    entries: [ unspec, netdev, internal, gre, vxlan, geneve ]
+  -
+    name: vport-stats
+    type: struct
+    members:
+      -
+        name: rx-packets
+        type: u64
+      -
+        name: tx-packets
+        type: u64
+      -
+        name: rx-bytes
+        type: u64
+      -
+        name: tx-bytes
+        type: u64
+      -
+        name: rx-errors
+        type: u64
+      -
+        name: tx-errors
+        type: u64
+      -
+        name: rx-dropped
+        type: u64
+      -
+        name: tx-dropped
+        type: u64
+
+attribute-sets:
+  -
+    name: vport-options
+    attributes:
+      -
+        name: dst-port
+        type: u32
+      -
+        name: extension
+        type: u32
+  -
+    name: upcall-stats
+    attributes:
+      -
+        name: success
+        type: u64
+        value: 0
+      -
+        name: fail
+        type: u64
+  -
+    name: vport
+    attributes:
+      -
+        name: port-no
+        type: u32
+      -
+        name: type
+        type: u32
+        enum: vport-type
+      -
+        name: name
+        type: string
+      -
+        name: options
+        type: nest
+        nested-attributes: vport-options
+      -
+        name: upcall-pid
+        type: binary
+        sub-type: u32
+      -
+        name: stats
+        type: binary
+        struct: vport-stats
+      -
+        name: pad
+        type: unused
+      -
+        name: ifindex
+        type: u32
+      -
+        name: netnsid
+        type: u32
+      -
+        name: upcall-stats
+        type: nest
+        nested-attributes: upcall-stats
+
+operations:
+  list:
+    -
+      name: vport-get
+      doc: Get / dump OVS vport configuration and state
+      value: 3
+      attribute-set: vport
+      fixed-header: ovs-header
+      do: &vport-get-op
+        request:
+          attributes:
+            - dp-ifindex
+            - name
+        reply: &dev-all
+          attributes:
+            - dp-ifindex
+            - port-no
+            - type
+            - name
+            - upcall-pid
+            - stats
+            - ifindex
+            - netnsid
+            - upcall-stats
+      dump: *vport-get-op
+
+mcast-groups:
+  list:
+    -
+      name: ovs_vport
-- 
cgit v1.2.3


From 88e288968412ec1ca3d3b2d96956baa543fdfe82 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Mon, 27 Mar 2023 09:31:37 +0100
Subject: docs: netlink: document struct support for genetlink-legacy

Describe the genetlink-legacy support for using struct definitions
for fixed headers and for binary attributes.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../userspace-api/netlink/genetlink-legacy.rst     | 74 +++++++++++++++++++++-
 1 file changed, 71 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/userspace-api/netlink/genetlink-legacy.rst b/Documentation/userspace-api/netlink/genetlink-legacy.rst
index 3bf0bcdf21d8..b8fdcf7f6615 100644
--- a/Documentation/userspace-api/netlink/genetlink-legacy.rst
+++ b/Documentation/userspace-api/netlink/genetlink-legacy.rst
@@ -162,9 +162,77 @@ Other quirks (todo)
 Structures
 ----------
 
-Legacy families can define C structures both to be used as the contents
-of an attribute and as a fixed message header. The plan is to define
-the structs in ``definitions`` and link the appropriate attrs.
+Legacy families can define C structures both to be used as the contents of
+an attribute and as a fixed message header. Structures are defined in
+``definitions``  and referenced in operations or attributes. Note that
+structures defined in YAML are implicitly packed according to C
+conventions. For example, the following struct is 4 bytes, not 6 bytes:
+
+.. code-block:: c
+
+  struct {
+          u8 a;
+          u16 b;
+          u8 c;
+  }
+
+Any padding must be explicitly added and C-like languages should infer the
+need for explicit padding from whether the members are naturally aligned.
+
+Here is the struct definition from above, declared in YAML:
+
+.. code-block:: yaml
+
+  definitions:
+    -
+      name: message-header
+      type: struct
+      members:
+        -
+          name: a
+          type: u8
+        -
+          name: b
+          type: u16
+        -
+          name: c
+          type: u8
+
+Fixed Headers
+~~~~~~~~~~~~~
+
+Fixed message headers can be added to operations using ``fixed-header``.
+The default ``fixed-header`` can be set in ``operations`` and it can be set
+or overridden for each operation.
+
+.. code-block:: yaml
+
+  operations:
+    fixed-header: message-header
+    list:
+      -
+        name: get
+        fixed-header: custom-header
+        attribute-set: message-attrs
+
+Attributes
+~~~~~~~~~~
+
+A ``binary`` attribute can be interpreted as a C structure using a
+``struct`` property with the name of the structure definition. The
+``struct`` property implies ``sub-type: struct`` so it is not necessary to
+specify a sub-type.
+
+.. code-block:: yaml
+
+  attribute-sets:
+    -
+      name: stats-attrs
+      attributes:
+        -
+          name: stats
+          type: binary
+          struct: vport-stats
 
 Multi-message DO
 ----------------
-- 
cgit v1.2.3


From 04eac39361d3506c82ccf932cb288e84d5746969 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Mon, 27 Mar 2023 09:31:38 +0100
Subject: docs: netlink: document the sub-type attribute property

Add a definition for sub-type to the protocol spec doc and a description of
its usage for C arrays in genetlink-legacy.

Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/userspace-api/netlink/genetlink-legacy.rst | 14 ++++++++++++++
 Documentation/userspace-api/netlink/specs.rst            | 10 ++++++++++
 2 files changed, 24 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/userspace-api/netlink/genetlink-legacy.rst b/Documentation/userspace-api/netlink/genetlink-legacy.rst
index b8fdcf7f6615..802875a37a27 100644
--- a/Documentation/userspace-api/netlink/genetlink-legacy.rst
+++ b/Documentation/userspace-api/netlink/genetlink-legacy.rst
@@ -234,6 +234,20 @@ specify a sub-type.
           type: binary
           struct: vport-stats
 
+C Arrays
+--------
+
+Legacy families also use ``binary`` attributes to encapsulate C arrays. The
+``sub-type`` is used to identify the type of scalar to extract.
+
+.. code-block:: yaml
+
+  attributes:
+    -
+      name: ports
+      type: binary
+      sub-type: u32
+
 Multi-message DO
 ----------------
 
diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst
index a22442ba1d30..2e4acde890b7 100644
--- a/Documentation/userspace-api/netlink/specs.rst
+++ b/Documentation/userspace-api/netlink/specs.rst
@@ -254,6 +254,16 @@ rather than depend on what is specified in the spec file.
 The validation policy in the kernel is formed by combining the type
 definition (``type`` and ``nested-attributes``) and the ``checks``.
 
+sub-type
+~~~~~~~~
+
+Legacy families have special ways of expressing arrays. ``sub-type`` can be
+used to define the type of array members in case array members are not
+fully defined as attributes (in a bona fide attribute space). For instance
+a C array of u32 values can be specified with ``type: binary`` and
+``sub-type: u32``. Binary types and legacy array formats are described in
+more detail in :doc:`genetlink-legacy`.
+
 operations
 ----------
 
-- 
cgit v1.2.3


From 8ba732befd6ff44cd8ecc809bd6df850e246f31a Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Wed, 29 Mar 2023 10:22:55 -0700
Subject: Documentation/eth/intel: Update address for driver support

Update the email address for support to use Intel Wired LAN, the mailing
list used for kernel development.

Suggested-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
---
 Documentation/networking/device_drivers/ethernet/intel/e100.rst    | 2 +-
 Documentation/networking/device_drivers/ethernet/intel/e1000.rst   | 2 +-
 Documentation/networking/device_drivers/ethernet/intel/e1000e.rst  | 2 +-
 Documentation/networking/device_drivers/ethernet/intel/fm10k.rst   | 2 +-
 Documentation/networking/device_drivers/ethernet/intel/i40e.rst    | 2 +-
 Documentation/networking/device_drivers/ethernet/intel/iavf.rst    | 2 +-
 Documentation/networking/device_drivers/ethernet/intel/ice.rst     | 2 +-
 Documentation/networking/device_drivers/ethernet/intel/igb.rst     | 2 +-
 Documentation/networking/device_drivers/ethernet/intel/igbvf.rst   | 2 +-
 Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst   | 2 +-
 Documentation/networking/device_drivers/ethernet/intel/ixgbevf.rst | 2 +-
 11 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/ethernet/intel/e100.rst b/Documentation/networking/device_drivers/ethernet/intel/e100.rst
index 371b7e5c3293..4f613949782c 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/e100.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/e100.rst
@@ -184,4 +184,4 @@ or the Intel Wired Networking project hosted by Sourceforge at:
 http://sourceforge.net/projects/e1000
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
-to e1000-devel@lists.sf.net.
+to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/e1000.rst b/Documentation/networking/device_drivers/ethernet/intel/e1000.rst
index 4aaae0f7d6ba..7b15b8c72be0 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/e1000.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/e1000.rst
@@ -460,4 +460,4 @@ or the Intel Wired Networking project hosted by Sourceforge at:
 
 If an issue is identified with the released source code on the supported
 kernel with a supported adapter, email the specific information related
-to the issue to e1000-devel@lists.sf.net
+to the issue to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/e1000e.rst b/Documentation/networking/device_drivers/ethernet/intel/e1000e.rst
index f49cd370e7bf..7a9cbfa9e0f3 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/e1000e.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/e1000e.rst
@@ -380,4 +380,4 @@ https://sourceforge.net/projects/e1000
 
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
-to e1000-devel@lists.sf.net.
+to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/fm10k.rst b/Documentation/networking/device_drivers/ethernet/intel/fm10k.rst
index 9258ef6f515c..9d9c2ec2152e 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/fm10k.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/fm10k.rst
@@ -139,4 +139,4 @@ https://sourceforge.net/projects/e1000
 
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
-to e1000-devel@lists.sf.net.
+to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
index c495c4e16b3b..5b13fe0fec82 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
@@ -768,4 +768,4 @@ https://sourceforge.net/projects/e1000
 
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
-to e1000-devel@lists.sf.net.
+to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
index 151af0a8da9c..079847666125 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
@@ -328,4 +328,4 @@ https://sourceforge.net/projects/e1000
 
 If an issue is identified with the released source code on the supported kernel
 with a supported adapter, email the specific information related to the issue
-to e1000-devel@lists.sf.net
+to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
index 2b6dc7880d7b..246bf6455f64 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
@@ -1031,7 +1031,7 @@ https://sourceforge.net/projects/e1000
 
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
-to e1000-devel@lists.sf.net.
+to intel-wired-lan@lists.osuosl.org.
 
 
 Trademarks
diff --git a/Documentation/networking/device_drivers/ethernet/intel/igb.rst b/Documentation/networking/device_drivers/ethernet/intel/igb.rst
index d46289e182cf..ee149bdb42b9 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/igb.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/igb.rst
@@ -210,4 +210,4 @@ https://sourceforge.net/projects/e1000
 
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
-to e1000-devel@lists.sf.net.
+to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/igbvf.rst b/Documentation/networking/device_drivers/ethernet/intel/igbvf.rst
index 40fa210c5e14..78ceb3cdbfdb 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/igbvf.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/igbvf.rst
@@ -62,4 +62,4 @@ https://sourceforge.net/projects/e1000
 
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
-to e1000-devel@lists.sf.net.
+to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst b/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst
index 0a233b17c664..8d4f7ede2ff8 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst
@@ -554,4 +554,4 @@ https://sourceforge.net/projects/e1000
 
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
-to e1000-devel@lists.sf.net.
+to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/ixgbevf.rst b/Documentation/networking/device_drivers/ethernet/intel/ixgbevf.rst
index 76bbde736f21..9201c74e4c36 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/ixgbevf.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/ixgbevf.rst
@@ -64,4 +64,4 @@ https://sourceforge.net/projects/e1000
 
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
-to e1000-devel@lists.sf.net.
+to intel-wired-lan@lists.osuosl.org.
-- 
cgit v1.2.3


From 79d872c62b16e6c640e125fd4613c6beadac5210 Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Wed, 29 Mar 2023 10:22:56 -0700
Subject: Documentation/eth/intel: Remove references to SourceForge

The out-of-tree driver is hosted on SourceForge, as this does not apply
to the kernel driver remove references to it. Also do some minor
formatting changes around this section.

Suggested-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
---
 Documentation/networking/device_drivers/ethernet/intel/e100.rst    | 2 --
 Documentation/networking/device_drivers/ethernet/intel/e1000.rst   | 7 +------
 Documentation/networking/device_drivers/ethernet/intel/e1000e.rst  | 5 -----
 Documentation/networking/device_drivers/ethernet/intel/fm10k.rst   | 5 -----
 Documentation/networking/device_drivers/ethernet/intel/i40e.rst    | 5 -----
 Documentation/networking/device_drivers/ethernet/intel/iavf.rst    | 5 -----
 Documentation/networking/device_drivers/ethernet/intel/ice.rst     | 3 ---
 Documentation/networking/device_drivers/ethernet/intel/igb.rst     | 5 -----
 Documentation/networking/device_drivers/ethernet/intel/igbvf.rst   | 5 -----
 Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst   | 5 -----
 Documentation/networking/device_drivers/ethernet/intel/ixgbevf.rst | 5 -----
 11 files changed, 1 insertion(+), 51 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/ethernet/intel/e100.rst b/Documentation/networking/device_drivers/ethernet/intel/e100.rst
index 4f613949782c..5dee1b53e977 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/e100.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/e100.rst
@@ -180,8 +180,6 @@ Support
 For general information, go to the Intel support website at:
 https://www.intel.com/support/
 
-or the Intel Wired Networking project hosted by Sourceforge at:
-http://sourceforge.net/projects/e1000
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
 to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/e1000.rst b/Documentation/networking/device_drivers/ethernet/intel/e1000.rst
index 7b15b8c72be0..52a7fb9ce8d9 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/e1000.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/e1000.rst
@@ -451,12 +451,7 @@ Support
 =======
 
 For general information, go to the Intel support website at:
-
-    http://support.intel.com
-
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-    http://sourceforge.net/projects/e1000
+http://support.intel.com
 
 If an issue is identified with the released source code on the supported
 kernel with a supported adapter, email the specific information related
diff --git a/Documentation/networking/device_drivers/ethernet/intel/e1000e.rst b/Documentation/networking/device_drivers/ethernet/intel/e1000e.rst
index 7a9cbfa9e0f3..d8f810afdd49 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/e1000e.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/e1000e.rst
@@ -371,13 +371,8 @@ NOTE: Wake on LAN is only supported on port A for the following devices:
 Support
 =======
 For general information, go to the Intel support website at:
-
 https://www.intel.com/support/
 
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-https://sourceforge.net/projects/e1000
-
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
 to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/fm10k.rst b/Documentation/networking/device_drivers/ethernet/intel/fm10k.rst
index 9d9c2ec2152e..396a2c8c3db1 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/fm10k.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/fm10k.rst
@@ -130,13 +130,8 @@ the Intel Ethernet Controller XL710.
 Support
 =======
 For general information, go to the Intel support website at:
-
 https://www.intel.com/support/
 
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-https://sourceforge.net/projects/e1000
-
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
 to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
index 5b13fe0fec82..4fbaa1a2d674 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
@@ -759,13 +759,8 @@ enabled when setting up DCB on your switch.
 Support
 =======
 For general information, go to the Intel support website at:
-
 https://www.intel.com/support/
 
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-https://sourceforge.net/projects/e1000
-
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
 to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
index 079847666125..eb926c3bd4cd 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
@@ -319,13 +319,8 @@ This is caused by the way the Linux kernel reports this stressed condition.
 Support
 =======
 For general information, go to the Intel support website at:
-
 https://support.intel.com
 
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-https://sourceforge.net/projects/e1000
-
 If an issue is identified with the released source code on the supported kernel
 with a supported adapter, email the specific information related to the issue
 to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
index 246bf6455f64..69695e5511f4 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst
@@ -1026,9 +1026,6 @@ Support
 For general information, go to the Intel support website at:
 https://www.intel.com/support/
 
-or the Intel Wired Networking project hosted by Sourceforge at:
-https://sourceforge.net/projects/e1000
-
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
 to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/igb.rst b/Documentation/networking/device_drivers/ethernet/intel/igb.rst
index ee149bdb42b9..fbd590b6a0d6 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/igb.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/igb.rst
@@ -201,13 +201,8 @@ NOTE: This feature is exclusive to i210 models.
 Support
 =======
 For general information, go to the Intel support website at:
-
 https://www.intel.com/support/
 
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-https://sourceforge.net/projects/e1000
-
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
 to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/igbvf.rst b/Documentation/networking/device_drivers/ethernet/intel/igbvf.rst
index 78ceb3cdbfdb..11a9017f3069 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/igbvf.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/igbvf.rst
@@ -53,13 +53,8 @@ https://www.kernel.org/pub/software/network/ethtool/
 Support
 =======
 For general information, go to the Intel support website at:
-
 https://www.intel.com/support/
 
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-https://sourceforge.net/projects/e1000
-
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
 to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst b/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst
index 8d4f7ede2ff8..1e5f16993f69 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/ixgbe.rst
@@ -545,13 +545,8 @@ on the Intel Ethernet Controller XL710.
 Support
 =======
 For general information, go to the Intel support website at:
-
 https://www.intel.com/support/
 
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-https://sourceforge.net/projects/e1000
-
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
 to intel-wired-lan@lists.osuosl.org.
diff --git a/Documentation/networking/device_drivers/ethernet/intel/ixgbevf.rst b/Documentation/networking/device_drivers/ethernet/intel/ixgbevf.rst
index 9201c74e4c36..08dc0d368a48 100644
--- a/Documentation/networking/device_drivers/ethernet/intel/ixgbevf.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/ixgbevf.rst
@@ -55,13 +55,8 @@ VLANs: There is a limit of a total of 64 shared VLANs to 1 or more VFs.
 Support
 =======
 For general information, go to the Intel support website at:
-
 https://www.intel.com/support/
 
-or the Intel Wired Networking project hosted by Sourceforge at:
-
-https://sourceforge.net/projects/e1000
-
 If an issue is identified with the released source code on a supported kernel
 with a supported adapter, email the specific information related to the issue
 to intel-wired-lan@lists.osuosl.org.
-- 
cgit v1.2.3


From a353318ebf24100bcc7254d293cd9f041f4075dd Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Wed, 29 Mar 2023 15:16:53 -0700
Subject: tools: ynl: populate most of the ethtool spec

Things that are not implemented:
- cable tests
- bitmaks in the requests don't work (needs multi-attr support in ynl.py)
- stats-get seems to return nonsense (not passing a bitmask properly?)
- notifications are not tested

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/ethtool.yaml | 1480 +++++++++++++++++++++++++++---
 1 file changed, 1367 insertions(+), 113 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml
index 6d8ae3d9a680..129f413ea349 100644
--- a/Documentation/netlink/specs/ethtool.yaml
+++ b/Documentation/netlink/specs/ethtool.yaml
@@ -6,6 +6,12 @@ protocol: genetlink-legacy
 
 doc: Partial family for Ethtool Netlink.
 
+definitions:
+  -
+    name: udp-tunnel-type
+    type: enum
+    entries: [ vxlan, geneve, vxlan-gpe ]
+
 attribute-sets:
   -
     name: header
@@ -38,6 +44,7 @@ attribute-sets:
       -
         name: bit
         type: nest
+        multi-attr: true
         nested-attributes: bitset-bit
   -
     name: bitset
@@ -53,6 +60,22 @@ attribute-sets:
         type: nest
         nested-attributes: bitset-bits
 
+  -
+    name: u64-array
+    attributes:
+      -
+        name: u64
+        type: nest
+        multi-attr: true
+        nested-attributes: u64
+  -
+    name: s32-array
+    attributes:
+      -
+        name: s32
+        type: nest
+        multi-attr: true
+        nested-attributes: s32
   -
     name: string
     attributes:
@@ -234,118 +257,1351 @@ attribute-sets:
         name: stats
         type: nest
         nested-attributes: mm-stat
+  -
+    name: linkinfo
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: port
+        type: u8
+      -
+        name: phyaddr
+        type: u8
+      -
+        name: tp-mdix
+        type: u8
+      -
+        name: tp-mdix-ctrl
+        type: u8
+      -
+        name: transceiver
+        type: u8
+  -
+    name: linkmodes
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: autoneg
+        type: u8
+      -
+        name: ours
+        type: nest
+        nested-attributes: bitset
+      -
+        name: peer
+        type: nest
+        nested-attributes: bitset
+      -
+        name: speed
+        type: u32
+      -
+        name: duplex
+        type: u8
+      -
+        name: master-slave-cfg
+        type: u8
+      -
+        name: master-slave-state
+        type: u8
+      -
+        name: master-slave-lanes
+        type: u32
+      -
+        name: rate-matching
+        type: u8
+  -
+    name: linkstate
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: link
+        type: u8
+      -
+        name: sqi
+        type: u32
+      -
+        name: sqi-max
+        type: u32
+      -
+        name: ext-state
+        type: u8
+      -
+        name: ext-substate
+        type: u8
+      -
+        name: down-cnt
+        type: u32
+  -
+    name: debug
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: msgmask
+        type: nest
+        nested-attributes: bitset
+  -
+    name: wol
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: modes
+        type: nest
+        nested-attributes: bitset
+      -
+        name: sopass
+        type: binary
+  -
+    name: features
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: hw
+        type: nest
+        nested-attributes: bitset
+      -
+        name: wanted
+        type: nest
+        nested-attributes: bitset
+      -
+        name: active
+        type: nest
+        nested-attributes: bitset
+      -
+        name: nochange
+        type: nest
+        nested-attributes: bitset
+  -
+    name: channels
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: rx-max
+        type: u32
+      -
+        name: tx-max
+        type: u32
+      -
+        name: other-max
+        type: u32
+      -
+        name: combined-max
+        type: u32
+      -
+        name: rx-count
+        type: u32
+      -
+        name: tx-count
+        type: u32
+      -
+        name: other-count
+        type: u32
+      -
+        name: combined-count
+        type: u32
 
-operations:
-  enum-model: directional
-  list:
-    -
-      name: strset-get
-      doc: Get string set from the kernel.
-
-      attribute-set: strset
-
-      do: &strset-get-op
-        request:
-          attributes:
-            - header
-            - stringsets
-            - counts-only
-        reply:
-          attributes:
-            - header
-            - stringsets
-      dump: *strset-get-op
-
-    # TODO: fill in the requests in between
-
-    -
-      name: privflags-get
-      doc: Get device private flags.
-
-      attribute-set: privflags
-
-      do: &privflag-get-op
-        request:
-          value: 13
-          attributes:
-            - header
-        reply:
-          value: 14
-          attributes:
-            - header
-            - flags
-      dump: *privflag-get-op
-    -
-      name: privflags-set
-      doc: Set device private flags.
-
-      attribute-set: privflags
-
-      do:
-        request:
-          attributes:
-            - header
-            - flags
-    -
-      name: privflags-ntf
-      doc: Notification for change in device private flags.
-      notify: privflags-get
-
-    -
-      name: rings-get
-      doc: Get ring params.
-
-      attribute-set: rings
-
-      do: &ring-get-op
-        request:
-          attributes:
-            - header
-        reply:
-          attributes:
-            - header
-            - rx-max
-            - rx-mini-max
-            - rx-jumbo-max
-            - tx-max
-            - rx
-            - rx-mini
-            - rx-jumbo
-            - tx
-            - rx-buf-len
-            - tcp-data-split
-            - cqe-size
-            - tx-push
-            - rx-push
-            - tx-push-buf-len
-            - tx-push-buf-len-max
-      dump: *ring-get-op
-    -
-      name: rings-set
-      doc: Set ring params.
-
-      attribute-set: rings
-
-      do:
-        request:
-          attributes:
-            - header
-            - rx
-            - rx-mini
-            - rx-jumbo
-            - tx
-            - rx-buf-len
-            - tcp-data-split
-            - cqe-size
-            - tx-push
-            - rx-push
-    -
-      name: rings-ntf
-      doc: Notification for change in ring params.
-      notify: rings-get
-
-    # TODO: fill in the requests in between
-
+  -
+    name: coalesce
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: rx-usecs
+        type: u32
+      -
+        name: rx-max-frames
+        type: u32
+      -
+        name: rx-usecs-irq
+        type: u32
+      -
+        name: rx-max-frames-irq
+        type: u32
+      -
+        name: tx-usecs
+        type: u32
+      -
+        name: tx-max-frames
+        type: u32
+      -
+        name: tx-usecs-irq
+        type: u32
+      -
+        name: tx-max-frames-irq
+        type: u32
+      -
+        name: stats-block-usecs
+        type: u32
+      -
+        name: use-adaptive-rx
+        type: u8
+      -
+        name: use-adaptive-tx
+        type: u8
+      -
+        name: pkt-rate-low
+        type: u32
+      -
+        name: rx-usecs-low
+        type: u32
+      -
+        name: rx-max-frames-low
+        type: u32
+      -
+        name: tx-usecs-low
+        type: u32
+      -
+        name: tx-max-frames-low
+        type: u32
+      -
+        name: pkt-rate-high
+        type: u32
+      -
+        name: rx-usecs-high
+        type: u32
+      -
+        name: rx-max-frames-high
+        type: u32
+      -
+        name: tx-usecs-high
+        type: u32
+      -
+        name: tx-max-frames-high
+        type: u32
+      -
+        name: rate-sample-interval
+        type: u32
+      -
+        name: use-cqe-mode-tx
+        type: u8
+      -
+        name: use-cqe-mode-rx
+        type: u8
+      -
+        name: tx-aggr-max-bytes
+        type: u32
+      -
+        name: tx-aggr-max-frames
+        type: u32
+      -
+        name: tx-aggr-time-usecs
+        type: u32
+  -
+    name: pause-stat
+    attributes:
+      -
+        name: pad
+        type: u32
+      -
+        name: tx-frames
+        type: u64
+      -
+        name: rx-frames
+        type: u64
+  -
+    name: pause
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: autoneg
+        type: u8
+      -
+        name: rx
+        type: u8
+      -
+        name: tx
+        type: u8
+      -
+        name: stats
+        type: nest
+        nested-attributes: pause-stat
+      -
+        name: stats-src
+        type: u32
+  -
+    name: eee
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: modes-ours
+        type: nest
+        nested-attributes: bitset
+      -
+        name: modes-peer
+        type: nest
+        nested-attributes: bitset
+      -
+        name: active
+        type: u8
+      -
+        name: enabled
+        type: u8
+      -
+        name: tx-lpi-enabled
+        type: u8
+      -
+        name: tx-lpi-timer
+        type: u32
+  -
+    name: tsinfo
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: timestamping
+        type: nest
+        nested-attributes: bitset
+      -
+        name: tx-types
+        type: nest
+        nested-attributes: bitset
+      -
+        name: rx-filters
+        type: nest
+        nested-attributes: bitset
+      -
+        name: phc-index
+        type: u32
+  -
+    name: cable-test-nft-nest-result
+    attributes:
+      -
+        name: pair
+        type: u8
+      -
+        name: code
+        type: u8
+  -
+    name: cable-test-nft-nest-fault-length
+    attributes:
+      -
+        name: pair
+        type: u8
+      -
+        name: cm
+        type: u32
+  -
+    name: cable-test-nft-nest
+    attributes:
+      -
+        name: result
+        type: nest
+        nested-attributes: cable-test-nft-nest-result
+      -
+        name: fault-length
+        type: nest
+        nested-attributes: cable-test-nft-nest-fault-length
+  -
+    name: cable-test
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: status
+        type: u8
+      -
+        name: nest
+        type: nest
+        nested-attributes: cable-test-nft-nest
+  -
+    name: cable-test-tdr-cfg
+    attributes:
+      -
+        name: first
+        type: u32
+      -
+        name: last
+        type: u32
+      -
+        name: step
+        type: u32
+      -
+        name: pari
+        type: u8
+  -
+    name: cable-test-tdr
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: cfg
+        type: nest
+        nested-attributes: cable-test-tdr-cfg
+  -
+    name: tunnel-info-udp-entry
+    attributes:
+      -
+        name: port
+        type: u16
+        byte-order: big-endian
+      -
+        name: type
+        type: u32
+        enum: udp-tunnel-type
+  -
+    name: tunnel-info-udp-table
+    attributes:
+      -
+        name: size
+        type: u32
+      -
+        name: types
+        type: nest
+        nested-attributes: bitset
+      -
+        name: udp-ports
+        type: nest
+        nested-attributes: tunnel-info-udp-entry
+  -
+    name: tunnel-info
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: udp-ports
+        type: nest
+        nested-attributes: tunnel-info-udp-table
+  -
+    name: fec-stat
+    attributes:
+      -
+        name: pad
+        type: u8
+      -
+        name: corrected
+        type: nest
+        nested-attributes: u64-array
+      -
+        name: uncorr
+        type: nest
+        nested-attributes: u64-array
+      -
+        name: corr-bits
+        type: nest
+        nested-attributes: u64-array
+  -
+    name: fec
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: modes
+        type: nest
+        nested-attributes: bitset
+      -
+        name: auto
+        type: u8
+      -
+        name: active
+        type: u32
+      -
+        name: stats
+        type: nest
+        nested-attributes: fec-stat
+  -
+    name: module-eeprom
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: offset
+        type: u32
+      -
+        name: length
+        type: u32
+      -
+        name: page
+        type: u8
+      -
+        name: bank
+        type: u8
+      -
+        name: i2c-address
+        type: u8
+      -
+        name: data
+        type: binary
+  -
+    name: stats-grp
+    attributes:
+      -
+        name: pad
+        type: u32
+      -
+        name: id
+        type: u32
+      -
+        name: ss-id
+        type: u32
+      -
+        name: stat
+        type: nest
+        nested-attributes: u64
+      -
+        name: hist-rx
+        type: nest
+        nested-attributes: u64
+      -
+        name: hist-tx
+        type: nest
+        nested-attributes: u64
+      -
+        name: hist-bkt-low
+        type: u32
+      -
+        name: hist-bkt-hi
+        type: u32
+      -
+        name: hist-bkt-val
+        type: u64
+  -
+    name: stats
+    attributes:
+      -
+        name: pad
+        type: u32
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: groups
+        type: nest
+        nested-attributes: bitset
+      -
+        name: grp
+        type: nest
+        nested-attributes: stats-grp
+      -
+        name: src
+        type: u32
+  -
+    name: phc-vclocks
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: num
+        type: u32
+      -
+        name: index
+        type: nest
+        nested-attributes: s32-array
+  -
+    name: module
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: power-mode-policy
+        type: u8
+      -
+        name: power-mode
+        type: u8
+  -
+    name: pse
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: admin-state
+        type: u32
+      -
+        name: admin-control
+        type: u32
+      -
+        name: pw-d-status
+        type: u32
+  -
+    name: rss
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: context
+        type: u32
+      -
+        name: hfunc
+        type: u32
+      -
+        name: indir
+        type: binary
+      -
+        name: hkey
+        type: binary
+  -
+    name: plca
+    attributes:
+      -
+        name: header
+        type: nest
+        nested-attributes: header
+      -
+        name: version
+        type: u16
+      -
+        name: enabled
+        type: u8
+      -
+        name: status
+        type: u8
+      -
+        name: node-cnt
+        type: u32
+      -
+        name: node-id
+        type: u32
+      -
+        name: to-tmr
+        type: u32
+      -
+        name: burst-cnt
+        type: u32
+      -
+        name: burst-tmr
+        type: u32
+
+operations:
+  enum-model: directional
+  list:
+    -
+      name: strset-get
+      doc: Get string set from the kernel.
+
+      attribute-set: strset
+
+      do: &strset-get-op
+        request:
+          attributes:
+            - header
+            - stringsets
+            - counts-only
+        reply:
+          attributes:
+            - header
+            - stringsets
+      dump: *strset-get-op
+    -
+      name: linkinfo-get
+      doc: Get link info.
+
+      attribute-set: linkinfo
+
+      do: &linkinfo-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &linkinfo
+            - header
+            - port
+            - phyaddr
+            - tp-mdix
+            - tp-mdix-ctrl
+            - transceiver
+      dump: *linkinfo-get-op
+    -
+      name: linkinfo-set
+      doc: Set link info.
+
+      attribute-set: linkinfo
+
+      do:
+        request:
+          attributes: *linkinfo
+    -
+      name: linkinfo-ntf
+      doc: Notification for change in link info.
+      notify: linkinfo-get
+    -
+      name: linkmodes-get
+      doc: Get link modes.
+
+      attribute-set: linkmodes
+
+      do: &linkmodes-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &linkmodes
+            - header
+            - autoneg
+            - ours
+            - peer
+            - speed
+            - duplex
+            - master-slave-cfg
+            - master-slave-state
+            - master-slave-lanes
+            - rate-matching
+      dump: *linkmodes-get-op
+    -
+      name: linkmodes-set
+      doc: Set link modes.
+
+      attribute-set: linkmodes
+
+      do:
+        request:
+          attributes: *linkmodes
+    -
+      name: linkmodes-ntf
+      doc: Notification for change in link modes.
+      notify: linkmodes-get
+    -
+      name: linkstate-get
+      doc: Get link state.
+
+      attribute-set: linkstate
+
+      do: &linkstate-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes:
+            - header
+            - link
+            - sqi
+            - sqi-max
+            - ext-state
+            - ext-substate
+            - down-cnt
+      dump: *linkstate-get-op
+    -
+      name: debug-get
+      doc: Get debug message mask.
+
+      attribute-set: debug
+
+      do: &debug-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &debug
+            - header
+            - msgmask
+      dump: *debug-get-op
+    -
+      name: debug-set
+      doc: Set debug message mask.
+
+      attribute-set: debug
+
+      do:
+        request:
+          attributes: *debug
+    -
+      name: debug-ntf
+      doc: Notification for change in debug message mask.
+      notify: debug-get
+    -
+      name: wol-get
+      doc: Get WOL params.
+
+      attribute-set: wol
+
+      do: &wol-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &wol
+            - header
+            - modes
+            - sopass
+      dump: *wol-get-op
+    -
+      name: wol-set
+      doc: Set WOL params.
+
+      attribute-set: wol
+
+      do:
+        request:
+          attributes: *wol
+    -
+      name: wol-ntf
+      doc: Notification for change in WOL params.
+      notify: wol-get
+    -
+      name: features-get
+      doc: Get features.
+
+      attribute-set: features
+
+      do: &feature-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &feature
+            - header
+            # User-changeable features.
+            - hw
+            # User-requested features.
+            - wanted
+            # Currently active features.
+            - active
+            # Unchangeable features.
+            - nochange
+      dump: *feature-get-op
+    -
+      name: features-set
+      doc: Set features.
+
+      attribute-set: features
+
+      do: &feature-set-op
+        request:
+          attributes: *feature
+        reply:
+          attributes: *feature
+    -
+      name: features-ntf
+      doc: Notification for change in features.
+      notify: features-get
+    -
+      name: privflags-get
+      doc: Get device private flags.
+
+      attribute-set: privflags
+
+      do: &privflag-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &privflag
+            - header
+            - flags
+      dump: *privflag-get-op
+    -
+      name: privflags-set
+      doc: Set device private flags.
+
+      attribute-set: privflags
+
+      do:
+        request:
+          attributes: *privflag
+    -
+      name: privflags-ntf
+      doc: Notification for change in device private flags.
+      notify: privflags-get
+
+    -
+      name: rings-get
+      doc: Get ring params.
+
+      attribute-set: rings
+
+      do: &ring-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &ring
+            - header
+            - rx-max
+            - rx-mini-max
+            - rx-jumbo-max
+            - tx-max
+            - rx
+            - rx-mini
+            - rx-jumbo
+            - tx
+            - rx-buf-len
+            - tcp-data-split
+            - cqe-size
+            - tx-push
+            - rx-push
+            - tx-push-buf-len
+            - tx-push-buf-len-max
+      dump: *ring-get-op
+    -
+      name: rings-set
+      doc: Set ring params.
+
+      attribute-set: rings
+
+      do:
+        request:
+          attributes: *ring
+    -
+      name: rings-ntf
+      doc: Notification for change in ring params.
+      notify: rings-get
+    -
+      name: channels-get
+      doc: Get channel params.
+
+      attribute-set: channels
+
+      do: &channel-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &channel
+            - header
+            - rx-max
+            - tx-max
+            - other-max
+            - combined-max
+            - rx-count
+            - tx-count
+            - other-count
+            - combined-count
+      dump: *channel-get-op
+    -
+      name: channels-set
+      doc: Set channel params.
+
+      attribute-set: channels
+
+      do:
+        request:
+          attributes: *channel
+    -
+      name: channels-ntf
+      doc: Notification for change in channel params.
+      notify: channels-get
+    -
+      name: coalesce-get
+      doc: Get coalesce params.
+
+      attribute-set: coalesce
+
+      do: &coalesce-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &coalesce
+            - header
+            - rx-usecs
+            - rx-max-frames
+            - rx-usecs-irq
+            - rx-max-frames-irq
+            - tx-usecs
+            - tx-max-frames
+            - tx-usecs-irq
+            - tx-max-frames-irq
+            - stats-block-usecs
+            - use-adaptive-rx
+            - use-adaptive-tx
+            - pkt-rate-low
+            - rx-usecs-low
+            - rx-max-frames-low
+            - tx-usecs-low
+            - tx-max-frames-low
+            - pkt-rate-high
+            - rx-usecs-high
+            - rx-max-frames-high
+            - tx-usecs-high
+            - tx-max-frames-high
+            - rate-sample-interval
+            - use-cqe-mode-tx
+            - use-cqe-mode-rx
+            - tx-aggr-max-bytes
+            - tx-aggr-max-frames
+            - tx-aggr-time-usecs
+      dump: *coalesce-get-op
+    -
+      name: coalesce-set
+      doc: Set coalesce params.
+
+      attribute-set: coalesce
+
+      do:
+        request:
+          attributes: *coalesce
+    -
+      name: coalesce-ntf
+      doc: Notification for change in coalesce params.
+      notify: coalesce-get
+    -
+      name: pause-get
+      doc: Get pause params.
+
+      attribute-set: pause
+
+      do: &pause-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &pause
+            - header
+            - autoneg
+            - rx
+            - tx
+            - stats
+            - stats-src
+      dump: *pause-get-op
+    -
+      name: pause-set
+      doc: Set pause params.
+
+      attribute-set: pause
+
+      do:
+        request:
+          attributes: *pause
+    -
+      name: pause-ntf
+      doc: Notification for change in pause params.
+      notify: pause-get
+    -
+      name: eee-get
+      doc: Get eee params.
+
+      attribute-set: eee
+
+      do: &eee-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &eee
+            - header
+            - modes-ours
+            - modes-peer
+            - active
+            - enabled
+            - tx-lpi-enabled
+            - tx-lpi-timer
+      dump: *eee-get-op
+    -
+      name: eee-set
+      doc: Set eee params.
+
+      attribute-set: eee
+
+      do:
+        request:
+          attributes: *eee
+    -
+      name: eee-ntf
+      doc: Notification for change in eee params.
+      notify: eee-get
+    -
+      name: tsinfo-get
+      doc: Get tsinfo params.
+
+      attribute-set: tsinfo
+
+      do: &tsinfo-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes:
+            - header
+            - timestamping
+            - tx-types
+            - rx-filters
+            - phc-index
+      dump: *tsinfo-get-op
+    -
+      name: cable-test-act
+      doc: Cable test.
+
+      attribute-set: cable-test
+
+      do:
+        request:
+          attributes:
+            - header
+        reply:
+          attributes:
+            - header
+            - cable-test-nft-nest
+    -
+      name: cable-test-tdr-act
+      doc: Cable test TDR.
+
+      attribute-set: cable-test-tdr
+
+      do:
+        request:
+          attributes:
+            - header
+        reply:
+          attributes:
+            - header
+            - cable-test-tdr-cfg
+    -
+      name: tunnel-info-get
+      doc: Get tsinfo params.
+
+      attribute-set: tunnel-info
+
+      do: &tunnel-info-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes:
+            - header
+            - udp-ports
+      dump: *tunnel-info-get-op
+    -
+      name: fec-get
+      doc: Get FEC params.
+
+      attribute-set: fec
+
+      do: &fec-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &fec
+            - header
+            - modes
+            - auto
+            - active
+            - stats
+      dump: *fec-get-op
+    -
+      name: fec-set
+      doc: Set FEC params.
+
+      attribute-set: fec
+
+      do:
+        request:
+          attributes: *fec
+    -
+      name: fec-ntf
+      doc: Notification for change in FEC params.
+      notify: fec-get
+    -
+      name: module-eeprom-get
+      doc: Get module EEPROM params.
+
+      attribute-set: module-eeprom
+
+      do: &module-eeprom-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes:
+            - header
+            - offset
+            - length
+            - page
+            - bank
+            - i2c-address
+            - data
+      dump: *module-eeprom-get-op
+    -
+      name: stats-get
+      doc: Get statistics.
+
+      attribute-set: stats
+
+      do: &stats-get-op
+        request:
+          attributes:
+            - header
+            - groups
+        reply:
+          attributes:
+            - header
+            - groups
+            - grp
+            - src
+      dump: *stats-get-op
+    -
+      name: phc-vclocks-get
+      doc: Get PHC VCLOCKs.
+
+      attribute-set: phc-vclocks
+
+      do: &phc-vclocks-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes:
+            - header
+            - num
+      dump: *phc-vclocks-get-op
+    -
+      name: module-get
+      doc: Get module params.
+
+      attribute-set: module
+
+      do: &module-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &module
+            - header
+            - power-mode-policy
+            - power-mode
+      dump: *module-get-op
+    -
+      name: module-set
+      doc: Set module params.
+
+      attribute-set: module
+
+      do:
+        request:
+          attributes: *module
+    -
+      name: module-ntf
+      doc: Notification for change in module params.
+      notify: module-get
+    -
+      name: pse-get
+      doc: Get Power Sourcing Equipment params.
+
+      attribute-set: pse
+
+      do: &pse-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &pse
+            - header
+            - admin-state
+            - admin-control
+            - pw-d-status
+      dump: *pse-get-op
+    -
+      name: pse-set
+      doc: Set Power Sourcing Equipment params.
+
+      attribute-set: pse
+
+      do:
+        request:
+          attributes: *pse
+    -
+      name: rss-get
+      doc: Get RSS params.
+
+      attribute-set: rss
+
+      do: &rss-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes:
+            - header
+            - context
+            - hfunc
+            - indir
+            - hkey
+      dump: *rss-get-op
+    -
+      name: plca-get
+      doc: Get PLCA params.
+
+      attribute-set: plca
+
+      do: &plca-get-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: &plca
+            - header
+            - version
+            - enabled
+            - status
+            - node-cnt
+            - node-id
+            - to-tmr
+            - burst-cnt
+            - burst-tmr
+      dump: *plca-get-op
+    -
+      name: plca-set
+      doc: Set PLCA params.
+
+      attribute-set: plca
+
+      do:
+        request:
+          attributes: *plca
+    -
+      name: plca-get-status
+      doc: Get PLCA status params.
+
+      attribute-set: plca
+
+      do: &plca-get-status-op
+        request:
+          attributes:
+            - header
+        reply:
+          attributes: *plca
+      dump: *plca-get-status-op
+    -
+      name: plca-ntf
+      doc: Notification for change in PLCA params.
+      notify: plca-get
     -
       name: mm-get
       doc: Get MAC Merge configuration and state
@@ -354,11 +1610,9 @@ operations:
 
       do: &mm-get-op
         request:
-          value: 42
           attributes:
             - header
         reply:
-          value: 42
           attributes:
             - header
             - pmac-enabled
-- 
cgit v1.2.3


From 99b3a769cd8ace9e2d38b3c41208a5a261510e70 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Tue, 28 Mar 2023 14:15:18 +0800
Subject: dt-bindings: net: fec: add power-domains property

Add optional power domains property

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20230328061518.1985981-1-peng.fan@oss.nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/devicetree/bindings/net/fsl,fec.yaml | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/fsl,fec.yaml b/Documentation/devicetree/bindings/net/fsl,fec.yaml
index e6f2045f05de..b494e009326e 100644
--- a/Documentation/devicetree/bindings/net/fsl,fec.yaml
+++ b/Documentation/devicetree/bindings/net/fsl,fec.yaml
@@ -144,6 +144,9 @@ properties:
     description:
       Regulator that powers the Ethernet PHY.
 
+  power-domains:
+    maxItems: 1
+
   fsl,num-tx-queues:
     $ref: /schemas/types.yaml#/definitions/uint32
     description:
-- 
cgit v1.2.3


From db9d479ab59b21d719486e6bf673f83f129dae32 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Fri, 31 Mar 2023 14:57:33 -0500
Subject: bpf,docs: Update documentation to reflect new task kfuncs

Now that struct task_struct objects are RCU safe, and bpf_task_acquire()
can return NULL, we should update the BPF task kfunc documentation to
reflect the current state of the API.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230331195733.699708-4-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/kfuncs.rst | 49 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 43 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index bf1b85941452..d8a16c4bef7f 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -471,13 +471,50 @@ struct_ops callback arg. For example:
 		struct task_struct *acquired;
 
 		acquired = bpf_task_acquire(task);
+		if (acquired)
+			/*
+			 * In a typical program you'd do something like store
+			 * the task in a map, and the map will automatically
+			 * release it later. Here, we release it manually.
+			 */
+			bpf_task_release(acquired);
+		return 0;
+	}
+
+
+References acquired on ``struct task_struct *`` objects are RCU protected.
+Therefore, when in an RCU read region, you can obtain a pointer to a task
+embedded in a map value without having to acquire a reference:
+
+.. code-block:: c
+
+	#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+	private(TASK) static struct task_struct *global;
+
+	/**
+	 * A trivial example showing how to access a task stored
+	 * in a map using RCU.
+	 */
+	SEC("tp_btf/task_newtask")
+	int BPF_PROG(task_rcu_read_example, struct task_struct *task, u64 clone_flags)
+	{
+		struct task_struct *local_copy;
+
+		bpf_rcu_read_lock();
+		local_copy = global;
+		if (local_copy)
+			/*
+			 * We could also pass local_copy to kfuncs or helper functions here,
+			 * as we're guaranteed that local_copy will be valid until we exit
+			 * the RCU read region below.
+			 */
+			bpf_printk("Global task %s is valid", local_copy->comm);
+		else
+			bpf_printk("No global task found");
+		bpf_rcu_read_unlock();
+
+		/* At this point we can no longer reference local_copy. */
 
-		/*
-		 * In a typical program you'd do something like store
-		 * the task in a map, and the map will automatically
-		 * release it later. Here, we release it manually.
-		 */
-		bpf_task_release(acquired);
 		return 0;
 	}
 
-- 
cgit v1.2.3


From 16b7c970cc8192e929dbd5192ccc1867e19d7bda Mon Sep 17 00:00:00 2001
From: Dave Thaler <dthaler@microsoft.com>
Date: Sun, 26 Mar 2023 05:49:46 +0000
Subject: bpf, docs: Add docs on extended 64-bit immediate instructions

Add docs on extended 64-bit immediate instructions, including six instructions
previously undocumented.  Include a brief description of maps and variables,
as used by those instructions.

V1 -> V2: rebased on top of latest master

V2 -> V3: addressed comments from Alexei

V3 -> V4: addressed comments from David Vernet

Signed-off-by: Dave Thaler <dthaler@microsoft.com>
Link: https://lore.kernel.org/r/20230326054946.2331-1-dthaler1968@googlemail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/instruction-set.rst | 58 ++++++++++++++++++++++++++++++-----
 Documentation/bpf/linux-notes.rst     | 22 +++++++++++++
 2 files changed, 72 insertions(+), 8 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/instruction-set.rst b/Documentation/bpf/instruction-set.rst
index b77280eb926f..492980ece1ab 100644
--- a/Documentation/bpf/instruction-set.rst
+++ b/Documentation/bpf/instruction-set.rst
@@ -416,14 +416,56 @@ and loaded back to ``R0``.
 -----------------------------
 
 Instructions with the ``BPF_IMM`` 'mode' modifier use the wide instruction
-encoding for an extra imm64 value.
-
-There is currently only one such instruction.
-
-``BPF_LD | BPF_DW | BPF_IMM`` means::
-
-  dst = imm64
-
+encoding defined in `Instruction encoding`_, and use the 'src' field of the
+basic instruction to hold an opcode subtype.
+
+The following table defines a set of ``BPF_IMM | BPF_DW | BPF_LD`` instructions
+with opcode subtypes in the 'src' field, using new terms such as "map"
+defined further below:
+
+=========================  ======  ===  =========================================  ===========  ==============
+opcode construction        opcode  src  pseudocode                                 imm type     dst type
+=========================  ======  ===  =========================================  ===========  ==============
+BPF_IMM | BPF_DW | BPF_LD  0x18    0x0  dst = imm64                                integer      integer
+BPF_IMM | BPF_DW | BPF_LD  0x18    0x1  dst = map_by_fd(imm)                       map fd       map
+BPF_IMM | BPF_DW | BPF_LD  0x18    0x2  dst = map_val(map_by_fd(imm)) + next_imm   map fd       data pointer
+BPF_IMM | BPF_DW | BPF_LD  0x18    0x3  dst = var_addr(imm)                        variable id  data pointer
+BPF_IMM | BPF_DW | BPF_LD  0x18    0x4  dst = code_addr(imm)                       integer      code pointer
+BPF_IMM | BPF_DW | BPF_LD  0x18    0x5  dst = map_by_idx(imm)                      map index    map
+BPF_IMM | BPF_DW | BPF_LD  0x18    0x6  dst = map_val(map_by_idx(imm)) + next_imm  map index    data pointer
+=========================  ======  ===  =========================================  ===========  ==============
+
+where
+
+* map_by_fd(imm) means to convert a 32-bit file descriptor into an address of a map (see `Maps`_)
+* map_by_idx(imm) means to convert a 32-bit index into an address of a map
+* map_val(map) gets the address of the first value in a given map
+* var_addr(imm) gets the address of a platform variable (see `Platform Variables`_) with a given id
+* code_addr(imm) gets the address of the instruction at a specified relative offset in number of (64-bit) instructions
+* the 'imm type' can be used by disassemblers for display
+* the 'dst type' can be used for verification and JIT compilation purposes
+
+Maps
+~~~~
+
+Maps are shared memory regions accessible by eBPF programs on some platforms.
+A map can have various semantics as defined in a separate document, and may or
+may not have a single contiguous memory region, but the 'map_val(map)' is
+currently only defined for maps that do have a single contiguous memory region.
+
+Each map can have a file descriptor (fd) if supported by the platform, where
+'map_by_fd(imm)' means to get the map with the specified file descriptor. Each
+BPF program can also be defined to use a set of maps associated with the
+program at load time, and 'map_by_idx(imm)' means to get the map with the given
+index in the set associated with the BPF program containing the instruction.
+
+Platform Variables
+~~~~~~~~~~~~~~~~~~
+
+Platform variables are memory regions, identified by integer ids, exposed by
+the runtime and accessible by BPF programs on some platforms.  The
+'var_addr(imm)' operation means to get the address of the memory region
+identified by the given id.
 
 Legacy BPF Packet access instructions
 -------------------------------------
diff --git a/Documentation/bpf/linux-notes.rst b/Documentation/bpf/linux-notes.rst
index f43b9c797bcb..508d009d3bed 100644
--- a/Documentation/bpf/linux-notes.rst
+++ b/Documentation/bpf/linux-notes.rst
@@ -20,6 +20,28 @@ integer would be read from a specified register, is not currently supported
 by the verifier.  Any programs with this instruction will fail to load
 until such support is added.
 
+Maps
+====
+
+Linux only supports the 'map_val(map)' operation on array maps with a single element.
+
+Linux uses an fd_array to store maps associated with a BPF program. Thus,
+map_by_idx(imm) uses the fd at that index in the array.
+
+Variables
+=========
+
+The following 64-bit immediate instruction specifies that a variable address,
+which corresponds to some integer stored in the 'imm' field, should be loaded:
+
+=========================  ======  ===  =========================================  ===========  ==============
+opcode construction        opcode  src  pseudocode                                 imm type     dst type
+=========================  ======  ===  =========================================  ===========  ==============
+BPF_IMM | BPF_DW | BPF_LD  0x18    0x3  dst = var_addr(imm)                        variable id  data pointer
+=========================  ======  ===  =========================================  ===========  ==============
+
+On Linux, this integer is a BTF ID.
+
 Legacy BPF Packet access instructions
 =====================================
 
-- 
cgit v1.2.3


From 386f5fc9061b0c8429c7c0cc57f12c05f8662e23 Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Mon, 3 Apr 2023 02:19:51 +0100
Subject: dt-bindings: net: dsa: mediatek,mt7530: add mediatek,mt7988-switch

Add documentation for the built-in switch which can be found in the
MediaTek MT7988 SoC.

Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/dsa/mediatek,mt7530.yaml          | 26 ++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml
index 5ae9cd8f99a2..e532c6b795f4 100644
--- a/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/mediatek,mt7530.yaml
@@ -11,16 +11,23 @@ maintainers:
   - Landen Chao <Landen.Chao@mediatek.com>
   - DENG Qingfang <dqfext@gmail.com>
   - Sean Wang <sean.wang@mediatek.com>
+  - Daniel Golle <daniel@makrotopia.org>
 
 description: |
-  There are two versions of MT7530, standalone and in a multi-chip module.
+  There are three versions of MT7530, standalone, in a multi-chip module and
+  built-into a SoC.
 
   MT7530 is a part of the multi-chip module in MT7620AN, MT7620DA, MT7620DAN,
   MT7620NN, MT7621AT, MT7621DAT, MT7621ST and MT7623AI SoCs.
 
+  The MT7988 SoC comes with a built-in switch similar to MT7531 as well as four
+  Gigabit Ethernet PHYs. The switch registers are directly mapped into the SoC's
+  memory map rather than using MDIO. The switch got an internally connected 10G
+  CPU port and 4 user ports connected to the built-in Gigabit Ethernet PHYs.
+
   MT7530 in MT7620AN, MT7620DA, MT7620DAN and MT7620NN SoCs has got 10/100 PHYs
   and the switch registers are directly mapped into SoC's memory map rather than
-  using MDIO. The DSA driver currently doesn't support this.
+  using MDIO. The DSA driver currently doesn't support MT7620 variants.
 
   There is only the standalone version of MT7531.
 
@@ -81,6 +88,10 @@ properties:
           Multi-chip module MT7530 in MT7621AT, MT7621DAT and MT7621ST SoCs
         const: mediatek,mt7621
 
+      - description:
+          Built-in switch of the MT7988 SoC
+        const: mediatek,mt7988-switch
+
   reg:
     maxItems: 1
 
@@ -268,6 +279,17 @@ allOf:
       required:
         - mediatek,mcm
 
+  - if:
+      properties:
+        compatible:
+          const: mediatek,mt7988-switch
+    then:
+      $ref: "#/$defs/mt7530-dsa-port"
+      properties:
+        gpio-controller: false
+        mediatek,mcm: false
+        reset-names: false
+
 unevaluatedProperties: false
 
 examples:
-- 
cgit v1.2.3


From 066b41a599d687fb0409bd0b0f3c41615f070d77 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Tue, 28 Mar 2023 13:46:02 +0800
Subject: dt-bindings: can: fsl,flexcan: add optional power-domains property

Add optional power-domains property for i.MX8 usage.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/all/20230328054602.1974255-1-peng.fan@oss.nxp.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml
index 6e59bd2a6094..4162469c3c08 100644
--- a/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml
+++ b/Documentation/devicetree/bindings/net/can/fsl,flexcan.yaml
@@ -63,6 +63,9 @@ properties:
       boot loader. This property should only be used the used operating system
       doesn't support the clocks and clock-names property.
 
+  power-domains:
+    maxItems: 1
+
   xceiver-supply:
     description: Regulator that powers the CAN transceiver.
 
-- 
cgit v1.2.3


From c8f1f2e946757e5130882d23c6beded32d87ed0c Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 4 Apr 2023 15:42:13 -0500
Subject: dt-bindings: net: ethernet-switch: Make "#address-cells/#size-cells"
 required

The schema doesn't allow for a single (unaddressed) ethernet port node
nor does a single port switch make much sense. So if there's always
multiple child nodes, "#address-cells" and "#size-cells" should be
required.

Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20230404204213.635773-1-robh@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/devicetree/bindings/net/ethernet-switch.yaml   | 4 ++++
 Documentation/devicetree/bindings/net/qcom,ipq8064-mdio.yaml | 4 +++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/ethernet-switch.yaml b/Documentation/devicetree/bindings/net/ethernet-switch.yaml
index a04f8ef744aa..2ceccce6cbd7 100644
--- a/Documentation/devicetree/bindings/net/ethernet-switch.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-switch.yaml
@@ -40,6 +40,10 @@ patternProperties:
         type: object
         description: Ethernet switch ports
 
+    required:
+      - "#address-cells"
+      - "#size-cells"
+
 oneOf:
   - required:
       - ports
diff --git a/Documentation/devicetree/bindings/net/qcom,ipq8064-mdio.yaml b/Documentation/devicetree/bindings/net/qcom,ipq8064-mdio.yaml
index 144001ff840c..164704338ef0 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipq8064-mdio.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipq8064-mdio.yaml
@@ -53,7 +53,9 @@ examples:
             reg = <0x10>;
 
             ports {
-              /* ... */
+                #address-cells = <1>;
+                #size-cells = <0>;
+                /* ... */
             };
         };
     };
-- 
cgit v1.2.3


From f037897669051d53551a2b198b2519356d59d491 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 4 Apr 2023 15:41:52 -0500
Subject: dt-bindings: net: dsa: brcm,sf2: Drop unneeded
 "#address-cells/#size-cells"

There's no need for "#address-cells/#size-cells" in the brcm,sf2 node as
no immediate child nodes have an address. What was probably intended was
to put them in the 'ports' node, but that's not necessary as that is
covered by ethernet-switch.yaml via dsa.yaml.

Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20230404204152.635400-1-robh@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/devicetree/bindings/net/dsa/brcm,sf2.yaml | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/dsa/brcm,sf2.yaml b/Documentation/devicetree/bindings/net/dsa/brcm,sf2.yaml
index 37bf33bd4670..c745407f2f68 100644
--- a/Documentation/devicetree/bindings/net/dsa/brcm,sf2.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/brcm,sf2.yaml
@@ -76,12 +76,6 @@ properties:
       supports reporting the number of packets in-flight in a switch queue
     type: boolean
 
-  "#address-cells":
-    const: 1
-
-  "#size-cells":
-    const: 0
-
   ports:
     type: object
 
@@ -99,8 +93,6 @@ properties:
 required:
   - reg
   - interrupts
-  - "#address-cells"
-  - "#size-cells"
 
 allOf:
   - $ref: dsa.yaml#
@@ -145,8 +137,6 @@ examples:
   - |
     switch@f0b00000 {
             compatible = "brcm,bcm7445-switch-v4.0";
-            #address-cells = <1>;
-            #size-cells = <0>;
             reg = <0xf0b00000 0x40000>,
                   <0xf0b40000 0x110>,
                   <0xf0b40340 0x30>,
-- 
cgit v1.2.3


From d2f5c68e3f7157e874a759e382a5eaffa775b869 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 6 Apr 2023 18:25:30 -0700
Subject: docs: net: reformat driver.rst from a list to sections

driver.rst had a historical form of list of common problems.
In the age os Sphinx and rendered documentation it's better
to use the more usual title + text format.

This will allow us to render kdoc into the output more naturally.

No changes to the actual text.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/driver.rst | 91 +++++++++++++++++++++++--------------
 1 file changed, 56 insertions(+), 35 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/driver.rst b/Documentation/networking/driver.rst
index 64f7236ff10b..3040a74d421c 100644
--- a/Documentation/networking/driver.rst
+++ b/Documentation/networking/driver.rst
@@ -4,15 +4,19 @@
 Softnet Driver Issues
 =====================
 
-Transmit path guidelines:
+Transmit path guidelines
+========================
 
-1) The ndo_start_xmit method must not return NETDEV_TX_BUSY under
-   any normal circumstances.  It is considered a hard error unless
-   there is no way your device can tell ahead of time when its
-   transmit function will become busy.
+Stop queues in advance
+----------------------
 
-   Instead it must maintain the queue properly.  For example,
-   for a driver implementing scatter-gather this means::
+The ndo_start_xmit method must not return NETDEV_TX_BUSY under
+any normal circumstances.  It is considered a hard error unless
+there is no way your device can tell ahead of time when its
+transmit function will become busy.
+
+Instead it must maintain the queue properly.  For example,
+for a driver implementing scatter-gather this means::
 
 	static netdev_tx_t drv_hard_start_xmit(struct sk_buff *skb,
 					       struct net_device *dev)
@@ -42,56 +46,73 @@ Transmit path guidelines:
 		return NETDEV_TX_OK;
 	}
 
-   And then at the end of your TX reclamation event handling::
+And then at the end of your TX reclamation event handling::
 
 	if (netif_queue_stopped(dp->dev) &&
 	    TX_BUFFS_AVAIL(dp) > (MAX_SKB_FRAGS + 1))
 		netif_wake_queue(dp->dev);
 
-   For a non-scatter-gather supporting card, the three tests simply become::
+For a non-scatter-gather supporting card, the three tests simply become::
 
 		/* This is a hard error log it. */
 		if (TX_BUFFS_AVAIL(dp) <= 0)
 
-   and::
+and::
 
 		if (TX_BUFFS_AVAIL(dp) == 0)
 
-   and::
+and::
 
 	if (netif_queue_stopped(dp->dev) &&
 	    TX_BUFFS_AVAIL(dp) > 0)
 		netif_wake_queue(dp->dev);
 
-2) An ndo_start_xmit method must not modify the shared parts of a
-   cloned SKB.
+No exclusive ownership
+----------------------
+
+An ndo_start_xmit method must not modify the shared parts of a
+cloned SKB.
+
+Timely completions
+------------------
+
+Do not forget that once you return NETDEV_TX_OK from your
+ndo_start_xmit method, it is your driver's responsibility to free
+up the SKB and in some finite amount of time.
 
-3) Do not forget that once you return NETDEV_TX_OK from your
-   ndo_start_xmit method, it is your driver's responsibility to free
-   up the SKB and in some finite amount of time.
+For example, this means that it is not allowed for your TX
+mitigation scheme to let TX packets "hang out" in the TX
+ring unreclaimed forever if no new TX packets are sent.
+This error can deadlock sockets waiting for send buffer room
+to be freed up.
 
-   For example, this means that it is not allowed for your TX
-   mitigation scheme to let TX packets "hang out" in the TX
-   ring unreclaimed forever if no new TX packets are sent.
-   This error can deadlock sockets waiting for send buffer room
-   to be freed up.
+If you return NETDEV_TX_BUSY from the ndo_start_xmit method, you
+must not keep any reference to that SKB and you must not attempt
+to free it up.
 
-   If you return NETDEV_TX_BUSY from the ndo_start_xmit method, you
-   must not keep any reference to that SKB and you must not attempt
-   to free it up.
+Probing guidelines
+==================
 
-Probing guidelines:
+Address validation
+------------------
+
+Any hardware layer address you obtain for your device should
+be verified.  For example, for ethernet check it with
+linux/etherdevice.h:is_valid_ether_addr()
+
+Close/stop guidelines
+=====================
 
-1) Any hardware layer address you obtain for your device should
-   be verified.  For example, for ethernet check it with
-   linux/etherdevice.h:is_valid_ether_addr()
+Quiescence
+----------
 
-Close/stop guidelines:
+After the ndo_stop routine has been called, the hardware must
+not receive or transmit any data.  All in flight packets must
+be aborted. If necessary, poll or wait for completion of
+any reset commands.
 
-1) After the ndo_stop routine has been called, the hardware must
-   not receive or transmit any data.  All in flight packets must
-   be aborted. If necessary, poll or wait for completion of
-   any reset commands.
+Auto-close
+----------
 
-2) The ndo_stop routine will be called by unregister_netdevice
-   if device is still UP.
+The ndo_stop routine will be called by unregister_netdevice
+if device is still UP.
-- 
cgit v1.2.3


From da4f0f82ee9d5a128b48c959e8db7f41b59848c8 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 6 Apr 2023 18:25:31 -0700
Subject: docs: net: move the probe and open/close sections of driver.rst up

Somehow it feels more right to start from the probe then open,
then tx... Much like the lifetime of the driver itself.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/driver.rst | 54 ++++++++++++++++++-------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/driver.rst b/Documentation/networking/driver.rst
index 3040a74d421c..bfbd66871bb3 100644
--- a/Documentation/networking/driver.rst
+++ b/Documentation/networking/driver.rst
@@ -4,6 +4,33 @@
 Softnet Driver Issues
 =====================
 
+Probing guidelines
+==================
+
+Address validation
+------------------
+
+Any hardware layer address you obtain for your device should
+be verified.  For example, for ethernet check it with
+linux/etherdevice.h:is_valid_ether_addr()
+
+Close/stop guidelines
+=====================
+
+Quiescence
+----------
+
+After the ndo_stop routine has been called, the hardware must
+not receive or transmit any data.  All in flight packets must
+be aborted. If necessary, poll or wait for completion of
+any reset commands.
+
+Auto-close
+----------
+
+The ndo_stop routine will be called by unregister_netdevice
+if device is still UP.
+
 Transmit path guidelines
 ========================
 
@@ -89,30 +116,3 @@ to be freed up.
 If you return NETDEV_TX_BUSY from the ndo_start_xmit method, you
 must not keep any reference to that SKB and you must not attempt
 to free it up.
-
-Probing guidelines
-==================
-
-Address validation
-------------------
-
-Any hardware layer address you obtain for your device should
-be verified.  For example, for ethernet check it with
-linux/etherdevice.h:is_valid_ether_addr()
-
-Close/stop guidelines
-=====================
-
-Quiescence
-----------
-
-After the ndo_stop routine has been called, the hardware must
-not receive or transmit any data.  All in flight packets must
-be aborted. If necessary, poll or wait for completion of
-any reset commands.
-
-Auto-close
-----------
-
-The ndo_stop routine will be called by unregister_netdevice
-if device is still UP.
-- 
cgit v1.2.3


From 8336462539ae751cbf7d59822eb331602a69df99 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 6 Apr 2023 18:25:32 -0700
Subject: docs: net: use C syntax highlight in driver.rst

Use syntax highlight, comment out the "..." since they are
not valid C.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/driver.rst | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/driver.rst b/Documentation/networking/driver.rst
index bfbd66871bb3..19c363291d04 100644
--- a/Documentation/networking/driver.rst
+++ b/Documentation/networking/driver.rst
@@ -43,7 +43,9 @@ there is no way your device can tell ahead of time when its
 transmit function will become busy.
 
 Instead it must maintain the queue properly.  For example,
-for a driver implementing scatter-gather this means::
+for a driver implementing scatter-gather this means:
+
+.. code-block:: c
 
 	static netdev_tx_t drv_hard_start_xmit(struct sk_buff *skb,
 					       struct net_device *dev)
@@ -51,7 +53,7 @@ for a driver implementing scatter-gather this means::
 		struct drv *dp = netdev_priv(dev);
 
 		lock_tx(dp);
-		...
+		//...
 		/* This is a hard error log it. */
 		if (TX_BUFFS_AVAIL(dp) <= (skb_shinfo(skb)->nr_frags + 1)) {
 			netif_stop_queue(dev);
@@ -61,34 +63,42 @@ for a driver implementing scatter-gather this means::
 			return NETDEV_TX_BUSY;
 		}
 
-		... queue packet to card ...
-		... update tx consumer index ...
+		//... queue packet to card ...
+		//... update tx consumer index ...
 
 		if (TX_BUFFS_AVAIL(dp) <= (MAX_SKB_FRAGS + 1))
 			netif_stop_queue(dev);
 
-		...
+		//...
 		unlock_tx(dp);
-		...
+		//...
 		return NETDEV_TX_OK;
 	}
 
-And then at the end of your TX reclamation event handling::
+And then at the end of your TX reclamation event handling:
+
+.. code-block:: c
 
 	if (netif_queue_stopped(dp->dev) &&
 	    TX_BUFFS_AVAIL(dp) > (MAX_SKB_FRAGS + 1))
 		netif_wake_queue(dp->dev);
 
-For a non-scatter-gather supporting card, the three tests simply become::
+For a non-scatter-gather supporting card, the three tests simply become:
+
+.. code-block:: c
 
 		/* This is a hard error log it. */
 		if (TX_BUFFS_AVAIL(dp) <= 0)
 
-and::
+and:
+
+.. code-block:: c
 
 		if (TX_BUFFS_AVAIL(dp) == 0)
 
-and::
+and:
+
+.. code-block:: c
 
 	if (netif_queue_stopped(dp->dev) &&
 	    TX_BUFFS_AVAIL(dp) > 0)
-- 
cgit v1.2.3


From c91c46de6bbc1147ae5dfe046b87f5f3d6593215 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 6 Apr 2023 18:25:33 -0700
Subject: net: provide macros for commonly copied lockless queue stop/wake code

A lot of drivers follow the same scheme to stop / start queues
without introducing locks between xmit and NAPI tx completions.
I'm guessing they all copy'n'paste each other's code.
The original code dates back all the way to e1000 and Linux 2.6.19.

Smaller drivers shy away from the scheme and introduce a lock
which may cause deadlocks in netpoll.

Provide macros which encapsulate the necessary logic.

The macros do not prevent false wake ups, the extra barrier
required to close that race is not worth it. See discussion in:
https://lore.kernel.org/all/c39312a2-4537-14b4-270c-9fe1fbb91e89@gmail.com/

Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/driver.rst |   6 ++
 include/linux/netdevice.h           |   1 +
 include/net/netdev_queues.h         | 144 ++++++++++++++++++++++++++++++++++++
 3 files changed, 151 insertions(+)
 create mode 100644 include/net/netdev_queues.h

(limited to 'Documentation')

diff --git a/Documentation/networking/driver.rst b/Documentation/networking/driver.rst
index 19c363291d04..4071f2c00f8b 100644
--- a/Documentation/networking/driver.rst
+++ b/Documentation/networking/driver.rst
@@ -104,6 +104,12 @@ and:
 	    TX_BUFFS_AVAIL(dp) > 0)
 		netif_wake_queue(dp->dev);
 
+Lockless queue stop / wake helper macros
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: include/net/netdev_queues.h
+   :doc: Lockless queue stopping / waking helpers.
+
 No exclusive ownership
 ----------------------
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1c25b39681b3..7bec9a2be8ef 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3335,6 +3335,7 @@ static inline void netif_tx_wake_all_queues(struct net_device *dev)
 
 static __always_inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
 {
+	/* Must be an atomic op see netif_txq_try_stop() */
 	set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
 }
 
diff --git a/include/net/netdev_queues.h b/include/net/netdev_queues.h
new file mode 100644
index 000000000000..5236d78bbdeb
--- /dev/null
+++ b/include/net/netdev_queues.h
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NET_QUEUES_H
+#define _LINUX_NET_QUEUES_H
+
+#include <linux/netdevice.h>
+
+/**
+ * DOC: Lockless queue stopping / waking helpers.
+ *
+ * The netif_txq_maybe_stop() and __netif_txq_completed_wake()
+ * macros are designed to safely implement stopping
+ * and waking netdev queues without full lock protection.
+ *
+ * We assume that there can be no concurrent stop attempts and no concurrent
+ * wake attempts. The try-stop should happen from the xmit handler,
+ * while wake up should be triggered from NAPI poll context.
+ * The two may run concurrently (single producer, single consumer).
+ *
+ * The try-stop side is expected to run from the xmit handler and therefore
+ * it does not reschedule Tx (netif_tx_start_queue() instead of
+ * netif_tx_wake_queue()). Uses of the ``stop`` macros outside of the xmit
+ * handler may lead to xmit queue being enabled but not run.
+ * The waking side does not have similar context restrictions.
+ *
+ * The macros guarantee that rings will not remain stopped if there's
+ * space available, but they do *not* prevent false wake ups when
+ * the ring is full! Drivers should check for ring full at the start
+ * for the xmit handler.
+ *
+ * All descriptor ring indexes (and other relevant shared state) must
+ * be updated before invoking the macros.
+ */
+
+#define netif_txq_try_stop(txq, get_desc, start_thrs)			\
+	({								\
+		int _res;						\
+									\
+		netif_tx_stop_queue(txq);				\
+		/* Producer index and stop bit must be visible		\
+		 * to consumer before we recheck.			\
+		 * Pairs with a barrier in __netif_txq_maybe_wake().	\
+		 */							\
+		smp_mb__after_atomic();					\
+									\
+		/* We need to check again in a case another		\
+		 * CPU has just made room available.			\
+		 */							\
+		_res = 0;						\
+		if (unlikely(get_desc >= start_thrs)) {			\
+			netif_tx_start_queue(txq);			\
+			_res = -1;					\
+		}							\
+		_res;							\
+	})								\
+
+/**
+ * netif_txq_maybe_stop() - locklessly stop a Tx queue, if needed
+ * @txq:	struct netdev_queue to stop/start
+ * @get_desc:	get current number of free descriptors (see requirements below!)
+ * @stop_thrs:	minimal number of available descriptors for queue to be left
+ *		enabled
+ * @start_thrs:	minimal number of descriptors to re-enable the queue, can be
+ *		equal to @stop_thrs or higher to avoid frequent waking
+ *
+ * All arguments may be evaluated multiple times, beware of side effects.
+ * @get_desc must be a formula or a function call, it must always
+ * return up-to-date information when evaluated!
+ * Expected to be used from ndo_start_xmit, see the comment on top of the file.
+ *
+ * Returns:
+ *	 0 if the queue was stopped
+ *	 1 if the queue was left enabled
+ *	-1 if the queue was re-enabled (raced with waking)
+ */
+#define netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs)	\
+	({								\
+		int _res;						\
+									\
+		_res = 1;						\
+		if (unlikely(get_desc < stop_thrs))			\
+			_res = netif_txq_try_stop(txq, get_desc, start_thrs); \
+		_res;							\
+	})								\
+
+
+/**
+ * __netif_txq_maybe_wake() - locklessly wake a Tx queue, if needed
+ * @txq:	struct netdev_queue to stop/start
+ * @get_desc:	get current number of free descriptors (see requirements below!)
+ * @start_thrs:	minimal number of descriptors to re-enable the queue
+ * @down_cond:	down condition, predicate indicating that the queue should
+ *		not be woken up even if descriptors are available
+ *
+ * All arguments may be evaluated multiple times.
+ * @get_desc must be a formula or a function call, it must always
+ * return up-to-date information when evaluated!
+ *
+ * Returns:
+ *	 0 if the queue was woken up
+ *	 1 if the queue was already enabled (or disabled but @down_cond is true)
+ *	-1 if the queue was left unchanged (@start_thrs not reached)
+ */
+#define __netif_txq_maybe_wake(txq, get_desc, start_thrs, down_cond)	\
+	({								\
+		int _res;						\
+									\
+		_res = -1;						\
+		if (likely(get_desc > start_thrs)) {			\
+			/* Make sure that anybody stopping the queue after \
+			 * this sees the new next_to_clean.		\
+			 */						\
+			smp_mb();					\
+			_res = 1;					\
+			if (unlikely(netif_tx_queue_stopped(txq)) &&	\
+			    !(down_cond)) {				\
+				netif_tx_wake_queue(txq);		\
+				_res = 0;				\
+			}						\
+		}							\
+		_res;							\
+	})
+
+#define netif_txq_maybe_wake(txq, get_desc, start_thrs)		\
+	__netif_txq_maybe_wake(txq, get_desc, start_thrs, false)
+
+/* subqueue variants follow */
+
+#define netif_subqueue_try_stop(dev, idx, get_desc, start_thrs)		\
+	({								\
+		struct netdev_queue *txq;				\
+									\
+		txq = netdev_get_tx_queue(dev, idx);			\
+		netif_txq_try_stop(txq, get_desc, start_thrs);		\
+	})
+
+#define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \
+	({								\
+		struct netdev_queue *txq;				\
+									\
+		txq = netdev_get_tx_queue(dev, idx);			\
+		netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs); \
+	})
+
+#endif
-- 
cgit v1.2.3


From ec48599abee3a16fdc93c1c3c3e153a4f4d29420 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Mon, 10 Apr 2023 23:16:33 -0500
Subject: bpf,docs: Remove references to bpf_cgroup_kptr_get()

The bpf_cgroup_kptr_get() kfunc has been removed, and
bpf_cgroup_acquire() / bpf_cgroup_release() now have the same semantics
as bpf_task_acquire() / bpf_task_release(). This patch updates the BPF
documentation to reflect this.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230411041633.179404-3-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/kfuncs.rst | 68 --------------------------------------------
 1 file changed, 68 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index d8a16c4bef7f..3b42cfe12437 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -572,74 +572,6 @@ bpf_task_release() respectively, so we won't provide examples for them.
 
 ----
 
-You may also acquire a reference to a ``struct cgroup`` kptr that's already
-stored in a map using bpf_cgroup_kptr_get():
-
-.. kernel-doc:: kernel/bpf/helpers.c
-   :identifiers: bpf_cgroup_kptr_get
-
-Here's an example of how it can be used:
-
-.. code-block:: c
-
-	/* struct containing the struct task_struct kptr which is actually stored in the map. */
-	struct __cgroups_kfunc_map_value {
-		struct cgroup __kptr * cgroup;
-	};
-
-	/* The map containing struct __cgroups_kfunc_map_value entries. */
-	struct {
-		__uint(type, BPF_MAP_TYPE_HASH);
-		__type(key, int);
-		__type(value, struct __cgroups_kfunc_map_value);
-		__uint(max_entries, 1);
-	} __cgroups_kfunc_map SEC(".maps");
-
-	/* ... */
-
-	/**
-	 * A simple example tracepoint program showing how a
-	 * struct cgroup kptr that is stored in a map can
-	 * be acquired using the bpf_cgroup_kptr_get() kfunc.
-	 */
-	 SEC("tp_btf/cgroup_mkdir")
-	 int BPF_PROG(cgroup_kptr_get_example, struct cgroup *cgrp, const char *path)
-	 {
-		struct cgroup *kptr;
-		struct __cgroups_kfunc_map_value *v;
-		s32 id = cgrp->self.id;
-
-		/* Assume a cgroup kptr was previously stored in the map. */
-		v = bpf_map_lookup_elem(&__cgroups_kfunc_map, &id);
-		if (!v)
-			return -ENOENT;
-
-		/* Acquire a reference to the cgroup kptr that's already stored in the map. */
-		kptr = bpf_cgroup_kptr_get(&v->cgroup);
-		if (!kptr)
-			/* If no cgroup was present in the map, it's because
-			 * we're racing with another CPU that removed it with
-			 * bpf_kptr_xchg() between the bpf_map_lookup_elem()
-			 * above, and our call to bpf_cgroup_kptr_get().
-			 * bpf_cgroup_kptr_get() internally safely handles this
-			 * race, and will return NULL if the task is no longer
-			 * present in the map by the time we invoke the kfunc.
-			 */
-			return -EBUSY;
-
-		/* Free the reference we just took above. Note that the
-		 * original struct cgroup kptr is still in the map. It will
-		 * be freed either at a later time if another context deletes
-		 * it from the map, or automatically by the BPF subsystem if
-		 * it's still present when the map is destroyed.
-		 */
-		bpf_cgroup_release(kptr);
-
-		return 0;
-        }
-
-----
-
 Other kfuncs available for interacting with ``struct cgroup *`` objects are
 bpf_cgroup_ancestor() and bpf_cgroup_from_id(), allowing callers to access
 the ancestor of a cgroup and find a cgroup by its ID, respectively. Both
-- 
cgit v1.2.3


From d554ba0ea03cbd1cc1455f6133d03cd9b7967ac6 Mon Sep 17 00:00:00 2001
From: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Date: Tue, 11 Apr 2023 15:03:58 -0500
Subject: dt-bindings: net: snps,dwmac: Update interrupt-names

As commit fc191af1bb0d ("net: stmmac: platform: Fix misleading
interrupt error msg") noted, not every stmmac based platform
makes use of the 'eth_wake_irq' or 'eth_lpi' interrupts.

So, update the 'interrupt-names' inside 'snps,dwmac' YAML
bindings to reflect the same.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Signed-off-by: Andrew Halaney <ahalaney@redhat.com>
Tested-by: Brian Masney <bmasney@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/devicetree/bindings/net/snps,dwmac.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 74f2ddc12018..5a4737e969a3 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -105,7 +105,7 @@ properties:
     minItems: 1
     items:
       - const: macirq
-      - const: eth_wake_irq
+      - enum: [eth_wake_irq, eth_lpi]
       - const: eth_lpi
 
   clocks:
-- 
cgit v1.2.3


From d70c215bdd17b92358e90c4e4d190e749a48d982 Mon Sep 17 00:00:00 2001
From: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Date: Tue, 11 Apr 2023 15:03:59 -0500
Subject: dt-bindings: net: snps,dwmac: Add Qualcomm Ethernet ETHQOS
 compatibles

Add Qualcomm Ethernet ETHQOS compatible checks
in snps,dwmac YAML binding document.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Signed-off-by: Andrew Halaney <ahalaney@redhat.com>
Tested-by: Brian Masney <bmasney@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/devicetree/bindings/net/snps,dwmac.yaml | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 5a4737e969a3..1e7982704114 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -65,6 +65,8 @@ properties:
         - ingenic,x2000-mac
         - loongson,ls2k-dwmac
         - loongson,ls7a-dwmac
+        - qcom,qcs404-ethqos
+        - qcom,sm8150-ethqos
         - renesas,r9a06g032-gmac
         - renesas,rzn1-gmac
         - rockchip,px30-gmac
@@ -625,6 +627,8 @@ allOf:
               - ingenic,x1600-mac
               - ingenic,x1830-mac
               - ingenic,x2000-mac
+              - qcom,qcs404-ethqos
+              - qcom,sm8150-ethqos
               - snps,dwmac-4.00
               - snps,dwmac-4.10a
               - snps,dwmac-4.20a
-- 
cgit v1.2.3


From 02e98ce3db149919d6aa35f873e5d74c1ac36640 Mon Sep 17 00:00:00 2001
From: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Date: Tue, 11 Apr 2023 15:04:00 -0500
Subject: dt-bindings: net: qcom,ethqos: Convert bindings to yaml

Convert Qualcomm ETHQOS Ethernet devicetree binding to YAML.
In doing so add a new property for iommus since newer platforms support
using one, and without such make dtbs_check fails on them.

While at it, also update the MAINTAINERS file to point to the yaml
version of the bindings.

Signed-off-by: Bhupesh Sharma <bhupesh.sharma@linaro.org>
[halaney: Remove duplicated properties, add MAINTAINERS and iommus]
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Andrew Halaney <ahalaney@redhat.com>
Tested-by: Brian Masney <bmasney@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../devicetree/bindings/net/qcom,ethqos.txt        |  66 -------------
 .../devicetree/bindings/net/qcom,ethqos.yaml       | 110 +++++++++++++++++++++
 MAINTAINERS                                        |   2 +-
 3 files changed, 111 insertions(+), 67 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/qcom,ethqos.txt
 create mode 100644 Documentation/devicetree/bindings/net/qcom,ethqos.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/qcom,ethqos.txt b/Documentation/devicetree/bindings/net/qcom,ethqos.txt
deleted file mode 100644
index 1f5746849a71..000000000000
--- a/Documentation/devicetree/bindings/net/qcom,ethqos.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-Qualcomm Ethernet ETHQOS device
-
-This documents dwmmac based ethernet device which supports Gigabit
-ethernet for version v2.3.0 onwards.
-
-This device has following properties:
-
-Required properties:
-
-- compatible: Should be one of:
-		"qcom,qcs404-ethqos"
-		"qcom,sm8150-ethqos"
-
-- reg: Address and length of the register set for the device
-
-- reg-names: Should contain register names "stmmaceth", "rgmii"
-
-- clocks: Should contain phandle to clocks
-
-- clock-names: Should contain clock names "stmmaceth", "pclk",
-		"ptp_ref", "rgmii"
-
-- interrupts: Should contain phandle to interrupts
-
-- interrupt-names: Should contain interrupt names "macirq", "eth_lpi"
-
-Rest of the properties are defined in stmmac.txt file in same directory
-
-
-Example:
-
-ethernet: ethernet@7a80000 {
-	compatible = "qcom,qcs404-ethqos";
-	reg = <0x07a80000 0x10000>,
-		<0x07a96000 0x100>;
-	reg-names = "stmmaceth", "rgmii";
-	clock-names = "stmmaceth", "pclk", "ptp_ref", "rgmii";
-	clocks = <&gcc GCC_ETH_AXI_CLK>,
-		<&gcc GCC_ETH_SLAVE_AHB_CLK>,
-		<&gcc GCC_ETH_PTP_CLK>,
-		<&gcc GCC_ETH_RGMII_CLK>;
-	interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
-			<GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
-	interrupt-names = "macirq", "eth_lpi";
-	snps,reset-gpio = <&tlmm 60 GPIO_ACTIVE_LOW>;
-	snps,reset-active-low;
-
-	snps,txpbl = <8>;
-	snps,rxpbl = <2>;
-	snps,aal;
-	snps,tso;
-
-	phy-handle = <&phy1>;
-	phy-mode = "rgmii";
-
-	mdio {
-		#address-cells = <0x1>;
-		#size-cells = <0x0>;
-		compatible = "snps,dwmac-mdio";
-		phy1: phy@4 {
-			device_type = "ethernet-phy";
-			reg = <0x4>;
-		};
-	};
-
-};
diff --git a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
new file mode 100644
index 000000000000..88234a2010b1
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
@@ -0,0 +1,110 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/qcom,ethqos.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Ethernet ETHQOS device
+
+maintainers:
+  - Bhupesh Sharma <bhupesh.sharma@linaro.org>
+
+description:
+  dwmmac based Qualcomm ethernet devices which support Gigabit
+  ethernet (version v2.3.0 and onwards).
+
+allOf:
+  - $ref: snps,dwmac.yaml#
+
+properties:
+  compatible:
+    enum:
+      - qcom,qcs404-ethqos
+      - qcom,sm8150-ethqos
+
+  reg:
+    maxItems: 2
+
+  reg-names:
+    items:
+      - const: stmmaceth
+      - const: rgmii
+
+  interrupts:
+    items:
+      - description: Combined signal for various interrupt events
+      - description: The interrupt that occurs when Rx exits the LPI state
+
+  interrupt-names:
+    items:
+      - const: macirq
+      - const: eth_lpi
+
+  clocks:
+    maxItems: 4
+
+  clock-names:
+    items:
+      - const: stmmaceth
+      - const: pclk
+      - const: ptp_ref
+      - const: rgmii
+
+  iommus:
+    maxItems: 1
+
+required:
+  - compatible
+  - clocks
+  - clock-names
+  - reg-names
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/qcom,gcc-qcs404.h>
+    #include <dt-bindings/gpio/gpio.h>
+
+    ethernet: ethernet@7a80000 {
+      compatible = "qcom,qcs404-ethqos";
+      reg = <0x07a80000 0x10000>,
+            <0x07a96000 0x100>;
+      reg-names = "stmmaceth", "rgmii";
+      clock-names = "stmmaceth", "pclk", "ptp_ref", "rgmii";
+      clocks = <&gcc GCC_ETH_AXI_CLK>,
+               <&gcc GCC_ETH_SLAVE_AHB_CLK>,
+               <&gcc GCC_ETH_PTP_CLK>,
+               <&gcc GCC_ETH_RGMII_CLK>;
+      interrupts = <GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+      interrupt-names = "macirq", "eth_lpi";
+
+      rx-fifo-depth = <4096>;
+      tx-fifo-depth = <4096>;
+
+      snps,tso;
+      snps,reset-gpio = <&tlmm 60 GPIO_ACTIVE_LOW>;
+      snps,reset-active-low;
+      snps,reset-delays-us = <0 10000 10000>;
+
+      pinctrl-names = "default";
+      pinctrl-0 = <&ethernet_defaults>;
+
+      phy-handle = <&phy1>;
+      phy-mode = "rgmii";
+      mdio {
+        #address-cells = <0x1>;
+        #size-cells = <0x0>;
+
+        compatible = "snps,dwmac-mdio";
+        phy1: phy@4 {
+          compatible = "ethernet-phy-ieee802.3-c22";
+          device_type = "ethernet-phy";
+          reg = <0x4>;
+
+          #phy-cells = <0>;
+        };
+      };
+    };
diff --git a/MAINTAINERS b/MAINTAINERS
index 0b19a3fb266c..fdcef63fa9a0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17304,7 +17304,7 @@ M:	Vinod Koul <vkoul@kernel.org>
 R:	Bhupesh Sharma <bhupesh.sharma@linaro.org>
 L:	netdev@vger.kernel.org
 S:	Maintained
-F:	Documentation/devicetree/bindings/net/qcom,ethqos.txt
+F:	Documentation/devicetree/bindings/net/qcom,ethqos.yaml
 F:	drivers/net/ethernet/stmicro/stmmac/dwmac-qcom-ethqos.c
 
 QUALCOMM FASTRPC DRIVER
-- 
cgit v1.2.3


From 25926a703ec155c99a28f6730b79e9da3acc0b10 Mon Sep 17 00:00:00 2001
From: Andrew Halaney <ahalaney@redhat.com>
Date: Tue, 11 Apr 2023 15:04:01 -0500
Subject: dt-bindings: net: qcom,ethqos: Add Qualcomm sc8280xp compatibles

The sc8280xp has a new version of the ETHQOS hardware in it, EMAC v3.
Add a compatible for this.

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Andrew Halaney <ahalaney@redhat.com>
Tested-by: Brian Masney <bmasney@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/devicetree/bindings/net/qcom,ethqos.yaml | 1 +
 Documentation/devicetree/bindings/net/snps,dwmac.yaml  | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
index 88234a2010b1..60a38044fb19 100644
--- a/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ethqos.yaml
@@ -20,6 +20,7 @@ properties:
   compatible:
     enum:
       - qcom,qcs404-ethqos
+      - qcom,sc8280xp-ethqos
       - qcom,sm8150-ethqos
 
   reg:
diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 1e7982704114..da311c1f2c88 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -66,6 +66,7 @@ properties:
         - loongson,ls2k-dwmac
         - loongson,ls7a-dwmac
         - qcom,qcs404-ethqos
+        - qcom,sc8280xp-ethqos
         - qcom,sm8150-ethqos
         - renesas,r9a06g032-gmac
         - renesas,rzn1-gmac
@@ -574,6 +575,7 @@ allOf:
               - ingenic,x1600-mac
               - ingenic,x1830-mac
               - ingenic,x2000-mac
+              - qcom,sc8280xp-ethqos
               - snps,dwmac-3.50a
               - snps,dwmac-4.10a
               - snps,dwmac-4.20a
@@ -628,6 +630,7 @@ allOf:
               - ingenic,x1830-mac
               - ingenic,x2000-mac
               - qcom,qcs404-ethqos
+              - qcom,sc8280xp-ethqos
               - qcom,sm8150-ethqos
               - snps,dwmac-4.00
               - snps,dwmac-4.10a
-- 
cgit v1.2.3


From 50762d9af307b1c466fe0e1441c7923975927d98 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 11 Apr 2023 18:50:36 -0700
Subject: net: docs: update the sample code in driver.rst

The sample code talks about single-queue devices and uses locks.
Update it to something resembling more modern code.
Make sure we mention use of READ_ONCE() / WRITE_ONCE().

Change the comment which talked about consumer on the xmit side.
AFAIU xmit is the producer and completions are a consumer.

Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/networking/driver.rst | 61 ++++++++++++++++---------------------
 1 file changed, 27 insertions(+), 34 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/driver.rst b/Documentation/networking/driver.rst
index 4071f2c00f8b..4f5dfa9c022e 100644
--- a/Documentation/networking/driver.rst
+++ b/Documentation/networking/driver.rst
@@ -47,30 +47,43 @@ for a driver implementing scatter-gather this means:
 
 .. code-block:: c
 
+	static u32 drv_tx_avail(struct drv_ring *dr)
+	{
+		u32 used = READ_ONCE(dr->prod) - READ_ONCE(dr->cons);
+
+		return dr->tx_ring_size - (used & bp->tx_ring_mask);
+	}
+
 	static netdev_tx_t drv_hard_start_xmit(struct sk_buff *skb,
 					       struct net_device *dev)
 	{
 		struct drv *dp = netdev_priv(dev);
+		struct netdev_queue *txq;
+		struct drv_ring *dr;
+		int idx;
+
+		idx = skb_get_queue_mapping(skb);
+		dr = dp->tx_rings[idx];
+		txq = netdev_get_tx_queue(dev, idx);
 
-		lock_tx(dp);
 		//...
-		/* This is a hard error log it. */
-		if (TX_BUFFS_AVAIL(dp) <= (skb_shinfo(skb)->nr_frags + 1)) {
+		/* This should be a very rare race - log it. */
+		if (drv_tx_avail(dr) <= skb_shinfo(skb)->nr_frags + 1) {
 			netif_stop_queue(dev);
-			unlock_tx(dp);
-			printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
-			       dev->name);
+			netdev_warn(dev, "Tx Ring full when queue awake!\n");
 			return NETDEV_TX_BUSY;
 		}
 
 		//... queue packet to card ...
-		//... update tx consumer index ...
 
-		if (TX_BUFFS_AVAIL(dp) <= (MAX_SKB_FRAGS + 1))
-			netif_stop_queue(dev);
+		netdev_tx_sent_queue(txq, skb->len);
+
+		//... update tx producer index using WRITE_ONCE() ...
+
+		if (!netif_txq_maybe_stop(txq, drv_tx_avail(dr),
+					  MAX_SKB_FRAGS + 1, 2 * MAX_SKB_FRAGS))
+			dr->stats.stopped++;
 
-		//...
-		unlock_tx(dp);
 		//...
 		return NETDEV_TX_OK;
 	}
@@ -79,30 +92,10 @@ And then at the end of your TX reclamation event handling:
 
 .. code-block:: c
 
-	if (netif_queue_stopped(dp->dev) &&
-	    TX_BUFFS_AVAIL(dp) > (MAX_SKB_FRAGS + 1))
-		netif_wake_queue(dp->dev);
-
-For a non-scatter-gather supporting card, the three tests simply become:
-
-.. code-block:: c
-
-		/* This is a hard error log it. */
-		if (TX_BUFFS_AVAIL(dp) <= 0)
-
-and:
-
-.. code-block:: c
-
-		if (TX_BUFFS_AVAIL(dp) == 0)
-
-and:
-
-.. code-block:: c
+	//... update tx consumer index using WRITE_ONCE() ...
 
-	if (netif_queue_stopped(dp->dev) &&
-	    TX_BUFFS_AVAIL(dp) > 0)
-		netif_wake_queue(dp->dev);
+	netif_txq_completed_wake(txq, cmpl_pkts, cmpl_bytes,
+				 drv_tx_avail(dr), 2 * MAX_SKB_FRAGS);
 
 Lockless queue stop / wake helper macros
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- 
cgit v1.2.3


From 8c48eea3adf3119e0a3fc57bd31f6966f26ee784 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 12 Apr 2023 21:26:04 -0700
Subject: page_pool: allow caching from safely localized NAPI

Recent patches to mlx5 mentioned a regression when moving from
driver local page pool to only using the generic page pool code.
Page pool has two recycling paths (1) direct one, which runs in
safe NAPI context (basically consumer context, so producing
can be lockless); and (2) via a ptr_ring, which takes a spin
lock because the freeing can happen from any CPU; producer
and consumer may run concurrently.

Since the page pool code was added, Eric introduced a revised version
of deferred skb freeing. TCP skbs are now usually returned to the CPU
which allocated them, and freed in softirq context. This places the
freeing (producing of pages back to the pool) enticingly close to
the allocation (consumer).

If we can prove that we're freeing in the same softirq context in which
the consumer NAPI will run - lockless use of the cache is perfectly fine,
no need for the lock.

Let drivers link the page pool to a NAPI instance. If the NAPI instance
is scheduled on the same CPU on which we're freeing - place the pages
in the direct cache.

With that and patched bnxt (XDP enabled to engage the page pool, sigh,
bnxt really needs page pool work :() I see a 2.6% perf boost with
a TCP stream test (app on a different physical core than softirq).

The CPU use of relevant functions decreases as expected:

  page_pool_refill_alloc_cache   1.17% -> 0%
  _raw_spin_lock                 2.41% -> 0.98%

Only consider lockless path to be safe when NAPI is scheduled
- in practice this should cover majority if not all of steady state
workloads. It's usually the NAPI kicking in that causes the skb flush.

The main case we'll miss out on is when application runs on the same
CPU as NAPI. In that case we don't use the deferred skb free path.

Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Tested-by: Dragos Tatulea <dtatulea@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/page_pool.rst |  1 +
 include/linux/netdevice.h              |  3 +++
 include/linux/skbuff.h                 | 20 +++++++++++++-------
 include/net/page_pool.h                |  3 ++-
 net/core/dev.c                         |  3 +++
 net/core/page_pool.c                   | 15 +++++++++++++--
 net/core/skbuff.c                      |  4 ++--
 7 files changed, 37 insertions(+), 12 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst
index 30f1344e7cca..873efd97f822 100644
--- a/Documentation/networking/page_pool.rst
+++ b/Documentation/networking/page_pool.rst
@@ -165,6 +165,7 @@ Registration
     pp_params.pool_size = DESC_NUM;
     pp_params.nid = NUMA_NO_NODE;
     pp_params.dev = priv->dev;
+    pp_params.napi = napi; /* only if locking is tied to NAPI */
     pp_params.dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
     page_pool = page_pool_create(&pp_params);
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 96d27d558b0c..203c0df2046c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -360,8 +360,11 @@ struct napi_struct {
 	unsigned long		gro_bitmask;
 	int			(*poll)(struct napi_struct *, int);
 #ifdef CONFIG_NETPOLL
+	/* CPU actively polling if netpoll is configured */
 	int			poll_owner;
 #endif
+	/* CPU on which NAPI has been scheduled for processing */
+	int			list_owner;
 	struct net_device	*dev;
 	struct gro_list		gro_hash[GRO_HASH_BUCKETS];
 	struct sk_buff		*skb;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 494a23a976b0..a823ec3aa326 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3386,6 +3386,18 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
 	__skb_frag_ref(&skb_shinfo(skb)->frags[f]);
 }
 
+static inline void
+napi_frag_unref(skb_frag_t *frag, bool recycle, bool napi_safe)
+{
+	struct page *page = skb_frag_page(frag);
+
+#ifdef CONFIG_PAGE_POOL
+	if (recycle && page_pool_return_skb_page(page, napi_safe))
+		return;
+#endif
+	put_page(page);
+}
+
 /**
  * __skb_frag_unref - release a reference on a paged fragment.
  * @frag: the paged fragment
@@ -3396,13 +3408,7 @@ static inline void skb_frag_ref(struct sk_buff *skb, int f)
  */
 static inline void __skb_frag_unref(skb_frag_t *frag, bool recycle)
 {
-	struct page *page = skb_frag_page(frag);
-
-#ifdef CONFIG_PAGE_POOL
-	if (recycle && page_pool_return_skb_page(page))
-		return;
-#endif
-	put_page(page);
+	napi_frag_unref(frag, recycle, false);
 }
 
 /**
diff --git a/include/net/page_pool.h b/include/net/page_pool.h
index ddfa0b328677..91b808dade82 100644
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -77,6 +77,7 @@ struct page_pool_params {
 	unsigned int	pool_size;
 	int		nid;  /* Numa node id to allocate from pages from */
 	struct device	*dev; /* device, for DMA pre-mapping purposes */
+	struct napi_struct *napi; /* Sole consumer of pages, otherwise NULL */
 	enum dma_data_direction dma_dir; /* DMA mapping direction */
 	unsigned int	max_len; /* max DMA sync memory size */
 	unsigned int	offset;  /* DMA addr offset */
@@ -239,7 +240,7 @@ inline enum dma_data_direction page_pool_get_dma_dir(struct page_pool *pool)
 	return pool->p.dma_dir;
 }
 
-bool page_pool_return_skb_page(struct page *page);
+bool page_pool_return_skb_page(struct page *page, bool napi_safe);
 
 struct page_pool *page_pool_create(const struct page_pool_params *params);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index c7f13742b56c..8aea68275172 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4359,6 +4359,7 @@ static inline void ____napi_schedule(struct softnet_data *sd,
 	}
 
 	list_add_tail(&napi->poll_list, &sd->poll_list);
+	WRITE_ONCE(napi->list_owner, smp_processor_id());
 	/* If not called from net_rx_action()
 	 * we have to raise NET_RX_SOFTIRQ.
 	 */
@@ -6069,6 +6070,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 		list_del_init(&n->poll_list);
 		local_irq_restore(flags);
 	}
+	WRITE_ONCE(n->list_owner, -1);
 
 	val = READ_ONCE(n->state);
 	do {
@@ -6384,6 +6386,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
 #ifdef CONFIG_NETPOLL
 	napi->poll_owner = -1;
 #endif
+	napi->list_owner = -1;
 	set_bit(NAPI_STATE_SCHED, &napi->state);
 	set_bit(NAPI_STATE_NPSVC, &napi->state);
 	list_add_rcu(&napi->dev_list, &dev->napi_list);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 193c18799865..2f6bf422ed30 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -19,6 +19,7 @@
 #include <linux/mm.h> /* for put_page() */
 #include <linux/poison.h>
 #include <linux/ethtool.h>
+#include <linux/netdevice.h>
 
 #include <trace/events/page_pool.h>
 
@@ -874,9 +875,11 @@ void page_pool_update_nid(struct page_pool *pool, int new_nid)
 }
 EXPORT_SYMBOL(page_pool_update_nid);
 
-bool page_pool_return_skb_page(struct page *page)
+bool page_pool_return_skb_page(struct page *page, bool napi_safe)
 {
+	struct napi_struct *napi;
 	struct page_pool *pp;
+	bool allow_direct;
 
 	page = compound_head(page);
 
@@ -892,12 +895,20 @@ bool page_pool_return_skb_page(struct page *page)
 
 	pp = page->pp;
 
+	/* Allow direct recycle if we have reasons to believe that we are
+	 * in the same context as the consumer would run, so there's
+	 * no possible race.
+	 */
+	napi = pp->p.napi;
+	allow_direct = napi_safe && napi &&
+		READ_ONCE(napi->list_owner) == smp_processor_id();
+
 	/* Driver set this to memory recycling info. Reset it on recycle.
 	 * This will *not* work for NIC using a split-page memory model.
 	 * The page will be returned to the pool here regardless of the
 	 * 'flipped' fragment being in use or not.
 	 */
-	page_pool_put_full_page(pp, page, false);
+	page_pool_put_full_page(pp, page, allow_direct);
 
 	return true;
 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 2b5a98c5cb49..ef81452759be 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -843,7 +843,7 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe)
 {
 	if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle)
 		return false;
-	return page_pool_return_skb_page(virt_to_page(data));
+	return page_pool_return_skb_page(virt_to_page(data), napi_safe);
 }
 
 static void skb_kfree_head(void *head, unsigned int end_offset)
@@ -889,7 +889,7 @@ static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason,
 	}
 
 	for (i = 0; i < shinfo->nr_frags; i++)
-		__skb_frag_unref(&shinfo->frags[i], skb->pp_recycle);
+		napi_frag_unref(&shinfo->frags[i], skb->pp_recycle, napi_safe);
 
 free_head:
 	if (shinfo->frag_list)
-- 
cgit v1.2.3


From 530474e6d044d07b179dc4d3392fb853c47446d0 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Sun, 16 Apr 2023 03:49:28 -0500
Subject: bpf,docs: Remove KF_KPTR_GET from documentation

A prior patch removed KF_KPTR_GET from the kernel. Now that it's no
longer accessible to kfunc authors, this patch removes it from the BPF
kfunc documentation.

Signed-off-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/r/20230416084928.326135-4-void@manifault.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/kfuncs.rst | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst
index 3b42cfe12437..ea2516374d92 100644
--- a/Documentation/bpf/kfuncs.rst
+++ b/Documentation/bpf/kfuncs.rst
@@ -184,16 +184,7 @@ in. All copies of the pointer being released are invalidated as a result of
 invoking kfunc with this flag. KF_RELEASE kfuncs automatically receive the
 protection afforded by the KF_TRUSTED_ARGS flag described below.
 
-2.4.4 KF_KPTR_GET flag
-----------------------
-
-The KF_KPTR_GET flag is used to indicate that the kfunc takes the first argument
-as a pointer to kptr, safely increments the refcount of the object it points to,
-and returns a reference to the user. The rest of the arguments may be normal
-arguments of a kfunc. The KF_KPTR_GET flag should be used in conjunction with
-KF_ACQUIRE and KF_RET_NULL flags.
-
-2.4.5 KF_TRUSTED_ARGS flag
+2.4.4 KF_TRUSTED_ARGS flag
 --------------------------
 
 The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It
@@ -205,7 +196,7 @@ exception described below).
 There are two types of pointers to kernel objects which are considered "valid":
 
 1. Pointers which are passed as tracepoint or struct_ops callback arguments.
-2. Pointers which were returned from a KF_ACQUIRE or KF_KPTR_GET kfunc.
+2. Pointers which were returned from a KF_ACQUIRE kfunc.
 
 Pointers to non-BTF objects (e.g. scalar pointers) may also be passed to
 KF_TRUSTED_ARGS kfuncs, and may have a non-zero offset.
@@ -232,13 +223,13 @@ In other words, you must:
 2. Specify the type and name of the trusted nested field. This field must match
    the field in the original type definition exactly.
 
-2.4.6 KF_SLEEPABLE flag
+2.4.5 KF_SLEEPABLE flag
 -----------------------
 
 The KF_SLEEPABLE flag is used for kfuncs that may sleep. Such kfuncs can only
 be called by sleepable BPF programs (BPF_F_SLEEPABLE).
 
-2.4.7 KF_DESTRUCTIVE flag
+2.4.6 KF_DESTRUCTIVE flag
 --------------------------
 
 The KF_DESTRUCTIVE flag is used to indicate functions calling which is
@@ -247,7 +238,7 @@ rebooting or panicking. Due to this additional restrictions apply to these
 calls. At the moment they only require CAP_SYS_BOOT capability, but more can be
 added later.
 
-2.4.8 KF_RCU flag
+2.4.7 KF_RCU flag
 -----------------
 
 The KF_RCU flag is a weaker version of KF_TRUSTED_ARGS. The kfuncs marked with
@@ -260,7 +251,7 @@ also be KF_RET_NULL.
 
 .. _KF_deprecated_flag:
 
-2.4.9 KF_DEPRECATED flag
+2.4.8 KF_DEPRECATED flag
 ------------------------
 
 The KF_DEPRECATED flag is used for kfuncs which are scheduled to be
-- 
cgit v1.2.3


From 64822bdba456a145f7cb4c66d9939bf42c64ae62 Mon Sep 17 00:00:00 2001
From: Frank Wunderlich <frank-w@public-files.de>
Date: Tue, 7 Feb 2023 14:35:04 +0100
Subject: dt-bindings: mt76: add active-low property for led

LEDs can be in low-active mode, driver already supports it, but
documentation is missing. Add documentation for the dt property.

Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml
index 7d526ff53fb7..67b63f119f64 100644
--- a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml
@@ -111,6 +111,11 @@ properties:
     $ref: /schemas/leds/common.yaml#
     additionalProperties: false
     properties:
+      led-active-low:
+        description:
+          LED is enabled with ground signal.
+        type: boolean
+
       led-sources:
         maxItems: 1
 
-- 
cgit v1.2.3


From 13f9351180aaa6cd745303339d2a1968fb5cc47d Mon Sep 17 00:00:00 2001
From: Emil Renner Berthing <kernel@esmil.dk>
Date: Mon, 17 Apr 2023 18:02:46 +0800
Subject: dt-bindings: net: snps,dwmac: Add dwmac-5.20 version

Add dwmac-5.20 IP version to snps.dwmac.yaml

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Emil Renner Berthing <kernel@esmil.dk>
Signed-off-by: Samin Guo <samin.guo@starfivetech.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/devicetree/bindings/net/snps,dwmac.yaml | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index da311c1f2c88..e5ca96bcbae0 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -30,6 +30,7 @@ select:
           - snps,dwmac-4.10a
           - snps,dwmac-4.20a
           - snps,dwmac-5.10a
+          - snps,dwmac-5.20
           - snps,dwxgmac
           - snps,dwxgmac-2.10
 
@@ -90,6 +91,7 @@ properties:
         - snps,dwmac-4.10a
         - snps,dwmac-4.20a
         - snps,dwmac-5.10a
+        - snps,dwmac-5.20
         - snps,dwxgmac
         - snps,dwxgmac-2.10
 
@@ -579,6 +581,7 @@ allOf:
               - snps,dwmac-3.50a
               - snps,dwmac-4.10a
               - snps,dwmac-4.20a
+              - snps,dwmac-5.20
               - snps,dwxgmac
               - snps,dwxgmac-2.10
               - st,spear600-gmac
@@ -636,6 +639,7 @@ allOf:
               - snps,dwmac-4.10a
               - snps,dwmac-4.20a
               - snps,dwmac-5.10a
+              - snps,dwmac-5.20
               - snps,dwxgmac
               - snps,dwxgmac-2.10
               - st,spear600-gmac
-- 
cgit v1.2.3


From 843f603762a5453e10999d89f2adea6a3cfe8735 Mon Sep 17 00:00:00 2001
From: Samin Guo <samin.guo@starfivetech.com>
Date: Mon, 17 Apr 2023 18:02:48 +0800
Subject: dt-bindings: net: snps,dwmac: Add 'ahb' reset/reset-name

According to:
stmmac_platform.c: stmmac_probe_config_dt
stmmac_main.c: stmmac_dvr_probe

dwmac controller may require one (stmmaceth) or two (stmmaceth+ahb)
reset signals, and the maxItems of resets/reset-names is going to be 2.

The gmac of Starfive Jh7110 SOC must have two resets.
it uses snps,dwmac-5.20 IP.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Samin Guo <samin.guo@starfivetech.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 Documentation/devicetree/bindings/net/snps,dwmac.yaml | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index e5ca96bcbae0..71151478885a 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -136,12 +136,16 @@ properties:
         - ptp_ref
 
   resets:
-    maxItems: 1
-    description:
-      MAC Reset signal.
+    minItems: 1
+    items:
+      - description: GMAC stmmaceth reset
+      - description: AHB reset
 
   reset-names:
-    const: stmmaceth
+    minItems: 1
+    items:
+      - const: stmmaceth
+      - const: ahb
 
   power-domains:
     maxItems: 1
-- 
cgit v1.2.3


From b76eaf7d7ede35adbbb914cb2e9223a250448e52 Mon Sep 17 00:00:00 2001
From: Yanhong Wang <yanhong.wang@starfivetech.com>
Date: Mon, 17 Apr 2023 18:02:49 +0800
Subject: dt-bindings: net: Add support StarFive dwmac

Add documentation to describe StarFive dwmac driver(GMAC).

Signed-off-by: Yanhong Wang <yanhong.wang@starfivetech.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Samin Guo <samin.guo@starfivetech.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 .../devicetree/bindings/net/snps,dwmac.yaml        |   1 +
 .../bindings/net/starfive,jh7110-dwmac.yaml        | 144 +++++++++++++++++++++
 MAINTAINERS                                        |   6 +
 3 files changed, 151 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
index 71151478885a..363b3e3ea3a6 100644
--- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml
@@ -94,6 +94,7 @@ properties:
         - snps,dwmac-5.20
         - snps,dwxgmac
         - snps,dwxgmac-2.10
+        - starfive,jh7110-dwmac
 
   reg:
     minItems: 1
diff --git a/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
new file mode 100644
index 000000000000..5e7cfbbebce6
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
@@ -0,0 +1,144 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (C) 2022 StarFive Technology Co., Ltd.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/starfive,jh7110-dwmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: StarFive JH7110 DWMAC glue layer
+
+maintainers:
+  - Emil Renner Berthing <kernel@esmil.dk>
+  - Samin Guo <samin.guo@starfivetech.com>
+
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - starfive,jh7110-dwmac
+  required:
+    - compatible
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - starfive,jh7110-dwmac
+      - const: snps,dwmac-5.20
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: GMAC main clock
+      - description: GMAC AHB clock
+      - description: PTP clock
+      - description: TX clock
+      - description: GTX clock
+
+  clock-names:
+    items:
+      - const: stmmaceth
+      - const: pclk
+      - const: ptp_ref
+      - const: tx
+      - const: gtx
+
+  interrupts:
+    minItems: 3
+    maxItems: 3
+
+  interrupt-names:
+    minItems: 3
+    maxItems: 3
+
+  resets:
+    items:
+      - description: MAC Reset signal.
+      - description: AHB Reset signal.
+
+  reset-names:
+    items:
+      - const: stmmaceth
+      - const: ahb
+
+  starfive,tx-use-rgmii-clk:
+    description:
+      Tx clock is provided by external rgmii clock.
+    type: boolean
+
+  starfive,syscon:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    items:
+      - items:
+          - description: phandle to syscon that configures phy mode
+          - description: Offset of phy mode selection
+          - description: Shift of phy mode selection
+    description:
+      A phandle to syscon with two arguments that configure phy mode.
+      The argument one is the offset of phy mode selection, the
+      argument two is the shift of phy mode selection.
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - interrupts
+  - interrupt-names
+  - resets
+  - reset-names
+
+allOf:
+  - $ref: snps,dwmac.yaml#
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    ethernet@16030000 {
+        compatible = "starfive,jh7110-dwmac", "snps,dwmac-5.20";
+        reg = <0x16030000 0x10000>;
+        clocks = <&clk 3>, <&clk 2>, <&clk 109>,
+                 <&clk 6>, <&clk 111>;
+        clock-names = "stmmaceth", "pclk", "ptp_ref",
+                      "tx", "gtx";
+        resets = <&rst 1>, <&rst 2>;
+        reset-names = "stmmaceth", "ahb";
+        interrupts = <7>, <6>, <5>;
+        interrupt-names = "macirq", "eth_wake_irq", "eth_lpi";
+        phy-mode = "rgmii-id";
+        snps,multicast-filter-bins = <64>;
+        snps,perfect-filter-entries = <8>;
+        rx-fifo-depth = <2048>;
+        tx-fifo-depth = <2048>;
+        snps,fixed-burst;
+        snps,no-pbl-x8;
+        snps,tso;
+        snps,force_thresh_dma_mode;
+        snps,axi-config = <&stmmac_axi_setup>;
+        snps,en-tx-lpi-clockgating;
+        snps,txpbl = <16>;
+        snps,rxpbl = <16>;
+        starfive,syscon = <&aon_syscon 0xc 0x12>;
+        phy-handle = <&phy0>;
+
+        mdio {
+            #address-cells = <1>;
+            #size-cells = <0>;
+            compatible = "snps,dwmac-mdio";
+
+            phy0: ethernet-phy@0 {
+                reg = <0>;
+            };
+        };
+
+        stmmac_axi_setup: stmmac-axi-config {
+            snps,lpi_en;
+            snps,wr_osr_lmt = <4>;
+            snps,rd_osr_lmt = <4>;
+            snps,blen = <256 128 64 32 0 0 0>;
+        };
+    };
diff --git a/MAINTAINERS b/MAINTAINERS
index 1c09473685b1..4301fe86ad35 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -19928,6 +19928,12 @@ M:	Emil Renner Berthing <kernel@esmil.dk>
 S:	Maintained
 F:	arch/riscv/boot/dts/starfive/
 
+STARFIVE DWMAC GLUE LAYER
+M:	Emil Renner Berthing <kernel@esmil.dk>
+M:	Samin Guo <samin.guo@starfivetech.com>
+S:	Maintained
+F:	Documentation/devicetree/bindings/net/starfive,jh7110-dwmac.yaml
+
 STARFIVE JH7100 CLOCK DRIVERS
 M:	Emil Renner Berthing <kernel@esmil.dk>
 S:	Maintained
-- 
cgit v1.2.3


From 57b6c752c5c05a1f38eb6da9f1618f1b000e1f51 Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Mon, 17 Apr 2023 17:17:32 +0200
Subject: dt-bindings: net: ethernet-controller: Document support for LEDs node

Document support for LEDs node in ethernet-controller.
Ethernet Controller may support different LEDs that can be configured
for different operation like blinking on traffic event or port link.

Also add some Documentation to describe the difference of these nodes
compared to PHY LEDs, since ethernet-controller LEDs are controllable
by the ethernet controller regs and the possible intergated PHY doesn't
have control on them.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/ethernet-controller.yaml          | 35 ++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index 00be387984ac..ebc2646ab5ff 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -222,6 +222,41 @@ properties:
         required:
           - speed
 
+  leds:
+    description:
+      Describes the LEDs associated by Ethernet Controller.
+      These LEDs are not integrated in the PHY and PHY doesn't have any
+      control on them. Ethernet Controller regs are used to control
+      these defined LEDs.
+
+    type: object
+
+    properties:
+      '#address-cells':
+        const: 1
+
+      '#size-cells':
+        const: 0
+
+    patternProperties:
+      '^led@[a-f0-9]+$':
+        $ref: /schemas/leds/common.yaml#
+
+        properties:
+          reg:
+            maxItems: 1
+            description:
+              This define the LED index in the PHY or the MAC. It's really
+              driver dependent and required for ports that define multiple
+              LED for the same port.
+
+        required:
+          - reg
+
+        unevaluatedProperties: false
+
+    additionalProperties: false
+
 dependencies:
   pcs-handle-names: [pcs-handle]
 
-- 
cgit v1.2.3


From ed617bc022f4596dc03815e8f0331e60644d07ee Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Mon, 17 Apr 2023 17:17:33 +0200
Subject: dt-bindings: net: dsa: qca8k: add LEDs definition example

Add LEDs definition example for qca8k Switch Family to describe how they
should be defined for a correct usage.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/dsa/qca8k.yaml         | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.yaml b/Documentation/devicetree/bindings/net/dsa/qca8k.yaml
index fe9ebe285938..df64eebebe18 100644
--- a/Documentation/devicetree/bindings/net/dsa/qca8k.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/qca8k.yaml
@@ -18,6 +18,8 @@ description:
   PHY it is connected to. In this config, an internal mdio-bus is registered and
   the MDIO master is used for communication. Mixed external and internal
   mdio-bus configurations are not supported by the hardware.
+  Each phy has at most 3 LEDs connected and can be declared
+  using the standard LEDs structure.
 
 properties:
   compatible:
@@ -117,6 +119,7 @@ unevaluatedProperties: false
 examples:
   - |
     #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/leds/common.h>
 
     mdio {
         #address-cells = <1>;
@@ -226,6 +229,25 @@ examples:
                     label = "lan1";
                     phy-mode = "internal";
                     phy-handle = <&internal_phy_port1>;
+
+                    leds {
+                        #address-cells = <1>;
+                        #size-cells = <0>;
+
+                        led@0 {
+                            reg = <0>;
+                            color = <LED_COLOR_ID_WHITE>;
+                            function = LED_FUNCTION_LAN;
+                            default-state = "keep";
+                        };
+
+                        led@1 {
+                            reg = <1>;
+                            color = <LED_COLOR_ID_AMBER>;
+                            function = LED_FUNCTION_LAN;
+                            default-state = "keep";
+                        };
+                    };
                 };
 
                 port@2 {
-- 
cgit v1.2.3


From 18a24b694a2bfd59ae8a17ffee87f12c65d9f2a6 Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Mon, 17 Apr 2023 17:17:36 +0200
Subject: dt-bindings: net: phy: Document support for LEDs node

Document support for LEDs node in phy and add an example for it.
PHY LED will have to match led pattern and should be treated as a
generic led.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/ethernet-phy.yaml      | 43 ++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
index ac04f8efa35c..4f574532ee13 100644
--- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
@@ -197,6 +197,35 @@ properties:
       PHY's that have configurable TX internal delays. If this property is
       present then the PHY applies the TX delay.
 
+  leds:
+    type: object
+
+    properties:
+      '#address-cells':
+        const: 1
+
+      '#size-cells':
+        const: 0
+
+    patternProperties:
+      '^led@[a-f0-9]+$':
+        $ref: /schemas/leds/common.yaml#
+
+        properties:
+          reg:
+            maxItems: 1
+            description:
+              This define the LED index in the PHY or the MAC. It's really
+              driver dependent and required for ports that define multiple
+              LED for the same port.
+
+        required:
+          - reg
+
+        unevaluatedProperties: false
+
+    additionalProperties: false
+
 required:
   - reg
 
@@ -204,6 +233,8 @@ additionalProperties: true
 
 examples:
   - |
+    #include <dt-bindings/leds/common.h>
+
     ethernet {
         #address-cells = <1>;
         #size-cells = <0>;
@@ -219,5 +250,17 @@ examples:
             reset-gpios = <&gpio1 4 1>;
             reset-assert-us = <1000>;
             reset-deassert-us = <2000>;
+
+            leds {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                led@0 {
+                    reg = <0>;
+                    color = <LED_COLOR_ID_WHITE>;
+                    function = LED_FUNCTION_LAN;
+                    default-state = "keep";
+                };
+            };
         };
     };
-- 
cgit v1.2.3


From c693ea2fd6e3b43f6caaf618f864236c36a71f70 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 17 Apr 2023 17:17:38 +0200
Subject: Documentation: LEDs: Describe good names for network LEDs

Network LEDs can exist in both the MAC and the PHY. Naming is
difficult because the netdev name is neither stable or unique, do to
commands like ip link set name eth42 dev eth0, and network
namesspaces.

Give some example names where the MAC and the PHY have unique names
based on device tree nodes, or PCI bus addresses.

Since the LED can be used for anything which Linux supports for LEDs,
avoid using names like activity or link, rather describe the location
on the RJ-45, of what the RJ-45 is expected to be used for, WAN/LAN
etc.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/leds/well-known-leds.txt | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/leds/well-known-leds.txt b/Documentation/leds/well-known-leds.txt
index 2160382c86be..e9c30dc75884 100644
--- a/Documentation/leds/well-known-leds.txt
+++ b/Documentation/leds/well-known-leds.txt
@@ -70,3 +70,33 @@ Good: "platform:*:charging" (allwinner sun50i)
 * Screen
 
 Good: ":backlight" (Motorola Droid 4)
+
+* Ethernet LEDs
+
+Currently two types of Network LEDs are support, those controlled by
+the PHY and those by the MAC. In theory both can be present at the
+same time for one Linux netdev, hence the names need to differ between
+MAC and PHY.
+
+Do not use the netdev name, such as eth0, enp1s0. These are not stable
+and are not unique. They also don't differentiate between MAC and PHY.
+
+** MAC LEDs
+
+Good: f1070000.ethernet:white:WAN
+Good: mdio_mux-0.1:00:green:left
+Good: 0000:02:00.0:yellow:top
+
+The first part must uniquely name the MAC controller. Then follows the
+colour.  WAN/LAN should be used for a single LED. If there are
+multiple LEDs, use left/right, or top/bottom to indicate their
+position on the RJ45 socket.
+
+** PHY LEDs
+
+Good: f1072004.mdio-mii:00: white:WAN
+Good: !mdio-mux!mdio@2!switch@0!mdio:01:green:right
+Good: r8169-0-200:00:yellow:bottom
+
+The first part must uniquely name the PHY. This often means uniquely
+identifying the MDIO bus controller, and the address on the bus.
-- 
cgit v1.2.3


From ed09c61eb19d9889780c791cb316ac76468f5186 Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Mon, 17 Apr 2023 13:35:00 +0300
Subject: dt-bindings: net: Convert ath10k to YAML

Convert the ath10k bindings to YAML.

Dropped properties that are absent at the current state of mainline:
- qcom,msi_addr
- qcom,msi_base

Somewhat based on the ath11k bindings.

Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
Link: https://lore.kernel.org/r/20230406-topic-ath10k_bindings-v4-1-9f67a6bb0d56@linaro.org
---
 .../bindings/net/wireless/qcom,ath10k.txt          | 215 -------------
 .../bindings/net/wireless/qcom,ath10k.yaml         | 358 +++++++++++++++++++++
 MAINTAINERS                                        |   2 +-
 3 files changed, 359 insertions(+), 216 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
 create mode 100644 Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
deleted file mode 100644
index b61c2d5a0ff7..000000000000
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
+++ /dev/null
@@ -1,215 +0,0 @@
-* Qualcomm Atheros ath10k wireless devices
-
-Required properties:
-- compatible: Should be one of the following:
-	* "qcom,ath10k"
-	* "qcom,ipq4019-wifi"
-	* "qcom,wcn3990-wifi"
-
-PCI based devices uses compatible string "qcom,ath10k" and takes calibration
-data along with board specific data via "qcom,ath10k-calibration-data".
-Rest of the properties are not applicable for PCI based devices.
-
-AHB based devices (i.e. ipq4019) uses compatible string "qcom,ipq4019-wifi"
-and also uses most of the properties defined in this doc (except
-"qcom,ath10k-calibration-data"). It uses "qcom,ath10k-pre-calibration-data"
-to carry pre calibration data.
-
-In general, entry "qcom,ath10k-pre-calibration-data" and
-"qcom,ath10k-calibration-data" conflict with each other and only one
-can be provided per device.
-
-SNOC based devices (i.e. wcn3990) uses compatible string "qcom,wcn3990-wifi".
-
-- reg: Address and length of the register set for the device.
-- reg-names: Must include the list of following reg names,
-	     "membase"
-- interrupts: reference to the list of 17 interrupt numbers for "qcom,ipq4019-wifi"
-	      compatible target.
-	      reference to the list of 12 interrupt numbers for "qcom,wcn3990-wifi"
-	      compatible target.
-	      Must contain interrupt-names property per entry for
-	      "qcom,ath10k", "qcom,ipq4019-wifi" compatible targets.
-
-- interrupt-names: Must include the entries for MSI interrupt
-		   names ("msi0" to "msi15") and legacy interrupt
-		   name ("legacy") for "qcom,ath10k", "qcom,ipq4019-wifi"
-		   compatible targets.
-
-Optional properties:
-- resets: Must contain an entry for each entry in reset-names.
-          See ../reset/reseti.txt for details.
-- reset-names: Must include the list of following reset names,
-	       "wifi_cpu_init"
-	       "wifi_radio_srif"
-	       "wifi_radio_warm"
-	       "wifi_radio_cold"
-	       "wifi_core_warm"
-	       "wifi_core_cold"
-- clocks: List of clock specifiers, must contain an entry for each required
-          entry in clock-names.
-- clock-names: Should contain the clock names "wifi_wcss_cmd", "wifi_wcss_ref",
-	       "wifi_wcss_rtc" for "qcom,ipq4019-wifi" compatible target and
-	       "cxo_ref_clk_pin" and optionally "qdss" for "qcom,wcn3990-wifi"
-	       compatible target.
-- qcom,msi_addr: MSI interrupt address.
-- qcom,msi_base: Base value to add before writing MSI data into
-		MSI address register.
-- qcom,ath10k-calibration-variant: string to search for in the board-2.bin
-				   variant list with the same bus and device
-				   specific ids
-- qcom,ath10k-calibration-data : calibration data + board specific data
-				 as an array, the length can vary between
-				 hw versions.
-- qcom,ath10k-pre-calibration-data : pre calibration data as an array,
-				     the length can vary between hw versions.
-- <supply-name>-supply: handle to the regulator device tree node
-			   optional "supply-name" are "vdd-0.8-cx-mx",
-			   "vdd-1.8-xo", "vdd-1.3-rfa", "vdd-3.3-ch0",
-			   and "vdd-3.3-ch1".
-- memory-region:
-	Usage: optional
-	Value type: <phandle>
-	Definition: reference to the reserved-memory for the msa region
-		    used by the wifi firmware running in Q6.
-- iommus:
-	Usage: optional
-	Value type: <prop-encoded-array>
-	Definition: A list of phandle and IOMMU specifier pairs.
-- ext-fem-name:
-	Usage: Optional
-	Value type: string
-	Definition: Name of external front end module used. Some valid FEM names
-		    for example: "microsemi-lx5586", "sky85703-11"
-		    and "sky85803" etc.
-- qcom,snoc-host-cap-8bit-quirk:
-	Usage: Optional
-	Value type: <empty>
-	Definition: Quirk specifying that the firmware expects the 8bit version
-		    of the host capability QMI request
-- qcom,xo-cal-data: xo cal offset to be configured in xo trim register.
-
-- qcom,msa-fixed-perm: Boolean context flag to disable SCM call for statically
-		       mapped msa region.
-
-- qcom,coexist-support : should contain eithr "0" or "1" to indicate coex
-			 support by the hardware.
-- qcom,coexist-gpio-pin : gpio pin number  information to support coex
-			  which will be used by wifi firmware.
-
-* Subnodes
-The ath10k wifi node can contain one optional firmware subnode.
-Firmware subnode is needed when the platform does not have TustZone.
-The firmware subnode must have:
-
-- iommus:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: A list of phandle and IOMMU specifier pairs.
-
-
-Example (to supply PCI based wifi block details):
-
-In this example, the node is defined as child node of the PCI controller.
-
-pci {
-	pcie@0 {
-		reg = <0 0 0 0 0>;
-		#interrupt-cells = <1>;
-		#size-cells = <2>;
-		#address-cells = <3>;
-		device_type = "pci";
-
-		wifi@0,0 {
-			reg = <0 0 0 0 0>;
-			qcom,ath10k-calibration-data = [ 01 02 03 ... ];
-			ext-fem-name = "microsemi-lx5586";
-		};
-	};
-};
-
-Example (to supply ipq4019 SoC wifi block details):
-
-wifi0: wifi@a000000 {
-	compatible = "qcom,ipq4019-wifi";
-	reg = <0xa000000 0x200000>;
-	resets = <&gcc WIFI0_CPU_INIT_RESET>,
-		 <&gcc WIFI0_RADIO_SRIF_RESET>,
-		 <&gcc WIFI0_RADIO_WARM_RESET>,
-		 <&gcc WIFI0_RADIO_COLD_RESET>,
-		 <&gcc WIFI0_CORE_WARM_RESET>,
-		 <&gcc WIFI0_CORE_COLD_RESET>;
-	reset-names = "wifi_cpu_init",
-		      "wifi_radio_srif",
-		      "wifi_radio_warm",
-		      "wifi_radio_cold",
-		      "wifi_core_warm",
-		      "wifi_core_cold";
-	clocks = <&gcc GCC_WCSS2G_CLK>,
-		 <&gcc GCC_WCSS2G_REF_CLK>,
-		 <&gcc GCC_WCSS2G_RTC_CLK>;
-	clock-names = "wifi_wcss_cmd",
-		      "wifi_wcss_ref",
-		      "wifi_wcss_rtc";
-	interrupts = <0 0x20 0x1>,
-		     <0 0x21 0x1>,
-		     <0 0x22 0x1>,
-		     <0 0x23 0x1>,
-		     <0 0x24 0x1>,
-		     <0 0x25 0x1>,
-		     <0 0x26 0x1>,
-		     <0 0x27 0x1>,
-		     <0 0x28 0x1>,
-		     <0 0x29 0x1>,
-		     <0 0x2a 0x1>,
-		     <0 0x2b 0x1>,
-		     <0 0x2c 0x1>,
-		     <0 0x2d 0x1>,
-		     <0 0x2e 0x1>,
-		     <0 0x2f 0x1>,
-		     <0 0xa8 0x0>;
-	interrupt-names = "msi0",  "msi1",  "msi2",  "msi3",
-			  "msi4",  "msi5",  "msi6",  "msi7",
-			  "msi8",  "msi9",  "msi10", "msi11",
-			  "msi12", "msi13", "msi14", "msi15",
-			  "legacy";
-	qcom,msi_addr = <0x0b006040>;
-	qcom,msi_base = <0x40>;
-	qcom,ath10k-pre-calibration-data = [ 01 02 03 ... ];
-	qcom,coexist-support = <1>;
-	qcom,coexist-gpio-pin = <0x33>;
-};
-
-Example (to supply wcn3990 SoC wifi block details):
-
-wifi@18000000 {
-		compatible = "qcom,wcn3990-wifi";
-		reg = <0x18800000 0x800000>;
-		reg-names = "membase";
-		clocks = <&clock_gcc clk_rf_clk2_pin>;
-		clock-names = "cxo_ref_clk_pin";
-		interrupts =
-			<GIC_SPI 414 IRQ_TYPE_LEVEL_HIGH>,
-			<GIC_SPI 415 IRQ_TYPE_LEVEL_HIGH>,
-			<GIC_SPI 416 IRQ_TYPE_LEVEL_HIGH>,
-			<GIC_SPI 417 IRQ_TYPE_LEVEL_HIGH>,
-			<GIC_SPI 418 IRQ_TYPE_LEVEL_HIGH>,
-			<GIC_SPI 419 IRQ_TYPE_LEVEL_HIGH>,
-			<GIC_SPI 420 IRQ_TYPE_LEVEL_HIGH>,
-			<GIC_SPI 421 IRQ_TYPE_LEVEL_HIGH>,
-			<GIC_SPI 422 IRQ_TYPE_LEVEL_HIGH>,
-			<GIC_SPI 423 IRQ_TYPE_LEVEL_HIGH>,
-			<GIC_SPI 424 IRQ_TYPE_LEVEL_HIGH>,
-			<GIC_SPI 425 IRQ_TYPE_LEVEL_HIGH>;
-		vdd-0.8-cx-mx-supply = <&pm8998_l5>;
-		vdd-1.8-xo-supply = <&vreg_l7a_1p8>;
-		vdd-1.3-rfa-supply = <&vreg_l17a_1p3>;
-		vdd-3.3-ch0-supply = <&vreg_l25a_3p3>;
-		vdd-3.3-ch1-supply = <&vreg_l26a_3p3>;
-		memory-region = <&wifi_msa_mem>;
-		iommus = <&apps_smmu 0x0040 0x1>;
-		qcom,msa-fixed-perm;
-		wifi-firmware {
-			iommus = <&apps_iommu 0xc22 0x1>;
-		};
-};
diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
new file mode 100644
index 000000000000..c85ed330426d
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
@@ -0,0 +1,358 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/wireless/qcom,ath10k.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Technologies ath10k wireless devices
+
+maintainers:
+  - Kalle Valo <kvalo@kernel.org>
+
+description:
+  Qualcomm Technologies, Inc. IEEE 802.11ac devices.
+
+properties:
+  compatible:
+    enum:
+      - qcom,ath10k # SDIO-based devices
+      - qcom,ipq4019-wifi
+      - qcom,wcn3990-wifi # SNoC-based devices
+
+  reg:
+    maxItems: 1
+
+  reg-names:
+    items:
+      - const: membase
+
+  interrupts:
+    minItems: 12
+    maxItems: 17
+
+  interrupt-names:
+    minItems: 12
+    maxItems: 17
+
+  memory-region:
+    maxItems: 1
+    description:
+      Reference to the MSA memory region used by the Wi-Fi firmware
+      running on the Q6 core.
+
+  iommus:
+    minItems: 1
+    maxItems: 2
+
+  clocks:
+    minItems: 1
+    maxItems: 3
+
+  clock-names:
+    minItems: 1
+    maxItems: 3
+
+  resets:
+    maxItems: 6
+
+  reset-names:
+    items:
+      - const: wifi_cpu_init
+      - const: wifi_radio_srif
+      - const: wifi_radio_warm
+      - const: wifi_radio_cold
+      - const: wifi_core_warm
+      - const: wifi_core_cold
+
+  ext-fem-name:
+    $ref: /schemas/types.yaml#/definitions/string
+    description: Name of external front end module used.
+    enum:
+      - microsemi-lx5586
+      - sky85703-11
+      - sky85803
+
+  wifi-firmware:
+    type: object
+    additionalProperties: false
+    description: |
+      The ath10k Wi-Fi node can contain one optional firmware subnode.
+      Firmware subnode is needed when the platform does not have Trustzone.
+    properties:
+      iommus:
+        maxItems: 1
+    required:
+      - iommus
+
+  qcom,ath10k-calibration-data:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description:
+      Calibration data + board-specific data as a byte array. The length
+      can vary between hardware versions.
+
+  qcom,ath10k-calibration-variant:
+    $ref: /schemas/types.yaml#/definitions/string
+    description:
+      Unique variant identifier of the calibration data in board-2.bin
+      for designs with colliding bus and device specific ids
+
+  qcom,ath10k-pre-calibration-data:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description:
+      Pre-calibration data as a byte array. The length can vary between
+      hardware versions.
+
+  qcom,coexist-support:
+    $ref: /schemas/types.yaml#/definitions/uint8
+    enum: [0, 1]
+    description:
+      Indicate coex support by the hardware.
+
+  qcom,coexist-gpio-pin:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      COEX GPIO number provided to the Wi-Fi firmware.
+
+  qcom,msa-fixed-perm:
+    type: boolean
+    description:
+      Whether to skip executing an SCM call that reassigns the memory
+      region ownership.
+
+  qcom,smem-states:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    description: State bits used by the AP to signal the WLAN Q6.
+    items:
+      - description: Signal bits used to enable/disable low power mode
+                     on WCN in the case of WoW (Wake on Wireless).
+
+  qcom,smem-state-names:
+    description: The names of the state bits used for SMP2P output.
+    items:
+      - const: wlan-smp2p-out
+
+  qcom,snoc-host-cap-8bit-quirk:
+    type: boolean
+    description:
+      Quirk specifying that the firmware expects the 8bit version
+      of the host capability QMI request
+
+  qcom,xo-cal-data:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      XO cal offset to be configured in XO trim register.
+
+  vdd-0.8-cx-mx-supply:
+    description: Main logic power rail
+
+  vdd-1.8-xo-supply:
+    description: Crystal oscillator supply
+
+  vdd-1.3-rfa-supply:
+    description: RFA supply
+
+  vdd-3.3-ch0-supply:
+    description: Primary Wi-Fi antenna supply
+
+  vdd-3.3-ch1-supply:
+    description: Secondary Wi-Fi antenna supply
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,ipq4019-wifi
+    then:
+      properties:
+        interrupts:
+          minItems: 17
+          maxItems: 17
+
+        interrupt-names:
+          items:
+            - const: msi0
+            - const: msi1
+            - const: msi2
+            - const: msi3
+            - const: msi4
+            - const: msi5
+            - const: msi6
+            - const: msi7
+            - const: msi8
+            - const: msi9
+            - const: msi10
+            - const: msi11
+            - const: msi12
+            - const: msi13
+            - const: msi14
+            - const: msi15
+            - const: legacy
+
+        clocks:
+          items:
+            - description: Wi-Fi command clock
+            - description: Wi-Fi reference clock
+            - description: Wi-Fi RTC clock
+
+        clock-names:
+          items:
+            - const: wifi_wcss_cmd
+            - const: wifi_wcss_ref
+            - const: wifi_wcss_rtc
+
+      required:
+        - clocks
+        - clock-names
+        - interrupts
+        - interrupt-names
+        - resets
+        - reset-names
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,wcn3990-wifi
+
+    then:
+      properties:
+        clocks:
+          minItems: 1
+          items:
+            - description: XO reference clock
+            - description: Qualcomm Debug Subsystem clock
+
+        clock-names:
+          minItems: 1
+          items:
+            - const: cxo_ref_clk_pin
+            - const: qdss
+
+        interrupts:
+          items:
+            - description: CE0
+            - description: CE1
+            - description: CE2
+            - description: CE3
+            - description: CE4
+            - description: CE5
+            - description: CE6
+            - description: CE7
+            - description: CE8
+            - description: CE9
+            - description: CE10
+            - description: CE11
+
+        interrupt-names: false
+
+      required:
+        - interrupts
+
+examples:
+  # SNoC
+  - |
+    #include <dt-bindings/clock/qcom,rpmcc.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    wifi@18800000 {
+      compatible = "qcom,wcn3990-wifi";
+      reg = <0x18800000 0x800000>;
+      reg-names = "membase";
+      memory-region = <&wlan_msa_mem>;
+      clocks = <&rpmcc RPM_SMD_RF_CLK2_PIN>;
+      clock-names = "cxo_ref_clk_pin";
+      interrupts = <GIC_SPI 413 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 414 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 415 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 416 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 417 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 418 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 420 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 421 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 422 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 423 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 424 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SPI 425 IRQ_TYPE_LEVEL_HIGH>;
+      iommus = <&anoc2_smmu 0x1900>,
+               <&anoc2_smmu 0x1901>;
+      qcom,snoc-host-cap-8bit-quirk;
+      vdd-0.8-cx-mx-supply = <&vreg_l5a_0p8>;
+      vdd-1.8-xo-supply = <&vreg_l7a_1p8>;
+      vdd-1.3-rfa-supply = <&vreg_l17a_1p3>;
+      vdd-3.3-ch0-supply = <&vreg_l25a_3p3>;
+      vdd-3.3-ch1-supply = <&vreg_l23a_3p3>;
+
+      wifi-firmware {
+        iommus = <&apps_smmu 0x1c02 0x1>;
+      };
+    };
+
+  # AHB
+  - |
+    #include <dt-bindings/clock/qcom,gcc-ipq4019.h>
+
+    wifi@a000000 {
+        compatible = "qcom,ipq4019-wifi";
+        reg = <0xa000000 0x200000>;
+        resets = <&gcc WIFI0_CPU_INIT_RESET>,
+                 <&gcc WIFI0_RADIO_SRIF_RESET>,
+                 <&gcc WIFI0_RADIO_WARM_RESET>,
+                 <&gcc WIFI0_RADIO_COLD_RESET>,
+                 <&gcc WIFI0_CORE_WARM_RESET>,
+                 <&gcc WIFI0_CORE_COLD_RESET>;
+        reset-names = "wifi_cpu_init",
+                      "wifi_radio_srif",
+                      "wifi_radio_warm",
+                      "wifi_radio_cold",
+                      "wifi_core_warm",
+                      "wifi_core_cold";
+        clocks = <&gcc GCC_WCSS2G_CLK>,
+                 <&gcc GCC_WCSS2G_REF_CLK>,
+                 <&gcc GCC_WCSS2G_RTC_CLK>;
+        clock-names = "wifi_wcss_cmd",
+                      "wifi_wcss_ref",
+                      "wifi_wcss_rtc";
+        interrupts = <GIC_SPI 32 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 33 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 34 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 35 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 36 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 37 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 38 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 39 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 40 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 41 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 42 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 43 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 44 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 45 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 46 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 47 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 168 IRQ_TYPE_LEVEL_HIGH>;
+        interrupt-names =  "msi0",
+                           "msi1",
+                           "msi2",
+                           "msi3",
+                           "msi4",
+                           "msi5",
+                           "msi6",
+                           "msi7",
+                           "msi8",
+                           "msi9",
+                           "msi10",
+                           "msi11",
+                           "msi12",
+                           "msi13",
+                           "msi14",
+                           "msi15",
+                           "legacy";
+      };
diff --git a/MAINTAINERS b/MAINTAINERS
index 8d5bc223f305..c659e12e0c4f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17196,7 +17196,7 @@ S:	Supported
 W:	https://wireless.wiki.kernel.org/en/users/Drivers/ath10k
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
 F:	drivers/net/wireless/ath/ath10k/
-F:	Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
+F:	Documentation/devicetree/bindings/net/wireless/qcom,ath10k.yaml
 
 QUALCOMM ATHEROS ATH11K WIRELESS DRIVER
 M:	Kalle Valo <kvalo@kernel.org>
-- 
cgit v1.2.3


From 84ce730f82dfe62f6f3e76b059ce0f7178322fb9 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 18 Apr 2023 10:06:27 -0500
Subject: dt-bindings: net: ethernet: Fix JSON pointer references

A JSON pointer reference (the part after the "#") must start with a "/".
Conversely, references to the entire document must not have a trailing "/"
and should be just a "#". The existing jsonschema package allows these,
but coming changes make allowed "$ref" URIs stricter and throw errors on
these references.

Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20230418150628.1528480-1-robh@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/devicetree/bindings/net/ethernet-controller.yaml | 2 +-
 Documentation/devicetree/bindings/net/ethernet-switch.yaml     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index ebc2646ab5ff..6b0d359367da 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -205,7 +205,7 @@ properties:
               duplex is assumed.
 
           pause:
-            $ref: /schemas/types.yaml#definitions/flag
+            $ref: /schemas/types.yaml#/definitions/flag
             description:
               Indicates that pause should be enabled.
 
diff --git a/Documentation/devicetree/bindings/net/ethernet-switch.yaml b/Documentation/devicetree/bindings/net/ethernet-switch.yaml
index 2ceccce6cbd7..f1b9075dc7fb 100644
--- a/Documentation/devicetree/bindings/net/ethernet-switch.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-switch.yaml
@@ -55,7 +55,7 @@ additionalProperties: true
 $defs:
   base:
     description: An ethernet switch without any extra port properties
-    $ref: '#/'
+    $ref: '#'
 
     patternProperties:
       "^(ethernet-)?port@[0-9]+$":
-- 
cgit v1.2.3


From 3b3009ea8abb713b022d94fba95ec270cf6e7eae Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 17 Apr 2023 10:32:26 -0400
Subject: net/handshake: Create a NETLINK service for handling handshake
 requests

When a kernel consumer needs a transport layer security session, it
first needs a handshake to negotiate and establish a session. This
negotiation can be done in user space via one of the several
existing library implementations, or it can be done in the kernel.

No in-kernel handshake implementations yet exist. In their absence,
we add a netlink service that can:

a. Notify a user space daemon that a handshake is needed.

b. Once notified, the daemon calls the kernel back via this
   netlink service to get the handshake parameters, including an
   open socket on which to establish the session.

c. Once the handshake is complete, the daemon reports the
   session status and other information via a second netlink
   operation. This operation marks that it is safe for the
   kernel to use the open socket and the security session
   established there.

The notification service uses a multicast group. Each handshake
mechanism (eg, tlshd) adopts its own group number so that the
handshake services are completely independent of one another. The
kernel can then tell via netlink_has_listeners() whether a handshake
service is active and prepared to handle a handshake request.

A new netlink operation, ACCEPT, acts like accept(2) in that it
instantiates a file descriptor in the user space daemon's fd table.
If this operation is successful, the reply carries the fd number,
which can be treated as an open and ready file descriptor.

While user space is performing the handshake, the kernel keeps its
muddy paws off the open socket. A second new netlink operation,
DONE, indicates that the user space daemon is finished with the
socket and it is safe for the kernel to use again. The operation
also indicates whether a session was established successfully.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/handshake.yaml | 122 +++++++++++
 MAINTAINERS                                |   9 +
 include/trace/events/handshake.h           | 159 ++++++++++++++
 include/uapi/linux/handshake.h             |  71 ++++++
 net/Kconfig                                |   5 +
 net/Makefile                               |   1 +
 net/handshake/Makefile                     |  11 +
 net/handshake/genl.c                       |  57 +++++
 net/handshake/genl.h                       |  23 ++
 net/handshake/handshake.h                  |  82 +++++++
 net/handshake/netlink.c                    | 312 ++++++++++++++++++++++++++
 net/handshake/request.c                    | 339 +++++++++++++++++++++++++++++
 net/handshake/trace.c                      |  20 ++
 13 files changed, 1211 insertions(+)
 create mode 100644 Documentation/netlink/specs/handshake.yaml
 create mode 100644 include/trace/events/handshake.h
 create mode 100644 include/uapi/linux/handshake.h
 create mode 100644 net/handshake/Makefile
 create mode 100644 net/handshake/genl.c
 create mode 100644 net/handshake/genl.h
 create mode 100644 net/handshake/handshake.h
 create mode 100644 net/handshake/netlink.c
 create mode 100644 net/handshake/request.c
 create mode 100644 net/handshake/trace.c

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/handshake.yaml b/Documentation/netlink/specs/handshake.yaml
new file mode 100644
index 000000000000..0333d92b1438
--- /dev/null
+++ b/Documentation/netlink/specs/handshake.yaml
@@ -0,0 +1,122 @@
+# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+#
+# Author: Chuck Lever <chuck.lever@oracle.com>
+#
+# Copyright (c) 2023, Oracle and/or its affiliates.
+#
+
+name: handshake
+
+protocol: genetlink
+
+doc: Netlink protocol to request a transport layer security handshake.
+
+definitions:
+  -
+    type: enum
+    name: handler-class
+    value-start: 0
+    entries: [ none, max ]
+  -
+    type: enum
+    name: msg-type
+    value-start: 0
+    entries: [ unspec, clienthello, serverhello ]
+  -
+    type: enum
+    name: auth
+    value-start: 0
+    entries: [ unspec, unauth, psk, x509 ]
+
+attribute-sets:
+  -
+    name: x509
+    attributes:
+      -
+        name: cert
+        type: u32
+      -
+        name: privkey
+        type: u32
+  -
+    name: accept
+    attributes:
+      -
+        name: sockfd
+        type: u32
+      -
+        name: handler-class
+        type: u32
+        enum: handler-class
+      -
+        name: message-type
+        type: u32
+        enum: msg-type
+      -
+        name: timeout
+        type: u32
+      -
+        name: auth-mode
+        type: u32
+        enum: auth
+      -
+        name: peer-identity
+        type: u32
+        multi-attr: true
+      -
+        name: certificate
+        type: nest
+        nested-attributes: x509
+        multi-attr: true
+  -
+    name: done
+    attributes:
+      -
+        name: status
+        type: u32
+      -
+        name: sockfd
+        type: u32
+      -
+        name: remote-auth
+        type: u32
+        multi-attr: true
+
+operations:
+  list:
+    -
+      name: ready
+      doc: Notify handlers that a new handshake request is waiting
+      notify: accept
+    -
+      name: accept
+      doc: Handler retrieves next queued handshake request
+      attribute-set: accept
+      flags: [ admin-perm ]
+      do:
+        request:
+          attributes:
+            - handler-class
+        reply:
+          attributes:
+            - sockfd
+            - message-type
+            - timeout
+            - auth-mode
+            - peer-identity
+            - certificate
+    -
+      name: done
+      doc: Handler reports handshake completion
+      attribute-set: done
+      do:
+        request:
+          attributes:
+            - status
+            - sockfd
+            - remote-auth
+
+mcast-groups:
+  list:
+    -
+      name: none
diff --git a/MAINTAINERS b/MAINTAINERS
index 4fc57dfd5fd0..cdc7748d15b8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8947,6 +8947,15 @@ Q:	http://patchwork.linuxtv.org/project/linux-media/list/
 T:	git git://linuxtv.org/anttip/media_tree.git
 F:	drivers/media/usb/hackrf/
 
+HANDSHAKE UPCALL FOR TRANSPORT LAYER SECURITY
+M:	Chuck Lever <chuck.lever@oracle.com>
+L:	kernel-tls-handshake@lists.linux.dev
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	Documentation/netlink/specs/handshake.yaml
+F:	include/trace/events/handshake.h
+F:	net/handshake/
+
 HANTRO VPU CODEC DRIVER
 M:	Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
 M:	Philipp Zabel <p.zabel@pengutronix.de>
diff --git a/include/trace/events/handshake.h b/include/trace/events/handshake.h
new file mode 100644
index 000000000000..8dadcab5f12a
--- /dev/null
+++ b/include/trace/events/handshake.h
@@ -0,0 +1,159 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM handshake
+
+#if !defined(_TRACE_HANDSHAKE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_HANDSHAKE_H
+
+#include <linux/net.h>
+#include <linux/tracepoint.h>
+
+DECLARE_EVENT_CLASS(handshake_event_class,
+	TP_PROTO(
+		const struct net *net,
+		const struct handshake_req *req,
+		const struct sock *sk
+	),
+	TP_ARGS(net, req, sk),
+	TP_STRUCT__entry(
+		__field(const void *, req)
+		__field(const void *, sk)
+		__field(unsigned int, netns_ino)
+	),
+	TP_fast_assign(
+		__entry->req = req;
+		__entry->sk = sk;
+		__entry->netns_ino = net->ns.inum;
+	),
+	TP_printk("req=%p sk=%p",
+		__entry->req, __entry->sk
+	)
+);
+#define DEFINE_HANDSHAKE_EVENT(name)				\
+	DEFINE_EVENT(handshake_event_class, name,		\
+		TP_PROTO(					\
+			const struct net *net,			\
+			const struct handshake_req *req,	\
+			const struct sock *sk			\
+		),						\
+		TP_ARGS(net, req, sk))
+
+DECLARE_EVENT_CLASS(handshake_fd_class,
+	TP_PROTO(
+		const struct net *net,
+		const struct handshake_req *req,
+		const struct sock *sk,
+		int fd
+	),
+	TP_ARGS(net, req, sk, fd),
+	TP_STRUCT__entry(
+		__field(const void *, req)
+		__field(const void *, sk)
+		__field(int, fd)
+		__field(unsigned int, netns_ino)
+	),
+	TP_fast_assign(
+		__entry->req = req;
+		__entry->sk = req->hr_sk;
+		__entry->fd = fd;
+		__entry->netns_ino = net->ns.inum;
+	),
+	TP_printk("req=%p sk=%p fd=%d",
+		__entry->req, __entry->sk, __entry->fd
+	)
+);
+#define DEFINE_HANDSHAKE_FD_EVENT(name)				\
+	DEFINE_EVENT(handshake_fd_class, name,			\
+		TP_PROTO(					\
+			const struct net *net,			\
+			const struct handshake_req *req,	\
+			const struct sock *sk,			\
+			int fd					\
+		),						\
+		TP_ARGS(net, req, sk, fd))
+
+DECLARE_EVENT_CLASS(handshake_error_class,
+	TP_PROTO(
+		const struct net *net,
+		const struct handshake_req *req,
+		const struct sock *sk,
+		int err
+	),
+	TP_ARGS(net, req, sk, err),
+	TP_STRUCT__entry(
+		__field(const void *, req)
+		__field(const void *, sk)
+		__field(int, err)
+		__field(unsigned int, netns_ino)
+	),
+	TP_fast_assign(
+		__entry->req = req;
+		__entry->sk = sk;
+		__entry->err = err;
+		__entry->netns_ino = net->ns.inum;
+	),
+	TP_printk("req=%p sk=%p err=%d",
+		__entry->req, __entry->sk, __entry->err
+	)
+);
+#define DEFINE_HANDSHAKE_ERROR(name)				\
+	DEFINE_EVENT(handshake_error_class, name,		\
+		TP_PROTO(					\
+			const struct net *net,			\
+			const struct handshake_req *req,	\
+			const struct sock *sk,			\
+			int err					\
+		),						\
+		TP_ARGS(net, req, sk, err))
+
+
+/*
+ * Request lifetime events
+ */
+
+DEFINE_HANDSHAKE_EVENT(handshake_submit);
+DEFINE_HANDSHAKE_ERROR(handshake_submit_err);
+DEFINE_HANDSHAKE_EVENT(handshake_cancel);
+DEFINE_HANDSHAKE_EVENT(handshake_cancel_none);
+DEFINE_HANDSHAKE_EVENT(handshake_cancel_busy);
+DEFINE_HANDSHAKE_EVENT(handshake_destruct);
+
+
+TRACE_EVENT(handshake_complete,
+	TP_PROTO(
+		const struct net *net,
+		const struct handshake_req *req,
+		const struct sock *sk,
+		int status
+	),
+	TP_ARGS(net, req, sk, status),
+	TP_STRUCT__entry(
+		__field(const void *, req)
+		__field(const void *, sk)
+		__field(int, status)
+		__field(unsigned int, netns_ino)
+	),
+	TP_fast_assign(
+		__entry->req = req;
+		__entry->sk = sk;
+		__entry->status = status;
+		__entry->netns_ino = net->ns.inum;
+	),
+	TP_printk("req=%p sk=%p status=%d",
+		__entry->req, __entry->sk, __entry->status
+	)
+);
+
+/*
+ * Netlink events
+ */
+
+DEFINE_HANDSHAKE_ERROR(handshake_notify_err);
+DEFINE_HANDSHAKE_FD_EVENT(handshake_cmd_accept);
+DEFINE_HANDSHAKE_ERROR(handshake_cmd_accept_err);
+DEFINE_HANDSHAKE_FD_EVENT(handshake_cmd_done);
+DEFINE_HANDSHAKE_ERROR(handshake_cmd_done_err);
+
+#endif /* _TRACE_HANDSHAKE_H */
+
+#include <trace/define_trace.h>
diff --git a/include/uapi/linux/handshake.h b/include/uapi/linux/handshake.h
new file mode 100644
index 000000000000..7f66ff489b87
--- /dev/null
+++ b/include/uapi/linux/handshake.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/*	Documentation/netlink/specs/handshake.yaml */
+/* YNL-GEN uapi header */
+
+#ifndef _UAPI_LINUX_HANDSHAKE_H
+#define _UAPI_LINUX_HANDSHAKE_H
+
+#define HANDSHAKE_FAMILY_NAME		"handshake"
+#define HANDSHAKE_FAMILY_VERSION	1
+
+enum handshake_handler_class {
+	HANDSHAKE_HANDLER_CLASS_NONE,
+	HANDSHAKE_HANDLER_CLASS_MAX,
+};
+
+enum handshake_msg_type {
+	HANDSHAKE_MSG_TYPE_UNSPEC,
+	HANDSHAKE_MSG_TYPE_CLIENTHELLO,
+	HANDSHAKE_MSG_TYPE_SERVERHELLO,
+};
+
+enum handshake_auth {
+	HANDSHAKE_AUTH_UNSPEC,
+	HANDSHAKE_AUTH_UNAUTH,
+	HANDSHAKE_AUTH_PSK,
+	HANDSHAKE_AUTH_X509,
+};
+
+enum {
+	HANDSHAKE_A_X509_CERT = 1,
+	HANDSHAKE_A_X509_PRIVKEY,
+
+	__HANDSHAKE_A_X509_MAX,
+	HANDSHAKE_A_X509_MAX = (__HANDSHAKE_A_X509_MAX - 1)
+};
+
+enum {
+	HANDSHAKE_A_ACCEPT_SOCKFD = 1,
+	HANDSHAKE_A_ACCEPT_HANDLER_CLASS,
+	HANDSHAKE_A_ACCEPT_MESSAGE_TYPE,
+	HANDSHAKE_A_ACCEPT_TIMEOUT,
+	HANDSHAKE_A_ACCEPT_AUTH_MODE,
+	HANDSHAKE_A_ACCEPT_PEER_IDENTITY,
+	HANDSHAKE_A_ACCEPT_CERTIFICATE,
+
+	__HANDSHAKE_A_ACCEPT_MAX,
+	HANDSHAKE_A_ACCEPT_MAX = (__HANDSHAKE_A_ACCEPT_MAX - 1)
+};
+
+enum {
+	HANDSHAKE_A_DONE_STATUS = 1,
+	HANDSHAKE_A_DONE_SOCKFD,
+	HANDSHAKE_A_DONE_REMOTE_AUTH,
+
+	__HANDSHAKE_A_DONE_MAX,
+	HANDSHAKE_A_DONE_MAX = (__HANDSHAKE_A_DONE_MAX - 1)
+};
+
+enum {
+	HANDSHAKE_CMD_READY = 1,
+	HANDSHAKE_CMD_ACCEPT,
+	HANDSHAKE_CMD_DONE,
+
+	__HANDSHAKE_CMD_MAX,
+	HANDSHAKE_CMD_MAX = (__HANDSHAKE_CMD_MAX - 1)
+};
+
+#define HANDSHAKE_MCGRP_NONE	"none"
+
+#endif /* _UAPI_LINUX_HANDSHAKE_H */
diff --git a/net/Kconfig b/net/Kconfig
index f806722bccf4..4b800706cc76 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -68,6 +68,11 @@ source "net/iucv/Kconfig"
 source "net/smc/Kconfig"
 source "net/xdp/Kconfig"
 
+config NET_HANDSHAKE
+	bool
+	depends on SUNRPC || NVME_TARGET_TCP || NVME_TCP
+	default y
+
 config INET
 	bool "TCP/IP networking"
 	help
diff --git a/net/Makefile b/net/Makefile
index 87592009366f..4c4dc535453d 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -79,3 +79,4 @@ obj-$(CONFIG_NET_NCSI)		+= ncsi/
 obj-$(CONFIG_XDP_SOCKETS)	+= xdp/
 obj-$(CONFIG_MPTCP)		+= mptcp/
 obj-$(CONFIG_MCTP)		+= mctp/
+obj-$(CONFIG_NET_HANDSHAKE)	+= handshake/
diff --git a/net/handshake/Makefile b/net/handshake/Makefile
new file mode 100644
index 000000000000..d38736de45da
--- /dev/null
+++ b/net/handshake/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for the Generic HANDSHAKE service
+#
+# Author: Chuck Lever <chuck.lever@oracle.com>
+#
+# Copyright (c) 2023, Oracle and/or its affiliates.
+#
+
+obj-y += handshake.o
+handshake-y := genl.o netlink.o request.o trace.o
diff --git a/net/handshake/genl.c b/net/handshake/genl.c
new file mode 100644
index 000000000000..652f37d19bd6
--- /dev/null
+++ b/net/handshake/genl.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
+/* Do not edit directly, auto-generated from: */
+/*	Documentation/netlink/specs/handshake.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "genl.h"
+
+#include <linux/handshake.h>
+
+/* HANDSHAKE_CMD_ACCEPT - do */
+static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HANDLER_CLASS + 1] = {
+	[HANDSHAKE_A_ACCEPT_HANDLER_CLASS] = NLA_POLICY_MAX(NLA_U32, 1),
+};
+
+/* HANDSHAKE_CMD_DONE - do */
+static const struct nla_policy handshake_done_nl_policy[HANDSHAKE_A_DONE_REMOTE_AUTH + 1] = {
+	[HANDSHAKE_A_DONE_STATUS] = { .type = NLA_U32, },
+	[HANDSHAKE_A_DONE_SOCKFD] = { .type = NLA_U32, },
+	[HANDSHAKE_A_DONE_REMOTE_AUTH] = { .type = NLA_U32, },
+};
+
+/* Ops table for handshake */
+static const struct genl_split_ops handshake_nl_ops[] = {
+	{
+		.cmd		= HANDSHAKE_CMD_ACCEPT,
+		.doit		= handshake_nl_accept_doit,
+		.policy		= handshake_accept_nl_policy,
+		.maxattr	= HANDSHAKE_A_ACCEPT_HANDLER_CLASS,
+		.flags		= GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
+	},
+	{
+		.cmd		= HANDSHAKE_CMD_DONE,
+		.doit		= handshake_nl_done_doit,
+		.policy		= handshake_done_nl_policy,
+		.maxattr	= HANDSHAKE_A_DONE_REMOTE_AUTH,
+		.flags		= GENL_CMD_CAP_DO,
+	},
+};
+
+static const struct genl_multicast_group handshake_nl_mcgrps[] = {
+	[HANDSHAKE_NLGRP_NONE] = { "none", },
+};
+
+struct genl_family handshake_nl_family __ro_after_init = {
+	.name		= HANDSHAKE_FAMILY_NAME,
+	.version	= HANDSHAKE_FAMILY_VERSION,
+	.netnsok	= true,
+	.parallel_ops	= true,
+	.module		= THIS_MODULE,
+	.split_ops	= handshake_nl_ops,
+	.n_split_ops	= ARRAY_SIZE(handshake_nl_ops),
+	.mcgrps		= handshake_nl_mcgrps,
+	.n_mcgrps	= ARRAY_SIZE(handshake_nl_mcgrps),
+};
diff --git a/net/handshake/genl.h b/net/handshake/genl.h
new file mode 100644
index 000000000000..a1eb7ccccc7f
--- /dev/null
+++ b/net/handshake/genl.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* Do not edit directly, auto-generated from: */
+/*	Documentation/netlink/specs/handshake.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_HANDSHAKE_GEN_H
+#define _LINUX_HANDSHAKE_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <linux/handshake.h>
+
+int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info);
+int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info);
+
+enum {
+	HANDSHAKE_NLGRP_NONE,
+};
+
+extern struct genl_family handshake_nl_family;
+
+#endif /* _LINUX_HANDSHAKE_GEN_H */
diff --git a/net/handshake/handshake.h b/net/handshake/handshake.h
new file mode 100644
index 000000000000..52568dbe24f1
--- /dev/null
+++ b/net/handshake/handshake.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Generic netlink handshake service
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ */
+
+#ifndef _INTERNAL_HANDSHAKE_H
+#define _INTERNAL_HANDSHAKE_H
+
+/* Per-net namespace context */
+struct handshake_net {
+	spinlock_t		hn_lock;	/* protects next 3 fields */
+	int			hn_pending;
+	int			hn_pending_max;
+	struct list_head	hn_requests;
+
+	unsigned long		hn_flags;
+};
+
+enum hn_flags_bits {
+	HANDSHAKE_F_NET_DRAINING,
+};
+
+struct handshake_proto;
+
+/* One handshake request */
+struct handshake_req {
+	struct list_head		hr_list;
+	struct rhash_head		hr_rhash;
+	unsigned long			hr_flags;
+	const struct handshake_proto	*hr_proto;
+	struct sock			*hr_sk;
+	void				(*hr_odestruct)(struct sock *sk);
+
+	/* Always the last field */
+	char				hr_priv[];
+};
+
+enum hr_flags_bits {
+	HANDSHAKE_F_REQ_COMPLETED,
+};
+
+/* Invariants for all handshake requests for one transport layer
+ * security protocol
+ */
+struct handshake_proto {
+	int			hp_handler_class;
+	size_t			hp_privsize;
+
+	int			(*hp_accept)(struct handshake_req *req,
+					     struct genl_info *info, int fd);
+	void			(*hp_done)(struct handshake_req *req,
+					   unsigned int status,
+					   struct genl_info *info);
+	void			(*hp_destroy)(struct handshake_req *req);
+};
+
+/* netlink.c */
+int handshake_genl_notify(struct net *net, const struct handshake_proto *proto,
+			  gfp_t flags);
+struct nlmsghdr *handshake_genl_put(struct sk_buff *msg,
+				    struct genl_info *info);
+struct handshake_net *handshake_pernet(struct net *net);
+
+/* request.c */
+struct handshake_req *handshake_req_alloc(const struct handshake_proto *proto,
+					  gfp_t flags);
+int handshake_req_hash_init(void);
+void handshake_req_hash_destroy(void);
+void *handshake_req_private(struct handshake_req *req);
+struct handshake_req *handshake_req_hash_lookup(struct sock *sk);
+struct handshake_req *handshake_req_next(struct handshake_net *hn, int class);
+int handshake_req_submit(struct socket *sock, struct handshake_req *req,
+			 gfp_t flags);
+void handshake_complete(struct handshake_req *req, unsigned int status,
+			struct genl_info *info);
+bool handshake_req_cancel(struct sock *sk);
+
+#endif /* _INTERNAL_HANDSHAKE_H */
diff --git a/net/handshake/netlink.c b/net/handshake/netlink.c
new file mode 100644
index 000000000000..7264cac04047
--- /dev/null
+++ b/net/handshake/netlink.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Generic netlink handshake service
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/mm.h>
+
+#include <net/sock.h>
+#include <net/genetlink.h>
+#include <net/netns/generic.h>
+
+#include <uapi/linux/handshake.h>
+#include "handshake.h"
+#include "genl.h"
+
+#include <trace/events/handshake.h>
+
+/**
+ * handshake_genl_notify - Notify handlers that a request is waiting
+ * @net: target network namespace
+ * @proto: handshake protocol
+ * @flags: memory allocation control flags
+ *
+ * Returns zero on success or a negative errno if notification failed.
+ */
+int handshake_genl_notify(struct net *net, const struct handshake_proto *proto,
+			  gfp_t flags)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	if (!genl_has_listeners(&handshake_nl_family, net,
+				proto->hp_handler_class))
+		return -ESRCH;
+
+	msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, 0, 0, &handshake_nl_family, 0,
+			  HANDSHAKE_CMD_READY);
+	if (!hdr)
+		goto out_free;
+
+	if (nla_put_u32(msg, HANDSHAKE_A_ACCEPT_HANDLER_CLASS,
+			proto->hp_handler_class) < 0) {
+		genlmsg_cancel(msg, hdr);
+		goto out_free;
+	}
+
+	genlmsg_end(msg, hdr);
+	return genlmsg_multicast_netns(&handshake_nl_family, net, msg,
+				       0, proto->hp_handler_class, flags);
+
+out_free:
+	nlmsg_free(msg);
+	return -EMSGSIZE;
+}
+
+/**
+ * handshake_genl_put - Create a generic netlink message header
+ * @msg: buffer in which to create the header
+ * @info: generic netlink message context
+ *
+ * Returns a ready-to-use header, or NULL.
+ */
+struct nlmsghdr *handshake_genl_put(struct sk_buff *msg,
+				    struct genl_info *info)
+{
+	return genlmsg_put(msg, info->snd_portid, info->snd_seq,
+			   &handshake_nl_family, 0, info->genlhdr->cmd);
+}
+EXPORT_SYMBOL(handshake_genl_put);
+
+/*
+ * dup() a kernel socket for use as a user space file descriptor
+ * in the current process. The kernel socket must have an
+ * instatiated struct file.
+ *
+ * Implicit argument: "current()"
+ */
+static int handshake_dup(struct socket *sock)
+{
+	struct file *file;
+	int newfd;
+
+	if (!sock->file)
+		return -EBADF;
+
+	file = get_file(sock->file);
+	newfd = get_unused_fd_flags(O_CLOEXEC);
+	if (newfd < 0) {
+		fput(file);
+		return newfd;
+	}
+
+	fd_install(newfd, file);
+	return newfd;
+}
+
+int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = sock_net(skb->sk);
+	struct handshake_net *hn = handshake_pernet(net);
+	struct handshake_req *req = NULL;
+	struct socket *sock;
+	int class, fd, err;
+
+	err = -EOPNOTSUPP;
+	if (!hn)
+		goto out_status;
+
+	err = -EINVAL;
+	if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_ACCEPT_HANDLER_CLASS))
+		goto out_status;
+	class = nla_get_u32(info->attrs[HANDSHAKE_A_ACCEPT_HANDLER_CLASS]);
+
+	err = -EAGAIN;
+	req = handshake_req_next(hn, class);
+	if (!req)
+		goto out_status;
+
+	sock = req->hr_sk->sk_socket;
+	fd = handshake_dup(sock);
+	if (fd < 0) {
+		err = fd;
+		goto out_complete;
+	}
+	err = req->hr_proto->hp_accept(req, info, fd);
+	if (err)
+		goto out_complete;
+
+	trace_handshake_cmd_accept(net, req, req->hr_sk, fd);
+	return 0;
+
+out_complete:
+	handshake_complete(req, -EIO, NULL);
+	fput(sock->file);
+out_status:
+	trace_handshake_cmd_accept_err(net, req, NULL, err);
+	return err;
+}
+
+int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = sock_net(skb->sk);
+	struct socket *sock = NULL;
+	struct handshake_req *req;
+	int fd, status, err;
+
+	if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD))
+		return -EINVAL;
+	fd = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]);
+
+	err = 0;
+	sock = sockfd_lookup(fd, &err);
+	if (err) {
+		err = -EBADF;
+		goto out_status;
+	}
+
+	req = handshake_req_hash_lookup(sock->sk);
+	if (!req) {
+		err = -EBUSY;
+		fput(sock->file);
+		goto out_status;
+	}
+
+	trace_handshake_cmd_done(net, req, sock->sk, fd);
+
+	status = -EIO;
+	if (info->attrs[HANDSHAKE_A_DONE_STATUS])
+		status = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_STATUS]);
+
+	handshake_complete(req, status, info);
+	fput(sock->file);
+	return 0;
+
+out_status:
+	trace_handshake_cmd_done_err(net, req, sock->sk, err);
+	return err;
+}
+
+static unsigned int handshake_net_id;
+
+static int __net_init handshake_net_init(struct net *net)
+{
+	struct handshake_net *hn = net_generic(net, handshake_net_id);
+	unsigned long tmp;
+	struct sysinfo si;
+
+	/*
+	 * Arbitrary limit to prevent handshakes that do not make
+	 * progress from clogging up the system. The cap scales up
+	 * with the amount of physical memory on the system.
+	 */
+	si_meminfo(&si);
+	tmp = si.totalram / (25 * si.mem_unit);
+	hn->hn_pending_max = clamp(tmp, 3UL, 50UL);
+
+	spin_lock_init(&hn->hn_lock);
+	hn->hn_pending = 0;
+	hn->hn_flags = 0;
+	INIT_LIST_HEAD(&hn->hn_requests);
+	return 0;
+}
+
+static void __net_exit handshake_net_exit(struct net *net)
+{
+	struct handshake_net *hn = net_generic(net, handshake_net_id);
+	struct handshake_req *req;
+	LIST_HEAD(requests);
+
+	/*
+	 * Drain the net's pending list. Requests that have been
+	 * accepted and are in progress will be destroyed when
+	 * the socket is closed.
+	 */
+	spin_lock(&hn->hn_lock);
+	set_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags);
+	list_splice_init(&requests, &hn->hn_requests);
+	spin_unlock(&hn->hn_lock);
+
+	while (!list_empty(&requests)) {
+		req = list_first_entry(&requests, struct handshake_req, hr_list);
+		list_del(&req->hr_list);
+
+		/*
+		 * Requests on this list have not yet been
+		 * accepted, so they do not have an fd to put.
+		 */
+
+		handshake_complete(req, -ETIMEDOUT, NULL);
+	}
+}
+
+static struct pernet_operations __net_initdata handshake_genl_net_ops = {
+	.init		= handshake_net_init,
+	.exit		= handshake_net_exit,
+	.id		= &handshake_net_id,
+	.size		= sizeof(struct handshake_net),
+};
+
+/**
+ * handshake_pernet - Get the handshake private per-net structure
+ * @net: network namespace
+ *
+ * Returns a pointer to the net's private per-net structure for the
+ * handshake module, or NULL if handshake_init() failed.
+ */
+struct handshake_net *handshake_pernet(struct net *net)
+{
+	return handshake_net_id ?
+		net_generic(net, handshake_net_id) : NULL;
+}
+
+static int __init handshake_init(void)
+{
+	int ret;
+
+	ret = handshake_req_hash_init();
+	if (ret) {
+		pr_warn("handshake: hash initialization failed (%d)\n", ret);
+		return ret;
+	}
+
+	ret = genl_register_family(&handshake_nl_family);
+	if (ret) {
+		pr_warn("handshake: netlink registration failed (%d)\n", ret);
+		handshake_req_hash_destroy();
+		return ret;
+	}
+
+	/*
+	 * ORDER: register_pernet_subsys must be done last.
+	 *
+	 *	If initialization does not make it past pernet_subsys
+	 *	registration, then handshake_net_id will remain 0. That
+	 *	shunts the handshake consumer API to return ENOTSUPP
+	 *	to prevent it from dereferencing something that hasn't
+	 *	been allocated.
+	 */
+	ret = register_pernet_subsys(&handshake_genl_net_ops);
+	if (ret) {
+		pr_warn("handshake: pernet registration failed (%d)\n", ret);
+		genl_unregister_family(&handshake_nl_family);
+		handshake_req_hash_destroy();
+	}
+
+	return ret;
+}
+
+static void __exit handshake_exit(void)
+{
+	unregister_pernet_subsys(&handshake_genl_net_ops);
+	handshake_net_id = 0;
+
+	handshake_req_hash_destroy();
+	genl_unregister_family(&handshake_nl_family);
+}
+
+module_init(handshake_init);
+module_exit(handshake_exit);
diff --git a/net/handshake/request.c b/net/handshake/request.c
new file mode 100644
index 000000000000..d5b2bc6de057
--- /dev/null
+++ b/net/handshake/request.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Handshake request lifetime events
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/inet.h>
+#include <linux/fdtable.h>
+#include <linux/rhashtable.h>
+
+#include <net/sock.h>
+#include <net/genetlink.h>
+#include <net/netns/generic.h>
+
+#include <uapi/linux/handshake.h>
+#include "handshake.h"
+
+#include <trace/events/handshake.h>
+
+/*
+ * We need both a handshake_req -> sock mapping, and a sock ->
+ * handshake_req mapping. Both are one-to-one.
+ *
+ * To avoid adding another pointer field to struct sock, net/handshake
+ * maintains a hash table, indexed by the memory address of @sock, to
+ * find the struct handshake_req outstanding for that socket. The
+ * reverse direction uses a simple pointer field in the handshake_req
+ * struct.
+ */
+
+static struct rhashtable handshake_rhashtbl ____cacheline_aligned_in_smp;
+
+static const struct rhashtable_params handshake_rhash_params = {
+	.key_len		= sizeof_field(struct handshake_req, hr_sk),
+	.key_offset		= offsetof(struct handshake_req, hr_sk),
+	.head_offset		= offsetof(struct handshake_req, hr_rhash),
+	.automatic_shrinking	= true,
+};
+
+int handshake_req_hash_init(void)
+{
+	return rhashtable_init(&handshake_rhashtbl, &handshake_rhash_params);
+}
+
+void handshake_req_hash_destroy(void)
+{
+	rhashtable_destroy(&handshake_rhashtbl);
+}
+
+struct handshake_req *handshake_req_hash_lookup(struct sock *sk)
+{
+	return rhashtable_lookup_fast(&handshake_rhashtbl, &sk,
+				      handshake_rhash_params);
+}
+
+static bool handshake_req_hash_add(struct handshake_req *req)
+{
+	int ret;
+
+	ret = rhashtable_lookup_insert_fast(&handshake_rhashtbl,
+					    &req->hr_rhash,
+					    handshake_rhash_params);
+	return ret == 0;
+}
+
+static void handshake_req_destroy(struct handshake_req *req)
+{
+	if (req->hr_proto->hp_destroy)
+		req->hr_proto->hp_destroy(req);
+	rhashtable_remove_fast(&handshake_rhashtbl, &req->hr_rhash,
+			       handshake_rhash_params);
+	kfree(req);
+}
+
+static void handshake_sk_destruct(struct sock *sk)
+{
+	void (*sk_destruct)(struct sock *sk);
+	struct handshake_req *req;
+
+	req = handshake_req_hash_lookup(sk);
+	if (!req)
+		return;
+
+	trace_handshake_destruct(sock_net(sk), req, sk);
+	sk_destruct = req->hr_odestruct;
+	handshake_req_destroy(req);
+	if (sk_destruct)
+		sk_destruct(sk);
+}
+
+/**
+ * handshake_req_alloc - Allocate a handshake request
+ * @proto: security protocol
+ * @flags: memory allocation flags
+ *
+ * Returns an initialized handshake_req or NULL.
+ */
+struct handshake_req *handshake_req_alloc(const struct handshake_proto *proto,
+					  gfp_t flags)
+{
+	struct handshake_req *req;
+
+	if (!proto)
+		return NULL;
+	if (proto->hp_handler_class <= HANDSHAKE_HANDLER_CLASS_NONE)
+		return NULL;
+	if (proto->hp_handler_class >= HANDSHAKE_HANDLER_CLASS_MAX)
+		return NULL;
+	if (!proto->hp_accept || !proto->hp_done)
+		return NULL;
+
+	req = kzalloc(struct_size(req, hr_priv, proto->hp_privsize), flags);
+	if (!req)
+		return NULL;
+
+	INIT_LIST_HEAD(&req->hr_list);
+	req->hr_proto = proto;
+	return req;
+}
+EXPORT_SYMBOL(handshake_req_alloc);
+
+/**
+ * handshake_req_private - Get per-handshake private data
+ * @req: handshake arguments
+ *
+ */
+void *handshake_req_private(struct handshake_req *req)
+{
+	return (void *)&req->hr_priv;
+}
+EXPORT_SYMBOL(handshake_req_private);
+
+static bool __add_pending_locked(struct handshake_net *hn,
+				 struct handshake_req *req)
+{
+	if (WARN_ON_ONCE(!list_empty(&req->hr_list)))
+		return false;
+	hn->hn_pending++;
+	list_add_tail(&req->hr_list, &hn->hn_requests);
+	return true;
+}
+
+static void __remove_pending_locked(struct handshake_net *hn,
+				    struct handshake_req *req)
+{
+	hn->hn_pending--;
+	list_del_init(&req->hr_list);
+}
+
+/*
+ * Returns %true if the request was found on @net's pending list,
+ * otherwise %false.
+ *
+ * If @req was on a pending list, it has not yet been accepted.
+ */
+static bool remove_pending(struct handshake_net *hn, struct handshake_req *req)
+{
+	bool ret = false;
+
+	spin_lock(&hn->hn_lock);
+	if (!list_empty(&req->hr_list)) {
+		__remove_pending_locked(hn, req);
+		ret = true;
+	}
+	spin_unlock(&hn->hn_lock);
+
+	return ret;
+}
+
+struct handshake_req *handshake_req_next(struct handshake_net *hn, int class)
+{
+	struct handshake_req *req, *pos;
+
+	req = NULL;
+	spin_lock(&hn->hn_lock);
+	list_for_each_entry(pos, &hn->hn_requests, hr_list) {
+		if (pos->hr_proto->hp_handler_class != class)
+			continue;
+		__remove_pending_locked(hn, pos);
+		req = pos;
+		break;
+	}
+	spin_unlock(&hn->hn_lock);
+
+	return req;
+}
+
+/**
+ * handshake_req_submit - Submit a handshake request
+ * @sock: open socket on which to perform the handshake
+ * @req: handshake arguments
+ * @flags: memory allocation flags
+ *
+ * Return values:
+ *   %0: Request queued
+ *   %-EINVAL: Invalid argument
+ *   %-EBUSY: A handshake is already under way for this socket
+ *   %-ESRCH: No handshake agent is available
+ *   %-EAGAIN: Too many pending handshake requests
+ *   %-ENOMEM: Failed to allocate memory
+ *   %-EMSGSIZE: Failed to construct notification message
+ *   %-EOPNOTSUPP: Handshake module not initialized
+ *
+ * A zero return value from handshake_req_submit() means that
+ * exactly one subsequent completion callback is guaranteed.
+ *
+ * A negative return value from handshake_req_submit() means that
+ * no completion callback will be done and that @req has been
+ * destroyed.
+ */
+int handshake_req_submit(struct socket *sock, struct handshake_req *req,
+			 gfp_t flags)
+{
+	struct handshake_net *hn;
+	struct net *net;
+	int ret;
+
+	if (!sock || !req || !sock->file) {
+		kfree(req);
+		return -EINVAL;
+	}
+
+	req->hr_sk = sock->sk;
+	if (!req->hr_sk) {
+		kfree(req);
+		return -EINVAL;
+	}
+	req->hr_odestruct = req->hr_sk->sk_destruct;
+	req->hr_sk->sk_destruct = handshake_sk_destruct;
+
+	ret = -EOPNOTSUPP;
+	net = sock_net(req->hr_sk);
+	hn = handshake_pernet(net);
+	if (!hn)
+		goto out_err;
+
+	ret = -EAGAIN;
+	if (READ_ONCE(hn->hn_pending) >= hn->hn_pending_max)
+		goto out_err;
+
+	spin_lock(&hn->hn_lock);
+	ret = -EOPNOTSUPP;
+	if (test_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags))
+		goto out_unlock;
+	ret = -EBUSY;
+	if (!handshake_req_hash_add(req))
+		goto out_unlock;
+	if (!__add_pending_locked(hn, req))
+		goto out_unlock;
+	spin_unlock(&hn->hn_lock);
+
+	ret = handshake_genl_notify(net, req->hr_proto, flags);
+	if (ret) {
+		trace_handshake_notify_err(net, req, req->hr_sk, ret);
+		if (remove_pending(hn, req))
+			goto out_err;
+	}
+
+	/* Prevent socket release while a handshake request is pending */
+	sock_hold(req->hr_sk);
+
+	trace_handshake_submit(net, req, req->hr_sk);
+	return 0;
+
+out_unlock:
+	spin_unlock(&hn->hn_lock);
+out_err:
+	trace_handshake_submit_err(net, req, req->hr_sk, ret);
+	handshake_req_destroy(req);
+	return ret;
+}
+EXPORT_SYMBOL(handshake_req_submit);
+
+void handshake_complete(struct handshake_req *req, unsigned int status,
+			struct genl_info *info)
+{
+	struct sock *sk = req->hr_sk;
+	struct net *net = sock_net(sk);
+
+	if (!test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) {
+		trace_handshake_complete(net, req, sk, status);
+		req->hr_proto->hp_done(req, status, info);
+
+		/* Handshake request is no longer pending */
+		sock_put(sk);
+	}
+}
+
+/**
+ * handshake_req_cancel - Cancel an in-progress handshake
+ * @sk: socket on which there is an ongoing handshake
+ *
+ * Request cancellation races with request completion. To determine
+ * who won, callers examine the return value from this function.
+ *
+ * Return values:
+ *   %true - Uncompleted handshake request was canceled
+ *   %false - Handshake request already completed or not found
+ */
+bool handshake_req_cancel(struct sock *sk)
+{
+	struct handshake_req *req;
+	struct handshake_net *hn;
+	struct net *net;
+
+	net = sock_net(sk);
+	req = handshake_req_hash_lookup(sk);
+	if (!req) {
+		trace_handshake_cancel_none(net, req, sk);
+		return false;
+	}
+
+	hn = handshake_pernet(net);
+	if (hn && remove_pending(hn, req)) {
+		/* Request hadn't been accepted */
+		goto out_true;
+	}
+	if (test_and_set_bit(HANDSHAKE_F_REQ_COMPLETED, &req->hr_flags)) {
+		/* Request already completed */
+		trace_handshake_cancel_busy(net, req, sk);
+		return false;
+	}
+
+out_true:
+	trace_handshake_cancel(net, req, sk);
+
+	/* Handshake request is no longer pending */
+	sock_put(sk);
+	return true;
+}
+EXPORT_SYMBOL(handshake_req_cancel);
diff --git a/net/handshake/trace.c b/net/handshake/trace.c
new file mode 100644
index 000000000000..1c4d8e27e17a
--- /dev/null
+++ b/net/handshake/trace.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Trace points for transport security layer handshakes.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ */
+
+#include <linux/types.h>
+
+#include <net/sock.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "handshake.h"
+
+#define CREATE_TRACE_POINTS
+
+#include <trace/events/handshake.h>
-- 
cgit v1.2.3


From 2fd5532044a89d2403b543520b4902e196f7d165 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Mon, 17 Apr 2023 10:32:33 -0400
Subject: net/handshake: Add a kernel API for requesting a TLSv1.3 handshake

To enable kernel consumers of TLS to request a TLS handshake, add
support to net/handshake/ to request a handshake upcall.

This patch also acts as a template for adding handshake upcall
support for other kernel transport layer security providers.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/netlink/specs/handshake.yaml |   4 +-
 Documentation/networking/index.rst         |   1 +
 Documentation/networking/tls-handshake.rst | 217 +++++++++++++++
 MAINTAINERS                                |   2 +
 include/net/handshake.h                    |  43 +++
 include/uapi/linux/handshake.h             |   2 +
 net/handshake/Makefile                     |   2 +-
 net/handshake/genl.c                       |   3 +-
 net/handshake/genl.h                       |   1 +
 net/handshake/tlshd.c                      | 417 +++++++++++++++++++++++++++++
 10 files changed, 689 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/networking/tls-handshake.rst
 create mode 100644 include/net/handshake.h
 create mode 100644 net/handshake/tlshd.c

(limited to 'Documentation')

diff --git a/Documentation/netlink/specs/handshake.yaml b/Documentation/netlink/specs/handshake.yaml
index 0333d92b1438..614f1a585511 100644
--- a/Documentation/netlink/specs/handshake.yaml
+++ b/Documentation/netlink/specs/handshake.yaml
@@ -16,7 +16,7 @@ definitions:
     type: enum
     name: handler-class
     value-start: 0
-    entries: [ none, max ]
+    entries: [ none, tlshd, max ]
   -
     type: enum
     name: msg-type
@@ -120,3 +120,5 @@ mcast-groups:
   list:
     -
       name: none
+    -
+      name: tlshd
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 24bb256d6d53..a164ff074356 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -36,6 +36,7 @@ Contents:
    scaling
    tls
    tls-offload
+   tls-handshake
    nfc
    6lowpan
    6pack
diff --git a/Documentation/networking/tls-handshake.rst b/Documentation/networking/tls-handshake.rst
new file mode 100644
index 000000000000..a2817a88e905
--- /dev/null
+++ b/Documentation/networking/tls-handshake.rst
@@ -0,0 +1,217 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================
+In-Kernel TLS Handshake
+=======================
+
+Overview
+========
+
+Transport Layer Security (TLS) is a Upper Layer Protocol (ULP) that runs
+over TCP. TLS provides end-to-end data integrity and confidentiality in
+addition to peer authentication.
+
+The kernel's kTLS implementation handles the TLS record subprotocol, but
+does not handle the TLS handshake subprotocol which is used to establish
+a TLS session. Kernel consumers can use the API described here to
+request TLS session establishment.
+
+There are several possible ways to provide a handshake service in the
+kernel. The API described here is designed to hide the details of those
+implementations so that in-kernel TLS consumers do not need to be
+aware of how the handshake gets done.
+
+
+User handshake agent
+====================
+
+As of this writing, there is no TLS handshake implementation in the
+Linux kernel. To provide a handshake service, a handshake agent
+(typically in user space) is started in each network namespace where a
+kernel consumer might require a TLS handshake. Handshake agents listen
+for events sent from the kernel that indicate a handshake request is
+waiting.
+
+An open socket is passed to a handshake agent via a netlink operation,
+which creates a socket descriptor in the agent's file descriptor table.
+If the handshake completes successfully, the handshake agent promotes
+the socket to use the TLS ULP and sets the session information using the
+SOL_TLS socket options. The handshake agent returns the socket to the
+kernel via a second netlink operation.
+
+
+Kernel Handshake API
+====================
+
+A kernel TLS consumer initiates a client-side TLS handshake on an open
+socket by invoking one of the tls_client_hello() functions. First, it
+fills in a structure that contains the parameters of the request:
+
+.. code-block:: c
+
+  struct tls_handshake_args {
+        struct socket   *ta_sock;
+        tls_done_func_t ta_done;
+        void            *ta_data;
+        unsigned int    ta_timeout_ms;
+        key_serial_t    ta_keyring;
+        key_serial_t    ta_my_cert;
+        key_serial_t    ta_my_privkey;
+        unsigned int    ta_num_peerids;
+        key_serial_t    ta_my_peerids[5];
+  };
+
+The @ta_sock field references an open and connected socket. The consumer
+must hold a reference on the socket to prevent it from being destroyed
+while the handshake is in progress. The consumer must also have
+instantiated a struct file in sock->file.
+
+
+@ta_done contains a callback function that is invoked when the handshake
+has completed. Further explanation of this function is in the "Handshake
+Completion" sesction below.
+
+The consumer can fill in the @ta_timeout_ms field to force the servicing
+handshake agent to exit after a number of milliseconds. This enables the
+socket to be fully closed once both the kernel and the handshake agent
+have closed their endpoints.
+
+Authentication material such as x.509 certificates, private certificate
+keys, and pre-shared keys are provided to the handshake agent in keys
+that are instantiated by the consumer before making the handshake
+request. The consumer can provide a private keyring that is linked into
+the handshake agent's process keyring in the @ta_keyring field to prevent
+access of those keys by other subsystems.
+
+To request an x.509-authenticated TLS session, the consumer fills in
+the @ta_my_cert and @ta_my_privkey fields with the serial numbers of
+keys containing an x.509 certificate and the private key for that
+certificate. Then, it invokes this function:
+
+.. code-block:: c
+
+  ret = tls_client_hello_x509(args, gfp_flags);
+
+The function returns zero when the handshake request is under way. A
+zero return guarantees the callback function @ta_done will be invoked
+for this socket. The function returns a negative errno if the handshake
+could not be started. A negative errno guarantees the callback function
+@ta_done will not be invoked on this socket.
+
+
+To initiate a client-side TLS handshake with a pre-shared key, use:
+
+.. code-block:: c
+
+  ret = tls_client_hello_psk(args, gfp_flags);
+
+However, in this case, the consumer fills in the @ta_my_peerids array
+with serial numbers of keys containing the peer identities it wishes
+to offer, and the @ta_num_peerids field with the number of array
+entries it has filled in. The other fields are filled in as above.
+
+
+To initiate an anonymous client-side TLS handshake use:
+
+.. code-block:: c
+
+  ret = tls_client_hello_anon(args, gfp_flags);
+
+The handshake agent presents no peer identity information to the remote
+during this type of handshake. Only server authentication (ie the client
+verifies the server's identity) is performed during the handshake. Thus
+the established session uses encryption only.
+
+
+Consumers that are in-kernel servers use:
+
+.. code-block:: c
+
+  ret = tls_server_hello_x509(args, gfp_flags);
+
+or
+
+.. code-block:: c
+
+  ret = tls_server_hello_psk(args, gfp_flags);
+
+The argument structure is filled in as above.
+
+
+If the consumer needs to cancel the handshake request, say, due to a ^C
+or other exigent event, the consumer can invoke:
+
+.. code-block:: c
+
+  bool tls_handshake_cancel(sock);
+
+This function returns true if the handshake request associated with
+@sock has been canceled. The consumer's handshake completion callback
+will not be invoked. If this function returns false, then the consumer's
+completion callback has already been invoked.
+
+
+Handshake Completion
+====================
+
+When the handshake agent has completed processing, it notifies the
+kernel that the socket may be used by the consumer again. At this point,
+the consumer's handshake completion callback, provided in the @ta_done
+field in the tls_handshake_args structure, is invoked.
+
+The synopsis of this function is:
+
+.. code-block:: c
+
+  typedef void	(*tls_done_func_t)(void *data, int status,
+                                   key_serial_t peerid);
+
+The consumer provides a cookie in the @ta_data field of the
+tls_handshake_args structure that is returned in the @data parameter of
+this callback. The consumer uses the cookie to match the callback to the
+thread waiting for the handshake to complete.
+
+The success status of the handshake is returned via the @status
+parameter:
+
++------------+----------------------------------------------+
+|  status    |  meaning                                     |
++============+==============================================+
+|  0         |  TLS session established successfully        |
++------------+----------------------------------------------+
+|  -EACCESS  |  Remote peer rejected the handshake or       |
+|            |  authentication failed                       |
++------------+----------------------------------------------+
+|  -ENOMEM   |  Temporary resource allocation failure       |
++------------+----------------------------------------------+
+|  -EINVAL   |  Consumer provided an invalid argument       |
++------------+----------------------------------------------+
+|  -ENOKEY   |  Missing authentication material             |
++------------+----------------------------------------------+
+|  -EIO      |  An unexpected fault occurred                |
++------------+----------------------------------------------+
+
+The @peerid parameter contains the serial number of a key containing the
+remote peer's identity or the value TLS_NO_PEERID if the session is not
+authenticated.
+
+A best practice is to close and destroy the socket immediately if the
+handshake failed.
+
+
+Other considerations
+--------------------
+
+While a handshake is under way, the kernel consumer must alter the
+socket's sk_data_ready callback function to ignore all incoming data.
+Once the handshake completion callback function has been invoked, normal
+receive operation can be resumed.
+
+Once a TLS session is established, the consumer must provide a buffer
+for and then examine the control message (CMSG) that is part of every
+subsequent sock_recvmsg(). Each control message indicates whether the
+received message data is TLS record data or session metadata.
+
+See tls.rst for details on how a kTLS consumer recognizes incoming
+(decrypted) application data, alerts, and handshake packets once the
+socket has been promoted to use the TLS ULP.
diff --git a/MAINTAINERS b/MAINTAINERS
index cdc7748d15b8..04ebde8ccb75 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8953,6 +8953,8 @@ L:	kernel-tls-handshake@lists.linux.dev
 L:	netdev@vger.kernel.org
 S:	Maintained
 F:	Documentation/netlink/specs/handshake.yaml
+F:	Documentation/networking/tls-handshake.rst
+F:	include/net/handshake.h
 F:	include/trace/events/handshake.h
 F:	net/handshake/
 
diff --git a/include/net/handshake.h b/include/net/handshake.h
new file mode 100644
index 000000000000..3352b1ab43b3
--- /dev/null
+++ b/include/net/handshake.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Generic netlink HANDSHAKE service.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2023, Oracle and/or its affiliates.
+ */
+
+#ifndef _NET_HANDSHAKE_H
+#define _NET_HANDSHAKE_H
+
+enum {
+	TLS_NO_KEYRING = 0,
+	TLS_NO_PEERID = 0,
+	TLS_NO_CERT = 0,
+	TLS_NO_PRIVKEY = 0,
+};
+
+typedef void	(*tls_done_func_t)(void *data, int status,
+				   key_serial_t peerid);
+
+struct tls_handshake_args {
+	struct socket		*ta_sock;
+	tls_done_func_t		ta_done;
+	void			*ta_data;
+	unsigned int		ta_timeout_ms;
+	key_serial_t		ta_keyring;
+	key_serial_t		ta_my_cert;
+	key_serial_t		ta_my_privkey;
+	unsigned int		ta_num_peerids;
+	key_serial_t		ta_my_peerids[5];
+};
+
+int tls_client_hello_anon(const struct tls_handshake_args *args, gfp_t flags);
+int tls_client_hello_x509(const struct tls_handshake_args *args, gfp_t flags);
+int tls_client_hello_psk(const struct tls_handshake_args *args, gfp_t flags);
+int tls_server_hello_x509(const struct tls_handshake_args *args, gfp_t flags);
+int tls_server_hello_psk(const struct tls_handshake_args *args, gfp_t flags);
+
+bool tls_handshake_cancel(struct sock *sk);
+
+#endif /* _NET_HANDSHAKE_H */
diff --git a/include/uapi/linux/handshake.h b/include/uapi/linux/handshake.h
index 7f66ff489b87..1de4d0b95325 100644
--- a/include/uapi/linux/handshake.h
+++ b/include/uapi/linux/handshake.h
@@ -11,6 +11,7 @@
 
 enum handshake_handler_class {
 	HANDSHAKE_HANDLER_CLASS_NONE,
+	HANDSHAKE_HANDLER_CLASS_TLSHD,
 	HANDSHAKE_HANDLER_CLASS_MAX,
 };
 
@@ -67,5 +68,6 @@ enum {
 };
 
 #define HANDSHAKE_MCGRP_NONE	"none"
+#define HANDSHAKE_MCGRP_TLSHD	"tlshd"
 
 #endif /* _UAPI_LINUX_HANDSHAKE_H */
diff --git a/net/handshake/Makefile b/net/handshake/Makefile
index d38736de45da..a089f7e3df24 100644
--- a/net/handshake/Makefile
+++ b/net/handshake/Makefile
@@ -8,4 +8,4 @@
 #
 
 obj-y += handshake.o
-handshake-y := genl.o netlink.o request.o trace.o
+handshake-y := genl.o netlink.o request.o tlshd.o trace.o
diff --git a/net/handshake/genl.c b/net/handshake/genl.c
index 652f37d19bd6..9f29efb1493e 100644
--- a/net/handshake/genl.c
+++ b/net/handshake/genl.c
@@ -12,7 +12,7 @@
 
 /* HANDSHAKE_CMD_ACCEPT - do */
 static const struct nla_policy handshake_accept_nl_policy[HANDSHAKE_A_ACCEPT_HANDLER_CLASS + 1] = {
-	[HANDSHAKE_A_ACCEPT_HANDLER_CLASS] = NLA_POLICY_MAX(NLA_U32, 1),
+	[HANDSHAKE_A_ACCEPT_HANDLER_CLASS] = NLA_POLICY_MAX(NLA_U32, 2),
 };
 
 /* HANDSHAKE_CMD_DONE - do */
@@ -42,6 +42,7 @@ static const struct genl_split_ops handshake_nl_ops[] = {
 
 static const struct genl_multicast_group handshake_nl_mcgrps[] = {
 	[HANDSHAKE_NLGRP_NONE] = { "none", },
+	[HANDSHAKE_NLGRP_TLSHD] = { "tlshd", },
 };
 
 struct genl_family handshake_nl_family __ro_after_init = {
diff --git a/net/handshake/genl.h b/net/handshake/genl.h
index a1eb7ccccc7f..2c1f1aa6a02a 100644
--- a/net/handshake/genl.h
+++ b/net/handshake/genl.h
@@ -16,6 +16,7 @@ int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info);
 
 enum {
 	HANDSHAKE_NLGRP_NONE,
+	HANDSHAKE_NLGRP_TLSHD,
 };
 
 extern struct genl_family handshake_nl_family;
diff --git a/net/handshake/tlshd.c b/net/handshake/tlshd.c
new file mode 100644
index 000000000000..1b8353296060
--- /dev/null
+++ b/net/handshake/tlshd.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Establish a TLS session for a kernel socket consumer
+ * using the tlshd user space handler.
+ *
+ * Author: Chuck Lever <chuck.lever@oracle.com>
+ *
+ * Copyright (c) 2021-2023, Oracle and/or its affiliates.
+ */
+
+#include <linux/types.h>
+#include <linux/socket.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/key.h>
+
+#include <net/sock.h>
+#include <net/handshake.h>
+#include <net/genetlink.h>
+
+#include <uapi/linux/keyctl.h>
+#include <uapi/linux/handshake.h>
+#include "handshake.h"
+
+struct tls_handshake_req {
+	void			(*th_consumer_done)(void *data, int status,
+						    key_serial_t peerid);
+	void			*th_consumer_data;
+
+	int			th_type;
+	unsigned int		th_timeout_ms;
+	int			th_auth_mode;
+	key_serial_t		th_keyring;
+	key_serial_t		th_certificate;
+	key_serial_t		th_privkey;
+
+	unsigned int		th_num_peerids;
+	key_serial_t		th_peerid[5];
+};
+
+static struct tls_handshake_req *
+tls_handshake_req_init(struct handshake_req *req,
+		       const struct tls_handshake_args *args)
+{
+	struct tls_handshake_req *treq = handshake_req_private(req);
+
+	treq->th_timeout_ms = args->ta_timeout_ms;
+	treq->th_consumer_done = args->ta_done;
+	treq->th_consumer_data = args->ta_data;
+	treq->th_keyring = args->ta_keyring;
+	treq->th_num_peerids = 0;
+	treq->th_certificate = TLS_NO_CERT;
+	treq->th_privkey = TLS_NO_PRIVKEY;
+	return treq;
+}
+
+static void tls_handshake_remote_peerids(struct tls_handshake_req *treq,
+					 struct genl_info *info)
+{
+	struct nlattr *head = nlmsg_attrdata(info->nlhdr, GENL_HDRLEN);
+	int rem, len = nlmsg_attrlen(info->nlhdr, GENL_HDRLEN);
+	struct nlattr *nla;
+	unsigned int i;
+
+	i = 0;
+	nla_for_each_attr(nla, head, len, rem) {
+		if (nla_type(nla) == HANDSHAKE_A_DONE_REMOTE_AUTH)
+			i++;
+	}
+	if (!i)
+		return;
+	treq->th_num_peerids = min_t(unsigned int, i,
+				     ARRAY_SIZE(treq->th_peerid));
+
+	i = 0;
+	nla_for_each_attr(nla, head, len, rem) {
+		if (nla_type(nla) == HANDSHAKE_A_DONE_REMOTE_AUTH)
+			treq->th_peerid[i++] = nla_get_u32(nla);
+		if (i >= treq->th_num_peerids)
+			break;
+	}
+}
+
+/**
+ * tls_handshake_done - callback to handle a CMD_DONE request
+ * @req: socket on which the handshake was performed
+ * @status: session status code
+ * @info: full results of session establishment
+ *
+ */
+static void tls_handshake_done(struct handshake_req *req,
+			       unsigned int status, struct genl_info *info)
+{
+	struct tls_handshake_req *treq = handshake_req_private(req);
+
+	treq->th_peerid[0] = TLS_NO_PEERID;
+	if (info)
+		tls_handshake_remote_peerids(treq, info);
+
+	treq->th_consumer_done(treq->th_consumer_data, -status,
+			       treq->th_peerid[0]);
+}
+
+#if IS_ENABLED(CONFIG_KEYS)
+static int tls_handshake_private_keyring(struct tls_handshake_req *treq)
+{
+	key_ref_t process_keyring_ref, keyring_ref;
+	int ret;
+
+	if (treq->th_keyring == TLS_NO_KEYRING)
+		return 0;
+
+	process_keyring_ref = lookup_user_key(KEY_SPEC_PROCESS_KEYRING,
+					      KEY_LOOKUP_CREATE,
+					      KEY_NEED_WRITE);
+	if (IS_ERR(process_keyring_ref)) {
+		ret = PTR_ERR(process_keyring_ref);
+		goto out;
+	}
+
+	keyring_ref = lookup_user_key(treq->th_keyring, KEY_LOOKUP_CREATE,
+				      KEY_NEED_LINK);
+	if (IS_ERR(keyring_ref)) {
+		ret = PTR_ERR(keyring_ref);
+		goto out_put_key;
+	}
+
+	ret = key_link(key_ref_to_ptr(process_keyring_ref),
+		       key_ref_to_ptr(keyring_ref));
+
+	key_ref_put(keyring_ref);
+out_put_key:
+	key_ref_put(process_keyring_ref);
+out:
+	return ret;
+}
+#else
+static int tls_handshake_private_keyring(struct tls_handshake_req *treq)
+{
+	return 0;
+}
+#endif
+
+static int tls_handshake_put_peer_identity(struct sk_buff *msg,
+					   struct tls_handshake_req *treq)
+{
+	unsigned int i;
+
+	for (i = 0; i < treq->th_num_peerids; i++)
+		if (nla_put_u32(msg, HANDSHAKE_A_ACCEPT_PEER_IDENTITY,
+				treq->th_peerid[i]) < 0)
+			return -EMSGSIZE;
+	return 0;
+}
+
+static int tls_handshake_put_certificate(struct sk_buff *msg,
+					 struct tls_handshake_req *treq)
+{
+	struct nlattr *entry_attr;
+
+	if (treq->th_certificate == TLS_NO_CERT &&
+	    treq->th_privkey == TLS_NO_PRIVKEY)
+		return 0;
+
+	entry_attr = nla_nest_start(msg, HANDSHAKE_A_ACCEPT_CERTIFICATE);
+	if (!entry_attr)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(msg, HANDSHAKE_A_X509_CERT,
+			treq->th_certificate) ||
+	    nla_put_u32(msg, HANDSHAKE_A_X509_PRIVKEY,
+			treq->th_privkey)) {
+		nla_nest_cancel(msg, entry_attr);
+		return -EMSGSIZE;
+	}
+
+	nla_nest_end(msg, entry_attr);
+	return 0;
+}
+
+/**
+ * tls_handshake_accept - callback to construct a CMD_ACCEPT response
+ * @req: handshake parameters to return
+ * @info: generic netlink message context
+ * @fd: file descriptor to be returned
+ *
+ * Returns zero on success, or a negative errno on failure.
+ */
+static int tls_handshake_accept(struct handshake_req *req,
+				struct genl_info *info, int fd)
+{
+	struct tls_handshake_req *treq = handshake_req_private(req);
+	struct nlmsghdr *hdr;
+	struct sk_buff *msg;
+	int ret;
+
+	ret = tls_handshake_private_keyring(treq);
+	if (ret < 0)
+		goto out;
+
+	ret = -ENOMEM;
+	msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		goto out;
+	hdr = handshake_genl_put(msg, info);
+	if (!hdr)
+		goto out_cancel;
+
+	ret = -EMSGSIZE;
+	ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_SOCKFD, fd);
+	if (ret < 0)
+		goto out_cancel;
+	ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_MESSAGE_TYPE, treq->th_type);
+	if (ret < 0)
+		goto out_cancel;
+	if (treq->th_timeout_ms) {
+		ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_TIMEOUT, treq->th_timeout_ms);
+		if (ret < 0)
+			goto out_cancel;
+	}
+
+	ret = nla_put_u32(msg, HANDSHAKE_A_ACCEPT_AUTH_MODE,
+			  treq->th_auth_mode);
+	if (ret < 0)
+		goto out_cancel;
+	switch (treq->th_auth_mode) {
+	case HANDSHAKE_AUTH_PSK:
+		ret = tls_handshake_put_peer_identity(msg, treq);
+		if (ret < 0)
+			goto out_cancel;
+		break;
+	case HANDSHAKE_AUTH_X509:
+		ret = tls_handshake_put_certificate(msg, treq);
+		if (ret < 0)
+			goto out_cancel;
+		break;
+	}
+
+	genlmsg_end(msg, hdr);
+	return genlmsg_reply(msg, info);
+
+out_cancel:
+	genlmsg_cancel(msg, hdr);
+out:
+	return ret;
+}
+
+static const struct handshake_proto tls_handshake_proto = {
+	.hp_handler_class	= HANDSHAKE_HANDLER_CLASS_TLSHD,
+	.hp_privsize		= sizeof(struct tls_handshake_req),
+
+	.hp_accept		= tls_handshake_accept,
+	.hp_done		= tls_handshake_done,
+};
+
+/**
+ * tls_client_hello_anon - request an anonymous TLS handshake on a socket
+ * @args: socket and handshake parameters for this request
+ * @flags: memory allocation control flags
+ *
+ * Return values:
+ *   %0: Handshake request enqueue; ->done will be called when complete
+ *   %-ESRCH: No user agent is available
+ *   %-ENOMEM: Memory allocation failed
+ */
+int tls_client_hello_anon(const struct tls_handshake_args *args, gfp_t flags)
+{
+	struct tls_handshake_req *treq;
+	struct handshake_req *req;
+
+	req = handshake_req_alloc(&tls_handshake_proto, flags);
+	if (!req)
+		return -ENOMEM;
+	treq = tls_handshake_req_init(req, args);
+	treq->th_type = HANDSHAKE_MSG_TYPE_CLIENTHELLO;
+	treq->th_auth_mode = HANDSHAKE_AUTH_UNAUTH;
+
+	return handshake_req_submit(args->ta_sock, req, flags);
+}
+EXPORT_SYMBOL(tls_client_hello_anon);
+
+/**
+ * tls_client_hello_x509 - request an x.509-based TLS handshake on a socket
+ * @args: socket and handshake parameters for this request
+ * @flags: memory allocation control flags
+ *
+ * Return values:
+ *   %0: Handshake request enqueue; ->done will be called when complete
+ *   %-ESRCH: No user agent is available
+ *   %-ENOMEM: Memory allocation failed
+ */
+int tls_client_hello_x509(const struct tls_handshake_args *args, gfp_t flags)
+{
+	struct tls_handshake_req *treq;
+	struct handshake_req *req;
+
+	req = handshake_req_alloc(&tls_handshake_proto, flags);
+	if (!req)
+		return -ENOMEM;
+	treq = tls_handshake_req_init(req, args);
+	treq->th_type = HANDSHAKE_MSG_TYPE_CLIENTHELLO;
+	treq->th_auth_mode = HANDSHAKE_AUTH_X509;
+	treq->th_certificate = args->ta_my_cert;
+	treq->th_privkey = args->ta_my_privkey;
+
+	return handshake_req_submit(args->ta_sock, req, flags);
+}
+EXPORT_SYMBOL(tls_client_hello_x509);
+
+/**
+ * tls_client_hello_psk - request a PSK-based TLS handshake on a socket
+ * @args: socket and handshake parameters for this request
+ * @flags: memory allocation control flags
+ *
+ * Return values:
+ *   %0: Handshake request enqueue; ->done will be called when complete
+ *   %-EINVAL: Wrong number of local peer IDs
+ *   %-ESRCH: No user agent is available
+ *   %-ENOMEM: Memory allocation failed
+ */
+int tls_client_hello_psk(const struct tls_handshake_args *args, gfp_t flags)
+{
+	struct tls_handshake_req *treq;
+	struct handshake_req *req;
+	unsigned int i;
+
+	if (!args->ta_num_peerids ||
+	    args->ta_num_peerids > ARRAY_SIZE(treq->th_peerid))
+		return -EINVAL;
+
+	req = handshake_req_alloc(&tls_handshake_proto, flags);
+	if (!req)
+		return -ENOMEM;
+	treq = tls_handshake_req_init(req, args);
+	treq->th_type = HANDSHAKE_MSG_TYPE_CLIENTHELLO;
+	treq->th_auth_mode = HANDSHAKE_AUTH_PSK;
+	treq->th_num_peerids = args->ta_num_peerids;
+	for (i = 0; i < args->ta_num_peerids; i++)
+		treq->th_peerid[i] = args->ta_my_peerids[i];
+
+	return handshake_req_submit(args->ta_sock, req, flags);
+}
+EXPORT_SYMBOL(tls_client_hello_psk);
+
+/**
+ * tls_server_hello_x509 - request a server TLS handshake on a socket
+ * @args: socket and handshake parameters for this request
+ * @flags: memory allocation control flags
+ *
+ * Return values:
+ *   %0: Handshake request enqueue; ->done will be called when complete
+ *   %-ESRCH: No user agent is available
+ *   %-ENOMEM: Memory allocation failed
+ */
+int tls_server_hello_x509(const struct tls_handshake_args *args, gfp_t flags)
+{
+	struct tls_handshake_req *treq;
+	struct handshake_req *req;
+
+	req = handshake_req_alloc(&tls_handshake_proto, flags);
+	if (!req)
+		return -ENOMEM;
+	treq = tls_handshake_req_init(req, args);
+	treq->th_type = HANDSHAKE_MSG_TYPE_SERVERHELLO;
+	treq->th_auth_mode = HANDSHAKE_AUTH_X509;
+	treq->th_certificate = args->ta_my_cert;
+	treq->th_privkey = args->ta_my_privkey;
+
+	return handshake_req_submit(args->ta_sock, req, flags);
+}
+EXPORT_SYMBOL(tls_server_hello_x509);
+
+/**
+ * tls_server_hello_psk - request a server TLS handshake on a socket
+ * @args: socket and handshake parameters for this request
+ * @flags: memory allocation control flags
+ *
+ * Return values:
+ *   %0: Handshake request enqueue; ->done will be called when complete
+ *   %-ESRCH: No user agent is available
+ *   %-ENOMEM: Memory allocation failed
+ */
+int tls_server_hello_psk(const struct tls_handshake_args *args, gfp_t flags)
+{
+	struct tls_handshake_req *treq;
+	struct handshake_req *req;
+
+	req = handshake_req_alloc(&tls_handshake_proto, flags);
+	if (!req)
+		return -ENOMEM;
+	treq = tls_handshake_req_init(req, args);
+	treq->th_type = HANDSHAKE_MSG_TYPE_SERVERHELLO;
+	treq->th_auth_mode = HANDSHAKE_AUTH_PSK;
+	treq->th_num_peerids = 1;
+	treq->th_peerid[0] = args->ta_my_peerids[0];
+
+	return handshake_req_submit(args->ta_sock, req, flags);
+}
+EXPORT_SYMBOL(tls_server_hello_psk);
+
+/**
+ * tls_handshake_cancel - cancel a pending handshake
+ * @sk: socket on which there is an ongoing handshake
+ *
+ * Request cancellation races with request completion. To determine
+ * who won, callers examine the return value from this function.
+ *
+ * Return values:
+ *   %true - Uncompleted handshake request was canceled
+ *   %false - Handshake request already completed or not found
+ */
+bool tls_handshake_cancel(struct sock *sk)
+{
+	return handshake_req_cancel(sk);
+}
+EXPORT_SYMBOL(tls_handshake_cancel);
-- 
cgit v1.2.3


From b0bc615df488abd0e95107e4a9ecefb9bf8c250a Mon Sep 17 00:00:00 2001
From: Maher Sanalla <msanalla@nvidia.com>
Date: Tue, 21 Mar 2023 00:10:16 +0200
Subject: net/mlx5: Add vnic devlink health reporter to PFs/VFs

Create a vnic devlink health reporter for PFs/VFs interfaces.
The reporter's diagnose callback displays the values of vNIC/vport
transport debug counters of PFs/VFs, as follows:

$ devlink health diagnose pci/0000:08:00.0 reporter vnic
 vNIC env counters:
    total_error_queues: 0 send_queue_priority_update_flow: 0
    comp_eq_overrun: 0 async_eq_overrun: 0 cq_overrun: 0
    invalid_command: 0 quota_exceeded_command: 0
    nic_receive_steering_discard: 0

Moreover, add documentation on the reporter functionality and the
counters description.

While at it, expose the vNIC counters diagnose function to be used by
the downstream patch, which will reveal the counters for representor
interfaces.

Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/devlink.rst             |  30 +++++
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 .../mellanox/mlx5/core/diag/reporter_vnic.c        | 125 +++++++++++++++++++++
 .../mellanox/mlx5/core/diag/reporter_vnic.h        |  16 +++
 drivers/net/ethernet/mellanox/mlx5/core/health.c   |   4 +
 include/linux/mlx5/driver.h                        |   1 +
 6 files changed, 177 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
index 0995e4e5acd7..ceab18e46456 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
@@ -257,3 +257,33 @@ User commands examples:
     $ devlink health dump show pci/0000:82:00.1 reporter fw_fatal
 
 NOTE: This command can run only on PF.
+
+vnic reporter
+-------------
+The vnic reporter implements only the `diagnose` callback.
+It is responsible for querying the vnic diagnostic counters from fw and displaying
+them in realtime.
+
+Description of the vnic counters:
+total_q_under_processor_handle: number of queues in an error state due to
+an async error or errored command.
+send_queue_priority_update_flow: number of QP/SQ priority/SL update
+events.
+cq_overrun: number of times CQ entered an error state due to an
+overflow.
+async_eq_overrun: number of times an EQ mapped to async events was
+overrun.
+comp_eq_overrun: number of times an EQ mapped to completion events was
+overrun.
+quota_exceeded_command: number of commands issued and failed due to quota
+exceeded.
+invalid_command: number of commands issued and failed dues to any reason
+other than quota exceeded.
+nic_receive_steering_discard: number of packets that completed RX flow
+steering but were discarded due to a mismatch in flow table.
+
+User commands examples:
+- Diagnose PF/VF vnic counters
+        $ devlink health diagnose pci/0000:82:00.1 reporter vnic
+
+NOTE: This command can run only on PF/VF ports.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 68f6a4544f7e..ddf1e352f51d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -16,7 +16,7 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
 		fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
 		lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
-		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \
+		diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \
 		fw_reset.o qos.o lib/tout.o lib/aso.o
 
 #
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
new file mode 100644
index 000000000000..9114661cd967
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */
+
+#include "reporter_vnic.h"
+#include "devlink.h"
+
+#define VNIC_ENV_GET64(vnic_env_stats, c) \
+	MLX5_GET64(query_vnic_env_out, (vnic_env_stats)->query_vnic_env_out, \
+		 vport_env.c)
+
+struct mlx5_vnic_diag_stats {
+	__be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)];
+};
+
+int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev,
+					 struct devlink_fmsg *fmsg,
+					 u16 vport_num, bool other_vport)
+{
+	u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
+	struct mlx5_vnic_diag_stats vnic;
+	int err;
+
+	MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
+	MLX5_SET(query_vnic_env_in, in, vport_number, vport_num);
+	MLX5_SET(query_vnic_env_in, in, other_vport, !!other_vport);
+
+	err = mlx5_cmd_exec_inout(dev, query_vnic_env, in, &vnic.query_vnic_env_out);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_start(fmsg, "vNIC env counters");
+	if (err)
+		return err;
+
+	err = devlink_fmsg_obj_nest_start(fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "total_error_queues",
+					VNIC_ENV_GET64(&vnic, total_error_queues));
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "send_queue_priority_update_flow",
+					VNIC_ENV_GET64(&vnic, send_queue_priority_update_flow));
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "comp_eq_overrun",
+					VNIC_ENV_GET64(&vnic, comp_eq_overrun));
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "async_eq_overrun",
+					VNIC_ENV_GET64(&vnic, async_eq_overrun));
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "cq_overrun",
+					VNIC_ENV_GET64(&vnic, cq_overrun));
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "invalid_command",
+					VNIC_ENV_GET64(&vnic, invalid_command));
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "quota_exceeded_command",
+					VNIC_ENV_GET64(&vnic, quota_exceeded_command));
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard",
+					VNIC_ENV_GET64(&vnic, nic_receive_steering_discard));
+	if (err)
+		return err;
+
+	err = devlink_fmsg_obj_nest_end(fmsg);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_pair_nest_end(fmsg);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int mlx5_reporter_vnic_diagnose(struct devlink_health_reporter *reporter,
+				       struct devlink_fmsg *fmsg,
+				       struct netlink_ext_ack *extack)
+{
+	struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter);
+
+	return mlx5_reporter_vnic_diagnose_counters(dev, fmsg, 0, false);
+}
+
+static const struct devlink_health_reporter_ops mlx5_reporter_vnic_ops = {
+	.name = "vnic",
+	.diagnose = mlx5_reporter_vnic_diagnose,
+};
+
+void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+	struct devlink *devlink = priv_to_devlink(dev);
+
+	health->vnic_reporter =
+		devlink_health_reporter_create(devlink,
+					       &mlx5_reporter_vnic_ops,
+					       0, dev);
+	if (IS_ERR(health->vnic_reporter))
+		mlx5_core_warn(dev,
+			       "Failed to create vnic reporter, err = %ld\n",
+			       PTR_ERR(health->vnic_reporter));
+}
+
+void mlx5_reporter_vnic_destroy(struct mlx5_core_dev *dev)
+{
+	struct mlx5_core_health *health = &dev->priv.health;
+
+	if (!IS_ERR_OR_NULL(health->vnic_reporter))
+		devlink_health_reporter_destroy(health->vnic_reporter);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h
new file mode 100644
index 000000000000..eba87a39e9b1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+ * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.
+ */
+#ifndef __MLX5_REPORTER_VNIC_H
+#define __MLX5_REPORTER_VNIC_H
+
+#include "mlx5_core.h"
+
+void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev);
+void mlx5_reporter_vnic_destroy(struct mlx5_core_dev *dev);
+
+int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev,
+					 struct devlink_fmsg *fmsg,
+					 u16 vport_num, bool other_vport);
+
+#endif /* __MLX5_REPORTER_VNIC_H */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 016c5f99c470..871c32dda66e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -42,6 +42,7 @@
 #include "lib/pci_vsc.h"
 #include "lib/tout.h"
 #include "diag/fw_tracer.h"
+#include "diag/reporter_vnic.h"
 
 enum {
 	MAX_MISSES			= 3,
@@ -898,6 +899,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 
 	cancel_delayed_work_sync(&health->update_fw_log_ts_work);
 	destroy_workqueue(health->wq);
+	mlx5_reporter_vnic_destroy(dev);
 	mlx5_fw_reporters_destroy(dev);
 }
 
@@ -907,6 +909,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
 	char *name;
 
 	mlx5_fw_reporters_create(dev);
+	mlx5_reporter_vnic_create(dev);
 
 	health = &dev->priv.health;
 	name = kmalloc(64, GFP_KERNEL);
@@ -926,6 +929,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev)
 	return 0;
 
 out_err:
+	mlx5_reporter_vnic_destroy(dev);
 	mlx5_fw_reporters_destroy(dev);
 	return -ENOMEM;
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 135a3c8d8237..5d25c4c73046 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -439,6 +439,7 @@ struct mlx5_core_health {
 	struct work_struct		report_work;
 	struct devlink_health_reporter *fw_reporter;
 	struct devlink_health_reporter *fw_fatal_reporter;
+	struct devlink_health_reporter *vnic_reporter;
 	struct delayed_work		update_fw_log_ts_work;
 };
 
-- 
cgit v1.2.3


From cf14af140a5ad0937d385ce693100f33f02e9c54 Mon Sep 17 00:00:00 2001
From: Maher Sanalla <msanalla@nvidia.com>
Date: Tue, 21 Mar 2023 13:33:00 +0200
Subject: net/mlx5e: Add vnic devlink health reporter to representors

Create a new devlink health reporter for representor interface, which
reports the values of representor vnic diagnostic counters when diagnosed.

This patch will allow admins to monitor VF diagnostic counters through
the representor-interface vnic reporter.

Example of usage:
$ devlink health diagnose pci/0000:08:00.0/65537 reporter vnic
  vNIC env counters:
    total_error_queues: 0 send_queue_priority_update_flow: 0
    comp_eq_overrun: 0 async_eq_overrun: 0 cq_overrun: 0
    invalid_command: 0 quota_exceeded_command: 0
    nic_receive_steering_discard: 0

Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../ethernet/mellanox/mlx5/devlink.rst             |  5 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 52 +++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h   |  1 +
 3 files changed, 55 insertions(+), 3 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
index ceab18e46456..3a7a714cc08f 100644
--- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
+++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst
@@ -285,5 +285,8 @@ steering but were discarded due to a mismatch in flow table.
 User commands examples:
 - Diagnose PF/VF vnic counters
         $ devlink health diagnose pci/0000:82:00.1 reporter vnic
+- Diagnose representor vnic counters (performed by supplying devlink port of the
+  representor, which can be obtained via devlink port command)
+        $ devlink health diagnose pci/0000:82:00.1/65537 reporter vnic
 
-NOTE: This command can run only on PF/VF ports.
+NOTE: This command can run over all interfaces such as PF/VF and representor ports.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 8ff654b4e9e1..2d87068f63fd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -53,6 +53,7 @@
 #include "lib/vxlan.h"
 #define CREATE_TRACE_POINTS
 #include "diag/en_rep_tracepoint.h"
+#include "diag/reporter_vnic.h"
 #include "en_accel/ipsec.h"
 #include "en/tc/int_port.h"
 #include "en/ptp.h"
@@ -1294,6 +1295,50 @@ static unsigned int mlx5e_ul_rep_stats_grps_num(struct mlx5e_priv *priv)
 	return ARRAY_SIZE(mlx5e_ul_rep_stats_grps);
 }
 
+static int
+mlx5e_rep_vnic_reporter_diagnose(struct devlink_health_reporter *reporter,
+				 struct devlink_fmsg *fmsg,
+				 struct netlink_ext_ack *extack)
+{
+	struct mlx5e_rep_priv *rpriv = devlink_health_reporter_priv(reporter);
+	struct mlx5_eswitch_rep *rep = rpriv->rep;
+
+	return mlx5_reporter_vnic_diagnose_counters(rep->esw->dev, fmsg,
+						    rep->vport, true);
+}
+
+static const struct devlink_health_reporter_ops mlx5_rep_vnic_reporter_ops = {
+	.name = "vnic",
+	.diagnose = mlx5e_rep_vnic_reporter_diagnose,
+};
+
+static void mlx5e_rep_vnic_reporter_create(struct mlx5e_priv *priv,
+					   struct devlink_port *dl_port)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct devlink_health_reporter *reporter;
+
+	reporter = devl_port_health_reporter_create(dl_port,
+						    &mlx5_rep_vnic_reporter_ops,
+						    0, rpriv);
+	if (IS_ERR(reporter)) {
+		mlx5_core_err(priv->mdev,
+			      "Failed to create representor vnic reporter, err = %ld\n",
+			      PTR_ERR(reporter));
+		return;
+	}
+
+	rpriv->rep_vnic_reporter = reporter;
+}
+
+static void mlx5e_rep_vnic_reporter_destroy(struct mlx5e_priv *priv)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+	if (!IS_ERR_OR_NULL(rpriv->rep_vnic_reporter))
+		devl_health_reporter_destroy(rpriv->rep_vnic_reporter);
+}
+
 static const struct mlx5e_profile mlx5e_rep_profile = {
 	.init			= mlx5e_init_rep,
 	.cleanup		= mlx5e_cleanup_rep,
@@ -1394,8 +1439,10 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 
 	dl_port = mlx5_esw_offloads_devlink_port(dev->priv.eswitch,
 						 rpriv->rep->vport);
-	if (dl_port)
+	if (dl_port) {
 		SET_NETDEV_DEVLINK_PORT(netdev, dl_port);
+		mlx5e_rep_vnic_reporter_create(priv, dl_port);
+	}
 
 	err = register_netdev(netdev);
 	if (err) {
@@ -1408,8 +1455,8 @@ mlx5e_vport_vf_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
 	return 0;
 
 err_detach_netdev:
+	mlx5e_rep_vnic_reporter_destroy(priv);
 	mlx5e_detach_netdev(netdev_priv(netdev));
-
 err_cleanup_profile:
 	priv->profile->cleanup(priv);
 
@@ -1458,6 +1505,7 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep)
 	}
 
 	unregister_netdev(netdev);
+	mlx5e_rep_vnic_reporter_destroy(priv);
 	mlx5e_detach_netdev(priv);
 	priv->profile->cleanup(priv);
 	mlx5e_destroy_netdev(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index dcfad0bf0f45..80b7f5079a5a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -118,6 +118,7 @@ struct mlx5e_rep_priv {
 	struct rtnl_link_stats64 prev_vf_vport_stats;
 	struct mlx5_flow_handle *send_to_vport_meta_rule;
 	struct rhashtable tc_ht;
+	struct devlink_health_reporter *rep_vnic_reporter;
 };
 
 static inline
-- 
cgit v1.2.3


From 7ab75456be144a354fbb3df1516d82fc24d3d67d Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Tue, 18 Apr 2023 18:32:38 -0700
Subject: ipv6: add icmpv6_error_anycast_as_unicast for ICMPv6
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ICMPv6 error packets are not sent to the anycast destinations and this
prevents things like traceroute from working. So create a setting similar
to ECHO when dealing with Anycast sources (icmpv6_echo_ignore_anycast).

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reviewed-by: Maciej Żenczykowski <maze@google.com>
Link: https://lore.kernel.org/r/20230419013238.2691167-1-maheshb@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/ip-sysctl.rst |  7 +++++++
 include/net/netns/ipv6.h               |  1 +
 net/ipv6/af_inet6.c                    |  1 +
 net/ipv6/icmp.c                        | 15 +++++++++++++--
 4 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 58a78a316697..6ec06a33688a 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -2721,6 +2721,13 @@ echo_ignore_anycast - BOOLEAN
 
 	Default: 0
 
+error_anycast_as_unicast - BOOLEAN
+	If set to 1, then the kernel will respond with ICMP Errors
+	resulting from requests sent to it over the IPv6 protocol destined
+	to anycast address essentially treating anycast as unicast.
+
+	Default: 0
+
 xfrm6_gc_thresh - INTEGER
 	(Obsolete since linux-4.14)
 	The threshold at which we will start garbage collecting for IPv6
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index b4af4837d80b..3cceb3e9320b 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -55,6 +55,7 @@ struct netns_sysctl_ipv6 {
 	u64 ioam6_id_wide;
 	bool skip_notify_on_dev_down;
 	u8 fib_notify_on_flag_change;
+	u8 icmpv6_error_anycast_as_unicast;
 };
 
 struct netns_ipv6 {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e1b679a590c9..2bbf13216a3d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -952,6 +952,7 @@ static int __net_init inet6_net_init(struct net *net)
 	net->ipv6.sysctl.icmpv6_echo_ignore_all = 0;
 	net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0;
 	net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0;
+	net->ipv6.sysctl.icmpv6_error_anycast_as_unicast = 0;
 
 	/* By default, rate limit error messages.
 	 * Except for pmtu discovery, it would break it.
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 1f53f2a74480..9edf1f45b1ed 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -362,9 +362,10 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
 
 	/*
 	 * We won't send icmp if the destination is known
-	 * anycast.
+	 * anycast unless we need to treat anycast as unicast.
 	 */
-	if (ipv6_anycast_destination(dst, &fl6->daddr)) {
+	if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) &&
+	    ipv6_anycast_destination(dst, &fl6->daddr)) {
 		net_dbg_ratelimited("icmp6_send: acast source\n");
 		dst_release(dst);
 		return ERR_PTR(-EINVAL);
@@ -1195,6 +1196,15 @@ static struct ctl_table ipv6_icmp_table_template[] = {
 		.mode		= 0644,
 		.proc_handler = proc_do_large_bitmap,
 	},
+	{
+		.procname	= "error_anycast_as_unicast",
+		.data		= &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast,
+		.maxlen		= sizeof(u8),
+		.mode		= 0644,
+		.proc_handler	= proc_dou8vec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
 	{ },
 };
 
@@ -1212,6 +1222,7 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
 		table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
 		table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
 		table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
+		table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast;
 	}
 	return table;
 }
-- 
cgit v1.2.3


From 55435ea7729accb5b8a330de751836c4be524834 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Wed, 19 Apr 2023 10:04:14 -0700
Subject: pds_core: initial framework for pds_core PF driver

This is the initial PCI driver framework for the new pds_core device
driver and its family of devices.  This does the very basics of
registering for the new PF PCI device 1dd8:100c, setting up debugfs
entries, and registering with devlink.

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../device_drivers/ethernet/amd/pds_core.rst       |  35 ++
 .../networking/device_drivers/ethernet/index.rst   |   1 +
 drivers/net/ethernet/amd/pds_core/Makefile         |   8 +
 drivers/net/ethernet/amd/pds_core/core.h           |  56 ++
 drivers/net/ethernet/amd/pds_core/debugfs.c        |  31 ++
 drivers/net/ethernet/amd/pds_core/main.c           | 277 ++++++++++
 include/linux/pds/pds_common.h                     |  14 +
 include/linux/pds/pds_core_if.h                    | 571 +++++++++++++++++++++
 8 files changed, 993 insertions(+)
 create mode 100644 Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
 create mode 100644 drivers/net/ethernet/amd/pds_core/Makefile
 create mode 100644 drivers/net/ethernet/amd/pds_core/core.h
 create mode 100644 drivers/net/ethernet/amd/pds_core/debugfs.c
 create mode 100644 drivers/net/ethernet/amd/pds_core/main.c
 create mode 100644 include/linux/pds/pds_common.h
 create mode 100644 include/linux/pds/pds_core_if.h

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst b/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
new file mode 100644
index 000000000000..99a70026f1bc
--- /dev/null
+++ b/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
@@ -0,0 +1,35 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+========================================================
+Linux Driver for the AMD/Pensando(R) DSC adapter family
+========================================================
+
+Copyright(c) 2023 Advanced Micro Devices, Inc
+
+Identifying the Adapter
+=======================
+
+To find if one or more AMD/Pensando PCI Core devices are installed on the
+host, check for the PCI devices::
+
+  # lspci -d 1dd8:100c
+  b5:00.0 Processing accelerators: Pensando Systems Device 100c
+  b6:00.0 Processing accelerators: Pensando Systems Device 100c
+
+If such devices are listed as above, then the pds_core.ko driver should find
+and configure them for use.  There should be log entries in the kernel
+messages such as these::
+
+  $ dmesg | grep pds_core
+  pds_core 0000:b5:00.0: 252.048 Gb/s available PCIe bandwidth (16.0 GT/s PCIe x16 link)
+  pds_core 0000:b5:00.0: FW: 1.60.0-73
+  pds_core 0000:b6:00.0: 252.048 Gb/s available PCIe bandwidth (16.0 GT/s PCIe x16 link)
+  pds_core 0000:b6:00.0: FW: 1.60.0-73
+
+Support
+=======
+
+For general Linux networking support, please use the netdev mailing
+list, which is monitored by AMD/Pensando personnel::
+
+  netdev@vger.kernel.org
diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst
index 6e9e7012d000..417ca514a4d0 100644
--- a/Documentation/networking/device_drivers/ethernet/index.rst
+++ b/Documentation/networking/device_drivers/ethernet/index.rst
@@ -14,6 +14,7 @@ Contents:
    3com/vortex
    amazon/ena
    altera/altera_tse
+   amd/pds_core
    aquantia/atlantic
    chelsio/cxgb
    cirrus/cs89x0
diff --git a/drivers/net/ethernet/amd/pds_core/Makefile b/drivers/net/ethernet/amd/pds_core/Makefile
new file mode 100644
index 000000000000..de3bf1d1886c
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2023 Advanced Micro Devices, Inc.
+
+obj-$(CONFIG_PDS_CORE) := pds_core.o
+
+pds_core-y := main.o
+
+pds_core-$(CONFIG_DEBUG_FS) += debugfs.o
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
new file mode 100644
index 000000000000..34ef837e8cfe
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#ifndef _PDSC_H_
+#define _PDSC_H_
+
+#include <linux/debugfs.h>
+#include <net/devlink.h>
+
+#include <linux/pds/pds_common.h>
+#include <linux/pds/pds_core_if.h>
+
+#define PDSC_DRV_DESCRIPTION	"AMD/Pensando Core Driver"
+
+struct pdsc_dev_bar {
+	void __iomem *vaddr;
+	phys_addr_t bus_addr;
+	unsigned long len;
+	int res_index;
+};
+
+/* No state flags set means we are in a steady running state */
+enum pdsc_state_flags {
+	PDSC_S_FW_DEAD,		    /* stopped, wait on startup or recovery */
+	PDSC_S_INITING_DRIVER,	    /* initial startup from probe */
+	PDSC_S_STOPPING_DRIVER,	    /* driver remove */
+
+	/* leave this as last */
+	PDSC_S_STATE_SIZE
+};
+
+struct pdsc {
+	struct pci_dev *pdev;
+	struct dentry *dentry;
+	struct device *dev;
+	struct pdsc_dev_bar bars[PDS_CORE_BARS_MAX];
+	int hw_index;
+	int uid;
+
+	unsigned long state;
+
+	struct pds_core_dev_info_regs __iomem *info_regs;
+	struct pds_core_dev_cmd_regs __iomem *cmd_regs;
+	struct pds_core_intr __iomem *intr_ctrl;
+	u64 __iomem *intr_status;
+	u64 __iomem *db_pages;
+	dma_addr_t phy_db_pages;
+	u64 __iomem *kern_dbpage;
+};
+
+void pdsc_debugfs_create(void);
+void pdsc_debugfs_destroy(void);
+void pdsc_debugfs_add_dev(struct pdsc *pdsc);
+void pdsc_debugfs_del_dev(struct pdsc *pdsc);
+
+#endif /* _PDSC_H_ */
diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c
new file mode 100644
index 000000000000..b2f7cb795c20
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/debugfs.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include <linux/pci.h>
+
+#include "core.h"
+
+static struct dentry *pdsc_dir;
+
+void pdsc_debugfs_create(void)
+{
+	pdsc_dir = debugfs_create_dir(PDS_CORE_DRV_NAME, NULL);
+}
+
+void pdsc_debugfs_destroy(void)
+{
+	debugfs_remove_recursive(pdsc_dir);
+}
+
+void pdsc_debugfs_add_dev(struct pdsc *pdsc)
+{
+	pdsc->dentry = debugfs_create_dir(pci_name(pdsc->pdev), pdsc_dir);
+
+	debugfs_create_ulong("state", 0400, pdsc->dentry, &pdsc->state);
+}
+
+void pdsc_debugfs_del_dev(struct pdsc *pdsc)
+{
+	debugfs_remove_recursive(pdsc->dentry);
+	pdsc->dentry = NULL;
+}
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
new file mode 100644
index 000000000000..c2b12f226959
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/pci.h>
+
+#include <linux/pds/pds_common.h>
+
+#include "core.h"
+
+MODULE_DESCRIPTION(PDSC_DRV_DESCRIPTION);
+MODULE_AUTHOR("Advanced Micro Devices, Inc");
+MODULE_LICENSE("GPL");
+
+/* Supported devices */
+static const struct pci_device_id pdsc_id_table[] = {
+	{ PCI_VDEVICE(PENSANDO, PCI_DEVICE_ID_PENSANDO_CORE_PF) },
+	{ 0, }	/* end of table */
+};
+MODULE_DEVICE_TABLE(pci, pdsc_id_table);
+
+static void pdsc_unmap_bars(struct pdsc *pdsc)
+{
+	struct pdsc_dev_bar *bars = pdsc->bars;
+	unsigned int i;
+
+	for (i = 0; i < PDS_CORE_BARS_MAX; i++) {
+		if (bars[i].vaddr)
+			pci_iounmap(pdsc->pdev, bars[i].vaddr);
+	}
+}
+
+static int pdsc_map_bars(struct pdsc *pdsc)
+{
+	struct pdsc_dev_bar *bar = pdsc->bars;
+	struct pci_dev *pdev = pdsc->pdev;
+	struct device *dev = pdsc->dev;
+	struct pdsc_dev_bar *bars;
+	unsigned int i, j;
+	int num_bars = 0;
+	int err;
+	u32 sig;
+
+	bars = pdsc->bars;
+
+	/* Since the PCI interface in the hardware is configurable,
+	 * we need to poke into all the bars to find the set we're
+	 * expecting.
+	 */
+	for (i = 0, j = 0; i < PDS_CORE_BARS_MAX; i++) {
+		if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+			continue;
+
+		bars[j].len = pci_resource_len(pdev, i);
+		bars[j].bus_addr = pci_resource_start(pdev, i);
+		bars[j].res_index = i;
+
+		/* only map the whole bar 0 */
+		if (j > 0) {
+			bars[j].vaddr = NULL;
+		} else {
+			bars[j].vaddr = pci_iomap(pdev, i, bars[j].len);
+			if (!bars[j].vaddr) {
+				dev_err(dev, "Cannot map BAR %d, aborting\n", i);
+				return -ENODEV;
+			}
+		}
+
+		j++;
+	}
+	num_bars = j;
+
+	/* BAR0: dev_cmd and interrupts */
+	if (num_bars < 1) {
+		dev_err(dev, "No bars found\n");
+		err = -EFAULT;
+		goto err_out;
+	}
+
+	if (bar->len < PDS_CORE_BAR0_SIZE) {
+		dev_err(dev, "Resource bar size %lu too small\n", bar->len);
+		err = -EFAULT;
+		goto err_out;
+	}
+
+	pdsc->info_regs = bar->vaddr + PDS_CORE_BAR0_DEV_INFO_REGS_OFFSET;
+	pdsc->cmd_regs = bar->vaddr + PDS_CORE_BAR0_DEV_CMD_REGS_OFFSET;
+	pdsc->intr_status = bar->vaddr + PDS_CORE_BAR0_INTR_STATUS_OFFSET;
+	pdsc->intr_ctrl = bar->vaddr + PDS_CORE_BAR0_INTR_CTRL_OFFSET;
+
+	sig = ioread32(&pdsc->info_regs->signature);
+	if (sig != PDS_CORE_DEV_INFO_SIGNATURE) {
+		dev_err(dev, "Incompatible firmware signature %x", sig);
+		err = -EFAULT;
+		goto err_out;
+	}
+
+	/* BAR1: doorbells */
+	bar++;
+	if (num_bars < 2) {
+		dev_err(dev, "Doorbell bar missing\n");
+		err = -EFAULT;
+		goto err_out;
+	}
+
+	pdsc->db_pages = bar->vaddr;
+	pdsc->phy_db_pages = bar->bus_addr;
+
+	return 0;
+
+err_out:
+	pdsc_unmap_bars(pdsc);
+	return err;
+}
+
+static int pdsc_init_vf(struct pdsc *vf)
+{
+	return -1;
+}
+
+static int pdsc_init_pf(struct pdsc *pdsc)
+{
+	struct devlink *dl;
+	int err;
+
+	pcie_print_link_status(pdsc->pdev);
+
+	err = pci_request_regions(pdsc->pdev, PDS_CORE_DRV_NAME);
+	if (err) {
+		dev_err(pdsc->dev, "Cannot request PCI regions: %pe\n",
+			ERR_PTR(err));
+		return err;
+	}
+
+	err = pdsc_map_bars(pdsc);
+	if (err)
+		goto err_out_release_regions;
+
+	dl = priv_to_devlink(pdsc);
+	devl_lock(dl);
+	devl_register(dl);
+	devl_unlock(dl);
+
+	return 0;
+
+err_out_release_regions:
+	pci_release_regions(pdsc->pdev);
+
+	return err;
+}
+
+static const struct devlink_ops pdsc_dl_ops = {
+};
+
+static const struct devlink_ops pdsc_dl_vf_ops = {
+};
+
+static DEFINE_IDA(pdsc_ida);
+
+static int pdsc_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct device *dev = &pdev->dev;
+	const struct devlink_ops *ops;
+	struct devlink *dl;
+	struct pdsc *pdsc;
+	bool is_pf;
+	int err;
+
+	is_pf = !pdev->is_virtfn;
+	ops = is_pf ? &pdsc_dl_ops : &pdsc_dl_vf_ops;
+	dl = devlink_alloc(ops, sizeof(struct pdsc), dev);
+	if (!dl)
+		return -ENOMEM;
+	pdsc = devlink_priv(dl);
+
+	pdsc->pdev = pdev;
+	pdsc->dev = &pdev->dev;
+	set_bit(PDSC_S_INITING_DRIVER, &pdsc->state);
+	pci_set_drvdata(pdev, pdsc);
+	pdsc_debugfs_add_dev(pdsc);
+
+	err = ida_alloc(&pdsc_ida, GFP_KERNEL);
+	if (err < 0) {
+		dev_err(pdsc->dev, "%s: id alloc failed: %pe\n",
+			__func__, ERR_PTR(err));
+		goto err_out_free_devlink;
+	}
+	pdsc->uid = err;
+
+	/* Query system for DMA addressing limitation for the device. */
+	err = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(PDS_CORE_ADDR_LEN));
+	if (err) {
+		dev_err(dev, "Unable to obtain 64-bit DMA for consistent allocations, aborting: %pe\n",
+			ERR_PTR(err));
+		goto err_out_free_ida;
+	}
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(dev, "Cannot enable PCI device: %pe\n", ERR_PTR(err));
+		goto err_out_free_ida;
+	}
+	pci_set_master(pdev);
+
+	if (is_pf)
+		err = pdsc_init_pf(pdsc);
+	else
+		err = pdsc_init_vf(pdsc);
+	if (err) {
+		dev_err(dev, "Cannot init device: %pe\n", ERR_PTR(err));
+		goto err_out_clear_master;
+	}
+
+	clear_bit(PDSC_S_INITING_DRIVER, &pdsc->state);
+	return 0;
+
+err_out_clear_master:
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
+err_out_free_ida:
+	ida_free(&pdsc_ida, pdsc->uid);
+err_out_free_devlink:
+	pdsc_debugfs_del_dev(pdsc);
+	devlink_free(dl);
+
+	return err;
+}
+
+static void pdsc_remove(struct pci_dev *pdev)
+{
+	struct pdsc *pdsc = pci_get_drvdata(pdev);
+	struct devlink *dl;
+
+	/* Unhook the registrations first to be sure there
+	 * are no requests while we're stopping.
+	 */
+	dl = priv_to_devlink(pdsc);
+	devl_lock(dl);
+	devl_unregister(dl);
+	devl_unlock(dl);
+
+	pdsc_unmap_bars(pdsc);
+	pci_release_regions(pdev);
+
+	pci_clear_master(pdev);
+	pci_disable_device(pdev);
+
+	ida_free(&pdsc_ida, pdsc->uid);
+	pdsc_debugfs_del_dev(pdsc);
+	devlink_free(dl);
+}
+
+static struct pci_driver pdsc_driver = {
+	.name = PDS_CORE_DRV_NAME,
+	.id_table = pdsc_id_table,
+	.probe = pdsc_probe,
+	.remove = pdsc_remove,
+};
+
+static int __init pdsc_init_module(void)
+{
+	if (strcmp(KBUILD_MODNAME, PDS_CORE_DRV_NAME))
+		return -EINVAL;
+
+	pdsc_debugfs_create();
+	return pci_register_driver(&pdsc_driver);
+}
+
+static void __exit pdsc_cleanup_module(void)
+{
+	pci_unregister_driver(&pdsc_driver);
+	pdsc_debugfs_destroy();
+}
+
+module_init(pdsc_init_module);
+module_exit(pdsc_cleanup_module);
diff --git a/include/linux/pds/pds_common.h b/include/linux/pds/pds_common.h
new file mode 100644
index 000000000000..bd041a5170a6
--- /dev/null
+++ b/include/linux/pds/pds_common.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) OR BSD-2-Clause */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc. */
+
+#ifndef _PDS_COMMON_H_
+#define _PDS_COMMON_H_
+
+#define PDS_CORE_DRV_NAME			"pds_core"
+
+/* the device's internal addressing uses up to 52 bits */
+#define PDS_CORE_ADDR_LEN	52
+#define PDS_CORE_ADDR_MASK	(BIT_ULL(PDS_ADDR_LEN) - 1)
+#define PDS_PAGE_SIZE		4096
+
+#endif /* _PDS_COMMON_H_ */
diff --git a/include/linux/pds/pds_core_if.h b/include/linux/pds/pds_core_if.h
new file mode 100644
index 000000000000..e838a2b90440
--- /dev/null
+++ b/include/linux/pds/pds_core_if.h
@@ -0,0 +1,571 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) OR BSD-2-Clause */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc. */
+
+#ifndef _PDS_CORE_IF_H_
+#define _PDS_CORE_IF_H_
+
+#define PCI_VENDOR_ID_PENSANDO			0x1dd8
+#define PCI_DEVICE_ID_PENSANDO_CORE_PF		0x100c
+#define PCI_DEVICE_ID_VIRTIO_NET_TRANS		0x1000
+#define PCI_DEVICE_ID_PENSANDO_IONIC_ETH_VF	0x1003
+#define PCI_DEVICE_ID_PENSANDO_VDPA_VF		0x100b
+#define PDS_CORE_BARS_MAX			4
+#define PDS_CORE_PCI_BAR_DBELL			1
+
+/* Bar0 */
+#define PDS_CORE_DEV_INFO_SIGNATURE		0x44455649 /* 'DEVI' */
+#define PDS_CORE_BAR0_SIZE			0x8000
+#define PDS_CORE_BAR0_DEV_INFO_REGS_OFFSET	0x0000
+#define PDS_CORE_BAR0_DEV_CMD_REGS_OFFSET	0x0800
+#define PDS_CORE_BAR0_DEV_CMD_DATA_REGS_OFFSET	0x0c00
+#define PDS_CORE_BAR0_INTR_STATUS_OFFSET	0x1000
+#define PDS_CORE_BAR0_INTR_CTRL_OFFSET		0x2000
+#define PDS_CORE_DEV_CMD_DONE			0x00000001
+
+#define PDS_CORE_DEVCMD_TIMEOUT			5
+
+#define PDS_CORE_CLIENT_ID			0
+#define PDS_CORE_ASIC_TYPE_CAPRI		0
+
+/*
+ * enum pds_core_cmd_opcode - Device commands
+ */
+enum pds_core_cmd_opcode {
+	/* Core init */
+	PDS_CORE_CMD_NOP		= 0,
+	PDS_CORE_CMD_IDENTIFY		= 1,
+	PDS_CORE_CMD_RESET		= 2,
+	PDS_CORE_CMD_INIT		= 3,
+
+	PDS_CORE_CMD_FW_DOWNLOAD	= 4,
+	PDS_CORE_CMD_FW_CONTROL		= 5,
+
+	/* SR/IOV commands */
+	PDS_CORE_CMD_VF_GETATTR		= 60,
+	PDS_CORE_CMD_VF_SETATTR		= 61,
+	PDS_CORE_CMD_VF_CTRL		= 62,
+
+	/* Add commands before this line */
+	PDS_CORE_CMD_MAX,
+	PDS_CORE_CMD_COUNT
+};
+
+/*
+ * enum pds_core_status_code - Device command return codes
+ */
+enum pds_core_status_code {
+	PDS_RC_SUCCESS	= 0,	/* Success */
+	PDS_RC_EVERSION	= 1,	/* Incorrect version for request */
+	PDS_RC_EOPCODE	= 2,	/* Invalid cmd opcode */
+	PDS_RC_EIO	= 3,	/* I/O error */
+	PDS_RC_EPERM	= 4,	/* Permission denied */
+	PDS_RC_EQID	= 5,	/* Bad qid */
+	PDS_RC_EQTYPE	= 6,	/* Bad qtype */
+	PDS_RC_ENOENT	= 7,	/* No such element */
+	PDS_RC_EINTR	= 8,	/* operation interrupted */
+	PDS_RC_EAGAIN	= 9,	/* Try again */
+	PDS_RC_ENOMEM	= 10,	/* Out of memory */
+	PDS_RC_EFAULT	= 11,	/* Bad address */
+	PDS_RC_EBUSY	= 12,	/* Device or resource busy */
+	PDS_RC_EEXIST	= 13,	/* object already exists */
+	PDS_RC_EINVAL	= 14,	/* Invalid argument */
+	PDS_RC_ENOSPC	= 15,	/* No space left or alloc failure */
+	PDS_RC_ERANGE	= 16,	/* Parameter out of range */
+	PDS_RC_BAD_ADDR	= 17,	/* Descriptor contains a bad ptr */
+	PDS_RC_DEV_CMD	= 18,	/* Device cmd attempted on AdminQ */
+	PDS_RC_ENOSUPP	= 19,	/* Operation not supported */
+	PDS_RC_ERROR	= 29,	/* Generic error */
+	PDS_RC_ERDMA	= 30,	/* Generic RDMA error */
+	PDS_RC_EVFID	= 31,	/* VF ID does not exist */
+	PDS_RC_BAD_FW	= 32,	/* FW file is invalid or corrupted */
+	PDS_RC_ECLIENT	= 33,   /* No such client id */
+};
+
+/**
+ * struct pds_core_drv_identity - Driver identity information
+ * @drv_type:         Driver type (enum pds_core_driver_type)
+ * @os_dist:          OS distribution, numeric format
+ * @os_dist_str:      OS distribution, string format
+ * @kernel_ver:       Kernel version, numeric format
+ * @kernel_ver_str:   Kernel version, string format
+ * @driver_ver_str:   Driver version, string format
+ */
+struct pds_core_drv_identity {
+	__le32 drv_type;
+	__le32 os_dist;
+	char   os_dist_str[128];
+	__le32 kernel_ver;
+	char   kernel_ver_str[32];
+	char   driver_ver_str[32];
+};
+
+#define PDS_DEV_TYPE_MAX	16
+/**
+ * struct pds_core_dev_identity - Device identity information
+ * @version:	      Version of device identify
+ * @type:	      Identify type (0 for now)
+ * @state:	      Device state
+ * @rsvd:	      Word boundary padding
+ * @nlifs:	      Number of LIFs provisioned
+ * @nintrs:	      Number of interrupts provisioned
+ * @ndbpgs_per_lif:   Number of doorbell pages per LIF
+ * @intr_coal_mult:   Interrupt coalescing multiplication factor
+ *		      Scale user-supplied interrupt coalescing
+ *		      value in usecs to device units using:
+ *		      device units = usecs * mult / div
+ * @intr_coal_div:    Interrupt coalescing division factor
+ *		      Scale user-supplied interrupt coalescing
+ *		      value in usecs to device units using:
+ *		      device units = usecs * mult / div
+ * @vif_types:        How many of each VIF device type is supported
+ */
+struct pds_core_dev_identity {
+	u8     version;
+	u8     type;
+	u8     state;
+	u8     rsvd;
+	__le32 nlifs;
+	__le32 nintrs;
+	__le32 ndbpgs_per_lif;
+	__le32 intr_coal_mult;
+	__le32 intr_coal_div;
+	__le16 vif_types[PDS_DEV_TYPE_MAX];
+};
+
+#define PDS_CORE_IDENTITY_VERSION_1	1
+
+/**
+ * struct pds_core_dev_identify_cmd - Driver/device identify command
+ * @opcode:	Opcode PDS_CORE_CMD_IDENTIFY
+ * @ver:	Highest version of identify supported by driver
+ *
+ * Expects to find driver identification info (struct pds_core_drv_identity)
+ * in cmd_regs->data.  Driver should keep the devcmd interface locked
+ * while preparing the driver info.
+ */
+struct pds_core_dev_identify_cmd {
+	u8 opcode;
+	u8 ver;
+};
+
+/**
+ * struct pds_core_dev_identify_comp - Device identify command completion
+ * @status:	Status of the command (enum pds_core_status_code)
+ * @ver:	Version of identify returned by device
+ *
+ * Device identification info (struct pds_core_dev_identity) can be found
+ * in cmd_regs->data.  Driver should keep the devcmd interface locked
+ * while reading the results.
+ */
+struct pds_core_dev_identify_comp {
+	u8 status;
+	u8 ver;
+};
+
+/**
+ * struct pds_core_dev_reset_cmd - Device reset command
+ * @opcode:	Opcode PDS_CORE_CMD_RESET
+ *
+ * Resets and clears all LIFs, VDevs, and VIFs on the device.
+ */
+struct pds_core_dev_reset_cmd {
+	u8 opcode;
+};
+
+/**
+ * struct pds_core_dev_reset_comp - Reset command completion
+ * @status:	Status of the command (enum pds_core_status_code)
+ */
+struct pds_core_dev_reset_comp {
+	u8 status;
+};
+
+/*
+ * struct pds_core_dev_init_data - Pointers and info needed for the Core
+ * initialization PDS_CORE_CMD_INIT command.  The in and out structs are
+ * overlays on the pds_core_dev_cmd_regs.data space for passing data down
+ * to the firmware on init, and then returning initialization results.
+ */
+struct pds_core_dev_init_data_in {
+	__le64 adminq_q_base;
+	__le64 adminq_cq_base;
+	__le64 notifyq_cq_base;
+	__le32 flags;
+	__le16 intr_index;
+	u8     adminq_ring_size;
+	u8     notifyq_ring_size;
+};
+
+struct pds_core_dev_init_data_out {
+	__le32 core_hw_index;
+	__le32 adminq_hw_index;
+	__le32 notifyq_hw_index;
+	u8     adminq_hw_type;
+	u8     notifyq_hw_type;
+};
+
+/**
+ * struct pds_core_dev_init_cmd - Core device initialize
+ * @opcode:          opcode PDS_CORE_CMD_INIT
+ *
+ * Initializes the core device and sets up the AdminQ and NotifyQ.
+ * Expects to find initialization data (struct pds_core_dev_init_data_in)
+ * in cmd_regs->data.  Driver should keep the devcmd interface locked
+ * while preparing the driver info.
+ */
+struct pds_core_dev_init_cmd {
+	u8     opcode;
+};
+
+/**
+ * struct pds_core_dev_init_comp - Core init completion
+ * @status:     Status of the command (enum pds_core_status_code)
+ *
+ * Initialization result data (struct pds_core_dev_init_data_in)
+ * is found in cmd_regs->data.
+ */
+struct pds_core_dev_init_comp {
+	u8     status;
+};
+
+/**
+ * struct pds_core_fw_download_cmd - Firmware download command
+ * @opcode:     opcode
+ * @rsvd:	Word boundary padding
+ * @addr:       DMA address of the firmware buffer
+ * @offset:     offset of the firmware buffer within the full image
+ * @length:     number of valid bytes in the firmware buffer
+ */
+struct pds_core_fw_download_cmd {
+	u8     opcode;
+	u8     rsvd[3];
+	__le32 offset;
+	__le64 addr;
+	__le32 length;
+};
+
+/**
+ * struct pds_core_fw_download_comp - Firmware download completion
+ * @status:     Status of the command (enum pds_core_status_code)
+ */
+struct pds_core_fw_download_comp {
+	u8     status;
+};
+
+/**
+ * enum pds_core_fw_control_oper - FW control operations
+ * @PDS_CORE_FW_INSTALL_ASYNC:     Install firmware asynchronously
+ * @PDS_CORE_FW_INSTALL_STATUS:    Firmware installation status
+ * @PDS_CORE_FW_ACTIVATE_ASYNC:    Activate firmware asynchronously
+ * @PDS_CORE_FW_ACTIVATE_STATUS:   Firmware activate status
+ * @PDS_CORE_FW_UPDATE_CLEANUP:    Cleanup any firmware update leftovers
+ * @PDS_CORE_FW_GET_BOOT:          Return current active firmware slot
+ * @PDS_CORE_FW_SET_BOOT:          Set active firmware slot for next boot
+ * @PDS_CORE_FW_GET_LIST:          Return list of installed firmware images
+ */
+enum pds_core_fw_control_oper {
+	PDS_CORE_FW_INSTALL_ASYNC          = 0,
+	PDS_CORE_FW_INSTALL_STATUS         = 1,
+	PDS_CORE_FW_ACTIVATE_ASYNC         = 2,
+	PDS_CORE_FW_ACTIVATE_STATUS        = 3,
+	PDS_CORE_FW_UPDATE_CLEANUP         = 4,
+	PDS_CORE_FW_GET_BOOT               = 5,
+	PDS_CORE_FW_SET_BOOT               = 6,
+	PDS_CORE_FW_GET_LIST               = 7,
+};
+
+enum pds_core_fw_slot {
+	PDS_CORE_FW_SLOT_INVALID    = 0,
+	PDS_CORE_FW_SLOT_A	    = 1,
+	PDS_CORE_FW_SLOT_B          = 2,
+	PDS_CORE_FW_SLOT_GOLD       = 3,
+};
+
+/**
+ * struct pds_core_fw_control_cmd - Firmware control command
+ * @opcode:    opcode
+ * @rsvd:      Word boundary padding
+ * @oper:      firmware control operation (enum pds_core_fw_control_oper)
+ * @slot:      slot to operate on (enum pds_core_fw_slot)
+ */
+struct pds_core_fw_control_cmd {
+	u8  opcode;
+	u8  rsvd[3];
+	u8  oper;
+	u8  slot;
+};
+
+/**
+ * struct pds_core_fw_control_comp - Firmware control copletion
+ * @status:	Status of the command (enum pds_core_status_code)
+ * @rsvd:	Word alignment space
+ * @slot:	Slot number (enum pds_core_fw_slot)
+ * @rsvd1:	Struct padding
+ * @color:	Color bit
+ */
+struct pds_core_fw_control_comp {
+	u8     status;
+	u8     rsvd[3];
+	u8     slot;
+	u8     rsvd1[10];
+	u8     color;
+};
+
+struct pds_core_fw_name_info {
+#define PDS_CORE_FWSLOT_BUFLEN		8
+#define PDS_CORE_FWVERS_BUFLEN		32
+	char   slotname[PDS_CORE_FWSLOT_BUFLEN];
+	char   fw_version[PDS_CORE_FWVERS_BUFLEN];
+};
+
+struct pds_core_fw_list_info {
+#define PDS_CORE_FWVERS_LIST_LEN	16
+	u8 num_fw_slots;
+	struct pds_core_fw_name_info fw_names[PDS_CORE_FWVERS_LIST_LEN];
+} __packed;
+
+enum pds_core_vf_attr {
+	PDS_CORE_VF_ATTR_SPOOFCHK	= 1,
+	PDS_CORE_VF_ATTR_TRUST		= 2,
+	PDS_CORE_VF_ATTR_MAC		= 3,
+	PDS_CORE_VF_ATTR_LINKSTATE	= 4,
+	PDS_CORE_VF_ATTR_VLAN		= 5,
+	PDS_CORE_VF_ATTR_RATE		= 6,
+	PDS_CORE_VF_ATTR_STATSADDR	= 7,
+};
+
+/**
+ * enum pds_core_vf_link_status - Virtual Function link status
+ * @PDS_CORE_VF_LINK_STATUS_AUTO:   Use link state of the uplink
+ * @PDS_CORE_VF_LINK_STATUS_UP:     Link always up
+ * @PDS_CORE_VF_LINK_STATUS_DOWN:   Link always down
+ */
+enum pds_core_vf_link_status {
+	PDS_CORE_VF_LINK_STATUS_AUTO = 0,
+	PDS_CORE_VF_LINK_STATUS_UP   = 1,
+	PDS_CORE_VF_LINK_STATUS_DOWN = 2,
+};
+
+/**
+ * struct pds_core_vf_setattr_cmd - Set VF attributes on the NIC
+ * @opcode:     Opcode
+ * @attr:       Attribute type (enum pds_core_vf_attr)
+ * @vf_index:   VF index
+ * @macaddr:	mac address
+ * @vlanid:	vlan ID
+ * @maxrate:	max Tx rate in Mbps
+ * @spoofchk:	enable address spoof checking
+ * @trust:	enable VF trust
+ * @linkstate:	set link up or down
+ * @stats:	stats addr struct
+ * @stats.pa:	set DMA address for VF stats
+ * @stats.len:	length of VF stats space
+ * @pad:	force union to specific size
+ */
+struct pds_core_vf_setattr_cmd {
+	u8     opcode;
+	u8     attr;
+	__le16 vf_index;
+	union {
+		u8     macaddr[6];
+		__le16 vlanid;
+		__le32 maxrate;
+		u8     spoofchk;
+		u8     trust;
+		u8     linkstate;
+		struct {
+			__le64 pa;
+			__le32 len;
+		} stats;
+		u8     pad[60];
+	} __packed;
+};
+
+struct pds_core_vf_setattr_comp {
+	u8     status;
+	u8     attr;
+	__le16 vf_index;
+	__le16 comp_index;
+	u8     rsvd[9];
+	u8     color;
+};
+
+/**
+ * struct pds_core_vf_getattr_cmd - Get VF attributes from the NIC
+ * @opcode:     Opcode
+ * @attr:       Attribute type (enum pds_core_vf_attr)
+ * @vf_index:   VF index
+ */
+struct pds_core_vf_getattr_cmd {
+	u8     opcode;
+	u8     attr;
+	__le16 vf_index;
+};
+
+struct pds_core_vf_getattr_comp {
+	u8     status;
+	u8     attr;
+	__le16 vf_index;
+	union {
+		u8     macaddr[6];
+		__le16 vlanid;
+		__le32 maxrate;
+		u8     spoofchk;
+		u8     trust;
+		u8     linkstate;
+		__le64 stats_pa;
+		u8     pad[11];
+	} __packed;
+	u8     color;
+};
+
+enum pds_core_vf_ctrl_opcode {
+	PDS_CORE_VF_CTRL_START_ALL	= 0,
+	PDS_CORE_VF_CTRL_START		= 1,
+};
+
+/**
+ * struct pds_core_vf_ctrl_cmd - VF control command
+ * @opcode:         Opcode for the command
+ * @ctrl_opcode:    VF control operation type
+ * @vf_index:       VF Index. It is unused if op START_ALL is used.
+ */
+
+struct pds_core_vf_ctrl_cmd {
+	u8	opcode;
+	u8	ctrl_opcode;
+	__le16	vf_index;
+};
+
+/**
+ * struct pds_core_vf_ctrl_comp - VF_CTRL command completion.
+ * @status:     Status of the command (enum pds_core_status_code)
+ */
+struct pds_core_vf_ctrl_comp {
+	u8	status;
+};
+
+/*
+ * union pds_core_dev_cmd - Overlay of core device command structures
+ */
+union pds_core_dev_cmd {
+	u8     opcode;
+	u32    words[16];
+
+	struct pds_core_dev_identify_cmd identify;
+	struct pds_core_dev_init_cmd     init;
+	struct pds_core_dev_reset_cmd    reset;
+	struct pds_core_fw_download_cmd  fw_download;
+	struct pds_core_fw_control_cmd   fw_control;
+
+	struct pds_core_vf_setattr_cmd   vf_setattr;
+	struct pds_core_vf_getattr_cmd   vf_getattr;
+	struct pds_core_vf_ctrl_cmd      vf_ctrl;
+};
+
+/*
+ * union pds_core_dev_comp - Overlay of core device completion structures
+ */
+union pds_core_dev_comp {
+	u8                                status;
+	u8                                bytes[16];
+
+	struct pds_core_dev_identify_comp identify;
+	struct pds_core_dev_reset_comp    reset;
+	struct pds_core_dev_init_comp     init;
+	struct pds_core_fw_download_comp  fw_download;
+	struct pds_core_fw_control_comp   fw_control;
+
+	struct pds_core_vf_setattr_comp   vf_setattr;
+	struct pds_core_vf_getattr_comp   vf_getattr;
+	struct pds_core_vf_ctrl_comp      vf_ctrl;
+};
+
+/**
+ * struct pds_core_dev_hwstamp_regs - Hardware current timestamp registers
+ * @tick_low:        Low 32 bits of hardware timestamp
+ * @tick_high:       High 32 bits of hardware timestamp
+ */
+struct pds_core_dev_hwstamp_regs {
+	u32    tick_low;
+	u32    tick_high;
+};
+
+/**
+ * struct pds_core_dev_info_regs - Device info register format (read-only)
+ * @signature:       Signature value of 0x44455649 ('DEVI')
+ * @version:         Current version of info
+ * @asic_type:       Asic type
+ * @asic_rev:        Asic revision
+ * @fw_status:       Firmware status
+ *			bit 0   - 1 = fw running
+ *			bit 4-7 - 4 bit generation number, changes on fw restart
+ * @fw_heartbeat:    Firmware heartbeat counter
+ * @serial_num:      Serial number
+ * @fw_version:      Firmware version
+ * @oprom_regs:      oprom_regs to store oprom debug enable/disable and bmp
+ * @rsvd_pad1024:    Struct padding
+ * @hwstamp:         Hardware current timestamp registers
+ * @rsvd_pad2048:    Struct padding
+ */
+struct pds_core_dev_info_regs {
+#define PDS_CORE_DEVINFO_FWVERS_BUFLEN 32
+#define PDS_CORE_DEVINFO_SERIAL_BUFLEN 32
+	u32    signature;
+	u8     version;
+	u8     asic_type;
+	u8     asic_rev;
+#define PDS_CORE_FW_STS_F_STOPPED	0x00
+#define PDS_CORE_FW_STS_F_RUNNING	0x01
+#define PDS_CORE_FW_STS_F_GENERATION	0xF0
+	u8     fw_status;
+	__le32 fw_heartbeat;
+	char   fw_version[PDS_CORE_DEVINFO_FWVERS_BUFLEN];
+	char   serial_num[PDS_CORE_DEVINFO_SERIAL_BUFLEN];
+	u8     oprom_regs[32];     /* reserved */
+	u8     rsvd_pad1024[916];
+	struct pds_core_dev_hwstamp_regs hwstamp;   /* on 1k boundary */
+	u8     rsvd_pad2048[1016];
+} __packed;
+
+/**
+ * struct pds_core_dev_cmd_regs - Device command register format (read-write)
+ * @doorbell:	Device Cmd Doorbell, write-only
+ *              Write a 1 to signal device to process cmd
+ * @done:	Command completed indicator, poll for completion
+ *              bit 0 == 1 when command is complete
+ * @cmd:	Opcode-specific command bytes
+ * @comp:	Opcode-specific response bytes
+ * @rsvd:	Struct padding
+ * @data:	Opcode-specific side-data
+ */
+struct pds_core_dev_cmd_regs {
+	u32                     doorbell;
+	u32                     done;
+	union pds_core_dev_cmd  cmd;
+	union pds_core_dev_comp comp;
+	u8                      rsvd[48];
+	u32                     data[478];
+} __packed;
+
+/**
+ * struct pds_core_dev_regs - Device register format for bar 0 page 0
+ * @info:            Device info registers
+ * @devcmd:          Device command registers
+ */
+struct pds_core_dev_regs {
+	struct pds_core_dev_info_regs info;
+	struct pds_core_dev_cmd_regs  devcmd;
+} __packed;
+
+#ifndef __CHECKER__
+static_assert(sizeof(struct pds_core_drv_identity) <= 1912);
+static_assert(sizeof(struct pds_core_dev_identity) <= 1912);
+static_assert(sizeof(union pds_core_dev_cmd) == 64);
+static_assert(sizeof(union pds_core_dev_comp) == 16);
+static_assert(sizeof(struct pds_core_dev_info_regs) == 2048);
+static_assert(sizeof(struct pds_core_dev_cmd_regs) == 2048);
+static_assert(sizeof(struct pds_core_dev_regs) == 4096);
+#endif /* __CHECKER__ */
+
+#endif /* _PDS_CORE_IF_H_ */
-- 
cgit v1.2.3


From 25b450c05a49e34f2d08ac469ae19d7651abc57c Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Wed, 19 Apr 2023 10:04:17 -0700
Subject: pds_core: add devlink health facilities

Add devlink health reporting on top of our fw watchdog.

Example:
  # devlink health show pci/0000:2b:00.0 reporter fw
  pci/0000:2b:00.0:
    reporter fw
      state healthy error 0 recover 0
  # devlink health diagnose pci/0000:2b:00.0 reporter fw
   Status: healthy State: 1 Generation: 0 Recoveries: 0

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../device_drivers/ethernet/amd/pds_core.rst       | 12 +++++++
 drivers/net/ethernet/amd/pds_core/Makefile         |  1 +
 drivers/net/ethernet/amd/pds_core/core.c           |  8 ++++-
 drivers/net/ethernet/amd/pds_core/core.h           |  6 ++++
 drivers/net/ethernet/amd/pds_core/devlink.c        | 40 ++++++++++++++++++++++
 drivers/net/ethernet/amd/pds_core/main.c           | 22 ++++++++++++
 6 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/amd/pds_core/devlink.c

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst b/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
index 99a70026f1bc..5b88173a20ff 100644
--- a/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
+++ b/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
@@ -26,6 +26,18 @@ messages such as these::
   pds_core 0000:b6:00.0: 252.048 Gb/s available PCIe bandwidth (16.0 GT/s PCIe x16 link)
   pds_core 0000:b6:00.0: FW: 1.60.0-73
 
+Health Reporters
+================
+
+The driver supports a devlink health reporter for FW status::
+
+  # devlink health show pci/0000:2b:00.0 reporter fw
+  pci/0000:2b:00.0:
+    reporter fw
+      state healthy error 0 recover 0
+  # devlink health diagnose pci/0000:2b:00.0 reporter fw
+   Status: healthy State: 1 Generation: 0 Recoveries: 0
+
 Support
 =======
 
diff --git a/drivers/net/ethernet/amd/pds_core/Makefile b/drivers/net/ethernet/amd/pds_core/Makefile
index 95a6c31e92d2..eaca8557ba66 100644
--- a/drivers/net/ethernet/amd/pds_core/Makefile
+++ b/drivers/net/ethernet/amd/pds_core/Makefile
@@ -4,6 +4,7 @@
 obj-$(CONFIG_PDS_CORE) := pds_core.o
 
 pds_core-y := main.o \
+	      devlink.o \
 	      dev.o \
 	      core.o
 
diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index 701d27471858..ab8531386226 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -5,7 +5,7 @@
 
 int pdsc_setup(struct pdsc *pdsc, bool init)
 {
-	int err = 0;
+	int err;
 
 	if (init)
 		err = pdsc_dev_init(pdsc);
@@ -42,6 +42,8 @@ static void pdsc_fw_down(struct pdsc *pdsc)
 		return;
 	}
 
+	devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc);
+
 	pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
 }
 
@@ -58,6 +60,10 @@ static void pdsc_fw_up(struct pdsc *pdsc)
 	if (err)
 		goto err_out;
 
+	pdsc->fw_recoveries++;
+	devlink_health_reporter_state_update(pdsc->fw_reporter,
+					     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
+
 	return;
 
 err_out:
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
index 83c528a2a131..32aa38c40024 100644
--- a/drivers/net/ethernet/amd/pds_core/core.h
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -68,6 +68,8 @@ struct pdsc {
 	struct timer_list wdtimer;
 	unsigned int wdtimer_period;
 	struct work_struct health_work;
+	struct devlink_health_reporter *fw_reporter;
+	u32 fw_recoveries;
 
 	struct pdsc_devinfo dev_info;
 	struct pds_core_dev_identity dev_ident;
@@ -88,6 +90,10 @@ struct pdsc {
 	u64 __iomem *kern_dbpage;
 };
 
+int pdsc_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
+			      struct devlink_fmsg *fmsg,
+			      struct netlink_ext_ack *extack);
+
 void pdsc_debugfs_create(void);
 void pdsc_debugfs_destroy(void);
 void pdsc_debugfs_add_dev(struct pdsc *pdsc);
diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c
new file mode 100644
index 000000000000..3b05b1af65d1
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/devlink.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include "core.h"
+
+int pdsc_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
+			      struct devlink_fmsg *fmsg,
+			      struct netlink_ext_ack *extack)
+{
+	struct pdsc *pdsc = devlink_health_reporter_priv(reporter);
+	int err;
+
+	mutex_lock(&pdsc->config_lock);
+
+	if (test_bit(PDSC_S_FW_DEAD, &pdsc->state))
+		err = devlink_fmsg_string_pair_put(fmsg, "Status", "dead");
+	else if (!pdsc_is_fw_good(pdsc))
+		err = devlink_fmsg_string_pair_put(fmsg, "Status", "unhealthy");
+	else
+		err = devlink_fmsg_string_pair_put(fmsg, "Status", "healthy");
+
+	mutex_unlock(&pdsc->config_lock);
+
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "State",
+					pdsc->fw_status &
+						~PDS_CORE_FW_STS_F_GENERATION);
+	if (err)
+		return err;
+
+	err = devlink_fmsg_u32_pair_put(fmsg, "Generation",
+					pdsc->fw_generation >> 4);
+	if (err)
+		return err;
+
+	return devlink_fmsg_u32_pair_put(fmsg, "Recoveries",
+					 pdsc->fw_recoveries);
+}
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
index c9fbf1d374a7..54f3aed7adb1 100644
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -130,10 +130,16 @@ static int pdsc_init_vf(struct pdsc *vf)
 	return -1;
 }
 
+static const struct devlink_health_reporter_ops pdsc_fw_reporter_ops = {
+	.name = "fw",
+	.diagnose = pdsc_fw_reporter_diagnose,
+};
+
 #define PDSC_WQ_NAME_LEN 24
 
 static int pdsc_init_pf(struct pdsc *pdsc)
 {
+	struct devlink_health_reporter *hr;
 	char wq_name[PDSC_WQ_NAME_LEN];
 	struct devlink *dl;
 	int err;
@@ -172,6 +178,16 @@ static int pdsc_init_pf(struct pdsc *pdsc)
 
 	dl = priv_to_devlink(pdsc);
 	devl_lock(dl);
+
+	hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, 0, pdsc);
+	if (IS_ERR(hr)) {
+		dev_warn(pdsc->dev, "Failed to create fw reporter: %pe\n", hr);
+		err = PTR_ERR(hr);
+		devl_unlock(dl);
+		goto err_out_teardown;
+	}
+	pdsc->fw_reporter = hr;
+
 	devl_register(dl);
 	devl_unlock(dl);
 
@@ -180,6 +196,8 @@ static int pdsc_init_pf(struct pdsc *pdsc)
 
 	return 0;
 
+err_out_teardown:
+	pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
 err_out_unmap_bars:
 	mutex_unlock(&pdsc->config_lock);
 	del_timer_sync(&pdsc->wdtimer);
@@ -283,6 +301,10 @@ static void pdsc_remove(struct pci_dev *pdev)
 	dl = priv_to_devlink(pdsc);
 	devl_lock(dl);
 	devl_unregister(dl);
+	if (pdsc->fw_reporter) {
+		devl_health_reporter_destroy(pdsc->fw_reporter);
+		pdsc->fw_reporter = NULL;
+	}
 	devl_unlock(dl);
 
 	if (!pdev->is_virtfn) {
-- 
cgit v1.2.3


From 45d76f492938cdc27ddadc16e1e75103f4cfbf56 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Wed, 19 Apr 2023 10:04:18 -0700
Subject: pds_core: set up device and adminq

Set up the basic adminq and notifyq queue structures.  These are
used mostly by the client drivers for feature configuration.
These are essentially the same adminq and notifyq as in the
ionic driver.

Part of this includes querying for device identity and FW
information, so we can make that available to devlink dev info.

  $ devlink dev info pci/0000:b5:00.0
  pci/0000:b5:00.0:
    driver pds_core
    serial_number FLM18420073
    versions:
        fixed:
          asic.id 0x0
          asic.rev 0x0
        running:
          fw 1.51.0-73
        stored:
          fw.goldfw 1.15.9-C-22
          fw.mainfwa 1.60.0-73
          fw.mainfwb 1.60.0-57

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../device_drivers/ethernet/amd/pds_core.rst       |  47 ++
 drivers/net/ethernet/amd/pds_core/core.c           | 431 +++++++++++++-
 drivers/net/ethernet/amd/pds_core/core.h           | 151 +++++
 drivers/net/ethernet/amd/pds_core/debugfs.c        |  77 +++
 drivers/net/ethernet/amd/pds_core/devlink.c        |  61 ++
 drivers/net/ethernet/amd/pds_core/main.c           |  17 +-
 include/linux/pds/pds_adminq.h                     | 638 +++++++++++++++++++++
 7 files changed, 1418 insertions(+), 4 deletions(-)
 create mode 100644 include/linux/pds/pds_adminq.h

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst b/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
index 5b88173a20ff..a48eafb3d0d3 100644
--- a/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
+++ b/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
@@ -26,6 +26,53 @@ messages such as these::
   pds_core 0000:b6:00.0: 252.048 Gb/s available PCIe bandwidth (16.0 GT/s PCIe x16 link)
   pds_core 0000:b6:00.0: FW: 1.60.0-73
 
+Driver and firmware version information can be gathered with devlink::
+
+  $ devlink dev info pci/0000:b5:00.0
+  pci/0000:b5:00.0:
+    driver pds_core
+    serial_number FLM18420073
+    versions:
+        fixed:
+          asic.id 0x0
+          asic.rev 0x0
+        running:
+          fw 1.51.0-73
+        stored:
+          fw.goldfw 1.15.9-C-22
+          fw.mainfwa 1.60.0-73
+          fw.mainfwb 1.60.0-57
+
+Info versions
+=============
+
+The ``pds_core`` driver reports the following versions
+
+.. list-table:: devlink info versions implemented
+   :widths: 5 5 90
+
+   * - Name
+     - Type
+     - Description
+   * - ``fw``
+     - running
+     - Version of firmware running on the device
+   * - ``fw.goldfw``
+     - stored
+     - Version of firmware stored in the goldfw slot
+   * - ``fw.mainfwa``
+     - stored
+     - Version of firmware stored in the mainfwa slot
+   * - ``fw.mainfwb``
+     - stored
+     - Version of firmware stored in the mainfwb slot
+   * - ``asic.id``
+     - fixed
+     - The ASIC type for this device
+   * - ``asic.rev``
+     - fixed
+     - The revision of the ASIC for this device
+
 Health Reporters
 ================
 
diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index ab8531386226..8c0dbdb5efc5 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -1,10 +1,365 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright(c) 2023 Advanced Micro Devices, Inc */
 
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
 #include "core.h"
 
+void pdsc_work_thread(struct work_struct *work)
+{
+	/* stub */
+}
+
+irqreturn_t pdsc_adminq_isr(int irq, void *data)
+{
+	/* stub */
+	return IRQ_HANDLED;
+}
+
+void pdsc_intr_free(struct pdsc *pdsc, int index)
+{
+	struct pdsc_intr_info *intr_info;
+
+	if (index >= pdsc->nintrs || index < 0) {
+		WARN(true, "bad intr index %d\n", index);
+		return;
+	}
+
+	intr_info = &pdsc->intr_info[index];
+	if (!intr_info->vector)
+		return;
+	dev_dbg(pdsc->dev, "%s: idx %d vec %d name %s\n",
+		__func__, index, intr_info->vector, intr_info->name);
+
+	pds_core_intr_mask(&pdsc->intr_ctrl[index], PDS_CORE_INTR_MASK_SET);
+	pds_core_intr_clean(&pdsc->intr_ctrl[index]);
+
+	free_irq(intr_info->vector, intr_info->data);
+
+	memset(intr_info, 0, sizeof(*intr_info));
+}
+
+int pdsc_intr_alloc(struct pdsc *pdsc, char *name,
+		    irq_handler_t handler, void *data)
+{
+	struct pdsc_intr_info *intr_info;
+	unsigned int index;
+	int err;
+
+	/* Find the first available interrupt */
+	for (index = 0; index < pdsc->nintrs; index++)
+		if (!pdsc->intr_info[index].vector)
+			break;
+	if (index >= pdsc->nintrs) {
+		dev_warn(pdsc->dev, "%s: no intr, index=%d nintrs=%d\n",
+			 __func__, index, pdsc->nintrs);
+		return -ENOSPC;
+	}
+
+	pds_core_intr_clean_flags(&pdsc->intr_ctrl[index],
+				  PDS_CORE_INTR_CRED_RESET_COALESCE);
+
+	intr_info = &pdsc->intr_info[index];
+
+	intr_info->index = index;
+	intr_info->data = data;
+	strscpy(intr_info->name, name, sizeof(intr_info->name));
+
+	/* Get the OS vector number for the interrupt */
+	err = pci_irq_vector(pdsc->pdev, index);
+	if (err < 0) {
+		dev_err(pdsc->dev, "failed to get intr vector index %d: %pe\n",
+			index, ERR_PTR(err));
+		goto err_out_free_intr;
+	}
+	intr_info->vector = err;
+
+	/* Init the device's intr mask */
+	pds_core_intr_clean(&pdsc->intr_ctrl[index]);
+	pds_core_intr_mask_assert(&pdsc->intr_ctrl[index], 1);
+	pds_core_intr_mask(&pdsc->intr_ctrl[index], PDS_CORE_INTR_MASK_SET);
+
+	/* Register the isr with a name */
+	err = request_irq(intr_info->vector, handler, 0, intr_info->name, data);
+	if (err) {
+		dev_err(pdsc->dev, "failed to get intr irq vector %d: %pe\n",
+			intr_info->vector, ERR_PTR(err));
+		goto err_out_free_intr;
+	}
+
+	return index;
+
+err_out_free_intr:
+	pdsc_intr_free(pdsc, index);
+	return err;
+}
+
+static void pdsc_qcq_intr_free(struct pdsc *pdsc, struct pdsc_qcq *qcq)
+{
+	if (!(qcq->flags & PDS_CORE_QCQ_F_INTR) ||
+	    qcq->intx == PDS_CORE_INTR_INDEX_NOT_ASSIGNED)
+		return;
+
+	pdsc_intr_free(pdsc, qcq->intx);
+	qcq->intx = PDS_CORE_INTR_INDEX_NOT_ASSIGNED;
+}
+
+static int pdsc_qcq_intr_alloc(struct pdsc *pdsc, struct pdsc_qcq *qcq)
+{
+	char name[PDSC_INTR_NAME_MAX_SZ];
+	int index;
+
+	if (!(qcq->flags & PDS_CORE_QCQ_F_INTR)) {
+		qcq->intx = PDS_CORE_INTR_INDEX_NOT_ASSIGNED;
+		return 0;
+	}
+
+	snprintf(name, sizeof(name), "%s-%d-%s",
+		 PDS_CORE_DRV_NAME, pdsc->pdev->bus->number, qcq->q.name);
+	index = pdsc_intr_alloc(pdsc, name, pdsc_adminq_isr, qcq);
+	if (index < 0)
+		return index;
+	qcq->intx = index;
+
+	return 0;
+}
+
+void pdsc_qcq_free(struct pdsc *pdsc, struct pdsc_qcq *qcq)
+{
+	struct device *dev = pdsc->dev;
+
+	if (!(qcq && qcq->pdsc))
+		return;
+
+	pdsc_debugfs_del_qcq(qcq);
+
+	pdsc_qcq_intr_free(pdsc, qcq);
+
+	if (qcq->q_base)
+		dma_free_coherent(dev, qcq->q_size,
+				  qcq->q_base, qcq->q_base_pa);
+
+	if (qcq->cq_base)
+		dma_free_coherent(dev, qcq->cq_size,
+				  qcq->cq_base, qcq->cq_base_pa);
+
+	if (qcq->cq.info)
+		vfree(qcq->cq.info);
+
+	if (qcq->q.info)
+		vfree(qcq->q.info);
+
+	memset(qcq, 0, sizeof(*qcq));
+}
+
+static void pdsc_q_map(struct pdsc_queue *q, void *base, dma_addr_t base_pa)
+{
+	struct pdsc_q_info *cur;
+	unsigned int i;
+
+	q->base = base;
+	q->base_pa = base_pa;
+
+	for (i = 0, cur = q->info; i < q->num_descs; i++, cur++)
+		cur->desc = base + (i * q->desc_size);
+}
+
+static void pdsc_cq_map(struct pdsc_cq *cq, void *base, dma_addr_t base_pa)
+{
+	struct pdsc_cq_info *cur;
+	unsigned int i;
+
+	cq->base = base;
+	cq->base_pa = base_pa;
+
+	for (i = 0, cur = cq->info; i < cq->num_descs; i++, cur++)
+		cur->comp = base + (i * cq->desc_size);
+}
+
+int pdsc_qcq_alloc(struct pdsc *pdsc, unsigned int type, unsigned int index,
+		   const char *name, unsigned int flags, unsigned int num_descs,
+		   unsigned int desc_size, unsigned int cq_desc_size,
+		   unsigned int pid, struct pdsc_qcq *qcq)
+{
+	struct device *dev = pdsc->dev;
+	void *q_base, *cq_base;
+	dma_addr_t cq_base_pa;
+	dma_addr_t q_base_pa;
+	int err;
+
+	qcq->q.info = vzalloc(num_descs * sizeof(*qcq->q.info));
+	if (!qcq->q.info) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+
+	qcq->pdsc = pdsc;
+	qcq->flags = flags;
+	INIT_WORK(&qcq->work, pdsc_work_thread);
+
+	qcq->q.type = type;
+	qcq->q.index = index;
+	qcq->q.num_descs = num_descs;
+	qcq->q.desc_size = desc_size;
+	qcq->q.tail_idx = 0;
+	qcq->q.head_idx = 0;
+	qcq->q.pid = pid;
+	snprintf(qcq->q.name, sizeof(qcq->q.name), "%s%u", name, index);
+
+	err = pdsc_qcq_intr_alloc(pdsc, qcq);
+	if (err)
+		goto err_out_free_q_info;
+
+	qcq->cq.info = vzalloc(num_descs * sizeof(*qcq->cq.info));
+	if (!qcq->cq.info) {
+		err = -ENOMEM;
+		goto err_out_free_irq;
+	}
+
+	qcq->cq.bound_intr = &pdsc->intr_info[qcq->intx];
+	qcq->cq.num_descs = num_descs;
+	qcq->cq.desc_size = cq_desc_size;
+	qcq->cq.tail_idx = 0;
+	qcq->cq.done_color = 1;
+
+	if (flags & PDS_CORE_QCQ_F_NOTIFYQ) {
+		/* q & cq need to be contiguous in case of notifyq */
+		qcq->q_size = PDS_PAGE_SIZE +
+			      ALIGN(num_descs * desc_size, PDS_PAGE_SIZE) +
+			      ALIGN(num_descs * cq_desc_size, PDS_PAGE_SIZE);
+		qcq->q_base = dma_alloc_coherent(dev,
+						 qcq->q_size + qcq->cq_size,
+						 &qcq->q_base_pa,
+						 GFP_KERNEL);
+		if (!qcq->q_base) {
+			err = -ENOMEM;
+			goto err_out_free_cq_info;
+		}
+		q_base = PTR_ALIGN(qcq->q_base, PDS_PAGE_SIZE);
+		q_base_pa = ALIGN(qcq->q_base_pa, PDS_PAGE_SIZE);
+		pdsc_q_map(&qcq->q, q_base, q_base_pa);
+
+		cq_base = PTR_ALIGN(q_base +
+				    ALIGN(num_descs * desc_size, PDS_PAGE_SIZE),
+				    PDS_PAGE_SIZE);
+		cq_base_pa = ALIGN(qcq->q_base_pa +
+				   ALIGN(num_descs * desc_size, PDS_PAGE_SIZE),
+				   PDS_PAGE_SIZE);
+
+	} else {
+		/* q DMA descriptors */
+		qcq->q_size = PDS_PAGE_SIZE + (num_descs * desc_size);
+		qcq->q_base = dma_alloc_coherent(dev, qcq->q_size,
+						 &qcq->q_base_pa,
+						 GFP_KERNEL);
+		if (!qcq->q_base) {
+			err = -ENOMEM;
+			goto err_out_free_cq_info;
+		}
+		q_base = PTR_ALIGN(qcq->q_base, PDS_PAGE_SIZE);
+		q_base_pa = ALIGN(qcq->q_base_pa, PDS_PAGE_SIZE);
+		pdsc_q_map(&qcq->q, q_base, q_base_pa);
+
+		/* cq DMA descriptors */
+		qcq->cq_size = PDS_PAGE_SIZE + (num_descs * cq_desc_size);
+		qcq->cq_base = dma_alloc_coherent(dev, qcq->cq_size,
+						  &qcq->cq_base_pa,
+						  GFP_KERNEL);
+		if (!qcq->cq_base) {
+			err = -ENOMEM;
+			goto err_out_free_q;
+		}
+		cq_base = PTR_ALIGN(qcq->cq_base, PDS_PAGE_SIZE);
+		cq_base_pa = ALIGN(qcq->cq_base_pa, PDS_PAGE_SIZE);
+	}
+
+	pdsc_cq_map(&qcq->cq, cq_base, cq_base_pa);
+	qcq->cq.bound_q = &qcq->q;
+
+	pdsc_debugfs_add_qcq(pdsc, qcq);
+
+	return 0;
+
+err_out_free_q:
+	dma_free_coherent(dev, qcq->q_size, qcq->q_base, qcq->q_base_pa);
+err_out_free_cq_info:
+	vfree(qcq->cq.info);
+err_out_free_irq:
+	pdsc_qcq_intr_free(pdsc, qcq);
+err_out_free_q_info:
+	vfree(qcq->q.info);
+	memset(qcq, 0, sizeof(*qcq));
+err_out:
+	dev_err(dev, "qcq alloc of %s%d failed %d\n", name, index, err);
+	return err;
+}
+
+static int pdsc_core_init(struct pdsc *pdsc)
+{
+	union pds_core_dev_comp comp = {};
+	union pds_core_dev_cmd cmd = {
+		.init.opcode = PDS_CORE_CMD_INIT,
+	};
+	struct pds_core_dev_init_data_out cido;
+	struct pds_core_dev_init_data_in cidi;
+	u32 dbid_count;
+	u32 dbpage_num;
+	size_t sz;
+	int err;
+
+	cidi.adminq_q_base = cpu_to_le64(pdsc->adminqcq.q_base_pa);
+	cidi.adminq_cq_base = cpu_to_le64(pdsc->adminqcq.cq_base_pa);
+	cidi.notifyq_cq_base = cpu_to_le64(pdsc->notifyqcq.cq.base_pa);
+	cidi.flags = cpu_to_le32(PDS_CORE_QINIT_F_IRQ | PDS_CORE_QINIT_F_ENA);
+	cidi.intr_index = cpu_to_le16(pdsc->adminqcq.intx);
+	cidi.adminq_ring_size = ilog2(pdsc->adminqcq.q.num_descs);
+	cidi.notifyq_ring_size = ilog2(pdsc->notifyqcq.q.num_descs);
+
+	mutex_lock(&pdsc->devcmd_lock);
+
+	sz = min_t(size_t, sizeof(cidi), sizeof(pdsc->cmd_regs->data));
+	memcpy_toio(&pdsc->cmd_regs->data, &cidi, sz);
+
+	err = pdsc_devcmd_locked(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
+	if (!err) {
+		sz = min_t(size_t, sizeof(cido), sizeof(pdsc->cmd_regs->data));
+		memcpy_fromio(&cido, &pdsc->cmd_regs->data, sz);
+	}
+
+	mutex_unlock(&pdsc->devcmd_lock);
+	if (err) {
+		dev_err(pdsc->dev, "Device init command failed: %pe\n",
+			ERR_PTR(err));
+		return err;
+	}
+
+	pdsc->hw_index = le32_to_cpu(cido.core_hw_index);
+
+	dbid_count = le32_to_cpu(pdsc->dev_ident.ndbpgs_per_lif);
+	dbpage_num = pdsc->hw_index * dbid_count;
+	pdsc->kern_dbpage = pdsc_map_dbpage(pdsc, dbpage_num);
+	if (!pdsc->kern_dbpage) {
+		dev_err(pdsc->dev, "Cannot map dbpage, aborting\n");
+		return -ENOMEM;
+	}
+
+	pdsc->adminqcq.q.hw_type = cido.adminq_hw_type;
+	pdsc->adminqcq.q.hw_index = le32_to_cpu(cido.adminq_hw_index);
+	pdsc->adminqcq.q.dbval = PDS_CORE_DBELL_QID(pdsc->adminqcq.q.hw_index);
+
+	pdsc->notifyqcq.q.hw_type = cido.notifyq_hw_type;
+	pdsc->notifyqcq.q.hw_index = le32_to_cpu(cido.notifyq_hw_index);
+	pdsc->notifyqcq.q.dbval = PDS_CORE_DBELL_QID(pdsc->notifyqcq.q.hw_index);
+
+	pdsc->last_eid = 0;
+
+	return err;
+}
+
 int pdsc_setup(struct pdsc *pdsc, bool init)
 {
+	int numdescs;
 	int err;
 
 	if (init)
@@ -14,17 +369,60 @@ int pdsc_setup(struct pdsc *pdsc, bool init)
 	if (err)
 		return err;
 
+	/* Scale the descriptor ring length based on number of CPUs and VFs */
+	numdescs = max_t(int, PDSC_ADMINQ_MIN_LENGTH, num_online_cpus());
+	numdescs += 2 * pci_sriov_get_totalvfs(pdsc->pdev);
+	numdescs = roundup_pow_of_two(numdescs);
+	err = pdsc_qcq_alloc(pdsc, PDS_CORE_QTYPE_ADMINQ, 0, "adminq",
+			     PDS_CORE_QCQ_F_CORE | PDS_CORE_QCQ_F_INTR,
+			     numdescs,
+			     sizeof(union pds_core_adminq_cmd),
+			     sizeof(union pds_core_adminq_comp),
+			     0, &pdsc->adminqcq);
+	if (err)
+		goto err_out_teardown;
+
+	err = pdsc_qcq_alloc(pdsc, PDS_CORE_QTYPE_NOTIFYQ, 0, "notifyq",
+			     PDS_CORE_QCQ_F_NOTIFYQ,
+			     PDSC_NOTIFYQ_LENGTH,
+			     sizeof(struct pds_core_notifyq_cmd),
+			     sizeof(union pds_core_notifyq_comp),
+			     0, &pdsc->notifyqcq);
+	if (err)
+		goto err_out_teardown;
+
+	/* NotifyQ rides on the AdminQ interrupt */
+	pdsc->notifyqcq.intx = pdsc->adminqcq.intx;
+
+	/* Set up the Core with the AdminQ and NotifyQ info */
+	err = pdsc_core_init(pdsc);
+	if (err)
+		goto err_out_teardown;
+
 	clear_bit(PDSC_S_FW_DEAD, &pdsc->state);
 	return 0;
+
+err_out_teardown:
+	pdsc_teardown(pdsc, init);
+	return err;
 }
 
 void pdsc_teardown(struct pdsc *pdsc, bool removing)
 {
+	int i;
+
 	pdsc_devcmd_reset(pdsc);
+	pdsc_qcq_free(pdsc, &pdsc->notifyqcq);
+	pdsc_qcq_free(pdsc, &pdsc->adminqcq);
+
+	if (pdsc->intr_info) {
+		for (i = 0; i < pdsc->nintrs; i++)
+			pdsc_intr_free(pdsc, i);
 
-	if (removing) {
-		kfree(pdsc->intr_info);
-		pdsc->intr_info = NULL;
+		if (removing) {
+			kfree(pdsc->intr_info);
+			pdsc->intr_info = NULL;
+		}
 	}
 
 	if (pdsc->kern_dbpage) {
@@ -35,6 +433,28 @@ void pdsc_teardown(struct pdsc *pdsc, bool removing)
 	set_bit(PDSC_S_FW_DEAD, &pdsc->state);
 }
 
+int pdsc_start(struct pdsc *pdsc)
+{
+	pds_core_intr_mask(&pdsc->intr_ctrl[pdsc->adminqcq.intx],
+			   PDS_CORE_INTR_MASK_CLEAR);
+
+	return 0;
+}
+
+void pdsc_stop(struct pdsc *pdsc)
+{
+	int i;
+
+	if (!pdsc->intr_info)
+		return;
+
+	/* Mask interrupts that are in use */
+	for (i = 0; i < pdsc->nintrs; i++)
+		if (pdsc->intr_info[i].vector)
+			pds_core_intr_mask(&pdsc->intr_ctrl[i],
+					   PDS_CORE_INTR_MASK_SET);
+}
+
 static void pdsc_fw_down(struct pdsc *pdsc)
 {
 	if (test_and_set_bit(PDSC_S_FW_DEAD, &pdsc->state)) {
@@ -44,6 +464,7 @@ static void pdsc_fw_down(struct pdsc *pdsc)
 
 	devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc);
 
+	pdsc_stop(pdsc);
 	pdsc_teardown(pdsc, PDSC_TEARDOWN_RECOVERY);
 }
 
@@ -60,6 +481,10 @@ static void pdsc_fw_up(struct pdsc *pdsc)
 	if (err)
 		goto err_out;
 
+	err = pdsc_start(pdsc);
+	if (err)
+		goto err_out;
+
 	pdsc->fw_recoveries++;
 	devlink_health_reporter_state_update(pdsc->fw_reporter,
 					     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
index 32aa38c40024..7adbd10cb6d9 100644
--- a/drivers/net/ethernet/amd/pds_core/core.h
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -9,11 +9,15 @@
 
 #include <linux/pds/pds_common.h>
 #include <linux/pds/pds_core_if.h>
+#include <linux/pds/pds_adminq.h>
 #include <linux/pds/pds_intr.h>
 
 #define PDSC_DRV_DESCRIPTION	"AMD/Pensando Core Driver"
 
 #define PDSC_WATCHDOG_SECS	5
+#define PDSC_QUEUE_NAME_MAX_SZ  32
+#define PDSC_ADMINQ_MIN_LENGTH	16	/* must be a power of two */
+#define PDSC_NOTIFYQ_LENGTH	64	/* must be a power of two */
 #define PDSC_TEARDOWN_RECOVERY	false
 #define PDSC_TEARDOWN_REMOVING	true
 #define PDSC_SETUP_RECOVERY	false
@@ -33,6 +37,28 @@ struct pdsc_devinfo {
 	char serial_num[PDS_CORE_DEVINFO_SERIAL_BUFLEN + 1];
 };
 
+struct pdsc_queue {
+	struct pdsc_q_info *info;
+	u64 dbval;
+	u16 head_idx;
+	u16 tail_idx;
+	u8 hw_type;
+	unsigned int index;
+	unsigned int num_descs;
+	u64 dbell_count;
+	u64 features;
+	unsigned int type;
+	unsigned int hw_index;
+	union {
+		void *base;
+		struct pds_core_admin_cmd *adminq;
+	};
+	dma_addr_t base_pa;	/* must be page aligned */
+	unsigned int desc_size;
+	unsigned int pid;
+	char name[PDSC_QUEUE_NAME_MAX_SZ];
+};
+
 #define PDSC_INTR_NAME_MAX_SZ		32
 
 struct pdsc_intr_info {
@@ -42,6 +68,61 @@ struct pdsc_intr_info {
 	void *data;
 };
 
+struct pdsc_cq_info {
+	void *comp;
+};
+
+struct pdsc_buf_info {
+	struct page *page;
+	dma_addr_t dma_addr;
+	u32 page_offset;
+	u32 len;
+};
+
+struct pdsc_q_info {
+	union {
+		void *desc;
+		struct pdsc_admin_cmd *adminq_desc;
+	};
+	unsigned int bytes;
+	unsigned int nbufs;
+	struct pdsc_buf_info bufs[PDS_CORE_MAX_FRAGS];
+	struct pdsc_wait_context *wc;
+	void *dest;
+};
+
+struct pdsc_cq {
+	struct pdsc_cq_info *info;
+	struct pdsc_queue *bound_q;
+	struct pdsc_intr_info *bound_intr;
+	u16 tail_idx;
+	bool done_color;
+	unsigned int num_descs;
+	unsigned int desc_size;
+	void *base;
+	dma_addr_t base_pa;	/* must be page aligned */
+} ____cacheline_aligned_in_smp;
+
+struct pdsc_qcq {
+	struct pdsc *pdsc;
+	void *q_base;
+	dma_addr_t q_base_pa;	/* might not be page aligned */
+	void *cq_base;
+	dma_addr_t cq_base_pa;	/* might not be page aligned */
+	u32 q_size;
+	u32 cq_size;
+	bool armed;
+	unsigned int flags;
+
+	struct work_struct work;
+	struct pdsc_queue q;
+	struct pdsc_cq cq;
+	int intx;
+
+	u32 accum_work;
+	struct dentry *dentry;
+};
+
 /* No state flags set means we are in a steady running state */
 enum pdsc_state_flags {
 	PDSC_S_FW_DEAD,		    /* stopped, wait on startup or recovery */
@@ -81,6 +162,7 @@ struct pdsc {
 	unsigned int devcmd_timeout;
 	struct mutex devcmd_lock;	/* lock for dev_cmd operations */
 	struct mutex config_lock;	/* lock for configuration operations */
+	spinlock_t adminq_lock;		/* lock for adminq operations */
 	struct pds_core_dev_info_regs __iomem *info_regs;
 	struct pds_core_dev_cmd_regs __iomem *cmd_regs;
 	struct pds_core_intr __iomem *intr_ctrl;
@@ -88,11 +170,64 @@ struct pdsc {
 	u64 __iomem *db_pages;
 	dma_addr_t phy_db_pages;
 	u64 __iomem *kern_dbpage;
+
+	struct pdsc_qcq adminqcq;
+	struct pdsc_qcq notifyqcq;
+	u64 last_eid;
 };
 
+/** enum pds_core_dbell_bits - bitwise composition of dbell values.
+ *
+ * @PDS_CORE_DBELL_QID_MASK:	unshifted mask of valid queue id bits.
+ * @PDS_CORE_DBELL_QID_SHIFT:	queue id shift amount in dbell value.
+ * @PDS_CORE_DBELL_QID:		macro to build QID component of dbell value.
+ *
+ * @PDS_CORE_DBELL_RING_MASK:	unshifted mask of valid ring bits.
+ * @PDS_CORE_DBELL_RING_SHIFT:	ring shift amount in dbell value.
+ * @PDS_CORE_DBELL_RING:	macro to build ring component of dbell value.
+ *
+ * @PDS_CORE_DBELL_RING_0:	ring zero dbell component value.
+ * @PDS_CORE_DBELL_RING_1:	ring one dbell component value.
+ * @PDS_CORE_DBELL_RING_2:	ring two dbell component value.
+ * @PDS_CORE_DBELL_RING_3:	ring three dbell component value.
+ *
+ * @PDS_CORE_DBELL_INDEX_MASK:	bit mask of valid index bits, no shift needed.
+ */
+enum pds_core_dbell_bits {
+	PDS_CORE_DBELL_QID_MASK		= 0xffffff,
+	PDS_CORE_DBELL_QID_SHIFT		= 24,
+
+#define PDS_CORE_DBELL_QID(n) \
+	(((u64)(n) & PDS_CORE_DBELL_QID_MASK) << PDS_CORE_DBELL_QID_SHIFT)
+
+	PDS_CORE_DBELL_RING_MASK		= 0x7,
+	PDS_CORE_DBELL_RING_SHIFT		= 16,
+
+#define PDS_CORE_DBELL_RING(n) \
+	(((u64)(n) & PDS_CORE_DBELL_RING_MASK) << PDS_CORE_DBELL_RING_SHIFT)
+
+	PDS_CORE_DBELL_RING_0		= 0,
+	PDS_CORE_DBELL_RING_1		= PDS_CORE_DBELL_RING(1),
+	PDS_CORE_DBELL_RING_2		= PDS_CORE_DBELL_RING(2),
+	PDS_CORE_DBELL_RING_3		= PDS_CORE_DBELL_RING(3),
+
+	PDS_CORE_DBELL_INDEX_MASK		= 0xffff,
+};
+
+static inline void pds_core_dbell_ring(u64 __iomem *db_page,
+				       enum pds_core_logical_qtype qtype,
+				       u64 val)
+{
+	writeq(val, &db_page[qtype]);
+}
+
 int pdsc_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
 			      struct devlink_fmsg *fmsg,
 			      struct netlink_ext_ack *extack);
+int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
+		     struct netlink_ext_ack *extack);
+
+void __iomem *pdsc_map_dbpage(struct pdsc *pdsc, int page_num);
 
 void pdsc_debugfs_create(void);
 void pdsc_debugfs_destroy(void);
@@ -100,6 +235,8 @@ void pdsc_debugfs_add_dev(struct pdsc *pdsc);
 void pdsc_debugfs_del_dev(struct pdsc *pdsc);
 void pdsc_debugfs_add_ident(struct pdsc *pdsc);
 void pdsc_debugfs_add_irqs(struct pdsc *pdsc);
+void pdsc_debugfs_add_qcq(struct pdsc *pdsc, struct pdsc_qcq *qcq);
+void pdsc_debugfs_del_qcq(struct pdsc_qcq *qcq);
 
 int pdsc_err_to_errno(enum pds_core_status_code code);
 bool pdsc_is_fw_running(struct pdsc *pdsc);
@@ -113,8 +250,22 @@ int pdsc_devcmd_reset(struct pdsc *pdsc);
 int pdsc_dev_reinit(struct pdsc *pdsc);
 int pdsc_dev_init(struct pdsc *pdsc);
 
+int pdsc_intr_alloc(struct pdsc *pdsc, char *name,
+		    irq_handler_t handler, void *data);
+void pdsc_intr_free(struct pdsc *pdsc, int index);
+void pdsc_qcq_free(struct pdsc *pdsc, struct pdsc_qcq *qcq);
+int pdsc_qcq_alloc(struct pdsc *pdsc, unsigned int type, unsigned int index,
+		   const char *name, unsigned int flags, unsigned int num_descs,
+		   unsigned int desc_size, unsigned int cq_desc_size,
+		   unsigned int pid, struct pdsc_qcq *qcq);
 int pdsc_setup(struct pdsc *pdsc, bool init);
 void pdsc_teardown(struct pdsc *pdsc, bool removing);
+int pdsc_start(struct pdsc *pdsc);
+void pdsc_stop(struct pdsc *pdsc);
 void pdsc_health_thread(struct work_struct *work);
 
+void pdsc_process_adminq(struct pdsc_qcq *qcq);
+void pdsc_work_thread(struct work_struct *work);
+irqreturn_t pdsc_adminq_isr(int irq, void *data);
+
 #endif /* _PDSC_H_ */
diff --git a/drivers/net/ethernet/amd/pds_core/debugfs.c b/drivers/net/ethernet/amd/pds_core/debugfs.c
index 601431b41abb..b83e5016644b 100644
--- a/drivers/net/ethernet/amd/pds_core/debugfs.c
+++ b/drivers/net/ethernet/amd/pds_core/debugfs.c
@@ -67,3 +67,80 @@ void pdsc_debugfs_add_ident(struct pdsc *pdsc)
 	debugfs_create_file("identity", 0400, pdsc->dentry,
 			    pdsc, &identity_fops);
 }
+
+static const struct debugfs_reg32 intr_ctrl_regs[] = {
+	{ .name = "coal_init", .offset = 0, },
+	{ .name = "mask", .offset = 4, },
+	{ .name = "credits", .offset = 8, },
+	{ .name = "mask_on_assert", .offset = 12, },
+	{ .name = "coal_timer", .offset = 16, },
+};
+
+void pdsc_debugfs_add_qcq(struct pdsc *pdsc, struct pdsc_qcq *qcq)
+{
+	struct dentry *qcq_dentry, *q_dentry, *cq_dentry;
+	struct dentry *intr_dentry;
+	struct debugfs_regset32 *intr_ctrl_regset;
+	struct pdsc_intr_info *intr = &pdsc->intr_info[qcq->intx];
+	struct pdsc_queue *q = &qcq->q;
+	struct pdsc_cq *cq = &qcq->cq;
+
+	qcq_dentry = debugfs_create_dir(q->name, pdsc->dentry);
+	if (IS_ERR_OR_NULL(qcq_dentry))
+		return;
+	qcq->dentry = qcq_dentry;
+
+	debugfs_create_x64("q_base_pa", 0400, qcq_dentry, &qcq->q_base_pa);
+	debugfs_create_x32("q_size", 0400, qcq_dentry, &qcq->q_size);
+	debugfs_create_x64("cq_base_pa", 0400, qcq_dentry, &qcq->cq_base_pa);
+	debugfs_create_x32("cq_size", 0400, qcq_dentry, &qcq->cq_size);
+	debugfs_create_x32("accum_work", 0400, qcq_dentry, &qcq->accum_work);
+
+	q_dentry = debugfs_create_dir("q", qcq->dentry);
+	if (IS_ERR_OR_NULL(q_dentry))
+		return;
+
+	debugfs_create_u32("index", 0400, q_dentry, &q->index);
+	debugfs_create_u32("num_descs", 0400, q_dentry, &q->num_descs);
+	debugfs_create_u32("desc_size", 0400, q_dentry, &q->desc_size);
+	debugfs_create_u32("pid", 0400, q_dentry, &q->pid);
+
+	debugfs_create_u16("tail", 0400, q_dentry, &q->tail_idx);
+	debugfs_create_u16("head", 0400, q_dentry, &q->head_idx);
+
+	cq_dentry = debugfs_create_dir("cq", qcq->dentry);
+	if (IS_ERR_OR_NULL(cq_dentry))
+		return;
+
+	debugfs_create_x64("base_pa", 0400, cq_dentry, &cq->base_pa);
+	debugfs_create_u32("num_descs", 0400, cq_dentry, &cq->num_descs);
+	debugfs_create_u32("desc_size", 0400, cq_dentry, &cq->desc_size);
+	debugfs_create_bool("done_color", 0400, cq_dentry, &cq->done_color);
+	debugfs_create_u16("tail", 0400, cq_dentry, &cq->tail_idx);
+
+	if (qcq->flags & PDS_CORE_QCQ_F_INTR) {
+		intr_dentry = debugfs_create_dir("intr", qcq->dentry);
+		if (IS_ERR_OR_NULL(intr_dentry))
+			return;
+
+		debugfs_create_u32("index", 0400, intr_dentry, &intr->index);
+		debugfs_create_u32("vector", 0400, intr_dentry, &intr->vector);
+
+		intr_ctrl_regset = kzalloc(sizeof(*intr_ctrl_regset),
+					   GFP_KERNEL);
+		if (!intr_ctrl_regset)
+			return;
+		intr_ctrl_regset->regs = intr_ctrl_regs;
+		intr_ctrl_regset->nregs = ARRAY_SIZE(intr_ctrl_regs);
+		intr_ctrl_regset->base = &pdsc->intr_ctrl[intr->index];
+
+		debugfs_create_regset32("intr_ctrl", 0400, intr_dentry,
+					intr_ctrl_regset);
+	}
+};
+
+void pdsc_debugfs_del_qcq(struct pdsc_qcq *qcq)
+{
+	debugfs_remove_recursive(qcq->dentry);
+	qcq->dentry = NULL;
+}
diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c
index 3b05b1af65d1..89f23d620d1d 100644
--- a/drivers/net/ethernet/amd/pds_core/devlink.c
+++ b/drivers/net/ethernet/amd/pds_core/devlink.c
@@ -3,6 +3,67 @@
 
 #include "core.h"
 
+static char *fw_slotnames[] = {
+	"fw.goldfw",
+	"fw.mainfwa",
+	"fw.mainfwb",
+};
+
+int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
+		     struct netlink_ext_ack *extack)
+{
+	union pds_core_dev_cmd cmd = {
+		.fw_control.opcode = PDS_CORE_CMD_FW_CONTROL,
+		.fw_control.oper = PDS_CORE_FW_GET_LIST,
+	};
+	struct pds_core_fw_list_info fw_list;
+	struct pdsc *pdsc = devlink_priv(dl);
+	union pds_core_dev_comp comp;
+	char buf[16];
+	int listlen;
+	int err;
+	int i;
+
+	mutex_lock(&pdsc->devcmd_lock);
+	err = pdsc_devcmd_locked(pdsc, &cmd, &comp, pdsc->devcmd_timeout * 2);
+	memcpy_fromio(&fw_list, pdsc->cmd_regs->data, sizeof(fw_list));
+	mutex_unlock(&pdsc->devcmd_lock);
+	if (err && err != -EIO)
+		return err;
+
+	listlen = fw_list.num_fw_slots;
+	for (i = 0; i < listlen; i++) {
+		if (i < ARRAY_SIZE(fw_slotnames))
+			strscpy(buf, fw_slotnames[i], sizeof(buf));
+		else
+			snprintf(buf, sizeof(buf), "fw.slot_%d", i);
+		err = devlink_info_version_stored_put(req, buf,
+						      fw_list.fw_names[i].fw_version);
+	}
+
+	err = devlink_info_version_running_put(req,
+					       DEVLINK_INFO_VERSION_GENERIC_FW,
+					       pdsc->dev_info.fw_version);
+	if (err)
+		return err;
+
+	snprintf(buf, sizeof(buf), "0x%x", pdsc->dev_info.asic_type);
+	err = devlink_info_version_fixed_put(req,
+					     DEVLINK_INFO_VERSION_GENERIC_ASIC_ID,
+					     buf);
+	if (err)
+		return err;
+
+	snprintf(buf, sizeof(buf), "0x%x", pdsc->dev_info.asic_rev);
+	err = devlink_info_version_fixed_put(req,
+					     DEVLINK_INFO_VERSION_GENERIC_ASIC_REV,
+					     buf);
+	if (err)
+		return err;
+
+	return devlink_info_serial_number_put(req, pdsc->dev_info.serial_num);
+}
+
 int pdsc_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
 			      struct devlink_fmsg *fmsg,
 			      struct netlink_ext_ack *extack)
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
index 54f3aed7adb1..eaff311d4a10 100644
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -125,6 +125,13 @@ err_out:
 	return err;
 }
 
+void __iomem *pdsc_map_dbpage(struct pdsc *pdsc, int page_num)
+{
+	return pci_iomap_range(pdsc->pdev,
+			       pdsc->bars[PDS_CORE_PCI_BAR_DBELL].res_index,
+			       (u64)page_num << PAGE_SHIFT, PAGE_SIZE);
+}
+
 static int pdsc_init_vf(struct pdsc *vf)
 {
 	return -1;
@@ -166,6 +173,7 @@ static int pdsc_init_pf(struct pdsc *pdsc)
 
 	mutex_init(&pdsc->devcmd_lock);
 	mutex_init(&pdsc->config_lock);
+	spin_lock_init(&pdsc->adminq_lock);
 
 	mutex_lock(&pdsc->config_lock);
 	set_bit(PDSC_S_FW_DEAD, &pdsc->state);
@@ -173,6 +181,9 @@ static int pdsc_init_pf(struct pdsc *pdsc)
 	err = pdsc_setup(pdsc, PDSC_SETUP_INIT);
 	if (err)
 		goto err_out_unmap_bars;
+	err = pdsc_start(pdsc);
+	if (err)
+		goto err_out_teardown;
 
 	mutex_unlock(&pdsc->config_lock);
 
@@ -184,7 +195,7 @@ static int pdsc_init_pf(struct pdsc *pdsc)
 		dev_warn(pdsc->dev, "Failed to create fw reporter: %pe\n", hr);
 		err = PTR_ERR(hr);
 		devl_unlock(dl);
-		goto err_out_teardown;
+		goto err_out_stop;
 	}
 	pdsc->fw_reporter = hr;
 
@@ -196,6 +207,8 @@ static int pdsc_init_pf(struct pdsc *pdsc)
 
 	return 0;
 
+err_out_stop:
+	pdsc_stop(pdsc);
 err_out_teardown:
 	pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
 err_out_unmap_bars:
@@ -214,6 +227,7 @@ err_out_release_regions:
 }
 
 static const struct devlink_ops pdsc_dl_ops = {
+	.info_get	= pdsc_dl_info_get,
 };
 
 static const struct devlink_ops pdsc_dl_vf_ops = {
@@ -315,6 +329,7 @@ static void pdsc_remove(struct pci_dev *pdev)
 		mutex_lock(&pdsc->config_lock);
 		set_bit(PDSC_S_STOPPING_DRIVER, &pdsc->state);
 
+		pdsc_stop(pdsc);
 		pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
 		mutex_unlock(&pdsc->config_lock);
 		mutex_destroy(&pdsc->config_lock);
diff --git a/include/linux/pds/pds_adminq.h b/include/linux/pds/pds_adminq.h
new file mode 100644
index 000000000000..dd5fbe3ee141
--- /dev/null
+++ b/include/linux/pds/pds_adminq.h
@@ -0,0 +1,638 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#ifndef _PDS_CORE_ADMINQ_H_
+#define _PDS_CORE_ADMINQ_H_
+
+enum pds_core_adminq_flags {
+	PDS_AQ_FLAG_FASTPOLL	= BIT(1),	/* completion poll at 1ms */
+};
+
+/*
+ * enum pds_core_adminq_opcode - AdminQ command opcodes
+ * These commands are only processed on AdminQ, not available in devcmd
+ */
+enum pds_core_adminq_opcode {
+	PDS_AQ_CMD_NOP			= 0,
+
+	/* Client control */
+	PDS_AQ_CMD_CLIENT_REG		= 6,
+	PDS_AQ_CMD_CLIENT_UNREG		= 7,
+	PDS_AQ_CMD_CLIENT_CMD		= 8,
+
+	/* LIF commands */
+	PDS_AQ_CMD_LIF_IDENTIFY		= 20,
+	PDS_AQ_CMD_LIF_INIT		= 21,
+	PDS_AQ_CMD_LIF_RESET		= 22,
+	PDS_AQ_CMD_LIF_GETATTR		= 23,
+	PDS_AQ_CMD_LIF_SETATTR		= 24,
+	PDS_AQ_CMD_LIF_SETPHC		= 25,
+
+	PDS_AQ_CMD_RX_MODE_SET		= 30,
+	PDS_AQ_CMD_RX_FILTER_ADD	= 31,
+	PDS_AQ_CMD_RX_FILTER_DEL	= 32,
+
+	/* Queue commands */
+	PDS_AQ_CMD_Q_IDENTIFY		= 39,
+	PDS_AQ_CMD_Q_INIT		= 40,
+	PDS_AQ_CMD_Q_CONTROL		= 41,
+
+	/* SR/IOV commands */
+	PDS_AQ_CMD_VF_GETATTR		= 60,
+	PDS_AQ_CMD_VF_SETATTR		= 61,
+};
+
+/*
+ * enum pds_core_notifyq_opcode - NotifyQ event codes
+ */
+enum pds_core_notifyq_opcode {
+	PDS_EVENT_LINK_CHANGE		= 1,
+	PDS_EVENT_RESET			= 2,
+	PDS_EVENT_XCVR			= 5,
+	PDS_EVENT_CLIENT		= 6,
+};
+
+#define PDS_COMP_COLOR_MASK  0x80
+
+/**
+ * struct pds_core_notifyq_event - Generic event reporting structure
+ * @eid:   event number
+ * @ecode: event code
+ *
+ * This is the generic event report struct from which the other
+ * actual events will be formed.
+ */
+struct pds_core_notifyq_event {
+	__le64 eid;
+	__le16 ecode;
+};
+
+/**
+ * struct pds_core_link_change_event - Link change event notification
+ * @eid:		event number
+ * @ecode:		event code = PDS_EVENT_LINK_CHANGE
+ * @link_status:	link up/down, with error bits
+ * @link_speed:		speed of the network link
+ *
+ * Sent when the network link state changes between UP and DOWN
+ */
+struct pds_core_link_change_event {
+	__le64 eid;
+	__le16 ecode;
+	__le16 link_status;
+	__le32 link_speed;	/* units of 1Mbps: e.g. 10000 = 10Gbps */
+};
+
+/**
+ * struct pds_core_reset_event - Reset event notification
+ * @eid:		event number
+ * @ecode:		event code = PDS_EVENT_RESET
+ * @reset_code:		reset type
+ * @state:		0=pending, 1=complete, 2=error
+ *
+ * Sent when the NIC or some subsystem is going to be or
+ * has been reset.
+ */
+struct pds_core_reset_event {
+	__le64 eid;
+	__le16 ecode;
+	u8     reset_code;
+	u8     state;
+};
+
+/**
+ * struct pds_core_client_event - Client event notification
+ * @eid:		event number
+ * @ecode:		event code = PDS_EVENT_CLIENT
+ * @client_id:          client to sent event to
+ * @client_event:       wrapped event struct for the client
+ *
+ * Sent when an event needs to be passed on to a client
+ */
+struct pds_core_client_event {
+	__le64 eid;
+	__le16 ecode;
+	__le16 client_id;
+	u8     client_event[54];
+};
+
+/**
+ * struct pds_core_notifyq_cmd - Placeholder for building qcq
+ * @data:      anonymous field for building the qcq
+ */
+struct pds_core_notifyq_cmd {
+	__le32 data;	/* Not used but needed for qcq structure */
+};
+
+/*
+ * union pds_core_notifyq_comp - Overlay of notifyq event structures
+ */
+union pds_core_notifyq_comp {
+	struct {
+		__le64 eid;
+		__le16 ecode;
+	};
+	struct pds_core_notifyq_event     event;
+	struct pds_core_link_change_event link_change;
+	struct pds_core_reset_event       reset;
+	u8     data[64];
+};
+
+#define PDS_DEVNAME_LEN		32
+/**
+ * struct pds_core_client_reg_cmd - Register a new client with DSC
+ * @opcode:         opcode PDS_AQ_CMD_CLIENT_REG
+ * @rsvd:           word boundary padding
+ * @devname:        text name of client device
+ * @vif_type:       what type of device (enum pds_core_vif_types)
+ *
+ * Tell the DSC of the new client, and receive a client_id from DSC.
+ */
+struct pds_core_client_reg_cmd {
+	u8     opcode;
+	u8     rsvd[3];
+	char   devname[PDS_DEVNAME_LEN];
+	u8     vif_type;
+};
+
+/**
+ * struct pds_core_client_reg_comp - Client registration completion
+ * @status:     Status of the command (enum pdc_core_status_code)
+ * @rsvd:       Word boundary padding
+ * @comp_index: Index in the descriptor ring for which this is the completion
+ * @client_id:  New id assigned by DSC
+ * @rsvd1:      Word boundary padding
+ * @color:      Color bit
+ */
+struct pds_core_client_reg_comp {
+	u8     status;
+	u8     rsvd;
+	__le16 comp_index;
+	__le16 client_id;
+	u8     rsvd1[9];
+	u8     color;
+};
+
+/**
+ * struct pds_core_client_unreg_cmd - Unregister a client from DSC
+ * @opcode:     opcode PDS_AQ_CMD_CLIENT_UNREG
+ * @rsvd:       word boundary padding
+ * @client_id:  id of client being removed
+ *
+ * Tell the DSC this client is going away and remove its context
+ * This uses the generic completion.
+ */
+struct pds_core_client_unreg_cmd {
+	u8     opcode;
+	u8     rsvd;
+	__le16 client_id;
+};
+
+/**
+ * struct pds_core_client_request_cmd - Pass along a wrapped client AdminQ cmd
+ * @opcode:     opcode PDS_AQ_CMD_CLIENT_CMD
+ * @rsvd:       word boundary padding
+ * @client_id:  id of client being removed
+ * @client_cmd: the wrapped client command
+ *
+ * Proxy post an adminq command for the client.
+ * This uses the generic completion.
+ */
+struct pds_core_client_request_cmd {
+	u8     opcode;
+	u8     rsvd;
+	__le16 client_id;
+	u8     client_cmd[60];
+};
+
+#define PDS_CORE_MAX_FRAGS		16
+
+#define PDS_CORE_QCQ_F_INITED		BIT(0)
+#define PDS_CORE_QCQ_F_SG		BIT(1)
+#define PDS_CORE_QCQ_F_INTR		BIT(2)
+#define PDS_CORE_QCQ_F_TX_STATS		BIT(3)
+#define PDS_CORE_QCQ_F_RX_STATS		BIT(4)
+#define PDS_CORE_QCQ_F_NOTIFYQ		BIT(5)
+#define PDS_CORE_QCQ_F_CMB_RINGS	BIT(6)
+#define PDS_CORE_QCQ_F_CORE		BIT(7)
+
+enum pds_core_lif_type {
+	PDS_CORE_LIF_TYPE_DEFAULT = 0,
+};
+
+/**
+ * union pds_core_lif_config - LIF configuration
+ * @state:	    LIF state (enum pds_core_lif_state)
+ * @rsvd:           Word boundary padding
+ * @name:	    LIF name
+ * @rsvd2:          Word boundary padding
+ * @features:	    LIF features active (enum pds_core_hw_features)
+ * @queue_count:    Queue counts per queue-type
+ * @words:          Full union buffer size
+ */
+union pds_core_lif_config {
+	struct {
+		u8     state;
+		u8     rsvd[3];
+		char   name[PDS_CORE_IFNAMSIZ];
+		u8     rsvd2[12];
+		__le64 features;
+		__le32 queue_count[PDS_CORE_QTYPE_MAX];
+	} __packed;
+	__le32 words[64];
+};
+
+/**
+ * struct pds_core_lif_status - LIF status register
+ * @eid:	     most recent NotifyQ event id
+ * @rsvd:            full struct size
+ */
+struct pds_core_lif_status {
+	__le64 eid;
+	u8     rsvd[56];
+};
+
+/**
+ * struct pds_core_lif_info - LIF info structure
+ * @config:	LIF configuration structure
+ * @status:	LIF status structure
+ */
+struct pds_core_lif_info {
+	union pds_core_lif_config config;
+	struct pds_core_lif_status status;
+};
+
+/**
+ * struct pds_core_lif_identity - LIF identity information (type-specific)
+ * @features:		LIF features (see enum pds_core_hw_features)
+ * @version:		Identify structure version
+ * @hw_index:		LIF hardware index
+ * @rsvd:		Word boundary padding
+ * @max_nb_sessions:	Maximum number of sessions supported
+ * @rsvd2:		buffer padding
+ * @config:		LIF config struct with features, q counts
+ */
+struct pds_core_lif_identity {
+	__le64 features;
+	u8     version;
+	u8     hw_index;
+	u8     rsvd[2];
+	__le32 max_nb_sessions;
+	u8     rsvd2[120];
+	union pds_core_lif_config config;
+};
+
+/**
+ * struct pds_core_lif_identify_cmd - Get LIF identity info command
+ * @opcode:	Opcode PDS_AQ_CMD_LIF_IDENTIFY
+ * @type:	LIF type (enum pds_core_lif_type)
+ * @client_id:	Client identifier
+ * @ver:	Version of identify returned by device
+ * @rsvd:       Word boundary padding
+ * @ident_pa:	DMA address to receive identity info
+ *
+ * Firmware will copy LIF identity data (struct pds_core_lif_identity)
+ * into the buffer address given.
+ */
+struct pds_core_lif_identify_cmd {
+	u8     opcode;
+	u8     type;
+	__le16 client_id;
+	u8     ver;
+	u8     rsvd[3];
+	__le64 ident_pa;
+};
+
+/**
+ * struct pds_core_lif_identify_comp - LIF identify command completion
+ * @status:	Status of the command (enum pds_core_status_code)
+ * @ver:	Version of identify returned by device
+ * @bytes:	Bytes copied into the buffer
+ * @rsvd:       Word boundary padding
+ * @color:      Color bit
+ */
+struct pds_core_lif_identify_comp {
+	u8     status;
+	u8     ver;
+	__le16 bytes;
+	u8     rsvd[11];
+	u8     color;
+};
+
+/**
+ * struct pds_core_lif_init_cmd - LIF init command
+ * @opcode:	Opcode PDS_AQ_CMD_LIF_INIT
+ * @type:	LIF type (enum pds_core_lif_type)
+ * @client_id:	Client identifier
+ * @rsvd:       Word boundary padding
+ * @info_pa:	Destination address for LIF info (struct pds_core_lif_info)
+ */
+struct pds_core_lif_init_cmd {
+	u8     opcode;
+	u8     type;
+	__le16 client_id;
+	__le32 rsvd;
+	__le64 info_pa;
+};
+
+/**
+ * struct pds_core_lif_init_comp - LIF init command completion
+ * @status:	Status of the command (enum pds_core_status_code)
+ * @rsvd:       Word boundary padding
+ * @hw_index:	Hardware index of the initialized LIF
+ * @rsvd1:      Word boundary padding
+ * @color:      Color bit
+ */
+struct pds_core_lif_init_comp {
+	u8 status;
+	u8 rsvd;
+	__le16 hw_index;
+	u8     rsvd1[11];
+	u8     color;
+};
+
+/**
+ * struct pds_core_lif_reset_cmd - LIF reset command
+ * Will reset only the specified LIF.
+ * @opcode:	Opcode PDS_AQ_CMD_LIF_RESET
+ * @rsvd:       Word boundary padding
+ * @client_id:	Client identifier
+ */
+struct pds_core_lif_reset_cmd {
+	u8     opcode;
+	u8     rsvd;
+	__le16 client_id;
+};
+
+/**
+ * enum pds_core_lif_attr - List of LIF attributes
+ * @PDS_CORE_LIF_ATTR_STATE:		LIF state attribute
+ * @PDS_CORE_LIF_ATTR_NAME:		LIF name attribute
+ * @PDS_CORE_LIF_ATTR_FEATURES:		LIF features attribute
+ * @PDS_CORE_LIF_ATTR_STATS_CTRL:	LIF statistics control attribute
+ */
+enum pds_core_lif_attr {
+	PDS_CORE_LIF_ATTR_STATE		= 0,
+	PDS_CORE_LIF_ATTR_NAME		= 1,
+	PDS_CORE_LIF_ATTR_FEATURES	= 4,
+	PDS_CORE_LIF_ATTR_STATS_CTRL	= 6,
+};
+
+/**
+ * struct pds_core_lif_setattr_cmd - Set LIF attributes on the NIC
+ * @opcode:	Opcode PDS_AQ_CMD_LIF_SETATTR
+ * @attr:	Attribute type (enum pds_core_lif_attr)
+ * @client_id:	Client identifier
+ * @state:	LIF state (enum pds_core_lif_state)
+ * @name:	The name string, 0 terminated
+ * @features:	Features (enum pds_core_hw_features)
+ * @stats_ctl:	Stats control commands (enum pds_core_stats_ctl_cmd)
+ * @rsvd:       Command Buffer padding
+ */
+struct pds_core_lif_setattr_cmd {
+	u8     opcode;
+	u8     attr;
+	__le16 client_id;
+	union {
+		u8      state;
+		char    name[PDS_CORE_IFNAMSIZ];
+		__le64  features;
+		u8      stats_ctl;
+		u8      rsvd[60];
+	} __packed;
+};
+
+/**
+ * struct pds_core_lif_setattr_comp - LIF set attr command completion
+ * @status:	Status of the command (enum pds_core_status_code)
+ * @rsvd:       Word boundary padding
+ * @comp_index: Index in the descriptor ring for which this is the completion
+ * @features:	Features (enum pds_core_hw_features)
+ * @rsvd2:      Word boundary padding
+ * @color:	Color bit
+ */
+struct pds_core_lif_setattr_comp {
+	u8     status;
+	u8     rsvd;
+	__le16 comp_index;
+	union {
+		__le64  features;
+		u8      rsvd2[11];
+	} __packed;
+	u8     color;
+};
+
+/**
+ * struct pds_core_lif_getattr_cmd - Get LIF attributes from the NIC
+ * @opcode:	Opcode PDS_AQ_CMD_LIF_GETATTR
+ * @attr:	Attribute type (enum pds_core_lif_attr)
+ * @client_id:	Client identifier
+ */
+struct pds_core_lif_getattr_cmd {
+	u8     opcode;
+	u8     attr;
+	__le16 client_id;
+};
+
+/**
+ * struct pds_core_lif_getattr_comp - LIF get attr command completion
+ * @status:	Status of the command (enum pds_core_status_code)
+ * @rsvd:       Word boundary padding
+ * @comp_index: Index in the descriptor ring for which this is the completion
+ * @state:	LIF state (enum pds_core_lif_state)
+ * @name:	LIF name string, 0 terminated
+ * @features:	Features (enum pds_core_hw_features)
+ * @rsvd2:      Word boundary padding
+ * @color:	Color bit
+ */
+struct pds_core_lif_getattr_comp {
+	u8     status;
+	u8     rsvd;
+	__le16 comp_index;
+	union {
+		u8      state;
+		__le64  features;
+		u8      rsvd2[11];
+	} __packed;
+	u8     color;
+};
+
+/**
+ * union pds_core_q_identity - Queue identity information
+ * @version:	Queue type version that can be used with FW
+ * @supported:	Bitfield of queue versions, first bit = ver 0
+ * @rsvd:       Word boundary padding
+ * @features:	Queue features
+ * @desc_sz:	Descriptor size
+ * @comp_sz:	Completion descriptor size
+ * @rsvd2:      Word boundary padding
+ */
+struct pds_core_q_identity {
+	u8      version;
+	u8      supported;
+	u8      rsvd[6];
+#define PDS_CORE_QIDENT_F_CQ	0x01	/* queue has completion ring */
+	__le64  features;
+	__le16  desc_sz;
+	__le16  comp_sz;
+	u8      rsvd2[6];
+};
+
+/**
+ * struct pds_core_q_identify_cmd - queue identify command
+ * @opcode:	Opcode PDS_AQ_CMD_Q_IDENTIFY
+ * @type:	Logical queue type (enum pds_core_logical_qtype)
+ * @client_id:	Client identifier
+ * @ver:	Highest queue type version that the driver supports
+ * @rsvd:       Word boundary padding
+ * @ident_pa:   DMA address to receive the data (struct pds_core_q_identity)
+ */
+struct pds_core_q_identify_cmd {
+	u8     opcode;
+	u8     type;
+	__le16 client_id;
+	u8     ver;
+	u8     rsvd[3];
+	__le64 ident_pa;
+};
+
+/**
+ * struct pds_core_q_identify_comp - queue identify command completion
+ * @status:	Status of the command (enum pds_core_status_code)
+ * @rsvd:       Word boundary padding
+ * @comp_index:	Index in the descriptor ring for which this is the completion
+ * @ver:	Queue type version that can be used with FW
+ * @rsvd1:      Word boundary padding
+ * @color:      Color bit
+ */
+struct pds_core_q_identify_comp {
+	u8     status;
+	u8     rsvd;
+	__le16 comp_index;
+	u8     ver;
+	u8     rsvd1[10];
+	u8     color;
+};
+
+/**
+ * struct pds_core_q_init_cmd - Queue init command
+ * @opcode:	  Opcode PDS_AQ_CMD_Q_INIT
+ * @type:	  Logical queue type
+ * @client_id:	  Client identifier
+ * @ver:	  Queue type version
+ * @rsvd:         Word boundary padding
+ * @index:	  (LIF, qtype) relative admin queue index
+ * @intr_index:	  Interrupt control register index, or Event queue index
+ * @pid:	  Process ID
+ * @flags:
+ *    IRQ:	  Interrupt requested on completion
+ *    ENA:	  Enable the queue.  If ENA=0 the queue is initialized
+ *		  but remains disabled, to be later enabled with the
+ *		  Queue Enable command. If ENA=1, then queue is
+ *		  initialized and then enabled.
+ * @cos:	  Class of service for this queue
+ * @ring_size:	  Queue ring size, encoded as a log2(size), in
+ *		  number of descriptors.  The actual ring size is
+ *		  (1 << ring_size).  For example, to select a ring size
+ *		  of 64 descriptors write ring_size = 6. The minimum
+ *		  ring_size value is 2 for a ring of 4 descriptors.
+ *		  The maximum ring_size value is 12 for a ring of 4k
+ *		  descriptors. Values of ring_size <2 and >12 are
+ *		  reserved.
+ * @ring_base:	  Queue ring base address
+ * @cq_ring_base: Completion queue ring base address
+ */
+struct pds_core_q_init_cmd {
+	u8     opcode;
+	u8     type;
+	__le16 client_id;
+	u8     ver;
+	u8     rsvd[3];
+	__le32 index;
+	__le16 pid;
+	__le16 intr_index;
+	__le16 flags;
+#define PDS_CORE_QINIT_F_IRQ	0x01	/* Request interrupt on completion */
+#define PDS_CORE_QINIT_F_ENA	0x02	/* Enable the queue */
+	u8     cos;
+#define PDS_CORE_QSIZE_MIN_LG2	2
+#define PDS_CORE_QSIZE_MAX_LG2	12
+	u8     ring_size;
+	__le64 ring_base;
+	__le64 cq_ring_base;
+} __packed;
+
+/**
+ * struct pds_core_q_init_comp - Queue init command completion
+ * @status:	Status of the command (enum pds_core_status_code)
+ * @rsvd:       Word boundary padding
+ * @comp_index:	Index in the descriptor ring for which this is the completion
+ * @hw_index:	Hardware Queue ID
+ * @hw_type:	Hardware Queue type
+ * @rsvd2:      Word boundary padding
+ * @color:	Color
+ */
+struct pds_core_q_init_comp {
+	u8     status;
+	u8     rsvd;
+	__le16 comp_index;
+	__le32 hw_index;
+	u8     hw_type;
+	u8     rsvd2[6];
+	u8     color;
+};
+
+union pds_core_adminq_cmd {
+	u8     opcode;
+	u8     bytes[64];
+
+	struct pds_core_client_reg_cmd     client_reg;
+	struct pds_core_client_unreg_cmd   client_unreg;
+	struct pds_core_client_request_cmd client_request;
+
+	struct pds_core_lif_identify_cmd  lif_ident;
+	struct pds_core_lif_init_cmd      lif_init;
+	struct pds_core_lif_reset_cmd     lif_reset;
+	struct pds_core_lif_setattr_cmd   lif_setattr;
+	struct pds_core_lif_getattr_cmd   lif_getattr;
+
+	struct pds_core_q_identify_cmd    q_ident;
+	struct pds_core_q_init_cmd        q_init;
+};
+
+union pds_core_adminq_comp {
+	struct {
+		u8     status;
+		u8     rsvd;
+		__le16 comp_index;
+		u8     rsvd2[11];
+		u8     color;
+	};
+	u32    words[4];
+
+	struct pds_core_client_reg_comp   client_reg;
+
+	struct pds_core_lif_identify_comp lif_ident;
+	struct pds_core_lif_init_comp     lif_init;
+	struct pds_core_lif_setattr_comp  lif_setattr;
+	struct pds_core_lif_getattr_comp  lif_getattr;
+
+	struct pds_core_q_identify_comp   q_ident;
+	struct pds_core_q_init_comp       q_init;
+};
+
+#ifndef __CHECKER__
+static_assert(sizeof(union pds_core_adminq_cmd) == 64);
+static_assert(sizeof(union pds_core_adminq_comp) == 16);
+static_assert(sizeof(union pds_core_notifyq_comp) == 64);
+#endif /* __CHECKER__ */
+
+/* The color bit is a 'done' bit for the completion descriptors
+ * where the meaning alternates between '1' and '0' for alternating
+ * passes through the completion descriptor ring.
+ */
+static inline u8 pdsc_color_match(u8 color, u8 done_color)
+{
+	return (!!(color & PDS_COMP_COLOR_MASK)) == done_color;
+}
+#endif /* _PDS_CORE_ADMINQ_H_ */
-- 
cgit v1.2.3


From 49ce92fbee0b6bb8066dddf37489483b3b6b5c25 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Wed, 19 Apr 2023 10:04:20 -0700
Subject: pds_core: add FW update feature to devlink

Add in the support for doing firmware updates.  Of the two
main banks available, a and b, this updates the one not in
use and then selects it for the next boot.

Example:
    devlink dev flash pci/0000:b2:00.0 \
	    file pensando/dsc_fw_1.63.0-22.tar

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../device_drivers/ethernet/amd/pds_core.rst       |  10 ++
 drivers/net/ethernet/amd/pds_core/Makefile         |   3 +-
 drivers/net/ethernet/amd/pds_core/core.h           |   5 +
 drivers/net/ethernet/amd/pds_core/devlink.c        |   9 +
 drivers/net/ethernet/amd/pds_core/fw.c             | 194 +++++++++++++++++++++
 drivers/net/ethernet/amd/pds_core/main.c           |   1 +
 6 files changed, 221 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/amd/pds_core/fw.c

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst b/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
index a48eafb3d0d3..932ac03a3359 100644
--- a/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
+++ b/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
@@ -73,6 +73,16 @@ The ``pds_core`` driver reports the following versions
      - fixed
      - The revision of the ASIC for this device
 
+Firmware Management
+===================
+
+The ``flash`` command can update a the DSC firmware.  The downloaded firmware
+will be saved into either of firmware bank 1 or bank 2, whichever is not
+currently in use, and that bank will used for the next boot::
+
+  # devlink dev flash pci/0000:b5:00.0 \
+            file pensando/dsc_fw_1.63.0-22.tar
+
 Health Reporters
 ================
 
diff --git a/drivers/net/ethernet/amd/pds_core/Makefile b/drivers/net/ethernet/amd/pds_core/Makefile
index ef76dcd7fccd..6d1d6c58a1fa 100644
--- a/drivers/net/ethernet/amd/pds_core/Makefile
+++ b/drivers/net/ethernet/amd/pds_core/Makefile
@@ -7,6 +7,7 @@ pds_core-y := main.o \
 	      devlink.o \
 	      dev.o \
 	      adminq.o \
-	      core.o
+	      core.o \
+	      fw.o
 
 pds_core-$(CONFIG_DEBUG_FS) += debugfs.o
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
index 7adbd10cb6d9..7eb02b359f3a 100644
--- a/drivers/net/ethernet/amd/pds_core/core.h
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -226,6 +226,9 @@ int pdsc_fw_reporter_diagnose(struct devlink_health_reporter *reporter,
 			      struct netlink_ext_ack *extack);
 int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 		     struct netlink_ext_ack *extack);
+int pdsc_dl_flash_update(struct devlink *dl,
+			 struct devlink_flash_update_params *params,
+			 struct netlink_ext_ack *extack);
 
 void __iomem *pdsc_map_dbpage(struct pdsc *pdsc, int page_num);
 
@@ -268,4 +271,6 @@ void pdsc_process_adminq(struct pdsc_qcq *qcq);
 void pdsc_work_thread(struct work_struct *work);
 irqreturn_t pdsc_adminq_isr(int irq, void *data);
 
+int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw,
+			 struct netlink_ext_ack *extack);
 #endif /* _PDSC_H_ */
diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c
index 89f23d620d1d..f91d65cc78b5 100644
--- a/drivers/net/ethernet/amd/pds_core/devlink.c
+++ b/drivers/net/ethernet/amd/pds_core/devlink.c
@@ -3,6 +3,15 @@
 
 #include "core.h"
 
+int pdsc_dl_flash_update(struct devlink *dl,
+			 struct devlink_flash_update_params *params,
+			 struct netlink_ext_ack *extack)
+{
+	struct pdsc *pdsc = devlink_priv(dl);
+
+	return pdsc_firmware_update(pdsc, params->fw, extack);
+}
+
 static char *fw_slotnames[] = {
 	"fw.goldfw",
 	"fw.mainfwa",
diff --git a/drivers/net/ethernet/amd/pds_core/fw.c b/drivers/net/ethernet/amd/pds_core/fw.c
new file mode 100644
index 000000000000..90a811f3878a
--- /dev/null
+++ b/drivers/net/ethernet/amd/pds_core/fw.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2023 Advanced Micro Devices, Inc */
+
+#include "core.h"
+
+/* The worst case wait for the install activity is about 25 minutes when
+ * installing a new CPLD, which is very seldom.  Normal is about 30-35
+ * seconds.  Since the driver can't tell if a CPLD update will happen we
+ * set the timeout for the ugly case.
+ */
+#define PDSC_FW_INSTALL_TIMEOUT	(25 * 60)
+#define PDSC_FW_SELECT_TIMEOUT	30
+
+/* Number of periodic log updates during fw file download */
+#define PDSC_FW_INTERVAL_FRACTION	32
+
+static int pdsc_devcmd_fw_download_locked(struct pdsc *pdsc, u64 addr,
+					  u32 offset, u32 length)
+{
+	union pds_core_dev_cmd cmd = {
+		.fw_download.opcode = PDS_CORE_CMD_FW_DOWNLOAD,
+		.fw_download.offset = cpu_to_le32(offset),
+		.fw_download.addr = cpu_to_le64(addr),
+		.fw_download.length = cpu_to_le32(length),
+	};
+	union pds_core_dev_comp comp;
+
+	return pdsc_devcmd_locked(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
+}
+
+static int pdsc_devcmd_fw_install(struct pdsc *pdsc)
+{
+	union pds_core_dev_cmd cmd = {
+		.fw_control.opcode = PDS_CORE_CMD_FW_CONTROL,
+		.fw_control.oper = PDS_CORE_FW_INSTALL_ASYNC
+	};
+	union pds_core_dev_comp comp;
+	int err;
+
+	err = pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
+	if (err < 0)
+		return err;
+
+	return comp.fw_control.slot;
+}
+
+static int pdsc_devcmd_fw_activate(struct pdsc *pdsc,
+				   enum pds_core_fw_slot slot)
+{
+	union pds_core_dev_cmd cmd = {
+		.fw_control.opcode = PDS_CORE_CMD_FW_CONTROL,
+		.fw_control.oper = PDS_CORE_FW_ACTIVATE_ASYNC,
+		.fw_control.slot = slot
+	};
+	union pds_core_dev_comp comp;
+
+	return pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
+}
+
+static int pdsc_fw_status_long_wait(struct pdsc *pdsc,
+				    const char *label,
+				    unsigned long timeout,
+				    u8 fw_cmd,
+				    struct netlink_ext_ack *extack)
+{
+	union pds_core_dev_cmd cmd = {
+		.fw_control.opcode = PDS_CORE_CMD_FW_CONTROL,
+		.fw_control.oper = fw_cmd,
+	};
+	union pds_core_dev_comp comp;
+	unsigned long start_time;
+	unsigned long end_time;
+	int err;
+
+	/* Ping on the status of the long running async install
+	 * command.  We get EAGAIN while the command is still
+	 * running, else we get the final command status.
+	 */
+	start_time = jiffies;
+	end_time = start_time + (timeout * HZ);
+	do {
+		err = pdsc_devcmd(pdsc, &cmd, &comp, pdsc->devcmd_timeout);
+		msleep(20);
+	} while (time_before(jiffies, end_time) &&
+		 (err == -EAGAIN || err == -ETIMEDOUT));
+
+	if (err == -EAGAIN || err == -ETIMEDOUT) {
+		NL_SET_ERR_MSG_MOD(extack, "Firmware wait timed out");
+		dev_err(pdsc->dev, "DEV_CMD firmware wait %s timed out\n",
+			label);
+	} else if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Firmware wait failed");
+	}
+
+	return err;
+}
+
+int pdsc_firmware_update(struct pdsc *pdsc, const struct firmware *fw,
+			 struct netlink_ext_ack *extack)
+{
+	u32 buf_sz, copy_sz, offset;
+	struct devlink *dl;
+	int next_interval;
+	u64 data_addr;
+	int err = 0;
+	int fw_slot;
+
+	dev_info(pdsc->dev, "Installing firmware\n");
+
+	dl = priv_to_devlink(pdsc);
+	devlink_flash_update_status_notify(dl, "Preparing to flash",
+					   NULL, 0, 0);
+
+	buf_sz = sizeof(pdsc->cmd_regs->data);
+
+	dev_dbg(pdsc->dev,
+		"downloading firmware - size %d part_sz %d nparts %lu\n",
+		(int)fw->size, buf_sz, DIV_ROUND_UP(fw->size, buf_sz));
+
+	offset = 0;
+	next_interval = 0;
+	data_addr = offsetof(struct pds_core_dev_cmd_regs, data);
+	while (offset < fw->size) {
+		if (offset >= next_interval) {
+			devlink_flash_update_status_notify(dl, "Downloading",
+							   NULL, offset,
+							   fw->size);
+			next_interval = offset +
+					(fw->size / PDSC_FW_INTERVAL_FRACTION);
+		}
+
+		copy_sz = min_t(unsigned int, buf_sz, fw->size - offset);
+		mutex_lock(&pdsc->devcmd_lock);
+		memcpy_toio(&pdsc->cmd_regs->data, fw->data + offset, copy_sz);
+		err = pdsc_devcmd_fw_download_locked(pdsc, data_addr,
+						     offset, copy_sz);
+		mutex_unlock(&pdsc->devcmd_lock);
+		if (err) {
+			dev_err(pdsc->dev,
+				"download failed offset 0x%x addr 0x%llx len 0x%x: %pe\n",
+				offset, data_addr, copy_sz, ERR_PTR(err));
+			NL_SET_ERR_MSG_MOD(extack, "Segment download failed");
+			goto err_out;
+		}
+		offset += copy_sz;
+	}
+	devlink_flash_update_status_notify(dl, "Downloading", NULL,
+					   fw->size, fw->size);
+
+	devlink_flash_update_timeout_notify(dl, "Installing", NULL,
+					    PDSC_FW_INSTALL_TIMEOUT);
+
+	fw_slot = pdsc_devcmd_fw_install(pdsc);
+	if (fw_slot < 0) {
+		err = fw_slot;
+		dev_err(pdsc->dev, "install failed: %pe\n", ERR_PTR(err));
+		NL_SET_ERR_MSG_MOD(extack, "Failed to start firmware install");
+		goto err_out;
+	}
+
+	err = pdsc_fw_status_long_wait(pdsc, "Installing",
+				       PDSC_FW_INSTALL_TIMEOUT,
+				       PDS_CORE_FW_INSTALL_STATUS,
+				       extack);
+	if (err)
+		goto err_out;
+
+	devlink_flash_update_timeout_notify(dl, "Selecting", NULL,
+					    PDSC_FW_SELECT_TIMEOUT);
+
+	err = pdsc_devcmd_fw_activate(pdsc, fw_slot);
+	if (err) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to start firmware select");
+		goto err_out;
+	}
+
+	err = pdsc_fw_status_long_wait(pdsc, "Selecting",
+				       PDSC_FW_SELECT_TIMEOUT,
+				       PDS_CORE_FW_ACTIVATE_STATUS,
+				       extack);
+	if (err)
+		goto err_out;
+
+	dev_info(pdsc->dev, "Firmware update completed, slot %d\n", fw_slot);
+
+err_out:
+	if (err)
+		devlink_flash_update_status_notify(dl, "Flash failed",
+						   NULL, 0, 0);
+	else
+		devlink_flash_update_status_notify(dl, "Flash done",
+						   NULL, 0, 0);
+	return err;
+}
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
index eaff311d4a10..54aaf213679f 100644
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -228,6 +228,7 @@ err_out_release_regions:
 
 static const struct devlink_ops pdsc_dl_ops = {
 	.info_get	= pdsc_dl_info_get,
+	.flash_update	= pdsc_dl_flash_update,
 };
 
 static const struct devlink_ops pdsc_dl_vf_ops = {
-- 
cgit v1.2.3


From 40ced89445364baa8620e92fba5b3fff8d9742b9 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Wed, 19 Apr 2023 10:04:24 -0700
Subject: pds_core: devlink params for enabling VIF support

Add the devlink parameter switches so the user can enable
the features supported by the VFs.  The only feature supported
at the moment is vDPA.

Example:
    devlink dev param set pci/0000:2b:00.0 \
	    name enable_vnet cmode runtime value true

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../device_drivers/ethernet/amd/pds_core.rst       | 19 ++++++
 drivers/net/ethernet/amd/pds_core/core.h           |  7 +++
 drivers/net/ethernet/amd/pds_core/devlink.c        | 73 ++++++++++++++++++++++
 drivers/net/ethernet/amd/pds_core/main.c           | 34 ++++++++--
 4 files changed, 127 insertions(+), 6 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst b/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
index 932ac03a3359..b9f310de862e 100644
--- a/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
+++ b/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
@@ -73,6 +73,25 @@ The ``pds_core`` driver reports the following versions
      - fixed
      - The revision of the ASIC for this device
 
+Parameters
+==========
+
+The ``pds_core`` driver implements the following generic
+parameters for controlling the functionality to be made available
+as auxiliary_bus devices.
+
+.. list-table:: Generic parameters implemented
+   :widths: 5 5 8 82
+
+   * - Name
+     - Mode
+     - Type
+     - Description
+   * - ``enable_vnet``
+     - runtime
+     - Boolean
+     - Enables vDPA functionality through an auxiliary_bus device
+
 Firmware Management
 ===================
 
diff --git a/drivers/net/ethernet/amd/pds_core/core.h b/drivers/net/ethernet/amd/pds_core/core.h
index 36099d3ac3dd..9e01a9ee6868 100644
--- a/drivers/net/ethernet/amd/pds_core/core.h
+++ b/drivers/net/ethernet/amd/pds_core/core.h
@@ -251,6 +251,13 @@ int pdsc_dl_info_get(struct devlink *dl, struct devlink_info_req *req,
 int pdsc_dl_flash_update(struct devlink *dl,
 			 struct devlink_flash_update_params *params,
 			 struct netlink_ext_ack *extack);
+int pdsc_dl_enable_get(struct devlink *dl, u32 id,
+		       struct devlink_param_gset_ctx *ctx);
+int pdsc_dl_enable_set(struct devlink *dl, u32 id,
+		       struct devlink_param_gset_ctx *ctx);
+int pdsc_dl_enable_validate(struct devlink *dl, u32 id,
+			    union devlink_param_value val,
+			    struct netlink_ext_ack *extack);
 
 void __iomem *pdsc_map_dbpage(struct pdsc *pdsc, int page_num);
 
diff --git a/drivers/net/ethernet/amd/pds_core/devlink.c b/drivers/net/ethernet/amd/pds_core/devlink.c
index f91d65cc78b5..9c6b3653c1c7 100644
--- a/drivers/net/ethernet/amd/pds_core/devlink.c
+++ b/drivers/net/ethernet/amd/pds_core/devlink.c
@@ -2,6 +2,79 @@
 /* Copyright(c) 2023 Advanced Micro Devices, Inc */
 
 #include "core.h"
+#include <linux/pds/pds_auxbus.h>
+
+static struct
+pdsc_viftype *pdsc_dl_find_viftype_by_id(struct pdsc *pdsc,
+					 enum devlink_param_type dl_id)
+{
+	int vt;
+
+	for (vt = 0; vt < PDS_DEV_TYPE_MAX; vt++) {
+		if (pdsc->viftype_status[vt].dl_id == dl_id)
+			return &pdsc->viftype_status[vt];
+	}
+
+	return NULL;
+}
+
+int pdsc_dl_enable_get(struct devlink *dl, u32 id,
+		       struct devlink_param_gset_ctx *ctx)
+{
+	struct pdsc *pdsc = devlink_priv(dl);
+	struct pdsc_viftype *vt_entry;
+
+	vt_entry = pdsc_dl_find_viftype_by_id(pdsc, id);
+	if (!vt_entry)
+		return -ENOENT;
+
+	ctx->val.vbool = vt_entry->enabled;
+
+	return 0;
+}
+
+int pdsc_dl_enable_set(struct devlink *dl, u32 id,
+		       struct devlink_param_gset_ctx *ctx)
+{
+	struct pdsc *pdsc = devlink_priv(dl);
+	struct pdsc_viftype *vt_entry;
+	int err = 0;
+	int vf_id;
+
+	vt_entry = pdsc_dl_find_viftype_by_id(pdsc, id);
+	if (!vt_entry || !vt_entry->supported)
+		return -EOPNOTSUPP;
+
+	if (vt_entry->enabled == ctx->val.vbool)
+		return 0;
+
+	vt_entry->enabled = ctx->val.vbool;
+	for (vf_id = 0; vf_id < pdsc->num_vfs; vf_id++) {
+		struct pdsc *vf = pdsc->vfs[vf_id].vf;
+
+		err = ctx->val.vbool ? pdsc_auxbus_dev_add(vf, pdsc) :
+				       pdsc_auxbus_dev_del(vf, pdsc);
+	}
+
+	return err;
+}
+
+int pdsc_dl_enable_validate(struct devlink *dl, u32 id,
+			    union devlink_param_value val,
+			    struct netlink_ext_ack *extack)
+{
+	struct pdsc *pdsc = devlink_priv(dl);
+	struct pdsc_viftype *vt_entry;
+
+	vt_entry = pdsc_dl_find_viftype_by_id(pdsc, id);
+	if (!vt_entry || !vt_entry->supported)
+		return -EOPNOTSUPP;
+
+	if (!pdsc->viftype_status[vt_entry->vif_id].supported)
+		return -ENODEV;
+
+	return 0;
+}
 
 int pdsc_dl_flash_update(struct devlink *dl,
 			 struct devlink_flash_update_params *params,
diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c
index b848f3360fe2..e2d14b1ca471 100644
--- a/drivers/net/ethernet/amd/pds_core/main.c
+++ b/drivers/net/ethernet/amd/pds_core/main.c
@@ -199,6 +199,14 @@ static const struct devlink_health_reporter_ops pdsc_fw_reporter_ops = {
 	.diagnose = pdsc_fw_reporter_diagnose,
 };
 
+static const struct devlink_param pdsc_dl_params[] = {
+	DEVLINK_PARAM_GENERIC(ENABLE_VNET,
+			      BIT(DEVLINK_PARAM_CMODE_RUNTIME),
+			      pdsc_dl_enable_get,
+			      pdsc_dl_enable_set,
+			      pdsc_dl_enable_validate),
+};
+
 #define PDSC_WQ_NAME_LEN 24
 
 static int pdsc_init_pf(struct pdsc *pdsc)
@@ -246,13 +254,19 @@ static int pdsc_init_pf(struct pdsc *pdsc)
 
 	dl = priv_to_devlink(pdsc);
 	devl_lock(dl);
+	err = devl_params_register(dl, pdsc_dl_params,
+				   ARRAY_SIZE(pdsc_dl_params));
+	if (err) {
+		dev_warn(pdsc->dev, "Failed to register devlink params: %pe\n",
+			 ERR_PTR(err));
+		goto err_out_unlock_dl;
+	}
 
 	hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, 0, pdsc);
 	if (IS_ERR(hr)) {
 		dev_warn(pdsc->dev, "Failed to create fw reporter: %pe\n", hr);
 		err = PTR_ERR(hr);
-		devl_unlock(dl);
-		goto err_out_stop;
+		goto err_out_unreg_params;
 	}
 	pdsc->fw_reporter = hr;
 
@@ -264,7 +278,11 @@ static int pdsc_init_pf(struct pdsc *pdsc)
 
 	return 0;
 
-err_out_stop:
+err_out_unreg_params:
+	devl_params_unregister(dl, pdsc_dl_params,
+			       ARRAY_SIZE(pdsc_dl_params));
+err_out_unlock_dl:
+	devl_unlock(dl);
 	pdsc_stop(pdsc);
 err_out_teardown:
 	pdsc_teardown(pdsc, PDSC_TEARDOWN_REMOVING);
@@ -373,9 +391,13 @@ static void pdsc_remove(struct pci_dev *pdev)
 	dl = priv_to_devlink(pdsc);
 	devl_lock(dl);
 	devl_unregister(dl);
-	if (pdsc->fw_reporter) {
-		devl_health_reporter_destroy(pdsc->fw_reporter);
-		pdsc->fw_reporter = NULL;
+	if (!pdev->is_virtfn) {
+		if (pdsc->fw_reporter) {
+			devl_health_reporter_destroy(pdsc->fw_reporter);
+			pdsc->fw_reporter = NULL;
+		}
+		devl_params_unregister(dl, pdsc_dl_params,
+				       ARRAY_SIZE(pdsc_dl_params));
 	}
 	devl_unlock(dl);
 
-- 
cgit v1.2.3


From ddbcb22055d136f58841c73ca2226dab79eb6101 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <shannon.nelson@amd.com>
Date: Wed, 19 Apr 2023 10:04:27 -0700
Subject: pds_core: Kconfig and pds_core.rst

Remaining documentation and Kconfig hook for building the driver.

Signed-off-by: Shannon Nelson <shannon.nelson@amd.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/ethernet/amd/pds_core.rst  | 16 ++++++++++++++++
 MAINTAINERS                                              |  9 +++++++++
 drivers/net/ethernet/amd/Kconfig                         | 12 ++++++++++++
 drivers/net/ethernet/amd/Makefile                        |  1 +
 4 files changed, 38 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst b/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
index b9f310de862e..9e8a16c44102 100644
--- a/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
+++ b/Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
@@ -114,6 +114,22 @@ The driver supports a devlink health reporter for FW status::
   # devlink health diagnose pci/0000:2b:00.0 reporter fw
    Status: healthy State: 1 Generation: 0 Recoveries: 0
 
+Enabling the driver
+===================
+
+The driver is enabled via the standard kernel configuration system,
+using the make command::
+
+  make oldconfig/menuconfig/etc.
+
+The driver is located in the menu structure at:
+
+  -> Device Drivers
+    -> Network device support (NETDEVICES [=y])
+      -> Ethernet driver support
+        -> AMD devices
+          -> AMD/Pensando Ethernet PDS_CORE Support
+
 Support
 =======
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 85571d5d8209..9420a1599a58 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1041,6 +1041,15 @@ F:	drivers/gpu/drm/amd/include/vi_structs.h
 F:	include/uapi/linux/kfd_ioctl.h
 F:	include/uapi/linux/kfd_sysfs.h
 
+AMD PDS CORE DRIVER
+M:	Shannon Nelson <shannon.nelson@amd.com>
+M:	Brett Creeley <brett.creeley@amd.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	Documentation/networking/device_drivers/ethernet/amd/pds_core.rst
+F:	drivers/net/ethernet/amd/pds_core/
+F:	include/linux/pds/
+
 AMD SPI DRIVER
 M:	Sanjay R Mehta <sanju.mehta@amd.com>
 S:	Maintained
diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig
index ab42f75b9413..235fcacef5c5 100644
--- a/drivers/net/ethernet/amd/Kconfig
+++ b/drivers/net/ethernet/amd/Kconfig
@@ -186,4 +186,16 @@ config AMD_XGBE_HAVE_ECC
 	bool
 	default n
 
+config PDS_CORE
+	tristate "AMD/Pensando Data Systems Core Device Support"
+	depends on 64BIT && PCI
+	help
+	  This enables the support for the AMD/Pensando Core device family of
+	  adapters.  More specific information on this driver can be
+	  found in
+	  <file:Documentation/networking/device_drivers/ethernet/amd/pds_core.rst>.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called pds_core.
+
 endif # NET_VENDOR_AMD
diff --git a/drivers/net/ethernet/amd/Makefile b/drivers/net/ethernet/amd/Makefile
index 42742afe9115..2dcfb84731e1 100644
--- a/drivers/net/ethernet/amd/Makefile
+++ b/drivers/net/ethernet/amd/Makefile
@@ -17,3 +17,4 @@ obj-$(CONFIG_PCNET32) += pcnet32.o
 obj-$(CONFIG_SUN3LANCE) += sun3lance.o
 obj-$(CONFIG_SUNLANCE) += sunlance.o
 obj-$(CONFIG_AMD_XGBE) += xgbe/
+obj-$(CONFIG_PDS_CORE) += pds_core/
-- 
cgit v1.2.3


From cf88231d973909dc8d578138509973e062aba3d7 Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Thu, 20 Apr 2023 17:04:23 +0100
Subject: dt-bindings: net: mediatek: add WED RX binding for MT7981 eth driver

Add compatible string for mediatek,mt7981-wed as MT7981 also supports
RX WED just like MT7986, but needs a different firmware file.

Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml | 1 +
 1 file changed, 1 insertion(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
index 5c223cb063d4..f7d578a171a4 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,mt7622-wed.yaml
@@ -20,6 +20,7 @@ properties:
     items:
       - enum:
           - mediatek,mt7622-wed
+          - mediatek,mt7981-wed
           - mediatek,mt7986-wed
       - const: syscon
 
-- 
cgit v1.2.3


From e0416e7d33361d2ad0bf9f007428346579ac854a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 21 Apr 2023 23:03:46 +0100
Subject: rxrpc: Fix potential race in error handling in afs_make_call()

If the rxrpc call set up by afs_make_call() receives an error whilst it is
transmitting the request, there's the possibility that it may get to the
point the rxrpc call is ended (after the error_kill_call label) just as the
call is queued for async processing.

This could manifest itself as call->rxcall being seen as NULL in
afs_deliver_to_call() when it tries to lock the call.

Fix this by splitting rxrpc_kernel_end_call() into a function to shut down
an rxrpc call and a function to release the caller's reference and calling
the latter only when we get to afs_put_call().

Reported-by: Jeffrey Altman <jaltman@auristor.com>
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: kafs-testing+fedora36_64checkkafs-build-306@auristor.com
cc: Marc Dionne <marc.dionne@auristor.com>
cc: "David S. Miller" <davem@davemloft.net>
cc: Eric Dumazet <edumazet@google.com>
cc: Jakub Kicinski <kuba@kernel.org>
cc: Paolo Abeni <pabeni@redhat.com>
cc: linux-afs@lists.infradead.org
cc: netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/rxrpc.rst | 17 ++++++++++++-----
 fs/afs/rxrpc.c                     |  9 ++++-----
 include/net/af_rxrpc.h             |  3 ++-
 net/rxrpc/af_rxrpc.c               | 37 +++++++++++++++++++++++++------------
 net/rxrpc/rxperf.c                 |  3 ++-
 5 files changed, 45 insertions(+), 24 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/rxrpc.rst b/Documentation/networking/rxrpc.rst
index ec1323d92c96..e807e18ba32a 100644
--- a/Documentation/networking/rxrpc.rst
+++ b/Documentation/networking/rxrpc.rst
@@ -848,14 +848,21 @@ The kernel interface functions are as follows:
      returned.  The caller now holds a reference on this and it must be
      properly ended.
 
- (#) End a client call::
+ (#) Shut down a client call::
 
-	void rxrpc_kernel_end_call(struct socket *sock,
+	void rxrpc_kernel_shutdown_call(struct socket *sock,
+					struct rxrpc_call *call);
+
+     This is used to shut down a previously begun call.  The user_call_ID is
+     expunged from AF_RXRPC's knowledge and will not be seen again in
+     association with the specified call.
+
+ (#) Release the ref on a client call::
+
+	void rxrpc_kernel_put_call(struct socket *sock,
 				   struct rxrpc_call *call);
 
-     This is used to end a previously begun call.  The user_call_ID is expunged
-     from AF_RXRPC's knowledge and will not be seen again in association with
-     the specified call.
+     This is used to release the caller's ref on an rxrpc call.
 
  (#) Send data through a call::
 
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 7817e2b860e5..e08b850c3e6d 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -179,7 +179,8 @@ void afs_put_call(struct afs_call *call)
 		ASSERT(call->type->name != NULL);
 
 		if (call->rxcall) {
-			rxrpc_kernel_end_call(net->socket, call->rxcall);
+			rxrpc_kernel_shutdown_call(net->socket, call->rxcall);
+			rxrpc_kernel_put_call(net->socket, call->rxcall);
 			call->rxcall = NULL;
 		}
 		if (call->type->destructor)
@@ -420,10 +421,8 @@ error_kill_call:
 	 * The call, however, might be queued on afs_async_calls and we need to
 	 * make sure we don't get any more notifications that might requeue it.
 	 */
-	if (call->rxcall) {
-		rxrpc_kernel_end_call(call->net->socket, call->rxcall);
-		call->rxcall = NULL;
-	}
+	if (call->rxcall)
+		rxrpc_kernel_shutdown_call(call->net->socket, call->rxcall);
 	if (call->async) {
 		if (cancel_work_sync(&call->async_work))
 			afs_put_call(call);
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index ba717eac0229..01a35e113ab9 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -57,7 +57,8 @@ int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
 			   struct iov_iter *, size_t *, bool, u32 *, u16 *);
 bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
 			     u32, int, enum rxrpc_abort_reason);
-void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
+void rxrpc_kernel_shutdown_call(struct socket *sock, struct rxrpc_call *call);
+void rxrpc_kernel_put_call(struct socket *sock, struct rxrpc_call *call);
 void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
 			   struct sockaddr_rxrpc *);
 bool rxrpc_kernel_get_srtt(struct socket *, struct rxrpc_call *, u32 *);
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 102f5cbff91a..c32b164206f9 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -342,31 +342,44 @@ static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall,
 }
 
 /**
- * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
+ * rxrpc_kernel_shutdown_call - Allow a kernel service to shut down a call it was using
  * @sock: The socket the call is on
  * @call: The call to end
  *
- * Allow a kernel service to end a call it was using.  The call must be
+ * Allow a kernel service to shut down a call it was using.  The call must be
  * complete before this is called (the call should be aborted if necessary).
  */
-void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
+void rxrpc_kernel_shutdown_call(struct socket *sock, struct rxrpc_call *call)
 {
 	_enter("%d{%d}", call->debug_id, refcount_read(&call->ref));
 
 	mutex_lock(&call->user_mutex);
-	rxrpc_release_call(rxrpc_sk(sock->sk), call);
-
-	/* Make sure we're not going to call back into a kernel service */
-	if (call->notify_rx) {
-		spin_lock(&call->notify_lock);
-		call->notify_rx = rxrpc_dummy_notify_rx;
-		spin_unlock(&call->notify_lock);
+	if (!test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
+		rxrpc_release_call(rxrpc_sk(sock->sk), call);
+
+		/* Make sure we're not going to call back into a kernel service */
+		if (call->notify_rx) {
+			spin_lock(&call->notify_lock);
+			call->notify_rx = rxrpc_dummy_notify_rx;
+			spin_unlock(&call->notify_lock);
+		}
 	}
-
 	mutex_unlock(&call->user_mutex);
+}
+EXPORT_SYMBOL(rxrpc_kernel_shutdown_call);
+
+/**
+ * rxrpc_kernel_put_call - Release a reference to a call
+ * @sock: The socket the call is on
+ * @call: The call to put
+ *
+ * Drop the application's ref on an rxrpc call.
+ */
+void rxrpc_kernel_put_call(struct socket *sock, struct rxrpc_call *call)
+{
 	rxrpc_put_call(call, rxrpc_call_put_kernel);
 }
-EXPORT_SYMBOL(rxrpc_kernel_end_call);
+EXPORT_SYMBOL(rxrpc_kernel_put_call);
 
 /**
  * rxrpc_kernel_check_life - Check to see whether a call is still alive
diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c
index 4a2e90015ca7..085e7892d310 100644
--- a/net/rxrpc/rxperf.c
+++ b/net/rxrpc/rxperf.c
@@ -342,7 +342,8 @@ static void rxperf_deliver_to_call(struct work_struct *work)
 call_complete:
 	rxperf_set_call_complete(call, ret, remote_abort);
 	/* The call may have been requeued */
-	rxrpc_kernel_end_call(rxperf_socket, call->rxcall);
+	rxrpc_kernel_shutdown_call(rxperf_socket, call->rxcall);
+	rxrpc_kernel_put_call(rxperf_socket, call->rxcall);
 	cancel_work(&call->work);
 	kfree(call);
 }
-- 
cgit v1.2.3


From c43132abfbe9d2f3befdbddfc3097d30905dc3d1 Mon Sep 17 00:00:00 2001
From: Stefan Eichenberger <stefan.eichenberger@toradex.com>
Date: Mon, 13 Feb 2023 13:09:22 +0100
Subject: dt-bindings: bluetooth: marvell: add 88W8997

Update the documentation with the device tree binding for the Marvell
88W8997 bluetooth device.

Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 Documentation/devicetree/bindings/net/marvell-bluetooth.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/marvell-bluetooth.yaml b/Documentation/devicetree/bindings/net/marvell-bluetooth.yaml
index 6aa7a078faa2..65e09476b048 100644
--- a/Documentation/devicetree/bindings/net/marvell-bluetooth.yaml
+++ b/Documentation/devicetree/bindings/net/marvell-bluetooth.yaml
@@ -15,7 +15,9 @@ maintainers:
 
 properties:
   compatible:
-    const: mrvl,88w8897
+    enum:
+      - mrvl,88w8897
+      - mrvl,88w8997
 
 required:
   - compatible
-- 
cgit v1.2.3


From 552705fd891b6ed6dda340567ce3ffff943f1f3b Mon Sep 17 00:00:00 2001
From: Stefan Eichenberger <stefan.eichenberger@toradex.com>
Date: Mon, 13 Feb 2023 13:09:23 +0100
Subject: dt-bindings: bluetooth: marvell: add max-speed property

The 88W8997 bluetooth module supports setting the max-speed property.

Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
Signed-off-by: Francesco Dolcini <francesco.dolcini@toradex.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 .../devicetree/bindings/net/marvell-bluetooth.yaml       | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/marvell-bluetooth.yaml b/Documentation/devicetree/bindings/net/marvell-bluetooth.yaml
index 65e09476b048..188a42ca6ceb 100644
--- a/Documentation/devicetree/bindings/net/marvell-bluetooth.yaml
+++ b/Documentation/devicetree/bindings/net/marvell-bluetooth.yaml
@@ -19,9 +19,25 @@ properties:
       - mrvl,88w8897
       - mrvl,88w8997
 
+  max-speed:
+    description: see Documentation/devicetree/bindings/serial/serial.yaml
+
 required:
   - compatible
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: mrvl,88w8997
+    then:
+      properties:
+        max-speed: true
+    else:
+      properties:
+        max-speed: false
+
 additionalProperties: false
 
 examples:
-- 
cgit v1.2.3


From 95ee3a93239e172b592dc4613c403de2410e4a82 Mon Sep 17 00:00:00 2001
From: Chris Morgan <macromorgan@hotmail.com>
Date: Tue, 28 Feb 2023 09:22:03 -0600
Subject: dt-bindings: net: realtek-bluetooth: Add RTL8821CS

Add compatible string for RTL8821CS for existing Realtek Bluetooth
driver.

Signed-off-by: Chris Morgan <macromorgan@hotmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Alistair Francis <alistair@alistair23.me>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 .../devicetree/bindings/net/realtek-bluetooth.yaml | 24 ++++++++++++++--------
 1 file changed, 15 insertions(+), 9 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml b/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml
index 143b5667abad..8cc2b9924680 100644
--- a/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml
+++ b/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml
@@ -4,24 +4,30 @@
 $id: http://devicetree.org/schemas/net/realtek-bluetooth.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: RTL8723BS/RTL8723CS/RTL8822CS Bluetooth
+title: RTL8723BS/RTL8723CS/RTL8821CS/RTL8822CS Bluetooth
 
 maintainers:
   - Vasily Khoruzhick <anarsoul@gmail.com>
   - Alistair Francis <alistair@alistair23.me>
 
 description:
-  RTL8723CS/RTL8723CS/RTL8822CS is WiFi + BT chip. WiFi part is connected over
-  SDIO, while BT is connected over serial. It speaks H5 protocol with few
-  extra commands to upload firmware and change module speed.
+  RTL8723CS/RTL8723CS/RTL8821CS/RTL8822CS is a WiFi + BT chip. WiFi part
+  is connected over SDIO, while BT is connected over serial. It speaks
+  H5 protocol with few extra commands to upload firmware and change
+  module speed.
 
 properties:
   compatible:
-    enum:
-      - realtek,rtl8723bs-bt
-      - realtek,rtl8723cs-bt
-      - realtek,rtl8723ds-bt
-      - realtek,rtl8822cs-bt
+    oneOf:
+      - enum:
+          - realtek,rtl8723bs-bt
+          - realtek,rtl8723cs-bt
+          - realtek,rtl8723ds-bt
+          - realtek,rtl8822cs-bt
+      - items:
+          - enum:
+              - realtek,rtl8821cs-bt
+          - const: realtek,rtl8822cs-bt
 
   device-wake-gpios:
     maxItems: 1
-- 
cgit v1.2.3


From ab3a769b4dccec2cf60f0a0700b140991bf9afc8 Mon Sep 17 00:00:00 2001
From: Neeraj Sanjay Kale <neeraj.sanjaykale@nxp.com>
Date: Thu, 16 Mar 2023 22:52:13 +0530
Subject: dt-bindings: net: bluetooth: Add NXP bluetooth support

Add binding document for NXP bluetooth chipsets attached over UART.

Signed-off-by: Neeraj Sanjay Kale <neeraj.sanjaykale@nxp.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 .../bindings/net/bluetooth/nxp,88w8987-bt.yaml     | 45 ++++++++++++++++++++++
 MAINTAINERS                                        |  6 +++
 2 files changed, 51 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/bluetooth/nxp,88w8987-bt.yaml

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/bluetooth/nxp,88w8987-bt.yaml b/Documentation/devicetree/bindings/net/bluetooth/nxp,88w8987-bt.yaml
new file mode 100644
index 000000000000..57e4c87cb00b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/bluetooth/nxp,88w8987-bt.yaml
@@ -0,0 +1,45 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/bluetooth/nxp,88w8987-bt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP Bluetooth chips
+
+description:
+  This binding describes UART-attached NXP bluetooth chips. These chips
+  are dual-radio chips supporting WiFi and Bluetooth. The bluetooth
+  works on standard H4 protocol over 4-wire UART. The RTS and CTS lines
+  are used during FW download. To enable power save mode, the host
+  asserts break signal over UART-TX line to put the chip into power save
+  state. De-asserting break wakes up the BT chip.
+
+maintainers:
+  - Neeraj Sanjay Kale <neeraj.sanjaykale@nxp.com>
+
+properties:
+  compatible:
+    enum:
+      - nxp,88w8987-bt
+      - nxp,88w8997-bt
+
+  fw-init-baudrate:
+    description:
+      Chip baudrate after FW is downloaded and initialized.
+      This property depends on the module vendor's
+      configuration. If this property is not specified,
+      115200 is set as default.
+
+required:
+  - compatible
+
+additionalProperties: false
+
+examples:
+  - |
+    serial {
+        bluetooth {
+            compatible = "nxp,88w8987-bt";
+            fw-init-baudrate = <3000000>;
+        };
+    };
diff --git a/MAINTAINERS b/MAINTAINERS
index 6ac562e0381e..ee743e25b096 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -23237,6 +23237,12 @@ L:	linux-mm@kvack.org
 S:	Maintained
 F:	mm/zswap.c
 
+NXP BLUETOOTH WIRELESS DRIVERS
+M:	Amitkumar Karwar <amitkumar.karwar@nxp.com>
+M:	Neeraj Kale <neeraj.sanjaykale@nxp.com>
+S:	Maintained
+F:	Documentation/devicetree/bindings/net/bluetooth/nxp,88w8987-bt.yaml
+
 THE REST
 M:	Linus Torvalds <torvalds@linux-foundation.org>
 L:	linux-kernel@vger.kernel.org
-- 
cgit v1.2.3


From b1d00baaa0298a5e033bc00a92522e4fd07900a8 Mon Sep 17 00:00:00 2001
From: Steev Klimaszewski <steev@kali.org>
Date: Sun, 26 Mar 2023 18:38:09 -0500
Subject: dt-bindings: net: Add WCN6855 Bluetooth

Add bindings for the QTI WCN6855 chipset.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Steev Klimaszewski <steev@kali.org>
Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
---
 .../bindings/net/bluetooth/qualcomm-bluetooth.yaml      | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'Documentation')

diff --git a/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml b/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
index a6a6b0e4df7a..68f78b90d23a 100644
--- a/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
+++ b/Documentation/devicetree/bindings/net/bluetooth/qualcomm-bluetooth.yaml
@@ -23,6 +23,7 @@ properties:
       - qcom,wcn3998-bt
       - qcom,qca6390-bt
       - qcom,wcn6750-bt
+      - qcom,wcn6855-bt
 
   enable-gpios:
     maxItems: 1
@@ -133,6 +134,22 @@ allOf:
         - vddrfa1p7-supply
         - vddrfa1p2-supply
         - vddasd-supply
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,wcn6855-bt
+    then:
+      required:
+        - enable-gpios
+        - swctrl-gpios
+        - vddio-supply
+        - vddbtcxmx-supply
+        - vddrfacmn-supply
+        - vddrfa0p8-supply
+        - vddrfa1p2-supply
+        - vddrfa1p7-supply
 
 examples:
   - |
-- 
cgit v1.2.3


From 00d0f31a1ec8eb3e4d692bdf2455daf0fb865bb3 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 20 Apr 2023 16:33:02 -0700
Subject: net: ethtool: coalesce: try to make user settings stick twice

SET_COALESCE may change operation mode and parameters in one call.
Changing operation mode may cause the driver to reset the parameter
values to what is a reasonable default for new operation mode.

Since driver does not know which parameters come from user and which
are echoed back from ->get, driver may ignore the parameters when
switching operation modes.

This used to be inevitable for ioctl() but in netlink we know which
parameters are actually specified by the user.

We could inform which parameters were set by the user but this would
lead to a lot of code duplication in the drivers. Instead try to call
the drivers twice if both mode and params are changed. The set method
already checks if any params need updating so in case the driver did
the right thing the first time around - there will be no second call
to it's ->set method (only an extra call to ->get()).

For mlx5 for example before this patch we'd see:

 # ethtool -C eth0 adaptive-rx on  adaptive-tx on
 # ethtool -C eth0 adaptive-rx off adaptive-tx off \
		   tx-usecs 123 rx-usecs 123
 Adaptive RX: off  TX: off
 rx-usecs: 3
 rx-frames: 32
 tx-usecs: 16
 tx-frames: 32
 [...]

After the change:

 # ethtool -C eth0 adaptive-rx on  adaptive-tx on
 # ethtool -C eth0 adaptive-rx off adaptive-tx off \
		   tx-usecs 123 rx-usecs 123
 Adaptive RX: off  TX: off
 rx-usecs: 123
 rx-frames: 32
 tx-usecs: 123
 tx-frames: 32
 [...]

This only works for netlink, so it's a small discrepancy between
netlink and ioctl(). Since we anticipate most users to move to
netlink I believe it's worth making their lives easier.

Link: https://lore.kernel.org/r/20230420233302.944382-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/ethtool-netlink.rst |  4 +++
 net/ethtool/coalesce.c                       | 54 ++++++++++++++++++++++------
 2 files changed, 47 insertions(+), 11 deletions(-)

(limited to 'Documentation')

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index cd0973d4ba01..2540c70952ff 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1099,6 +1099,10 @@ such that the corresponding bit in ``ethtool_ops::supported_coalesce_params``
 is not set), regardless of their values. Driver may impose additional
 constraints on coalescing parameters and their values.
 
+Compared to requests issued via the ``ioctl()`` netlink version of this request
+will try harder to make sure that values specified by the user have been applied
+and may call the driver twice.
+
 
 PAUSE_GET
 =========
diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c
index 443e7e642c96..01a59ce211c8 100644
--- a/net/ethtool/coalesce.c
+++ b/net/ethtool/coalesce.c
@@ -254,13 +254,14 @@ ethnl_set_coalesce_validate(struct ethnl_req_info *req_info,
 }
 
 static int
-ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info)
+__ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info,
+		     bool *dual_change)
 {
 	struct kernel_ethtool_coalesce kernel_coalesce = {};
 	struct net_device *dev = req_info->dev;
 	struct ethtool_coalesce coalesce = {};
+	bool mod_mode = false, mod = false;
 	struct nlattr **tb = info->attrs;
-	bool mod = false;
 	int ret;
 
 	ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
@@ -268,6 +269,7 @@ ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info)
 	if (ret < 0)
 		return ret;
 
+	/* Update values */
 	ethnl_update_u32(&coalesce.rx_coalesce_usecs,
 			 tb[ETHTOOL_A_COALESCE_RX_USECS], &mod);
 	ethnl_update_u32(&coalesce.rx_max_coalesced_frames,
@@ -286,10 +288,6 @@ ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info)
 			 tb[ETHTOOL_A_COALESCE_TX_MAX_FRAMES_IRQ], &mod);
 	ethnl_update_u32(&coalesce.stats_block_coalesce_usecs,
 			 tb[ETHTOOL_A_COALESCE_STATS_BLOCK_USECS], &mod);
-	ethnl_update_bool32(&coalesce.use_adaptive_rx_coalesce,
-			    tb[ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX], &mod);
-	ethnl_update_bool32(&coalesce.use_adaptive_tx_coalesce,
-			    tb[ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX], &mod);
 	ethnl_update_u32(&coalesce.pkt_rate_low,
 			 tb[ETHTOOL_A_COALESCE_PKT_RATE_LOW], &mod);
 	ethnl_update_u32(&coalesce.rx_coalesce_usecs_low,
@@ -312,17 +310,25 @@ ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info)
 			 tb[ETHTOOL_A_COALESCE_TX_MAX_FRAMES_HIGH], &mod);
 	ethnl_update_u32(&coalesce.rate_sample_interval,
 			 tb[ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL], &mod);
-	ethnl_update_u8(&kernel_coalesce.use_cqe_mode_tx,
-			tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_TX], &mod);
-	ethnl_update_u8(&kernel_coalesce.use_cqe_mode_rx,
-			tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_RX], &mod);
 	ethnl_update_u32(&kernel_coalesce.tx_aggr_max_bytes,
 			 tb[ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES], &mod);
 	ethnl_update_u32(&kernel_coalesce.tx_aggr_max_frames,
 			 tb[ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES], &mod);
 	ethnl_update_u32(&kernel_coalesce.tx_aggr_time_usecs,
 			 tb[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS], &mod);
-	if (!mod)
+
+	/* Update operation modes */
+	ethnl_update_bool32(&coalesce.use_adaptive_rx_coalesce,
+			    tb[ETHTOOL_A_COALESCE_USE_ADAPTIVE_RX], &mod_mode);
+	ethnl_update_bool32(&coalesce.use_adaptive_tx_coalesce,
+			    tb[ETHTOOL_A_COALESCE_USE_ADAPTIVE_TX], &mod_mode);
+	ethnl_update_u8(&kernel_coalesce.use_cqe_mode_tx,
+			tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_TX], &mod_mode);
+	ethnl_update_u8(&kernel_coalesce.use_cqe_mode_rx,
+			tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_RX], &mod_mode);
+
+	*dual_change = mod && mod_mode;
+	if (!mod && !mod_mode)
 		return 0;
 
 	ret = dev->ethtool_ops->set_coalesce(dev, &coalesce, &kernel_coalesce,
@@ -330,6 +336,32 @@ ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info)
 	return ret < 0 ? ret : 1;
 }
 
+static int
+ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+	bool dual_change;
+	int err, ret;
+
+	/* SET_COALESCE may change operation mode and parameters in one call.
+	 * Changing operation mode may cause the driver to reset the parameter
+	 * values, and therefore ignore user input (driver does not know which
+	 * parameters come from user and which are echoed back from ->get).
+	 * To not complicate the drivers if user tries to change both the mode
+	 * and parameters at once - call the driver twice.
+	 */
+	err = __ethnl_set_coalesce(req_info, info, &dual_change);
+	if (err < 0)
+		return err;
+	ret = err;
+
+	if (ret && dual_change) {
+		err = __ethnl_set_coalesce(req_info, info, &dual_change);
+		if (err < 0)
+			return err;
+	}
+	return ret;
+}
+
 const struct ethnl_request_ops ethnl_coalesce_request_ops = {
 	.request_cmd		= ETHTOOL_MSG_COALESCE_GET,
 	.reply_cmd		= ETHTOOL_MSG_COALESCE_GET_REPLY,
-- 
cgit v1.2.3