summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorL. E. Segovia <amy@centricular.com>2024-08-06 16:29:53 -0300
committerBackport Bot <gitlab-backport-bot@gstreamer-foundation.org>2024-08-09 19:45:24 +0100
commitad354dd8f9796662e891ae7d2e63caaaee840865 (patch)
tree6fd9fe91570fef42680cfaf69fdba00d77adf2a5
parent7b1620c92e3b458b2738afd2e8b07b43fa1bc890 (diff)
libvpx: Update to 1.14.1
Part-of: <https://gitlab.freedesktop.org/gstreamer/cerbero/-/merge_requests/1543>
-rw-r--r--recipes/libvpx.recipe6
-rw-r--r--recipes/libvpx/0001-Fix-implicit-argument-conversion-on-UWP.patch27
-rw-r--r--recipes/libvpx/0002-vp9_diamond_search_sad_neon-use-DECLARE_ALIGNED.patch32
-rw-r--r--recipes/libvpx/0003-Add-Meson-build.patch2693
4 files changed, 2435 insertions, 323 deletions
diff --git a/recipes/libvpx.recipe b/recipes/libvpx.recipe
index de808cc2..4598eaf9 100644
--- a/recipes/libvpx.recipe
+++ b/recipes/libvpx.recipe
@@ -7,11 +7,11 @@ import shutil
class Recipe(recipe.Recipe):
name = 'libvpx'
- version = 'v1.13.1'
+ version = 'v1.14.1'
stype = SourceType.TARBALL
url = 'https://github.com/webmproject/libvpx/archive/%(version)s.tar.gz'
tarball_dirname = 'libvpx-' + version[1:]
- tarball_checksum = '00dae80465567272abd077f59355f95ac91d7809a2d3006f9ace2637dd429d14'
+ tarball_checksum = '901747254d80a7937c933d03bd7c5d41e8e6c883e0665fadcb172542167c7977'
licenses = [{License.BSD: ['LICENSE'], License.Misc: ['PATENTS']}]
btype = BuildType.MESON
@@ -30,8 +30,6 @@ class Recipe(recipe.Recipe):
}
patches = [
- 'libvpx/0001-Fix-implicit-argument-conversion-on-UWP.patch',
- 'libvpx/0002-vp9_diamond_search_sad_neon-use-DECLARE_ALIGNED.patch',
'libvpx/0003-Add-Meson-build.patch',
]
diff --git a/recipes/libvpx/0001-Fix-implicit-argument-conversion-on-UWP.patch b/recipes/libvpx/0001-Fix-implicit-argument-conversion-on-UWP.patch
deleted file mode 100644
index 6ff8027f..00000000
--- a/recipes/libvpx/0001-Fix-implicit-argument-conversion-on-UWP.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From 0339bdc0a49c415f55f728ac9f069ff426898722 Mon Sep 17 00:00:00 2001
-From: Nirbheek Chauhan <nirbheek@centricular.com>
-Date: Wed, 11 Jan 2023 02:31:02 +0530
-Subject: [PATCH] Fix implicit argument conversion on UWP
-
-vpx_util\vpx_thread.h(79,13): error C2664: 'HANDLE CreateThread(LPSECURITY_ATTRIBUTES,SIZE_T,LPTHREAD_START_ROUTINE,LPVOID,DWORD,LPDWORD)': cannot convert argument 3 from 'unsigned int (__cdecl *)(void *)' to 'LPTHREAD_START_ROUTINE'
----
- vpx_util/vpx_thread.h | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/vpx_util/vpx_thread.h b/vpx_util/vpx_thread.h
-index 6d308e9..c84e96a 100644
---- a/vpx_util/vpx_thread.h
-+++ b/vpx_util/vpx_thread.h
-@@ -78,7 +78,8 @@ static INLINE int pthread_create(pthread_t *const thread, const void *attr,
- #ifdef USE_CREATE_THREAD
- *thread = CreateThread(NULL, /* lpThreadAttributes */
- 0, /* dwStackSize */
-- start, arg, 0, /* dwStackSize */
-+ (LPTHREAD_START_ROUTINE) start, arg,
-+ 0, /* dwStackSize */
- NULL); /* lpThreadId */
- #else
- *thread = (pthread_t)_beginthreadex(NULL, /* void *security */
---
-2.37.1 (Apple Git-137.1)
-
diff --git a/recipes/libvpx/0002-vp9_diamond_search_sad_neon-use-DECLARE_ALIGNED.patch b/recipes/libvpx/0002-vp9_diamond_search_sad_neon-use-DECLARE_ALIGNED.patch
deleted file mode 100644
index 8ec541b2..00000000
--- a/recipes/libvpx/0002-vp9_diamond_search_sad_neon-use-DECLARE_ALIGNED.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-From 7c854566c8dbbf047a90139a928a6ac9196546af Mon Sep 17 00:00:00 2001
-Message-ID: <7c854566c8dbbf047a90139a928a6ac9196546af.1692093230.git.amy@centricular.com>
-From: James Zern <jzern@google.com>
-Date: Wed, 1 Feb 2023 13:27:06 -0800
-Subject: [PATCH 2/3] vp9_diamond_search_sad_neon: use DECLARE_ALIGNED
-
-rather than the gcc specific __attribute__((aligned())); fixes build
-targeting ARM64 windows.
-
-Bug: webm:1788
-Change-Id: I2210fc215f44d90c1ce9dee9b54888eb1b78c99e
-(cherry picked from commit 858a8c611f4c965078485860a6820e2135e6611b)
----
- vp9/encoder/arm/neon/vp9_diamond_search_sad_neon.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/vp9/encoder/arm/neon/vp9_diamond_search_sad_neon.c b/vp9/encoder/arm/neon/vp9_diamond_search_sad_neon.c
-index 33753f77b..997775a66 100644
---- a/vp9/encoder/arm/neon/vp9_diamond_search_sad_neon.c
-+++ b/vp9/encoder/arm/neon/vp9_diamond_search_sad_neon.c
-@@ -220,7 +220,7 @@ int vp9_diamond_search_sad_neon(const MACROBLOCK *x,
- // Look up the component cost of the residual motion vector
- {
- uint32_t cost[4];
-- int16_t __attribute__((aligned(16))) rowcol[8];
-+ DECLARE_ALIGNED(16, int16_t, rowcol[8]);
- vst1q_s16(rowcol, v_diff_mv_w);
-
- // Note: This is a use case for gather instruction
---
-2.41.0
-
diff --git a/recipes/libvpx/0003-Add-Meson-build.patch b/recipes/libvpx/0003-Add-Meson-build.patch
index 0f2e3cb3..608c3018 100644
--- a/recipes/libvpx/0003-Add-Meson-build.patch
+++ b/recipes/libvpx/0003-Add-Meson-build.patch
@@ -4,94 +4,6 @@ Date: Tue, 18 Apr 2023 22:16:49 -0300
Subject: [PATCH] Add Meson build
For more information, please see https://github.com/dragonCodecs/libvpx/tree/old-meson
----
- LICENSE.meson.md | 51 +
- README.meson.md | 98 +
- examples/meson.build | 452 +++++
- examples/samples.dox.in | 14 +
- meson.build | 1900 +++++++++++++++++++
- meson/arm_neon.h | 17 +
- meson/capture_build_options.py | 18 +
- meson/check_test_data.py | 45 +
- meson/gen_def.py | 100 +
- meson/generate_component_dox.py | 25 +
- meson/generate_doxy.py | 28 +
- meson/generate_doxyfile.py | 44 +
- meson/parse_options.py | 171 ++
- meson/parse_sources.py | 219 +++
- meson/patch-configure.diff | 60 +
- meson/stdinout_wrapper.py | 19 +
- meson/transform_config.py | 53 +
- meson/transform_config_asm.py | 37 +
- meson/vpx_config.c | 10 +
- meson_options.txt | 49 +
- subprojects/.gitignore | 3 +
- subprojects/nasm-mac.wrap | 9 +
- subprojects/nasm-win.wrap | 9 +
- subprojects/packagefiles/nasm/meson.build | 13 +
- subprojects/packagefiles/nasm/patch_nasm.py | 15 +
- subprojects/packagefiles/perl/meson.build | 8 +
- subprojects/perl-win.wrap | 10 +
- test/meson.build | 1593 ++++++++++++++++
- third_party/googletest/meson.build | 33 +
- third_party/libwebm/meson.build | 52 +
- third_party/libyuv/meson.build | 49 +
- third_party/meson.build | 9 +
- tools/meson.build | 67 +
- tools/tools.dox.in | 5 +
- vp8/meson.build | 554 ++++++
- vp9/meson.build | 595 ++++++
- vpx/meson.build | 136 ++
- vpx_dsp/arm/meson.build | 12 +
- vpx_dsp/meson.build | 1206 ++++++++++++
- vpx_mem/meson.build | 41 +
- vpx_ports/meson.build | 157 ++
- vpx_scale/meson.build | 71 +
- vpx_util/meson.build | 61 +
- 43 files changed, 8118 insertions(+)
- create mode 100644 LICENSE.meson.md
- create mode 100644 README.meson.md
- create mode 100644 examples/meson.build
- create mode 100644 examples/samples.dox.in
- create mode 100644 meson.build
- create mode 100644 meson/arm_neon.h
- create mode 100644 meson/capture_build_options.py
- create mode 100644 meson/check_test_data.py
- create mode 100755 meson/gen_def.py
- create mode 100644 meson/generate_component_dox.py
- create mode 100644 meson/generate_doxy.py
- create mode 100644 meson/generate_doxyfile.py
- create mode 100644 meson/parse_options.py
- create mode 100644 meson/parse_sources.py
- create mode 100644 meson/patch-configure.diff
- create mode 100644 meson/stdinout_wrapper.py
- create mode 100644 meson/transform_config.py
- create mode 100644 meson/transform_config_asm.py
- create mode 100644 meson/vpx_config.c
- create mode 100644 meson_options.txt
- create mode 100644 subprojects/.gitignore
- create mode 100644 subprojects/nasm-mac.wrap
- create mode 100644 subprojects/nasm-win.wrap
- create mode 100644 subprojects/packagefiles/nasm/meson.build
- create mode 100644 subprojects/packagefiles/nasm/patch_nasm.py
- create mode 100644 subprojects/packagefiles/perl/meson.build
- create mode 100644 subprojects/perl-win.wrap
- create mode 100644 test/meson.build
- create mode 100644 third_party/googletest/meson.build
- create mode 100644 third_party/libwebm/meson.build
- create mode 100644 third_party/libyuv/meson.build
- create mode 100644 third_party/meson.build
- create mode 100644 tools/meson.build
- create mode 100644 tools/tools.dox.in
- create mode 100644 vp8/meson.build
- create mode 100644 vp9/meson.build
- create mode 100644 vpx/meson.build
- create mode 100644 vpx_dsp/arm/meson.build
- create mode 100644 vpx_dsp/meson.build
- create mode 100644 vpx_mem/meson.build
- create mode 100644 vpx_ports/meson.build
- create mode 100644 vpx_scale/meson.build
- create mode 100644 vpx_util/meson.build
diff --git a/LICENSE.meson.md b/LICENSE.meson.md
new file mode 100644
@@ -8478,12 +8390,6 @@ From: "L. E. Segovia" <amy@amyspark.me>
Date: Tue, 18 Apr 2023 22:59:00 -0300
Subject: [PATCH] Add GitLab CI
----
- .gitlab-ci.yml | 335 +++++++++++++++++++++++++++++++++++++
- ci/vs-arm64-cross-file.txt | 14 ++
- 2 files changed, 349 insertions(+)
- create mode 100644 .gitlab-ci.yml
- create mode 100644 ci/vs-arm64-cross-file.txt
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
@@ -8856,11 +8762,6 @@ Date: Wed, 17 May 2023 20:08:48 -0300
Subject: [PATCH] meson: Protect build system handled features with combo
choices
----
- meson.build | 34 +++++++++++++--------------
- meson/parse_options.py | 29 ++++++++++++++++++++---
- meson_options.txt | 52 +++++++++++++++++++++---------------------
- 3 files changed, 68 insertions(+), 47 deletions(-)
diff --git a/meson.build b/meson.build
index 78578a1f5..a2eeacf17 100644
@@ -9080,9 +8981,6 @@ Date: Tue, 4 Jul 2023 22:05:03 -0300
Subject: [PATCH] meson: Fix dependency override's library name
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/4>
----
- meson.build | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/meson.build b/meson.build
index a2eeacf17..29a14dc86 100644
@@ -9107,10 +9005,6 @@ Date: Wed, 5 Jul 2023 14:16:50 -0300
Subject: [PATCH] meson: Port thumb to Python
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/5>
----
- build/make/thumb.py | 52 +++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 52 insertions(+)
- create mode 100755 build/make/thumb.py
diff --git a/build/make/thumb.py b/build/make/thumb.py
new file mode 100755
@@ -9180,11 +9074,6 @@ Date: Wed, 5 Jul 2023 14:15:44 -0300
Subject: [PATCH] meson: Port ads2armasm_ms to Python
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/5>
----
- build/make/ads2armasm_ms.py | 33 +++++++++++++++++++++++++++++++++
- meson.build | 2 +-
- 2 files changed, 34 insertions(+), 1 deletion(-)
- create mode 100755 build/make/ads2armasm_ms.py
diff --git a/build/make/ads2armasm_ms.py b/build/make/ads2armasm_ms.py
new file mode 100755
@@ -9248,11 +9137,6 @@ Date: Wed, 5 Jul 2023 14:15:55 -0300
Subject: [PATCH] meson: Port ads2gas_apple to Python
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/5>
----
- build/make/ads2gas_apple.py | 98 +++++++++++++++++++++++++++++++++++++
- meson.build | 2 +-
- 2 files changed, 99 insertions(+), 1 deletion(-)
- create mode 100755 build/make/ads2gas_apple.py
diff --git a/build/make/ads2gas_apple.py b/build/make/ads2gas_apple.py
new file mode 100755
@@ -9381,11 +9265,6 @@ Date: Wed, 5 Jul 2023 14:16:44 -0300
Subject: [PATCH] meson: Port ads2gas to Python
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/5>
----
- build/make/ads2gas.py | 140 ++++++++++++++++++++++++++++++++++++++++++
- meson.build | 2 +-
- 2 files changed, 141 insertions(+), 1 deletion(-)
- create mode 100755 build/make/ads2gas.py
diff --git a/build/make/ads2gas.py b/build/make/ads2gas.py
new file mode 100755
@@ -9556,19 +9435,6 @@ Date: Fri, 7 Jul 2023 22:10:48 -0300
Subject: [PATCH] meson: Port rtcd.pl to Python
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/5>
----
- build/make/rtcd.py | 436 +++++++++
- meson.build | 14 +-
- vp8/common/rtcd_defs.py | 248 +++++
- vp9/common/vp9_rtcd_defs.py | 214 +++++
- vpx_dsp/vpx_dsp_rtcd_defs.py | 1655 ++++++++++++++++++++++++++++++++++
- vpx_scale/vpx_scale_rtcd.py | 45 +
- 6 files changed, 2605 insertions(+), 7 deletions(-)
- create mode 100755 build/make/rtcd.py
- create mode 100755 vp8/common/rtcd_defs.py
- create mode 100755 vp9/common/vp9_rtcd_defs.py
- create mode 100755 vpx_dsp/vpx_dsp_rtcd_defs.py
- create mode 100755 vpx_scale/vpx_scale_rtcd.py
diff --git a/build/make/rtcd.py b/build/make/rtcd.py
new file mode 100755
@@ -12277,18 +12143,6 @@ Subject: [PATCH] Remove Perl as a dependency
Fixes #1
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/5>
----
- .gitlab-ci.yml | 7 ++-----
- meson.build | 10 +---------
- subprojects/packagefiles/perl/meson.build | 8 --------
- subprojects/perl-win.wrap | 10 ----------
- vp8/meson.build | 2 +-
- vp9/meson.build | 2 +-
- vpx_dsp/meson.build | 2 +-
- vpx_scale/meson.build | 2 +-
- 8 files changed, 7 insertions(+), 36 deletions(-)
- delete mode 100644 subprojects/packagefiles/perl/meson.build
- delete mode 100644 subprojects/perl-win.wrap
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 251278731..872c5dfaf 100644
@@ -12456,13 +12310,6 @@ Date: Sat, 8 Jul 2023 19:39:55 +0000
Subject: [PATCH] meson: Add test for rtcd generation parity
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/5>
----
- meson.build | 34 +++++++++++++++++++++++++++++++---
- vp8/meson.build | 8 +++-----
- vp9/meson.build | 9 ++++-----
- vpx_dsp/meson.build | 8 +++-----
- vpx_scale/meson.build | 8 +++-----
- 5 files changed, 44 insertions(+), 23 deletions(-)
diff --git a/meson.build b/meson.build
index 9d7a8a9a2..2d64b1184 100644
@@ -12620,30 +12467,6 @@ Date: Tue, 18 Jul 2023 10:57:25 -0300
Subject: [PATCH] Backport NASM to GStreamer's manual execution and install
Closes !3
----
- meson.build | 60 ++++++++++------
- subprojects/.gitignore | 2 -
- subprojects/nasm-mac.wrap | 9 ---
- subprojects/nasm-win.wrap | 9 ---
- subprojects/nasm/.gitignore | 2 +
- subprojects/nasm/download-binary.py | 76 +++++++++++++++++++++
- subprojects/nasm/meson.build | 41 +++++++++++
- subprojects/nasm/patch_nasm.py | 15 ++++
- subprojects/packagefiles/nasm/meson.build | 13 ----
- subprojects/packagefiles/nasm/patch_nasm.py | 15 ----
- vp8/meson.build | 38 ++++++++---
- vp9/meson.build | 38 ++++++++---
- vpx_dsp/meson.build | 18 +++--
- vpx_ports/meson.build | 22 +++---
- 14 files changed, 256 insertions(+), 102 deletions(-)
- delete mode 100644 subprojects/nasm-mac.wrap
- delete mode 100644 subprojects/nasm-win.wrap
- create mode 100644 subprojects/nasm/.gitignore
- create mode 100755 subprojects/nasm/download-binary.py
- create mode 100644 subprojects/nasm/meson.build
- create mode 100755 subprojects/nasm/patch_nasm.py
- delete mode 100644 subprojects/packagefiles/nasm/meson.build
- delete mode 100644 subprojects/packagefiles/nasm/patch_nasm.py
diff --git a/meson.build b/meson.build
index 2d64b1184..4f9cde607 100644
@@ -13212,9 +13035,6 @@ Subject: [PATCH] meson: Work around Meson bug when using @SOURCE_DIR@ in a
subproject
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/7>
----
- meson.build | 8 +++++++-
- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/meson.build b/meson.build
index 4f9cde607..5365af788 100644
@@ -13248,11 +13068,6 @@ Subject: [PATCH] meson: Fix several typos discovered when testing for Cerbero
integration
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/8>
----
- meson.build | 4 ++--
- vp9/meson.build | 4 ++--
- vpx_dsp/meson.build | 12 ++++++------
- 3 files changed, 10 insertions(+), 10 deletions(-)
diff --git a/meson.build b/meson.build
index 5365af788..111c44cee 100644
@@ -13364,9 +13179,6 @@ See:
https://stackoverflow.com/questions/43152633/invalid-register-for-seh-savexmm-in-cygwin
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/8>
----
- meson.build | 8 ++++++++
- 1 file changed, 8 insertions(+)
diff --git a/meson.build b/meson.build
index 111c44cee..a237c3edf 100644
@@ -13397,9 +13209,6 @@ Date: Sun, 30 Jul 2023 16:03:48 +0100
Subject: [PATCH] ci: bump macos and ios images
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/9>
----
- .gitlab-ci.yml | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 872c5dfaf..c4f27d483 100644
@@ -13431,9 +13240,6 @@ Date: Fri, 4 Aug 2023 23:09:33 +0000
Subject: [PATCH] meson: Implement header installation
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/10>
----
- meson.build | 35 +++++++++++++++++++++++++++++++++++
- 1 file changed, 35 insertions(+)
diff --git a/meson.build b/meson.build
index a237c3edf..d34c07045 100644
@@ -13491,14 +13297,6 @@ Date: Fri, 4 Aug 2023 21:35:07 -0300
Subject: [PATCH] meson: Fix internal flags not superseding toolchain's
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/10>
----
- examples/meson.build | 30 +++++++
- meson.build | 131 ++++++++++++++---------------
- third_party/googletest/meson.build | 5 +-
- third_party/libwebm/meson.build | 5 +-
- third_party/libyuv/meson.build | 3 +-
- tools/meson.build | 3 +-
- 6 files changed, 104 insertions(+), 73 deletions(-)
diff --git a/examples/meson.build b/examples/meson.build
index 9c3df73ab..9b511686e 100644
@@ -14048,9 +13846,6 @@ Date: Fri, 4 Aug 2023 21:48:05 -0300
Subject: [PATCH] meson: Fix another typo discovered by Cerbero's
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/10>
----
- vpx_dsp/meson.build | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/vpx_dsp/meson.build b/vpx_dsp/meson.build
index 04ec4b5d7..d8c83297c 100644
@@ -14080,11 +13875,6 @@ Content-Transfer-Encoding: 8bit
With credits to Tim-Philipp Müller for the cpu-features lookup.
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/10>
----
- .gitlab-ci.yml | 3 ++-
- meson.build | 40 +++++++++++++++++++++++++++++++++++++++-
- meson_options.txt | 5 +++++
- 3 files changed, 46 insertions(+), 2 deletions(-)
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index c4f27d483..11fc1536a 100644
@@ -14199,9 +13989,6 @@ Date: Sat, 5 Aug 2023 00:12:48 -0300
Subject: [PATCH] meson: Fix missing PIC detection
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/10>
----
- meson.build | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/meson.build b/meson.build
index 3105f67f2..2dd5730ca 100644
@@ -14234,9 +14021,6 @@ Date: Sat, 5 Aug 2023 00:10:23 -0300
Subject: [PATCH] meson: Fix iOS Simulator build
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/10>
----
- meson.build | 49 +++++++++++++++++++++++++++----------------------
- 1 file changed, 27 insertions(+), 22 deletions(-)
diff --git a/meson.build b/meson.build
index 2dd5730ca..3a72512cf 100644
@@ -14355,9 +14139,6 @@ Date: Mon, 7 Aug 2023 19:58:03 -0300
Subject: [PATCH] meson: Allow introspection on feature flags
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/11>
----
- meson.build | 8 +++++++-
- 1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/meson.build b/meson.build
index 3a72512cf..f177428f7 100644
@@ -14399,9 +14180,6 @@ Date: Thu, 10 Aug 2023 22:59:26 -0300
Subject: [PATCH] ci: Update Android jobs to R22b
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/12>
----
- .gitlab-ci.yml | 13 ++++++-------
- 1 file changed, 6 insertions(+), 7 deletions(-)
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 11fc1536a..54145dc9f 100644
@@ -14454,10 +14232,6 @@ This is yet another instance of Meson calling Ninja from the root
project's build directory, which mucks with the output of custom_target.
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/14>
----
- meson.build | 1 +
- meson/generate_doxyfile.py | 3 ++-
- 2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/meson.build b/meson.build
index f177428f7..33618f9c7 100644
@@ -14505,9 +14279,6 @@ Subject: [PATCH] meson: Fix pthreads detection failure not disabling
See gstreamer/gstreamer#2905
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/13>
----
- meson.build | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/meson.build b/meson.build
index 33618f9c7..8ae4f7dee 100644
@@ -14547,28 +14318,6 @@ running the script through the Python executable.
See gstreamer/cerbero#450
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/15>
----
- meson/capture_build_options.py | 0
- meson/check_test_data.py | 0
- meson/generate_component_dox.py | 0
- meson/generate_doxy.py | 0
- meson/generate_doxyfile.py | 0
- meson/parse_options.py | 0
- meson/parse_sources.py | 0
- meson/stdinout_wrapper.py | 0
- meson/transform_config.py | 0
- meson/transform_config_asm.py | 0
- 10 files changed, 0 insertions(+), 0 deletions(-)
- mode change 100644 => 100755 meson/capture_build_options.py
- mode change 100644 => 100755 meson/check_test_data.py
- mode change 100644 => 100755 meson/generate_component_dox.py
- mode change 100644 => 100755 meson/generate_doxy.py
- mode change 100644 => 100755 meson/generate_doxyfile.py
- mode change 100644 => 100755 meson/parse_options.py
- mode change 100644 => 100755 meson/parse_sources.py
- mode change 100644 => 100755 meson/stdinout_wrapper.py
- mode change 100644 => 100755 meson/transform_config.py
- mode change 100644 => 100755 meson/transform_config_asm.py
diff --git a/meson/capture_build_options.py b/meson/capture_build_options.py
old mode 100644
@@ -14615,9 +14364,6 @@ should also produce a valid executable.
See gstreamer/cerbero#450
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/16>
----
- meson.build | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/meson.build b/meson.build
index 8ae4f7dee..79ebb514c 100644
@@ -14641,9 +14387,6 @@ From: =?UTF-8?q?Tim-Philipp=20M=C3=BCller?= <tim@centricular.com>
Date: Mon, 9 Oct 2023 18:43:36 +0100
Subject: [PATCH] meson: update to 1.13.1
----
- meson.build | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/meson.build b/meson.build
index 79ebb514c..09b9859f1 100644
@@ -14672,9 +14415,6 @@ Aesthetic change
Fixes #3
Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/19>
----
- build/make/ads2gas.py | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/build/make/ads2gas.py b/build/make/ads2gas.py
index c8025a971..0574135dd 100755
@@ -14759,3 +14499,2436 @@ index 1df26ca88..765d032b8 100644
--
2.44.0.windows.1
+
+From a334f583d65a4b416c8557b2242ca110994503a2 Mon Sep 17 00:00:00 2001
+From: "L. E. Segovia" <amy@centricular.com>
+Date: Wed, 10 Jul 2024 12:36:17 -0300
+Subject: [PATCH] meson: Prohibit shared libraries outside of Linux and Darwin
+ platforms
+
+Windows in particular needs special consideration because of `__declspec(dllimport)` being absent from the data exports.
+
+diff --git a/meson.build b/meson.build
+index 765d032b8..7366c61a7 100644
+--- a/meson.build
++++ b/meson.build
+@@ -1563,6 +1563,14 @@ vpx_config_c = configure_file(
+
+ # libs.mk
+
++if features.get('shared', false) and not ['linux', 'darwin', 'ios', 'iphonesimulator'].contains(tgt_os)
++ if features.enabled('gnu', false)
++ warn('Shared libraries are only supported on ELF; assuming this is OK')
++ else
++ error('Shared libraries are only supported on ELF, OS/2, and Darwin for now')
++ endif
++endif
++
+ rtcd_exe = find_program('build/make/rtcd.py', required: true)
+
+ extra_libs += c.find_library('m', required: false)
+@@ -1820,33 +1828,18 @@ foreach i: codec_arch_libs
+ objs += i.extract_all_objects(recursive: true)
+ endforeach
+
+-if features.get('static', false) and features.get('shared', false)
+- libvpx = both_libraries(
+- 'vpx',
+- codec_srcs,
+- c_args: project_c_args,
+- link_args: project_c_link_args + link_args,
+- extra_files: codec_headers,
+- dependencies: extra_libs,
+- objects: objs,
+- version: soversion,
+- vs_module_defs: vpx_def,
+- install: features.get('install_libs'),
+- )
+-else
+- libvpx = library(
+- 'vpx',
+- codec_srcs,
+- c_args: project_c_args,
+- link_args: project_c_link_args + link_args,
+- extra_files: codec_headers,
+- dependencies: extra_libs,
+- objects: objs,
+- version: soversion,
+- vs_module_defs: vpx_def,
+- install: features.get('install_libs'),
+- )
+-endif
++libvpx = library(
++ 'vpx',
++ codec_srcs,
++ c_args: project_c_args,
++ link_args: project_c_link_args + link_args,
++ extra_files: codec_headers,
++ dependencies: extra_libs,
++ objects: objs,
++ version: soversion,
++ vs_module_defs: vpx_def,
++ install: features.get('install_libs'),
++)
+
+ headers = files(
+ 'vpx/vpx_codec.h',
+--
+2.44.0.windows.1
+
+
+From 1fdc1f7de86ccf1b165fd10305c905745e326fe7 Mon Sep 17 00:00:00 2001
+From: "L. E. Segovia" <amy@centricular.com>
+Date: Wed, 10 Jul 2024 12:42:38 -0300
+Subject: [PATCH] meson: Fix typo in the tiny_ssim executable clause
+
+Using combos as booleans is not valid, the features hash was intended here.
+
+diff --git a/tools/meson.build b/tools/meson.build
+index 47c9ee831..7e6f16360 100644
+--- a/tools/meson.build
++++ b/tools/meson.build
+@@ -28,7 +28,7 @@ tiny_ssim = executable(
+ link_args: project_c_link_args + link_args,
+ extra_files: tiny_ssim_headers,
+ dependencies: libvpx_dep,
+- install: get_option('install_bins'),
++ install: features.get('install_bins', false),
+ )
+
+ tools_subpages = {
+--
+2.44.0.windows.1
+
+
+From eecbef0754c7b36bb55c6b0ec1aca0717bc69ea9 Mon Sep 17 00:00:00 2001
+From: "L. E. Segovia" <amy@centricular.com>
+Date: Sat, 20 Jul 2024 21:07:28 +0000
+Subject: [PATCH] ci: Switch Apple jobs to the Arm64 runner
+
+Fixes #2
+
+Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/22>
+
+diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
+index 54145dc9f..cc7c27de1 100644
+--- a/.gitlab-ci.yml
++++ b/.gitlab-ci.yml
+@@ -198,6 +198,9 @@ vs2019 arm64 cross:
+
+ .build darwin:
+ stage: 'build'
++ image: "registry.freedesktop.org/gstreamer/gstreamer/macos-arm64/14-sonoma:2023-10-25.0"
++ tags:
++ - gst-mac-arm
+ artifacts:
+ name: "${CI_JOB_NAME}_${CI_COMMIT_SHA}"
+ expire_in: '5 days'
+@@ -215,35 +218,35 @@ vs2019 arm64 cross:
+ - pip3 install --upgrade pip
+ # Make sure meson is up to date
+ - pip3 install -U meson
+- # Need to install certificates for python
+- - pip3 install --upgrade certifi
+- # Another way to install certificates
+- - open /Applications/Python\ 3.8/Install\ Certificates.command
+ # Get ninja
+ - pip3 install -U ninja
+ script:
+- # HACK to sneak Nasm and Perl under Meson's nose.
+- - export PATH="$(pwd)/subprojects/nasm-2.16.01:$PATH"
+- - CERT_PATH=$(python3 -m certifi) && export SSL_CERT_FILE=${CERT_PATH} && export REQUESTS_CA_BUNDLE=${CERT_PATH} && meson setup mesonbuild
++ - meson setup mesonbuild
+ - meson compile -C mesonbuild
+ - meson test -C mesonbuild
+
++macOS arm64:
++ extends: '.build darwin'
++
+ macOS x86_64:
+ extends: '.build darwin'
+- stage: 'build'
+- tags:
+- - gst-macos-13
++ script:
++ # HACK to sneak Nasm and Perl under Meson's nose.
++ - export PATH="$(pwd)/subprojects/nasm-2.16.01:$PATH"
++ - meson setup mesonbuild
++ - meson compile -C mesonbuild
++ - meson test -C mesonbuild
+
+ iOS arm64 cross:
+ extends: '.build darwin'
+- stage: 'build'
+- tags:
+- - gst-ios-16
++ # Same-architecture cross-builds are broken on 1.4.1
++ # 1.5.0 was meant to fix this, but instead broke detection wholesale
++ # Once 1.5.1 is out, try restoring `system = 'darwin'`
+ script:
+ - |
+ cat > ios-cross-file.txt <<EOF
+ [host_machine]
+- system = 'darwin'
++ system = 'ios'
+ cpu_family = 'aarch64'
+ cpu = 'aarch64'
+ endian = 'little'
+@@ -262,7 +265,7 @@ iOS arm64 cross:
+ strip = '$(xcrun --find --sdk iphoneos strip)'
+ pkgconfig = 'false'
+ EOF
+- - CERT_PATH=$(python3 -m certifi) && export SSL_CERT_FILE=${CERT_PATH} && export REQUESTS_CA_BUNDLE=${CERT_PATH} && meson setup mesonbuild --cross-file=ios-cross-file.txt
++ - meson setup mesonbuild --cross-file=ios-cross-file.txt
+ - meson compile -C mesonbuild
+ - meson test -C mesonbuild
+
+--
+2.44.0.windows.1
+
+
+From 38847a0321bcb1d04ff09ccd410f0d6288b92c39 Mon Sep 17 00:00:00 2001
+From: "L. E. Segovia" <amy@centricular.com>
+Date: Wed, 10 Jul 2024 18:53:54 -0300
+Subject: [PATCH] meson: Update to 1.14.1
+
+Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/21>
+
+diff --git a/build/make/rtcd.py b/build/make/rtcd.py
+index 3d4121c57..cdff3dcc1 100755
+--- a/build/make/rtcd.py
++++ b/build/make/rtcd.py
+@@ -422,7 +422,7 @@ if __name__ == '__main__':
+ ALL_ARCHS = filter(*qw('neon_asm neon'))
+ arm()
+ elif opts['arch'] == 'armv8' or opts['arch'] == 'arm64':
+- ALL_ARCHS = filter(*qw('neon'))
++ ALL_ARCHS = filter(*qw('neon neon_dotprod neon_i8mm sve'))
+ REQUIRES = filter(*qw('neon'))
+ require(*REQUIRES)
+ arm()
+diff --git a/examples/meson.build b/examples/meson.build
+index 9b511686e..09e5fab2c 100644
+--- a/examples/meson.build
++++ b/examples/meson.build
+@@ -232,6 +232,8 @@ if features.get('decoders', false)
+ vpxdec = executable(
+ 'vpxdec',
+ vpxdec_srcs,
++ c_args: project_c_args,
++ link_args: project_c_link_args,
+ extra_files: vpxdec_headers,
+ dependencies: vpxdec_deps,
+ install: features.get('install_bins', false),
+diff --git a/meson.build b/meson.build
+index 7366c61a7..198165e61 100644
+--- a/meson.build
++++ b/meson.build
+@@ -4,13 +4,11 @@
+ project(
+ 'libvpx',
+ 'c', 'cpp',
+- version: '1.13.1',
++ version: '1.14.1',
+ default_options: [
+- # Enforce c89 for c files. Don't be too strict about it though. Allow
+- # gnu extensions like "//" for comments.
+- 'c_std=gnu89',
+- # Uncomment when Meson can handle this one for MSVC.
+- # 'cpp_std=gnu++11',
++ # Enforce C99 for C files. Allow GNU extensions.
++ 'c_std=c11', # Because MSVC
++ 'cpp_std=c++11',
+ 'warning_level=2',
+ 'default_library=static',
+ 'buildtype=debugoptimized',
+@@ -85,6 +83,10 @@ features.set('spatial_resampling', true)
+ features.set('multithread', true)
+ features.set('os_support', true)
+ features.set('temporal_denoising', true)
++
++if get_option('b_pie') and not get_option('b_staticpic')
++ error('PIC is required when building position independent executables')
++endif
+ features.set('pic', get_option('b_staticpic'))
+
+ CODECS = [
+@@ -101,6 +103,7 @@ CODEC_FAMILIES = [
+
+ ARCH_LIST = [
+ 'arm',
++ 'aarch64',
+ 'mips',
+ 'x86',
+ 'x86_64',
+@@ -108,6 +111,13 @@ ARCH_LIST = [
+ 'loongarch',
+ ]
+
++ARCH_EXT_LIST_AARCH64 = [
++ 'neon',
++ 'neon_dotprod',
++ 'neon_i8mm',
++ 'sve',
++]
++
+ ARCH_EXT_LIST_X86 = [
+ 'mmx',
+ 'sse',
+@@ -127,9 +137,8 @@ ARCH_EXT_LIST_LOONGSON = [
+ ]
+
+ ARCH_EXT_LIST = [
+- 'neon',
+- 'neon_asm',
+-
++ 'neon_asm'
++] + ARCH_EXT_LIST_AARCH64 + [
+ 'mips32',
+ 'dspr2',
+ 'msa',
+@@ -199,7 +208,6 @@ CONFIG_LIST = [
+ 'multi_res_encoding',
+ 'temporal_denoising',
+ 'vp9_temporal_denoising',
+- 'consistent_recode',
+ 'coefficient_range_checking',
+ 'vp9_highbitdepth',
+ 'better_hw_compatibility',
+@@ -265,7 +273,6 @@ CMDLINE_SELECT = [
+ 'multi_res_encoding',
+ 'temporal_denoising',
+ 'vp9_temporal_denoising',
+- 'consistent_recode',
+ 'coefficient_range_checking',
+ 'better_hw_compatibility',
+ 'vp9_highbitdepth',
+@@ -525,6 +532,11 @@ c = meson.get_compiler('c')
+ cpp = meson.get_compiler('cpp')
+ asm = meson.get_compiler('c') # asm == GCC
+
++# Since MSVC does not support C99, but the official configure expects it,
++# the Meson default takes C11. This sets the GNU extensions back online.
++add_project_arguments(c.get_supported_arguments('-std=gnu11'), language: 'c')
++add_project_arguments(cpp.get_supported_arguments('-std=gnu++11'), language: 'cpp')
++
+ tgt_isa = host_machine.cpu_family()
+ tgt_os = host_machine.system()
+ tgt_cc = c.get_id()
+@@ -612,7 +624,10 @@ endif
+ features.set(tgt_cc, true)
+
+ # Enable the architecture family
+-if tgt_isa.startswith('arm')
++if tgt_isa == 'arm64'
++ features.set('arm', true)
++ features.set('aarch64', true)
++elif tgt_isa.startswith('arm')
+ features.set('arm', true)
+ elif tgt_isa.startswith('mips')
+ features.set('mips', true)
+@@ -637,7 +652,7 @@ if tgt_os == 'solaris'
+ endif
+
+ if tgt_isa.startswith('arm')
+- feature = 'neon'
++ feature = 'runtime_cpu_detect'
+ if features.get(feature, true)
+ if not features.has(feature)
+ message('\tenabling @0@'.format(feature))
+@@ -646,6 +661,13 @@ if tgt_isa.startswith('arm')
+ endif
+
+ if tgt_isa == 'armv7'
++ feature = 'neon'
++ if features.get(feature, true)
++ if not features.has(feature)
++ message('\tenabling @0@'.format(feature))
++ endif
++ features.set(feature, true)
++ endif
+ # Only enable neon_asm when neon is also enabled.
+ if features.get('neon', false)
+ feature = 'neon_asm'
+@@ -772,6 +794,93 @@ if tgt_isa.startswith('arm')
+ elif tgt_os == 'linux'
+ features.set('linux', true)
+ endif
++ if tgt_isa == 'arm64'
++ aarch64_arch_flag_neon = 'arch=armv8-a'
++ aarch64_arch_flag_neon_dotprod = 'arch=armv8.2-a+dotprod'
++ aarch64_arch_flag_neon_i8mm = 'arch=armv8.2-a+dotprod+i8mm'
++ aarch64_arch_flag_sve = 'arch=armv8.2-a+dotprod+i8mm+sve'
++
++ # Unline the original, we do can test cflags here :)
++ disable_exts = false
++ foreach feature : ARCH_EXT_LIST_AARCH64
++ if disable_exts
++ rtcd_options += ['--disable', feature]
++ if not features.get(feature, false) # ! enabled
++ if not features.has(feature) # disabled
++ message('\tdisabling @0@'.format(feature))
++ endif
++ features.set(feature, false)
++ endif
++ else
++ # Check the compiler supports the -march flag for the extension.
++ # This needs to happen after toolchain/OS inspection so we handle
++ # $CROSS etc correctly when checking for flags, else these will
++ # always fail.
++ flag = get_variable('aarch64_arch_flag_@0@'.format(feature))
++ if c.has_argument('-m@0@'.format(flag))
++ if features.get(feature, true)
++ if not features.has(feature)
++ message('\tenabling @0@'.format(feature))
++ endif
++ features.set(feature, true)
++ endif
++ else
++ rtcd_options += ['--disable', feature]
++ if not features.get(feature, false) # ! enabled
++ if not features.has(feature) # disabled
++ message('\tdisabling @0@'.format(feature))
++ endif
++ features.set(feature, false)
++ endif
++ endif
++ if not features.get(feature, false)
++ # Disable higher order extensions to simplify dependencies.
++ disable_exts = true
++ rtcd_options += ['--disable', feature]
++ if not features.get(feature, false) # ! enabled
++ if not features.has(feature) # disabled
++ message('\tdisabling @0@'.format(feature))
++ endif
++ features.set(feature, false)
++ endif
++ endif
++ endif
++ endforeach
++ if features.get('sve', false)
++ supports_armv8_etc = c.compiles('''
++ #ifndef __ARM_NEON_SVE_BRIDGE
++ #error 1
++ #endif
++ #include <arm_sve.h>
++ #include <arm_neon_sve_bridge.h>
++ ''',
++ name: 'supports Armv8.2-a, dotprod, i8mm, and SVE',
++ args: '-march=armv8.2-a+dotprod+i8mm+sve'
++ )
++ if supports_armv8_etc
++ # Check whether the compiler can compile SVE functions that require
++ # backup/restore of SVE registers according to AAPCS. Clang for Windows
++ # used to fail this, see
++ # https://github.com/llvm/llvm-project/issues/80009.
++ supports_armv8_etc = c.compiles('''
++ #include <arm_sve.h>
++ void other(void);
++ svfloat32_t func(svfloat32_t a) {
++ other();
++ return a;
++ }
++ ''',
++ name: 'has usable arm_sve.h',
++ args: '-march=armv8.2-a+dotprod+i8mm+sve'
++ )
++ endif
++ if not supports_armv8_etc
++ warning('disabling SVE: arm_neon_sve_bridge.h not supported by compiler')
++ features.set('sve', false)
++ rtcd_options += ['--disable', 'sve']
++ endif
++ endif
++ endif
+ elif tgt_isa.startswith('mips')
+ tune_cflags = '-mtune=@0@'
+
+@@ -1223,7 +1332,6 @@ endif
+
+ if features.get('gcc', false)
+ gcc_flags = [
+- '-Wdeclaration-after-statement',
+ '-Wdisabled-optimization',
+ '-Wextra-semi',
+ '-Wextra-semi-stmt',
+@@ -1237,8 +1345,9 @@ if features.get('gcc', false)
+ '-Wimplicit-function-declaration',
+ '-Wmissing-declarations',
+ '-Wmissing-prototypes',
++ '-Wshadow',
+ '-Wuninitialized',
+- '-Wunreachable-code-loop-increment',
++ '-Wunreachable-code-aggressive',
+ '-Wunused',
+ ]
+
+@@ -1270,12 +1379,14 @@ if features.get('gcc', false)
+ '-Wc++14-extensions',
+ '-Wc++17-extensions',
+ '-Wc++20-extensions',
++ '-Wnon-virtual-dtor',
+ )
+- # disable some warnings specific to libyuv.
++ # disable some warnings specific to libyuv / libwebm.
+ libyuv_cpp_args += cpp.get_supported_arguments(
+ '-Wno-missing-declarations',
+ '-Wno-missing-prototypes',
+ '-Wno-pass-failed',
++ '-Wno-shadow',
+ '-Wno-unused-parameter',
+ )
+ endif
+@@ -1361,7 +1472,6 @@ if tgt_cc == 'msvs'
+ elif tgt_os == 'android'
+ enable_features = ['libyuv']
+ if cpp.has_argument('-std=gnu++11')
+- project_cpp_args += '-std=gnu++11'
+ enable_features += ['webm_io']
+ endif
+ foreach feature: enable_features
+@@ -1380,7 +1490,6 @@ elif tgt_os == 'darwin'
+ elif tgt_os == 'iphonesimulator'
+ enable_features = ['libyuv']
+ if cpp.has_argument('-std=gnu++11')
+- project_cpp_args += '-std=gnu++11'
+ enable_features += ['webm_io']
+ endif
+ foreach feature: enable_features
+@@ -1397,7 +1506,6 @@ elif target_machine.system() == 'windows'
+ # would be disabled for the same reason.
+ enable_features = []
+ if cpp.has_argument('-std=gnu++11')
+- project_cpp_args += '-std=gnu++11'
+ enable_features += ['unit_tests', 'webm_io']
+ endif
+ if cpp.compiles('int z;')
+@@ -1414,7 +1522,6 @@ elif target_machine.system() == 'windows'
+ else
+ enable_features = []
+ if cpp.has_argument('-std=gnu++11')
+- project_cpp_args += '-std=gnu++11'
+ if features.get('pthread_h', false)
+ enable_features += ['unit_tests']
+ endif
+@@ -1553,7 +1660,7 @@ else
+ endif
+
+ vpx_config_data = configuration_data()
+-vpx_config_data.set_quoted('CONFIGURE_ARGS', configure_args)
++vpx_config_data.set_quoted('CONFIGURE_ARGS', configure_args.replace('\\', '\\\\'))
+
+ vpx_config_c = configure_file(
+ input: 'meson/vpx_config.c',
+@@ -1564,7 +1671,7 @@ vpx_config_c = configure_file(
+ # libs.mk
+
+ if features.get('shared', false) and not ['linux', 'darwin', 'ios', 'iphonesimulator'].contains(tgt_os)
+- if features.enabled('gnu', false)
++ if features.get('gcc', false) # Looks like a typo on upstream
+ warn('Shared libraries are only supported on ELF; assuming this is OK')
+ else
+ error('Shared libraries are only supported on ELF, OS/2, and Darwin for now')
+@@ -1611,6 +1718,11 @@ else
+ 'avx2': ['-mavx2'],
+ 'avx512': ['-mavx512f', '-mavx512cd', '-mavx512bw', '-mavx512dq', '-mavx512vl'],
+
++ # AARCH64
++ 'neon_dotprod': ['-march=armv8.2-a+dotprod'],
++ 'neon_i8mm': ['-march=armv8.2-a+dotprod+i8mm'],
++ 'sve': ['-march=armv8.2-a+dotprod+i8mm+sve'],
++
+ # POWER
+ 'vsx' : ['-maltivec', '-mvsx'],
+
+@@ -1809,7 +1921,7 @@ else
+ link_args += ['-Wl,--version-script,@0@'.format(meson.current_build_dir() / vpx_def_name)]
+ endif
+
+-soversion = '8.0.0'
++soversion = '9.0.1'
+
+ foreach arch, srcs : codec_arch_srcs
+ codec_arch_libs += static_library(
+diff --git a/meson/parse_sources.py b/meson/parse_sources.py
+index 5693aaddd..3b38eb2a5 100755
+--- a/meson/parse_sources.py
++++ b/meson/parse_sources.py
+@@ -80,8 +80,10 @@ def make_to_meson(target: str, paths: list[str]):
+ else:
+ continue
+
+- if not source_type or not component or not label:
++ if not component:
+ raise RuntimeError('Unspecified input file data was found')
++ elif not source_type:
++ print(f'Skipping {ofiles}')
+
+ accumulate = ofiles.endswith('\\')
+ ofiles = ofiles.strip('\\')
+@@ -99,7 +101,7 @@ def make_to_meson(target: str, paths: list[str]):
+ component_sources[label] = component_sources.setdefault(label, list()) + accum + ifiles
+ accum = []
+
+- if not label:
++ if not label and accum:
+ raise RuntimeError('Unspecified component type')
+
+ # Makefiles can end with '\' and this is just a porting script ;)
+@@ -145,7 +147,7 @@ def make_to_meson(target: str, paths: list[str]):
+ print ('Warning: skipping %s' % source)
+ continue
+ f.write(f"\t'{source}',\n")
+- f.write('{file_closing}\n\n')
++ f.write(f'{file_closing}\n\n')
+
+ f.write(f'{component}_{source_type}optional_sources = {{\n')
+ for label in sorted (component_sources):
+diff --git a/test/meson.build b/test/meson.build
+index 26a55d980..0c7319619 100644
+--- a/test/meson.build
++++ b/test/meson.build
+@@ -7,10 +7,10 @@
+
+ libvpx_test_sources = files(
+ 'bench.cc',
++ 'init_vpx_test.cc',
+ 'test_libvpx.cc',
+- '../md5_utils.c', # MANUAL
+- '../y4minput.c', # MANUAL
+ 'test_vectors.cc',
++ 'vpx_image_test.cc',
+ 'decode_test_driver.cc',
+ # 'decode_perf_test.cc', # MANUAL
+ # 'encode_perf_test.cc', # MANUAL
+@@ -41,10 +41,6 @@ libvpx_test_optional_sources = {
+ ),
+ 'encoders' : files(
+ 'altref_test.cc',
+- 'aq_segment_test.cc',
+- 'alt_ref_aq_segment_test.cc',
+- 'vp8_datarate_test.cc',
+- 'vp9_datarate_test.cc',
+ 'encode_api_test.cc',
+ 'error_resilience_test.cc',
+ 'realtime_test.cc',
+@@ -74,6 +70,7 @@ libvpx_test_optional_sources = {
+ 'config_test.cc',
+ 'cq_test.cc',
+ 'keyframe_test.cc',
++ 'vp8_datarate_test.cc',
+ # 'quantize_test.cc', # MANUAL
+ # 'set_roi.cc', # MANUAL
+ # 'variance_test.cc', # MANUAL
+@@ -91,6 +88,8 @@ libvpx_test_optional_sources = {
+ 'vp9_encoder' : files(
+ 'active_map_refresh_test.cc',
+ 'active_map_test.cc',
++ 'alt_ref_aq_segment_test.cc',
++ 'aq_segment_test.cc',
+ 'borders_test.cc',
+ 'cpu_speed_test.cc',
+ 'frame_size_tests.cc',
+@@ -104,6 +103,7 @@ libvpx_test_optional_sources = {
+ 'svc_test.cc',
+ 'svc_end_to_end_test.cc',
+ 'timestamp_test.cc',
++ 'vp9_datarate_test.cc',
+ 'vp9_ext_ratectrl_test.cc',
+ # 'avg_test.cc', # MANUAL
+ # 'comp_avg_pred_test.cc', # MANUAL
+@@ -143,6 +143,7 @@ simple_encode_test_optional_sources = {
+
+ test_intra_pred_speed_sources = files(
+ # 'test_intra_pred_speed.cc', # MANUAL
++ 'init_vpx_test.cc',
+ )
+
+ test_intra_pred_speed_optional_sources = {
+@@ -173,6 +174,7 @@ libvpx_test_headers_sources = files(
+ 'codec_factory.h',
+ 'md5_helper.h',
+ 'register_state_check.h',
++ 'init_vpx_test.h',
+ 'test_vectors.h',
+ 'util.h',
+ 'video_source.h',
+@@ -210,6 +212,13 @@ libwebm_parser_headers_sources = files(
+ libwebm_parser_headers_optional_sources = {
+ }
+
++test_intra_pred_speed_headers_sources = files(
++ 'init_vpx_test.h',
++)
++
++test_intra_pred_speed_headers_optional_sources = {
++}
++
+ rc_interface_test_headers_sources = files(
+ # 'decode_test_driver.h', # MANUAL
+ # 'codec_factory.h', # MANUAL
+@@ -1095,6 +1104,7 @@ libvpx_test_data_data_optional_sources = {
+ 'screendata.y4m',
+ 'niklas_640_480_30.yuv',
+ 'bus_352x288_420_f20_b8.yuv',
++ 'crowd_run_360p_10_150f.y4m',
+ 'niklas_1280_720_30.yuv',
+ 'desktop_640_360_30.yuv',
+ 'kirland_640_480_30.yuv',
+@@ -1107,6 +1117,9 @@ libvpx_test_data_data_optional_sources = {
+ ],
+ }
+
++#### --- END GENERATED --- ####
++
++
+ if features.get('webm_io', false) # MANUAL
+ libvpx_test_optional_sources += {
+ 'decoders' : libvpx_test_optional_sources['decoders'] + files(
+@@ -1432,8 +1445,6 @@ if features.get('encode_perf_tests', false)
+ }
+ endif
+
+-#### --- END GENERATED --- ####
+-
+ optional_sources = get_variable('libvpx_test_optional_sources', {})
+ foreach comp_name, comp_sources : optional_sources
+ if features.get(comp_name, false)
+diff --git a/third_party/libyuv/meson.build b/third_party/libyuv/meson.build
+index 78af1b775..f9d07f901 100644
+--- a/third_party/libyuv/meson.build
++++ b/third_party/libyuv/meson.build
+@@ -2,6 +2,7 @@
+ # SPDX-License-Identifier: BSD-3-Clause
+
+ libyuv_includes = files(
++ 'include/libyuv/basic_types.h',
+ 'include/libyuv/convert.h',
+ 'include/libyuv/convert_argb.h',
+ 'include/libyuv/convert_from.h',
+diff --git a/vp8/common/rtcd_defs.py b/vp8/common/rtcd_defs.py
+index 4edaea4cd..0af078974 100755
+--- a/vp8/common/rtcd_defs.py
++++ b/vp8/common/rtcd_defs.py
+@@ -130,12 +130,6 @@ specialize(*qw('vp8_copy_mem8x4 mmx neon dspr2 msa mmi'))
+ #
+ if vpx_config("CONFIG_POSTPROC") == "yes":
+
+- add_proto(*qw('void vp8_blend_mb_inner'), "unsigned char *y, unsigned char *u, unsigned char *v, int y_1, int u_1, int v_1, int alpha, int stride")
+-
+- add_proto(*qw('void vp8_blend_mb_outer'), "unsigned char *y, unsigned char *u, unsigned char *v, int y_1, int u_1, int v_1, int alpha, int stride")
+-
+- add_proto(*qw('void vp8_blend_b'), "unsigned char *y, unsigned char *u, unsigned char *v, int y_1, int u_1, int v_1, int alpha, int stride")
+-
+ add_proto(*qw('void vp8_filter_by_weight16x16'), "unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight")
+ specialize(*qw('vp8_filter_by_weight16x16 sse2 msa'))
+
+@@ -220,14 +214,14 @@ if vpx_config("CONFIG_VP8_ENCODER") == "yes":
+ #
+ add_proto(*qw('int vp8_refining_search_sad'), "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, int error_per_bit, int search_range, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv")
+ specialize(*qw('vp8_refining_search_sad sse2 msa'))
+- vp8_refining_search_sad_sse2='vp8_refining_search_sadx4';
+- vp8_refining_search_sad_msa='vp8_refining_search_sadx4';
++ vp8_refining_search_sad_sse2='vp8_refining_search_sadx4'
++ vp8_refining_search_sad_msa='vp8_refining_search_sadx4'
+
+ add_proto(*qw('int vp8_diamond_search_sad'), "struct macroblock *x, struct block *b, struct blockd *d, union int_mv *ref_mv, union int_mv *best_mv, int search_param, int sad_per_bit, int *num00, struct variance_vtable *fn_ptr, int *mvcost[2], union int_mv *center_mv")
+ specialize(*qw('vp8_diamond_search_sad sse2 msa lsx'))
+- vp8_diamond_search_sad_sse2='vp8_diamond_search_sadx4';
+- vp8_diamond_search_sad_msa='vp8_diamond_search_sadx4';
+- vp8_diamond_search_sad_lsx='vp8_diamond_search_sadx4';
++ vp8_diamond_search_sad_sse2='vp8_diamond_search_sadx4'
++ vp8_diamond_search_sad_msa='vp8_diamond_search_sadx4'
++ vp8_diamond_search_sad_lsx='vp8_diamond_search_sadx4'
+
+ #
+ # Alt-ref Noise Reduction (ARNR)
+diff --git a/vp8/meson.build b/vp8/meson.build
+index 647460ae4..a9790e613 100644
+--- a/vp8/meson.build
++++ b/vp8/meson.build
+@@ -327,6 +327,9 @@ vp8_dx_headers_optional_sources = {
+ ),
+ }
+
++vp8_common_asm_sources = files(
++)
++
+ vp8_common_asm_optional_sources = {
+ 'mmx' : files(
+ 'common/x86/dequantize_mmx.asm',
+@@ -348,6 +351,9 @@ vp8_common_asm_optional_sources = {
+ ),
+ }
+
++vp8_cx_asm_sources = files(
++)
++
+ if features.get('postproc', false) # MANUAL
+ vp8_common_asm_optional_sources += {
+ 'sse2': vp8_common_asm_optional_sources['sse2'] + files(
+diff --git a/vp9/common/vp9_rtcd_defs.py b/vp9/common/vp9_rtcd_defs.py
+index 41fdd120b..ad65ae649 100755
+--- a/vp9/common/vp9_rtcd_defs.py
++++ b/vp9/common/vp9_rtcd_defs.py
+@@ -27,7 +27,9 @@ struct macroblockd;
+
+ /* Encoder forward decls */
+ struct macroblock;
+-struct vp9_variance_vtable;
++struct macroblock_plane;
++struct vp9_sad_table;
++struct ScanOrder;
+ struct search_site_config;
+ struct mv;
+ union int_mv;
+@@ -121,24 +123,21 @@ if vpx_config("CONFIG_VP9_ENCODER") == "yes":
+ add_proto(*qw('int64_t vp9_block_error'), "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz")
+
+ add_proto(*qw('int64_t vp9_block_error_fp'), "const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size")
++ specialize(*qw('vp9_block_error_fp neon avx2 sse2'))
+
+- add_proto(*qw('void vp9_quantize_fp'), "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan")
++ add_proto(*qw('void vp9_quantize_fp'), "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order")
+ specialize(*qw('vp9_quantize_fp neon sse2 ssse3 avx2 vsx'))
+
+- add_proto(*qw('void vp9_quantize_fp_32x32'), "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan")
++ add_proto(*qw('void vp9_quantize_fp_32x32'), "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order")
+ specialize(*qw('vp9_quantize_fp_32x32 neon ssse3 avx2 vsx'))
+
+ if vpx_config("CONFIG_VP9_HIGHBITDEPTH") == "yes":
+- specialize(*qw('vp9_block_error avx2 sse2'))
+-
+- specialize(*qw('vp9_block_error_fp avx2 sse2'))
++ specialize(*qw('vp9_block_error neon avx2 sse2'))
+
+ add_proto(*qw('int64_t vp9_highbd_block_error'), "const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd")
+- specialize(*qw('vp9_highbd_block_error sse2'))
++ specialize(*qw('vp9_highbd_block_error neon sse2'))
+ else:
+- specialize(*qw('vp9_block_error avx2 msa sse2'))
+-
+- specialize(*qw('vp9_block_error_fp neon avx2 sse2'))
++ specialize(*qw('vp9_block_error neon avx2 msa sse2'))
+
+ # fdct functions
+
+@@ -166,15 +165,15 @@ if vpx_config("CONFIG_VP9_ENCODER") == "yes":
+ #
+ # Motion search
+ #
+- add_proto(*qw('int vp9_diamond_search_sad'), "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv")
+- specialize(*qw('vp9_diamond_search_sad avx neon'))
++ add_proto(*qw('int vp9_diamond_search_sad'), "const struct macroblock *x, const struct search_site_config *cfg, struct mv *ref_mv, uint32_t start_mv_sad, struct mv *best_mv, int search_param, int sad_per_bit, int *num00, const struct vp9_sad_table *sad_fn_ptr, const struct mv *center_mv")
++ specialize(*qw('vp9_diamond_search_sad neon'))
+
+ #
+ # Apply temporal filter
+ #
+ if vpx_config("CONFIG_REALTIME_ONLY") != "yes":
+ add_proto(*qw('void vp9_apply_temporal_filter'), "const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator, uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count")
+- specialize(*qw('vp9_apply_temporal_filter sse4_1'))
++ specialize(*qw('vp9_apply_temporal_filter sse4_1 neon'))
+
+ if vpx_config("CONFIG_VP9_HIGHBITDEPTH") == "yes":
+ add_proto(*qw('void vp9_highbd_apply_temporal_filter'), "const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, uint32_t *y_accum, uint16_t *y_count, uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count")
+@@ -184,10 +183,10 @@ if vpx_config("CONFIG_VP9_ENCODER") == "yes":
+ if vpx_config("CONFIG_VP9_HIGHBITDEPTH") == "yes":
+ # ENCODEMB INVOKE
+
+- add_proto(*qw('void vp9_highbd_quantize_fp'), "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan")
++ add_proto(*qw('void vp9_highbd_quantize_fp'), "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order")
+ specialize(*qw('vp9_highbd_quantize_fp avx2 neon'))
+
+- add_proto(*qw('void vp9_highbd_quantize_fp_32x32'), "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan" )
++ add_proto(*qw('void vp9_highbd_quantize_fp_32x32'), "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order")
+ specialize(*qw('vp9_highbd_quantize_fp_32x32 avx2 neon'))
+
+ # fdct functions
+@@ -195,8 +194,10 @@ if vpx_config("CONFIG_VP9_ENCODER") == "yes":
+ specialize(*qw('vp9_highbd_fht4x4 neon'))
+
+ add_proto(*qw('void vp9_highbd_fht8x8'), "const int16_t *input, tran_low_t *output, int stride, int tx_type")
++ specialize(*qw('vp9_highbd_fht8x8 neon'))
+
+ add_proto(*qw('void vp9_highbd_fht16x16'), "const int16_t *input, tran_low_t *output, int stride, int tx_type")
++ specialize(*qw('vp9_highbd_fht16x16 neon'))
+
+ add_proto(*qw('void vp9_highbd_fwht4x4'), "const int16_t *input, tran_low_t *output, int stride")
+
+@@ -211,4 +212,3 @@ if vpx_config("CONFIG_VP9_ENCODER") == "yes":
+ specialize(*qw('vp9_scale_and_extend_frame neon ssse3'))
+
+ # end encoder functions
+-
+diff --git a/vp9/meson.build b/vp9/meson.build
+index 7b7619575..e9314966c 100644
+--- a/vp9/meson.build
++++ b/vp9/meson.build
+@@ -67,42 +67,6 @@ vp9_common_optional_sources = {
+ ),
+ }
+
+-if not features.get('vp9_highbitdepth', false) # MANUAL
+- vp9_common_optional_sources += {
+- 'msa': vp9_common_optional_sources['msa'] + files(
+- 'common/mips/msa/vp9_idct4x4_msa.c',
+- 'common/mips/msa/vp9_idct8x8_msa.c',
+- 'common/mips/msa/vp9_idct16x16_msa.c',
+- ),
+- 'dspr2': vp9_common_optional_sources['dspr2'] + files(
+- 'common/mips/dspr2/vp9_itrans4_dspr2.c',
+- 'common/mips/dspr2/vp9_itrans8_dspr2.c',
+- 'common/mips/dspr2/vp9_itrans16_dspr2.c'
+- )
+- }
+-else
+- vp9_common_optional_sources += {
+- 'neon': vp9_common_optional_sources['neon'] + files(
+- 'common/arm/neon/vp9_highbd_iht4x4_add_neon.c',
+- 'common/arm/neon/vp9_highbd_iht8x8_add_neon.c',
+- 'common/arm/neon/vp9_highbd_iht16x16_add_neon.c'
+- ),
+- 'sse4_1': vp9_common_optional_sources['sse4_1'] + files(
+- 'common/x86/vp9_highbd_iht4x4_add_sse4.c',
+- 'common/x86/vp9_highbd_iht8x8_add_sse4.c',
+- 'common/x86/vp9_highbd_iht16x16_add_sse4.c'
+- )
+- }
+-endif
+-
+-if features.get('vp9_postproc', false) # MANUAL
+- vp9_common_optional_sources += {
+- 'msa': vp9_common_optional_sources['msa'] + files(
+- 'common/mips/msa/vp9_mfqe_msa.c'
+- )
+- }
+-endif
+-
+ vp9_cx_sources = files(
+ 'vp9_cx_iface.c',
+ 'encoder/vp9_bitstream.c',
+@@ -142,13 +106,11 @@ vp9_cx_sources = files(
+ 'encoder/vp9_noise_estimate.c',
+ 'encoder/vp9_ext_ratectrl.c',
+ # 'encoder/vp9_temporal_filter.c', # MANUAL
++ 'encoder/vp9_tpl_model.c',
+ # 'encoder/vp9_mbgraph.c', # MANUAL
+ )
+
+ vp9_cx_optional_sources = {
+- 'avx' : files(
+- 'encoder/x86/vp9_diamond_search_sad_avx.c'
+- ),
+ 'avx2' : files(
+ 'encoder/x86/vp9_quantize_avx2.c',
+ 'encoder/x86/vp9_error_avx2.c'
+@@ -164,12 +126,15 @@ vp9_cx_optional_sources = {
+ # 'encoder/mips/msa/vp9_fdct16x16_msa.c' # MANUAL
+ ),
+ 'neon' : files(
++ # 'encoder/arm/neon/vp9_temporal_filter_neon.c', # MANUAL
+ 'encoder/arm/neon/vp9_diamond_search_sad_neon.c',
++ # 'encoder/arm/neon/vp9_highbd_temporal_filter_neon.c', # MANUAL
+ 'encoder/arm/neon/vp9_dct_neon.c',
+ # 'encoder/arm/neon/vp9_denoiser_neon.c', # MANUAL
+ # 'encoder/arm/neon/vp9_error_neon.c', # MANUAL
+ 'encoder/arm/neon/vp9_frame_scale_neon.c',
+- 'encoder/arm/neon/vp9_quantize_neon.c'
++ 'encoder/arm/neon/vp9_quantize_neon.c',
++ # 'encoder/arm/neon/vp9_highbd_error_neon.c', # MANUAL
+ ),
+ 'non_greedy_mv' : files(
+ 'encoder/vp9_non_greedy_mv.c'
+@@ -260,6 +225,7 @@ vp9_cx_headers_sources = files(
+ 'encoder/vp9_encodemv.h',
+ 'encoder/vp9_extend.h',
+ 'encoder/vp9_firstpass.h',
++ 'encoder/vp9_firstpass_stats.h',
+ 'encoder/vp9_job_queue.h',
+ 'encoder/vp9_lookahead.h',
+ 'encoder/vp9_mcomp.h',
+@@ -288,6 +254,7 @@ vp9_cx_headers_sources = files(
+ 'encoder/vp9_noise_estimate.h',
+ 'encoder/vp9_ext_ratectrl.h',
+ 'encoder/vp9_temporal_filter.h',
++ 'encoder/vp9_tpl_model.h',
+ 'encoder/vp9_mbgraph.h',
+ )
+
+@@ -299,27 +266,20 @@ vp9_cx_headers_optional_sources = {
+ 'msa' : files(
+ # 'encoder/mips/msa/vp9_fdct_msa.h' # MANUAL
+ ),
++ 'neon' : files(
++ # 'encoder/vp9_temporal_filter_constants.h' # MANUAL
++ ),
+ 'non_greedy_mv' : files(
+ 'encoder/vp9_non_greedy_mv.h'
+ ),
+ 'sse4_1' : files(
+- # 'encoder/x86/temporal_filter_constants.h' # MANUAL
++ # 'encoder/vp9_temporal_filter_constants.h' # MANUAL
+ ),
+ 'vp9_temporal_denoising' : files(
+ 'encoder/vp9_denoiser.h'
+ ),
+ }
+
+-vp9_cx_remove_headers_optional_sources = {
+- 'realtime_only' : files(
+- 'encoder/x86/temporal_filter_constants.h',
+- 'encoder/vp9_alt_ref_aq.h',
+- 'encoder/vp9_aq_variance.h',
+- 'encoder/vp9_aq_360.h',
+- 'encoder/vp9_aq_complexity.h'
+- ),
+-}
+-
+ vp9_dx_headers_sources = files(
+ 'vp9_dx_iface.h',
+ 'decoder/vp9_decodeframe.h',
+@@ -333,12 +293,27 @@ vp9_dx_headers_sources = files(
+ vp9_dx_headers_optional_sources = {
+ }
+
++vp9_common_asm_sources = files(
++)
++
+ vp9_common_asm_optional_sources = {
+ 'sse2' : files(
+ # 'common/x86/vp9_mfqe_sse2.asm' # MANUAL
+ ),
+ }
+
++vp9_cx_asm_sources = files(
++ )
++
++vp9_cx_asm_optional_sources = {
++ 'sse2' : files(
++ 'encoder/x86/vp9_dct_sse2.asm',
++ 'encoder/x86/vp9_error_sse2.asm'
++ ),
++}
++
++#### --- END GENERATED --- ####
++
+ if features.get('vp9_postproc', false) # MANUAL
+ vp9_common_asm_optional_sources += {
+ 'sse2': vp9_common_asm_optional_sources['sse2'] + files(
+@@ -347,12 +322,41 @@ if features.get('vp9_postproc', false) # MANUAL
+ }
+ endif
+
+-vp9_cx_asm_optional_sources = {
+- 'sse2' : files(
+- 'encoder/x86/vp9_dct_sse2.asm',
+- 'encoder/x86/vp9_error_sse2.asm'
+- ),
+-}
++if not features.get('vp9_highbitdepth', false) # MANUAL
++ vp9_common_optional_sources += {
++ 'msa': vp9_common_optional_sources['msa'] + files(
++ 'common/mips/msa/vp9_idct4x4_msa.c',
++ 'common/mips/msa/vp9_idct8x8_msa.c',
++ 'common/mips/msa/vp9_idct16x16_msa.c',
++ ),
++ 'dspr2': vp9_common_optional_sources['dspr2'] + files(
++ 'common/mips/dspr2/vp9_itrans4_dspr2.c',
++ 'common/mips/dspr2/vp9_itrans8_dspr2.c',
++ 'common/mips/dspr2/vp9_itrans16_dspr2.c'
++ )
++ }
++else
++ vp9_common_optional_sources += {
++ 'neon': vp9_common_optional_sources['neon'] + files(
++ 'common/arm/neon/vp9_highbd_iht4x4_add_neon.c',
++ 'common/arm/neon/vp9_highbd_iht8x8_add_neon.c',
++ 'common/arm/neon/vp9_highbd_iht16x16_add_neon.c'
++ ),
++ 'sse4_1': vp9_common_optional_sources['sse4_1'] + files(
++ 'common/x86/vp9_highbd_iht4x4_add_sse4.c',
++ 'common/x86/vp9_highbd_iht8x8_add_sse4.c',
++ 'common/x86/vp9_highbd_iht16x16_add_sse4.c'
++ )
++ }
++endif
++
++if features.get('vp9_postproc', false) # MANUAL
++ vp9_common_optional_sources += {
++ 'msa': vp9_common_optional_sources['msa'] + files(
++ 'common/mips/msa/vp9_mfqe_msa.c'
++ )
++ }
++endif
+
+ if features.get('vp9_temporal_denoising', false)
+ vp9_cx_optional_sources += {
+@@ -370,6 +374,9 @@ if features.get('vp9_highbitdepth', false)
+ 'sse2' : vp9_cx_optional_sources['sse2'] + files(
+ 'encoder/x86/vp9_highbd_block_error_intrin_sse2.c',
+ ),
++ 'neon': vp9_cx_optional_sources['neon'] + files(
++ 'encoder/arm/neon/vp9_highbd_error_neon.c',
++ ),
+ }
+ else
+ vp9_cx_optional_sources += {
+@@ -412,11 +419,17 @@ if not features.get('realtime_only', false)
+ 'sse4_1' : vp9_cx_optional_sources['sse4_1'] + files(
+ 'encoder/x86/temporal_filter_sse4.c',
+ ),
++ 'neon' : vp9_cx_optional_sources['neon'] + files(
++ 'encoder/arm/neon/vp9_temporal_filter_neon.c',
++ ),
+ }
+
+ vp9_cx_headers_optional_sources += {
+ 'sse4_1' : vp9_cx_headers_optional_sources['sse4_1'] + files(
+- 'encoder/x86/temporal_filter_constants.h'
++ 'encoder/vp9_temporal_filter_constants.h'
++ ),
++ 'neon' : vp9_cx_headers_optional_sources['neon'] + files(
++ 'encoder/vp9_temporal_filter_constants.h',
+ ),
+ }
+
+@@ -425,12 +438,13 @@ if not features.get('realtime_only', false)
+ 'sse4_1' : vp9_cx_optional_sources['sse4_1'] + files(
+ 'encoder/x86/highbd_temporal_filter_sse4.c',
+ ),
++ 'neon' : vp9_cx_optional_sources['neon'] + files(
++ 'encoder/arm/neon/vp9_highbd_temporal_filter_neon.c',
++ ),
+ }
+ endif
+ endif
+
+-#### --- END GENERATED --- ####
+-
+ codec_srcs += vp9_common_sources
+ optional_sources = get_variable('vp9_common_optional_sources', {})
+ foreach comp_name, comp_sources : optional_sources
+diff --git a/vpx/meson.build b/vpx/meson.build
+index 08f178d14..854e49f75 100644
+--- a/vpx/meson.build
++++ b/vpx/meson.build
+@@ -10,14 +10,15 @@ api_sources = files(
+ 'src/vpx_encoder.c',
+ 'src/vpx_codec.c',
+ 'src/vpx_image.c',
++ 'src/vpx_tpl.c',
+ )
+
+ api_optional_sources = {
+ }
+
+ api_headers_sources = files(
+- 'vpx_decoder.h',
+- 'vpx_encoder.h',
++ # 'vpx_decoder.h', # MANUAL
++ # 'vpx_encoder.h', # MANUAL
+ 'internal/vpx_codec_internal.h',
+ 'internal/vpx_ratectrl_rtc.h',
+ 'vpx_codec.h',
+@@ -25,9 +26,17 @@ api_headers_sources = files(
+ 'vpx_image.h',
+ 'vpx_integer.h',
+ 'vpx_ext_ratectrl.h',
++ 'vpx_tpl.h',
+ )
+
+ api_headers_optional_sources = {
++ 'decoders': files( # MANUAL
++ 'vpx_tpl.h',
++ 'vpx_decoder.h',
++ ),
++ 'encoders': files( # MANUAL
++ 'vpx_encoder.h',
++ ),
+ 'vp8_decoder' : files(
+ 'vp8.h',
+ 'vp8dx.h'
+@@ -45,6 +54,7 @@ api_doc_headers_sources = files(
+ 'vpx_ext_ratectrl.h',
+ 'vpx_frame_buffer.h',
+ 'vpx_image.h',
++ 'vpx_tpl.h',
+ )
+
+ api_doc_headers_optional_sources = {
+diff --git a/vpx_dsp/meson.build b/vpx_dsp/meson.build
+index d8c83297c..c25458a7c 100644
+--- a/vpx_dsp/meson.build
++++ b/vpx_dsp/meson.build
+@@ -212,6 +212,8 @@ dsp_optional_sources = {
+ # 'arm/intrapred_neon_asm.asm', # MANUAL
+ # 'arm/save_reg_neon.asm' # MANUAL
+ ),
++ 'neon_dotprod' : files(), # MANUAL
++ 'neon_i8mm' : files(), # MANUAL
+ 'sse2' : files(
+ # 'x86/highbd_intrapred_intrin_sse2.c', # MANUAL
+ # 'x86/post_proc_sse2.c', # MANUAL
+@@ -385,6 +387,7 @@ if features.get('encoders', false) # MANUAL
+ 'bitwriter.c',
+ 'bitwriter_buffer.c',
+ 'psnr.c',
++ 'sse.c',
+ )
+
+ dsp_headers_sources += files(
+@@ -399,6 +402,18 @@ if features.get('encoders', false) # MANUAL
+ 'psnrhvs.c',
+ 'fastssim.c'
+ ),
++ 'neon' : dsp_optional_sources['neon'] + files(
++ 'arm/sse_neon.c',
++ ),
++ 'neon_dotprod' : dsp_optional_sources['neon_dotprod'] + files(
++ 'arm/sse_neon_dotprod.c',
++ ),
++ 'sse4_1' : dsp_optional_sources['sse4_1'] + files(
++ 'x86/sse_sse4.c',
++ ),
++ 'avx2' : dsp_optional_sources['avx2'] + files(
++ 'x86/sse_avx2.c',
++ ),
+ }
+
+ dsp_headers_optional_sources += {
+@@ -684,6 +699,16 @@ if features.get('vp9', false) # MANUAL
+ # loop filters
+ 'arm/loopfilter_neon.c',
+ )
++ dsp_optional_sources += {
++ 'neon_dotprod' : dsp_optional_sources['neon_dotprod'] + files(
++ 'arm/vpx_convolve8_neon_dotprod.c',
++ 'arm/vpx_convolve_neon_dotprod.c',
++ ),
++ 'neon_i8mm' : dsp_optional_sources['neon_i8mm'] + files(
++ 'arm/vpx_convolve8_neon_i8mm.c',
++ 'arm/vpx_convolve_neon_i8mm.c',
++ )
++ }
+ endif
+ endif
+
+@@ -718,6 +743,9 @@ if features.get('vp9_encoder', false)
+ 'sse2' : dsp_optional_sources['sse2'] + files(
+ 'x86/fwd_txfm_sse2.c',
+ ),
++ 'avx2': dsp_optional_sources['avx2'] + files(
++ 'x86/inv_txfm_avx2.c',
++ ),
+ 'neon': dsp_optional_sources['neon'] + files(
+ 'arm/fdct4x4_neon.c',
+ 'arm/fdct8x8_neon.c',
+@@ -744,7 +772,13 @@ if features.get('vp9_encoder', false)
+ }
+ endif
+
+- if not features.get('vp9_highbitdepth', false)
++ if features.get('vp9_highbitdepth', false)
++ dsp_optional_sources += {
++ 'neon' : dsp_optional_sources['neon'] + files(
++ 'arm/highbd_avg_neon.c'
++ ),
++ }
++ else
+ dsp_optional_sources += {
+ 'avx2' : dsp_optional_sources['avx2'] + files(
+ 'x86/fwd_txfm_avx2.c'
+@@ -979,6 +1013,8 @@ if features.get('vp9_encoder', false) # MANUAL
+ ),
+ 'neon' : dsp_optional_sources['neon'] + files(
+ 'arm/highbd_quantize_neon.c',
++ 'arm/highbd_hadamard_neon.c',
++ 'arm/highbd_avg_neon.c',
+ ),
+ }
+ endif
+@@ -1007,6 +1043,10 @@ if features.get('encoders', false) # MANUAL
+ 'arm/sad_neon.c',
+ 'arm/subtract_neon.c',
+ ),
++ 'neon_dotprod' : dsp_optional_sources['neon_dotprod'] + files(
++ 'arm/sad4d_neon_dotprod.c',
++ 'arm/sad_neon_dotprod.c',
++ ),
+ 'sse2' : dsp_optional_sources['sse2'] + files(
+ 'x86/sum_squares_sse2.c',
+ ),
+@@ -1059,7 +1099,9 @@ if features.get('encoders', false) # MANUAL
+ 'x86/highbd_sad_avx2.c',
+ ),
+ 'neon' : dsp_optional_sources['neon'] + files(
++ 'arm/highbd_sad4d_neon.c',
+ 'arm/highbd_sad_neon.c',
++ 'arm/highbd_subpel_variance_neon.c',
+ )
+ }
+ endif
+@@ -1081,6 +1123,9 @@ if features.get('encoders', false) or features.get('postproc', false) or feature
+ 'arm/subpel_variance_neon.c',
+ 'arm/variance_neon.c',
+ ),
++ 'neon_dotprod' : dsp_optional_sources['neon_dotprod'] + files(
++ 'arm/variance_neon_dotprod.c',
++ ),
+ 'msa' : dsp_optional_sources['msa'] + files(
+ 'mips/variance_msa.c',
+ 'mips/sub_pixel_variance_msa.c',
+@@ -1098,6 +1143,7 @@ if features.get('encoders', false) or features.get('postproc', false) or feature
+ 'x86/variance_sse2.c', # Contains SSE2 and SSSE3
+ ),
+ 'avx2' : dsp_optional_sources['avx2'] + files(
++ 'x86/avg_pred_avx2.c',
+ 'x86/variance_avx2.c',
+ ),
+ 'vsx' : dsp_optional_sources['vsx'] + files(
+@@ -1140,6 +1186,8 @@ if features.get('encoders', false) or features.get('postproc', false) or feature
+ 'x86/highbd_variance_sse2.c',
+ ),
+ 'neon' : dsp_optional_sources['neon'] + files(
++ 'arm/highbd_avg_pred_neon.c',
++ 'arm/highbd_sse_neon.c',
+ 'arm/highbd_variance_neon.c',
+ )
+ }
+diff --git a/vpx_dsp/vpx_dsp_rtcd_defs.py b/vpx_dsp/vpx_dsp_rtcd_defs.py
+index 03974bebd..17ddfde35 100755
+--- a/vpx_dsp/vpx_dsp_rtcd_defs.py
++++ b/vpx_dsp/vpx_dsp_rtcd_defs.py
+@@ -21,6 +21,10 @@ def vpx_dsp_forward_decls():
+ #include "vpx/vpx_integer.h"
+ #include "vpx_dsp/vpx_dsp_common.h"
+ #include "vpx_dsp/vpx_filter.h"
++#if CONFIG_VP9_ENCODER
++ struct macroblock_plane;
++ struct ScanOrder;
++#endif
+
+ ''')
+ forward_decls(*qw('vpx_dsp_forward_decls'))
+@@ -40,7 +44,7 @@ if opts["arch"] == "x86_64":
+ #
+
+ add_proto(*qw('void vpx_d207_predictor_4x4'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+-specialize(*qw('vpx_d207_predictor_4x4 sse2'))
++specialize(*qw('vpx_d207_predictor_4x4 neon sse2'))
+
+ add_proto(*qw('void vpx_d45_predictor_4x4'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ specialize(*qw('vpx_d45_predictor_4x4 neon sse2'))
+@@ -48,7 +52,7 @@ specialize(*qw('vpx_d45_predictor_4x4 neon sse2'))
+ add_proto(*qw('void vpx_d45e_predictor_4x4'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+
+ add_proto(*qw('void vpx_d63_predictor_4x4'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+-specialize(*qw('vpx_d63_predictor_4x4 ssse3'))
++specialize(*qw('vpx_d63_predictor_4x4 neon ssse3'))
+
+ add_proto(*qw('void vpx_d63e_predictor_4x4'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+
+@@ -59,12 +63,13 @@ specialize(*qw('vpx_h_predictor_4x4 neon dspr2 msa sse2'))
+ add_proto(*qw('void vpx_he_predictor_4x4'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+
+ add_proto(*qw('void vpx_d117_predictor_4x4'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
++specialize(*qw('vpx_d117_predictor_4x4 neon'))
+
+ add_proto(*qw('void vpx_d135_predictor_4x4'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ specialize(*qw('vpx_d135_predictor_4x4 neon'))
+
+ add_proto(*qw('void vpx_d153_predictor_4x4'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+-specialize(*qw('vpx_d153_predictor_4x4 ssse3'))
++specialize(*qw('vpx_d153_predictor_4x4 neon ssse3'))
+
+ add_proto(*qw('void vpx_v_predictor_4x4'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ specialize(*qw('vpx_v_predictor_4x4 neon msa sse2'))
+@@ -88,7 +93,7 @@ add_proto(*qw('void vpx_dc_128_predictor_4x4'), "uint8_t *dst, ptrdiff_t stride,
+ specialize(*qw('vpx_dc_128_predictor_4x4 msa neon sse2'))
+
+ add_proto(*qw('void vpx_d207_predictor_8x8'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+-specialize(*qw('vpx_d207_predictor_8x8 ssse3'))
++specialize(*qw('vpx_d207_predictor_8x8 neon ssse3'))
+
+ add_proto(*qw('void vpx_d45_predictor_8x8'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ # TODO(crbug.com/webm/1522): Re-enable vsx implementation.
+@@ -96,19 +101,20 @@ specialize(*qw('vpx_d45_predictor_8x8 neon sse2'))
+
+ add_proto(*qw('void vpx_d63_predictor_8x8'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ # TODO(crbug.com/webm/1522): Re-enable vsx implementation.
+-specialize(*qw('vpx_d63_predictor_8x8 ssse3'))
++specialize(*qw('vpx_d63_predictor_8x8 neon ssse3'))
+
+ add_proto(*qw('void vpx_h_predictor_8x8'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ # TODO(crbug.com/webm/1522): Re-enable vsx implementation.
+ specialize(*qw('vpx_h_predictor_8x8 neon dspr2 msa sse2'))
+
+ add_proto(*qw('void vpx_d117_predictor_8x8'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
++specialize(*qw('vpx_d117_predictor_8x8 neon'))
+
+ add_proto(*qw('void vpx_d135_predictor_8x8'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ specialize(*qw('vpx_d135_predictor_8x8 neon'))
+
+ add_proto(*qw('void vpx_d153_predictor_8x8'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+-specialize(*qw('vpx_d153_predictor_8x8 ssse3'))
++specialize(*qw('vpx_d153_predictor_8x8 neon ssse3'))
+
+ add_proto(*qw('void vpx_v_predictor_8x8'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ specialize(*qw('vpx_v_predictor_8x8 neon msa sse2'))
+@@ -131,24 +137,25 @@ add_proto(*qw('void vpx_dc_128_predictor_8x8'), "uint8_t *dst, ptrdiff_t stride,
+ specialize(*qw('vpx_dc_128_predictor_8x8 neon msa sse2'))
+
+ add_proto(*qw('void vpx_d207_predictor_16x16'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+-specialize(*qw('vpx_d207_predictor_16x16 ssse3'))
++specialize(*qw('vpx_d207_predictor_16x16 neon ssse3'))
+
+ add_proto(*qw('void vpx_d45_predictor_16x16'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ specialize(*qw('vpx_d45_predictor_16x16 neon ssse3 vsx'))
+
+ add_proto(*qw('void vpx_d63_predictor_16x16'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+-specialize(*qw('vpx_d63_predictor_16x16 ssse3 vsx'))
++specialize(*qw('vpx_d63_predictor_16x16 neon ssse3 vsx'))
+
+ add_proto(*qw('void vpx_h_predictor_16x16'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ specialize(*qw('vpx_h_predictor_16x16 neon dspr2 msa sse2 vsx'))
+
+ add_proto(*qw('void vpx_d117_predictor_16x16'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
++specialize(*qw('vpx_d117_predictor_16x16 neon'))
+
+ add_proto(*qw('void vpx_d135_predictor_16x16'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ specialize(*qw('vpx_d135_predictor_16x16 neon'))
+
+ add_proto(*qw('void vpx_d153_predictor_16x16'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+-specialize(*qw('vpx_d153_predictor_16x16 ssse3'))
++specialize(*qw('vpx_d153_predictor_16x16 neon ssse3'))
+
+ add_proto(*qw('void vpx_v_predictor_16x16'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ specialize(*qw('vpx_v_predictor_16x16 neon msa sse2 vsx'))
+@@ -169,24 +176,25 @@ add_proto(*qw('void vpx_dc_128_predictor_16x16'), "uint8_t *dst, ptrdiff_t strid
+ specialize(*qw('vpx_dc_128_predictor_16x16 neon msa sse2 vsx'))
+
+ add_proto(*qw('void vpx_d207_predictor_32x32'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+-specialize(*qw('vpx_d207_predictor_32x32 ssse3'))
++specialize(*qw('vpx_d207_predictor_32x32 neon ssse3'))
+
+ add_proto(*qw('void vpx_d45_predictor_32x32'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ specialize(*qw('vpx_d45_predictor_32x32 neon ssse3 vsx'))
+
+ add_proto(*qw('void vpx_d63_predictor_32x32'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+-specialize(*qw('vpx_d63_predictor_32x32 ssse3 vsx'))
++specialize(*qw('vpx_d63_predictor_32x32 neon ssse3 vsx'))
+
+ add_proto(*qw('void vpx_h_predictor_32x32'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ specialize(*qw('vpx_h_predictor_32x32 neon msa sse2 vsx'))
+
+ add_proto(*qw('void vpx_d117_predictor_32x32'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
++specialize(*qw('vpx_d117_predictor_32x32 neon'))
+
+ add_proto(*qw('void vpx_d135_predictor_32x32'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ specialize(*qw('vpx_d135_predictor_32x32 neon'))
+
+ add_proto(*qw('void vpx_d153_predictor_32x32'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+-specialize(*qw('vpx_d153_predictor_32x32 ssse3'))
++specialize(*qw('vpx_d153_predictor_32x32 neon ssse3'))
+
+ add_proto(*qw('void vpx_v_predictor_32x32'), "uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left")
+ specialize(*qw('vpx_v_predictor_32x32 neon msa sse2 vsx'))
+@@ -209,25 +217,25 @@ specialize(*qw('vpx_dc_128_predictor_32x32 msa neon sse2 vsx'))
+ # High bitdepth functions
+ if vpx_config("CONFIG_VP9_HIGHBITDEPTH") == "yes":
+ add_proto(*qw('void vpx_highbd_d207_predictor_4x4'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d207_predictor_4x4 sse2'))
++ specialize(*qw('vpx_highbd_d207_predictor_4x4 neon sse2'))
+
+ add_proto(*qw('void vpx_highbd_d45_predictor_4x4'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_d45_predictor_4x4 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_d63_predictor_4x4'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d63_predictor_4x4 sse2'))
++ specialize(*qw('vpx_highbd_d63_predictor_4x4 neon sse2'))
+
+ add_proto(*qw('void vpx_highbd_h_predictor_4x4'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_h_predictor_4x4 neon sse2'))
+
+ add_proto(*qw('void vpx_highbd_d117_predictor_4x4'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d117_predictor_4x4 sse2'))
++ specialize(*qw('vpx_highbd_d117_predictor_4x4 neon sse2'))
+
+ add_proto(*qw('void vpx_highbd_d135_predictor_4x4'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_d135_predictor_4x4 neon sse2'))
+
+ add_proto(*qw('void vpx_highbd_d153_predictor_4x4'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d153_predictor_4x4 sse2'))
++ specialize(*qw('vpx_highbd_d153_predictor_4x4 neon sse2'))
+
+ add_proto(*qw('void vpx_highbd_v_predictor_4x4'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_v_predictor_4x4 neon sse2'))
+@@ -248,25 +256,25 @@ if vpx_config("CONFIG_VP9_HIGHBITDEPTH") == "yes":
+ specialize(*qw('vpx_highbd_dc_128_predictor_4x4 neon sse2'))
+
+ add_proto(*qw('void vpx_highbd_d207_predictor_8x8'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d207_predictor_8x8 ssse3'))
++ specialize(*qw('vpx_highbd_d207_predictor_8x8 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_d45_predictor_8x8'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_d45_predictor_8x8 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_d63_predictor_8x8'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d63_predictor_8x8 ssse3'))
++ specialize(*qw('vpx_highbd_d63_predictor_8x8 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_h_predictor_8x8'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_h_predictor_8x8 neon sse2'))
+
+ add_proto(*qw('void vpx_highbd_d117_predictor_8x8'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d117_predictor_8x8 ssse3'))
++ specialize(*qw('vpx_highbd_d117_predictor_8x8 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_d135_predictor_8x8'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_d135_predictor_8x8 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_d153_predictor_8x8'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d153_predictor_8x8 ssse3'))
++ specialize(*qw('vpx_highbd_d153_predictor_8x8 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_v_predictor_8x8'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_v_predictor_8x8 neon sse2'))
+@@ -287,25 +295,25 @@ if vpx_config("CONFIG_VP9_HIGHBITDEPTH") == "yes":
+ specialize(*qw('vpx_highbd_dc_128_predictor_8x8 neon sse2'))
+
+ add_proto(*qw('void vpx_highbd_d207_predictor_16x16'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d207_predictor_16x16 ssse3'))
++ specialize(*qw('vpx_highbd_d207_predictor_16x16 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_d45_predictor_16x16'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_d45_predictor_16x16 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_d63_predictor_16x16'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d63_predictor_16x16 ssse3'))
++ specialize(*qw('vpx_highbd_d63_predictor_16x16 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_h_predictor_16x16'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_h_predictor_16x16 neon sse2'))
+
+ add_proto(*qw('void vpx_highbd_d117_predictor_16x16'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d117_predictor_16x16 ssse3'))
++ specialize(*qw('vpx_highbd_d117_predictor_16x16 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_d135_predictor_16x16'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_d135_predictor_16x16 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_d153_predictor_16x16'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d153_predictor_16x16 ssse3'))
++ specialize(*qw('vpx_highbd_d153_predictor_16x16 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_v_predictor_16x16'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_v_predictor_16x16 neon sse2'))
+@@ -326,25 +334,25 @@ if vpx_config("CONFIG_VP9_HIGHBITDEPTH") == "yes":
+ specialize(*qw('vpx_highbd_dc_128_predictor_16x16 neon sse2'))
+
+ add_proto(*qw('void vpx_highbd_d207_predictor_32x32'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d207_predictor_32x32 ssse3'))
++ specialize(*qw('vpx_highbd_d207_predictor_32x32 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_d45_predictor_32x32'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_d45_predictor_32x32 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_d63_predictor_32x32'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d63_predictor_32x32 ssse3'))
++ specialize(*qw('vpx_highbd_d63_predictor_32x32 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_h_predictor_32x32'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_h_predictor_32x32 neon sse2'))
+
+ add_proto(*qw('void vpx_highbd_d117_predictor_32x32'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d117_predictor_32x32 ssse3'))
++ specialize(*qw('vpx_highbd_d117_predictor_32x32 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_d135_predictor_32x32'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_d135_predictor_32x32 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_d153_predictor_32x32'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+- specialize(*qw('vpx_highbd_d153_predictor_32x32 ssse3'))
++ specialize(*qw('vpx_highbd_d153_predictor_32x32 neon ssse3'))
+
+ add_proto(*qw('void vpx_highbd_v_predictor_32x32'), "uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd")
+ specialize(*qw('vpx_highbd_v_predictor_32x32 neon sse2'))
+@@ -376,22 +384,22 @@ if vpx_config("CONFIG_VP9") == "yes":
+ specialize(*qw('vpx_convolve_avg neon dspr2 msa sse2 vsx mmi lsx'))
+
+ add_proto(*qw('void vpx_convolve8'), "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h")
+- specialize(*qw('vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx'))
++ specialize(*qw('vpx_convolve8 sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx'))
+
+ add_proto(*qw('void vpx_convolve8_horiz'), "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h")
+- specialize(*qw('vpx_convolve8_horiz sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx'))
++ specialize(*qw('vpx_convolve8_horiz sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx'))
+
+ add_proto(*qw('void vpx_convolve8_vert'), "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h")
+- specialize(*qw('vpx_convolve8_vert sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx'))
++ specialize(*qw('vpx_convolve8_vert sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx'))
+
+ add_proto(*qw('void vpx_convolve8_avg'), "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h")
+- specialize(*qw('vpx_convolve8_avg sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx'))
++ specialize(*qw('vpx_convolve8_avg sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx'))
+
+ add_proto(*qw('void vpx_convolve8_avg_horiz'), "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h")
+- specialize(*qw('vpx_convolve8_avg_horiz sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx'))
++ specialize(*qw('vpx_convolve8_avg_horiz sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx'))
+
+ add_proto(*qw('void vpx_convolve8_avg_vert'), "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h")
+- specialize(*qw('vpx_convolve8_avg_vert sse2 ssse3 avx2 neon dspr2 msa vsx mmi lsx'))
++ specialize(*qw('vpx_convolve8_avg_vert sse2 ssse3 avx2 neon neon_dotprod neon_i8mm dspr2 msa vsx mmi lsx'))
+
+ add_proto(*qw('void vpx_scaled_2d'), "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h")
+ specialize(*qw('vpx_scaled_2d ssse3 neon msa'))
+@@ -591,7 +599,7 @@ if vpx_config("CONFIG_VP9_ENCODER") == "yes":
+ specialize(*qw('vpx_fdct8x8_1 sse2 neon msa'))
+
+ add_proto(*qw('void vpx_fdct16x16'), "const int16_t *input, tran_low_t *output, int stride")
+- specialize(*qw('vpx_fdct16x16 neon sse2 msa lsx'))
++ specialize(*qw('vpx_fdct16x16 neon sse2 avx2 msa lsx'))
+
+ add_proto(*qw('void vpx_fdct16x16_1'), "const int16_t *input, tran_low_t *output, int stride")
+ specialize(*qw('vpx_fdct16x16_1 sse2 neon msa'))
+@@ -635,12 +643,12 @@ if vpx_config("CONFIG_VP9") == "yes":
+ specialize(*qw('vpx_idct8x8_64_add neon sse2 vsx'))
+ specialize(*qw('vpx_idct8x8_12_add neon sse2 ssse3'))
+ specialize(*qw('vpx_idct8x8_1_add neon sse2'))
+- specialize(*qw('vpx_idct16x16_256_add neon sse2 vsx'))
++ specialize(*qw('vpx_idct16x16_256_add neon sse2 avx2 vsx'))
+ specialize(*qw('vpx_idct16x16_38_add neon sse2'))
+ specialize(*qw('vpx_idct16x16_10_add neon sse2'))
+ specialize(*qw('vpx_idct16x16_1_add neon sse2'))
+- specialize(*qw('vpx_idct32x32_1024_add neon sse2 vsx'))
+- specialize(*qw('vpx_idct32x32_135_add neon sse2 ssse3'))
++ specialize(*qw('vpx_idct32x32_1024_add neon sse2 avx2 vsx'))
++ specialize(*qw('vpx_idct32x32_135_add neon sse2 ssse3 avx2'))
+ specialize(*qw('vpx_idct32x32_34_add neon sse2 ssse3'))
+ specialize(*qw('vpx_idct32x32_1_add neon sse2'))
+ specialize(*qw('vpx_iwht4x4_16_add sse2 vsx'))
+@@ -716,17 +724,17 @@ if vpx_config("CONFIG_VP9") == "yes":
+ # Quantization
+ #
+ if vpx_config("CONFIG_VP9_ENCODER") == "yes":
+- add_proto(*qw('void vpx_quantize_b'), "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan")
++ add_proto(*qw('void vpx_quantize_b'), "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order")
+ specialize(*qw('vpx_quantize_b neon sse2 ssse3 avx avx2 vsx lsx'))
+
+- add_proto(*qw('void vpx_quantize_b_32x32'), "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan")
++ add_proto(*qw('void vpx_quantize_b_32x32'), "const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order")
+ specialize(*qw('vpx_quantize_b_32x32 neon ssse3 avx avx2 vsx lsx'))
+
+ if vpx_config("CONFIG_VP9_HIGHBITDEPTH") == "yes":
+- add_proto(*qw('void vpx_highbd_quantize_b'), "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan")
++ add_proto(*qw('void vpx_highbd_quantize_b'), "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order")
+ specialize(*qw('vpx_highbd_quantize_b neon sse2 avx2'))
+
+- add_proto(*qw('void vpx_highbd_quantize_b_32x32'), "const tran_low_t *coeff_ptr, intptr_t n_coeffs, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan")
++ add_proto(*qw('void vpx_highbd_quantize_b_32x32'), "const tran_low_t *coeff_ptr, const struct macroblock_plane *const mb_plane, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const struct ScanOrder *const scan_order")
+ specialize(*qw('vpx_highbd_quantize_b_32x32 neon sse2 avx2'))
+ # CONFIG_VP9_HIGHBITDEPTH
+ # CONFIG_VP9_ENCODER
+@@ -738,32 +746,35 @@ if vpx_config("CONFIG_ENCODERS") == "yes":
+ add_proto(*qw('void vpx_subtract_block'), "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride")
+ specialize(*qw('vpx_subtract_block neon msa mmi sse2 avx2 vsx lsx'))
+
++ add_proto(*qw('int64_t'), "vpx_sse", "const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, int width, int height")
++ specialize(*qw('vpx_sse sse4_1 avx2 neon neon_dotprod'))
++
+ #
+ # Single block SAD
+ #
+ add_proto(*qw('unsigned int vpx_sad64x64'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
+- specialize(*qw('vpx_sad64x64 neon avx2 msa sse2 vsx mmi lsx'))
++ specialize(*qw('vpx_sad64x64 neon neon_dotprod avx2 msa sse2 vsx mmi lsx'))
+
+ add_proto(*qw('unsigned int vpx_sad64x32'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
+- specialize(*qw('vpx_sad64x32 neon avx2 msa sse2 vsx mmi'))
++ specialize(*qw('vpx_sad64x32 neon neon_dotprod avx2 msa sse2 vsx mmi'))
+
+ add_proto(*qw('unsigned int vpx_sad32x64'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
+- specialize(*qw('vpx_sad32x64 neon avx2 msa sse2 vsx mmi'))
++ specialize(*qw('vpx_sad32x64 neon neon_dotprod avx2 msa sse2 vsx mmi'))
+
+ add_proto(*qw('unsigned int vpx_sad32x32'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
+- specialize(*qw('vpx_sad32x32 neon avx2 msa sse2 vsx mmi lsx'))
++ specialize(*qw('vpx_sad32x32 neon neon_dotprod avx2 msa sse2 vsx mmi lsx'))
+
+ add_proto(*qw('unsigned int vpx_sad32x16'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
+- specialize(*qw('vpx_sad32x16 neon avx2 msa sse2 vsx mmi'))
++ specialize(*qw('vpx_sad32x16 neon neon_dotprod avx2 msa sse2 vsx mmi'))
+
+ add_proto(*qw('unsigned int vpx_sad16x32'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
+- specialize(*qw('vpx_sad16x32 neon msa sse2 vsx mmi'))
++ specialize(*qw('vpx_sad16x32 neon neon_dotprod msa sse2 vsx mmi'))
+
+ add_proto(*qw('unsigned int vpx_sad16x16'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
+- specialize(*qw('vpx_sad16x16 neon msa sse2 vsx mmi lsx'))
++ specialize(*qw('vpx_sad16x16 neon neon_dotprod msa sse2 vsx mmi lsx'))
+
+ add_proto(*qw('unsigned int vpx_sad16x8'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
+- specialize(*qw('vpx_sad16x8 neon msa sse2 vsx mmi'))
++ specialize(*qw('vpx_sad16x8 neon neon_dotprod msa sse2 vsx mmi'))
+
+ add_proto(*qw('unsigned int vpx_sad8x16'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
+ specialize(*qw('vpx_sad8x16 neon msa sse2 vsx mmi'))
+@@ -780,6 +791,45 @@ if vpx_config("CONFIG_ENCODERS") == "yes":
+ add_proto(*qw('unsigned int vpx_sad4x4'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
+ specialize(*qw('vpx_sad4x4 neon msa sse2 mmi'))
+
++ add_proto(*qw('unsigned int vpx_sad_skip_64x64'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_sad_skip_64x64 neon neon_dotprod avx2 sse2'))
++
++ add_proto(*qw('unsigned int vpx_sad_skip_64x32'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_sad_skip_64x32 neon neon_dotprod avx2 sse2'))
++
++ add_proto(*qw('unsigned int vpx_sad_skip_32x64'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_sad_skip_32x64 neon neon_dotprod avx2 sse2'))
++
++ add_proto(*qw('unsigned int vpx_sad_skip_32x32'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_sad_skip_32x32 neon neon_dotprod avx2 sse2'))
++
++ add_proto(*qw('unsigned int vpx_sad_skip_32x16'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_sad_skip_32x16 neon neon_dotprod avx2 sse2'))
++
++ add_proto(*qw('unsigned int vpx_sad_skip_16x32'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_sad_skip_16x32 neon neon_dotprod sse2'))
++
++ add_proto(*qw('unsigned int vpx_sad_skip_16x16'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_sad_skip_16x16 neon neon_dotprod sse2'))
++
++ add_proto(*qw('unsigned int vpx_sad_skip_16x8'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_sad_skip_16x8 neon neon_dotprod sse2'))
++
++ add_proto(*qw('unsigned int vpx_sad_skip_8x16'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_sad_skip_8x16 neon sse2'))
++
++ add_proto(*qw('unsigned int vpx_sad_skip_8x8'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_sad_skip_8x8 neon sse2'))
++
++ add_proto(*qw('unsigned int vpx_sad_skip_8x4'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_sad_skip_8x4 neon'))
++
++ add_proto(*qw('unsigned int vpx_sad_skip_4x8'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_sad_skip_4x8 neon sse2'))
++
++ add_proto(*qw('unsigned int vpx_sad_skip_4x4'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_sad_skip_4x4 neon'))
++
+ #
+ # Avg
+ #
+@@ -804,19 +854,19 @@ if vpx_config("CONFIG_ENCODERS") == "yes":
+ specialize(*qw('vpx_hadamard_32x32 sse2 avx2 neon'))
+
+ add_proto(*qw('void vpx_highbd_hadamard_8x8'), "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff")
+- specialize(*qw('vpx_highbd_hadamard_8x8 avx2'))
++ specialize(*qw('vpx_highbd_hadamard_8x8 avx2 neon'))
+
+ add_proto(*qw('void vpx_highbd_hadamard_16x16'), "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff")
+- specialize(*qw('vpx_highbd_hadamard_16x16 avx2'))
++ specialize(*qw('vpx_highbd_hadamard_16x16 avx2 neon'))
+
+ add_proto(*qw('void vpx_highbd_hadamard_32x32'), "const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff")
+- specialize(*qw('vpx_highbd_hadamard_32x32 avx2'))
++ specialize(*qw('vpx_highbd_hadamard_32x32 avx2 neon'))
+
+ add_proto(*qw('int vpx_satd'), "const tran_low_t *coeff, int length")
+ specialize(*qw('vpx_satd avx2 sse2 neon'))
+
+ add_proto(*qw('int vpx_highbd_satd'), "const tran_low_t *coeff, int length")
+- specialize(*qw('vpx_highbd_satd avx2'))
++ specialize(*qw('vpx_highbd_satd avx2 neon'))
+ else:
+ add_proto(*qw('void vpx_hadamard_8x8'), "const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff")
+ specialize(*qw('vpx_hadamard_8x8 sse2 neon msa vsx lsx'), f"{ssse3_x86_64}")
+@@ -832,38 +882,37 @@ if vpx_config("CONFIG_ENCODERS") == "yes":
+
+
+ add_proto(*qw('void vpx_int_pro_row'), "int16_t hbuf[16], const uint8_t *ref, const int ref_stride, const int height")
+- specialize(*qw('vpx_int_pro_row sse2 neon msa'))
+-
++ specialize(*qw('vpx_int_pro_row neon sse2 msa'))
+ add_proto(*qw('int16_t vpx_int_pro_col'), "const uint8_t *ref, const int width")
+- specialize(*qw('vpx_int_pro_col sse2 neon msa'))
++ specialize(*qw('vpx_int_pro_col neon sse2 msa'))
+
+ add_proto(*qw('int vpx_vector_var'), "const int16_t *ref, const int16_t *src, const int bwl")
+ specialize(*qw('vpx_vector_var neon sse2 msa'))
+ # CONFIG_VP9_ENCODER
+
+ add_proto(*qw('unsigned int vpx_sad64x64_avg'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred")
+- specialize(*qw('vpx_sad64x64_avg neon avx2 msa sse2 vsx mmi lsx'))
++ specialize(*qw('vpx_sad64x64_avg neon neon_dotprod avx2 msa sse2 vsx mmi lsx'))
+
+ add_proto(*qw('unsigned int vpx_sad64x32_avg'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred")
+- specialize(*qw('vpx_sad64x32_avg neon avx2 msa sse2 vsx mmi'))
++ specialize(*qw('vpx_sad64x32_avg neon neon_dotprod avx2 msa sse2 vsx mmi'))
+
+ add_proto(*qw('unsigned int vpx_sad32x64_avg'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred")
+- specialize(*qw('vpx_sad32x64_avg neon avx2 msa sse2 vsx mmi'))
++ specialize(*qw('vpx_sad32x64_avg neon neon_dotprod avx2 msa sse2 vsx mmi'))
+
+ add_proto(*qw('unsigned int vpx_sad32x32_avg'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred")
+- specialize(*qw('vpx_sad32x32_avg neon avx2 msa sse2 vsx mmi lsx'))
++ specialize(*qw('vpx_sad32x32_avg neon neon_dotprod avx2 msa sse2 vsx mmi lsx'))
+
+ add_proto(*qw('unsigned int vpx_sad32x16_avg'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred")
+- specialize(*qw('vpx_sad32x16_avg neon avx2 msa sse2 vsx mmi'))
++ specialize(*qw('vpx_sad32x16_avg neon neon_dotprod avx2 msa sse2 vsx mmi'))
+
+ add_proto(*qw('unsigned int vpx_sad16x32_avg'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred")
+- specialize(*qw('vpx_sad16x32_avg neon msa sse2 vsx mmi'))
++ specialize(*qw('vpx_sad16x32_avg neon neon_dotprod msa sse2 vsx mmi'))
+
+ add_proto(*qw('unsigned int vpx_sad16x16_avg'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred")
+- specialize(*qw('vpx_sad16x16_avg neon msa sse2 vsx mmi'))
++ specialize(*qw('vpx_sad16x16_avg neon neon_dotprod msa sse2 vsx mmi'))
+
+ add_proto(*qw('unsigned int vpx_sad16x8_avg'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred")
+- specialize(*qw('vpx_sad16x8_avg neon msa sse2 vsx mmi'))
++ specialize(*qw('vpx_sad16x8_avg neon neon_dotprod msa sse2 vsx mmi'))
+
+ add_proto(*qw('unsigned int vpx_sad8x16_avg'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred")
+ specialize(*qw('vpx_sad8x16_avg neon msa sse2 mmi'))
+@@ -883,45 +932,84 @@ if vpx_config("CONFIG_ENCODERS") == "yes":
+ #
+ # Multi-block SAD, comparing a reference to N independent blocks
+ #
+- add_proto(*qw('void vpx_sad64x64x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+- specialize(*qw('vpx_sad64x64x4d avx512 avx2 neon msa sse2 vsx mmi lsx'))
++ add_proto(*qw('void vpx_sad64x64x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad64x64x4d avx512 avx2 neon neon_dotprod msa sse2 vsx mmi lsx'))
+
+- add_proto(*qw('void vpx_sad64x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+- specialize(*qw('vpx_sad64x32x4d neon msa sse2 vsx mmi lsx'))
++ add_proto(*qw('void vpx_sad64x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad64x32x4d neon neon_dotprod msa sse2 vsx mmi lsx'))
+
+- add_proto(*qw('void vpx_sad32x64x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+- specialize(*qw('vpx_sad32x64x4d neon msa sse2 vsx mmi lsx'))
++ add_proto(*qw('void vpx_sad32x64x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad32x64x4d neon neon_dotprod msa sse2 vsx mmi lsx'))
+
+- add_proto(*qw('void vpx_sad32x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+- specialize(*qw('vpx_sad32x32x4d avx2 neon msa sse2 vsx mmi lsx'))
++ add_proto(*qw('void vpx_sad32x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad32x32x4d avx2 neon neon_dotprod msa sse2 vsx mmi lsx'))
+
+- add_proto(*qw('void vpx_sad32x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+- specialize(*qw('vpx_sad32x16x4d neon msa sse2 vsx mmi'))
++ add_proto(*qw('void vpx_sad32x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad32x16x4d neon neon_dotprod msa sse2 vsx mmi'))
+
+- add_proto(*qw('void vpx_sad16x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+- specialize(*qw('vpx_sad16x32x4d neon msa sse2 vsx mmi'))
++ add_proto(*qw('void vpx_sad16x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad16x32x4d neon neon_dotprod msa sse2 vsx mmi'))
+
+- add_proto(*qw('void vpx_sad16x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+- specialize(*qw('vpx_sad16x16x4d neon msa sse2 vsx mmi lsx'))
++ add_proto(*qw('void vpx_sad16x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad16x16x4d neon neon_dotprod msa sse2 vsx mmi lsx'))
+
+- add_proto(*qw('void vpx_sad16x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+- specialize(*qw('vpx_sad16x8x4d neon msa sse2 vsx mmi'))
++ add_proto(*qw('void vpx_sad16x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad16x8x4d neon neon_dotprod msa sse2 vsx mmi'))
+
+- add_proto(*qw('void vpx_sad8x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_sad8x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_sad8x16x4d neon msa sse2 mmi'))
+
+- add_proto(*qw('void vpx_sad8x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_sad8x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_sad8x8x4d neon msa sse2 mmi lsx'))
+
+- add_proto(*qw('void vpx_sad8x4x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_sad8x4x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_sad8x4x4d neon msa sse2 mmi'))
+
+- add_proto(*qw('void vpx_sad4x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_sad4x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_sad4x8x4d neon msa sse2 mmi'))
+
+- add_proto(*qw('void vpx_sad4x4x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t * const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_sad4x4x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_sad4x4x4d neon msa sse2 mmi'))
+
++ add_proto(*qw('void vpx_sad_skip_64x64x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad_skip_64x64x4d neon neon_dotprod avx2 sse2'))
++
++ add_proto(*qw('void vpx_sad_skip_64x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad_skip_64x32x4d neon neon_dotprod avx2 sse2'))
++
++ add_proto(*qw('void vpx_sad_skip_32x64x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad_skip_32x64x4d neon neon_dotprod avx2 sse2'))
++
++ add_proto(*qw('void vpx_sad_skip_32x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad_skip_32x32x4d neon neon_dotprod avx2 sse2'))
++
++ add_proto(*qw('void vpx_sad_skip_32x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad_skip_32x16x4d neon neon_dotprod avx2 sse2'))
++
++ add_proto(*qw('void vpx_sad_skip_16x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad_skip_16x32x4d neon neon_dotprod sse2'))
++
++ add_proto(*qw('void vpx_sad_skip_16x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad_skip_16x16x4d neon neon_dotprod sse2'))
++
++ add_proto(*qw('void vpx_sad_skip_16x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad_skip_16x8x4d neon neon_dotprod sse2'))
++
++ add_proto(*qw('void vpx_sad_skip_8x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad_skip_8x16x4d neon sse2'))
++
++ add_proto(*qw('void vpx_sad_skip_8x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad_skip_8x8x4d neon sse2'))
++
++ add_proto(*qw('void vpx_sad_skip_8x4x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad_skip_8x4x4d neon'))
++
++ add_proto(*qw('void vpx_sad_skip_4x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad_skip_4x8x4d neon sse2'))
++
++ add_proto(*qw('void vpx_sad_skip_4x4x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_sad_skip_4x4x4d neon'))
++
+ add_proto(*qw('uint64_t vpx_sum_squares_2d_i16'), "const int16_t *src, int stride, int size")
+ specialize(*qw('vpx_sum_squares_2d_i16 neon sse2 msa'))
+
+@@ -942,6 +1030,9 @@ if vpx_config("CONFIG_ENCODERS") == "yes":
+ add_proto(*qw('void vpx_highbd_subtract_block'), "int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src8_ptr, ptrdiff_t src_stride, const uint8_t *pred8_ptr, ptrdiff_t pred_stride, int bd")
+ specialize(*qw('vpx_highbd_subtract_block neon avx2'))
+
++ add_proto(*qw('int64_t'), "vpx_highbd_sse", "const uint8_t *a8, int a_stride, const uint8_t *b8,int b_stride, int width, int height")
++ specialize(*qw('vpx_highbd_sse sse4_1 avx2 neon'))
++
+ #
+ # Single block SAD
+ #
+@@ -984,16 +1075,56 @@ if vpx_config("CONFIG_ENCODERS") == "yes":
+ add_proto(*qw('unsigned int vpx_highbd_sad4x4'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
+ specialize(*qw('vpx_highbd_sad4x4 neon'))
+
++ add_proto(*qw('unsigned int vpx_highbd_sad_skip_64x64'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_highbd_sad_skip_64x64 neon sse2 avx2'))
++
++ add_proto(*qw('unsigned int vpx_highbd_sad_skip_64x32'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_highbd_sad_skip_64x32 neon sse2 avx2'))
++
++ add_proto(*qw('unsigned int vpx_highbd_sad_skip_32x64'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_highbd_sad_skip_32x64 neon sse2 avx2'))
++
++ add_proto(*qw('unsigned int vpx_highbd_sad_skip_32x32'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_highbd_sad_skip_32x32 neon sse2 avx2'))
++
++ add_proto(*qw('unsigned int vpx_highbd_sad_skip_32x16'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_highbd_sad_skip_32x16 neon sse2 avx2'))
++
++ add_proto(*qw('unsigned int vpx_highbd_sad_skip_16x32'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_highbd_sad_skip_16x32 neon sse2 avx2'))
++
++ add_proto(*qw('unsigned int vpx_highbd_sad_skip_16x16'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_highbd_sad_skip_16x16 neon sse2 avx2'))
++
++ add_proto(*qw('unsigned int vpx_highbd_sad_skip_16x8'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_highbd_sad_skip_16x8 neon sse2 avx2'))
++
++ add_proto(*qw('unsigned int vpx_highbd_sad_skip_8x16'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_highbd_sad_skip_8x16 neon sse2'))
++
++ add_proto(*qw('unsigned int vpx_highbd_sad_skip_8x8'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_highbd_sad_skip_8x8 neon sse2'))
++
++ add_proto(*qw('unsigned int vpx_highbd_sad_skip_8x4'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_highbd_sad_skip_8x4 neon'))
++
++ add_proto(*qw('unsigned int vpx_highbd_sad_skip_4x8'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_highbd_sad_skip_4x8 neon'))
++
++ add_proto(*qw('unsigned int vpx_highbd_sad_skip_4x4'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride")
++ specialize(*qw('vpx_highbd_sad_skip_4x4 neon'))
++
+ #
+ # Avg
+ #
+ add_proto(*qw('unsigned int vpx_highbd_avg_8x8'), "const uint8_t *s8, int p")
+- specialize(*qw('vpx_highbd_avg_8x8 sse2'))
++ specialize(*qw('vpx_highbd_avg_8x8 sse2 neon'))
+
+ add_proto(*qw('unsigned int vpx_highbd_avg_4x4'), "const uint8_t *s8, int p")
+- specialize(*qw('vpx_highbd_avg_4x4 sse2'))
++ specialize(*qw('vpx_highbd_avg_4x4 sse2 neon'))
+
+ add_proto(*qw('void vpx_highbd_minmax_8x8'), "const uint8_t *s8, int p, const uint8_t *d8, int dp, int *min, int *max")
++ specialize(*qw('vpx_highbd_minmax_8x8 neon'))
+
+ add_proto(*qw('unsigned int vpx_highbd_sad64x64_avg'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred")
+ specialize(*qw('vpx_highbd_sad64x64_avg sse2 neon avx2'))
+@@ -1037,45 +1168,84 @@ if vpx_config("CONFIG_ENCODERS") == "yes":
+ #
+ # Multi-block SAD, comparing a reference to N independent blocks
+ #
+- add_proto(*qw('void vpx_highbd_sad64x64x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_highbd_sad64x64x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_highbd_sad64x64x4d sse2 neon avx2'))
+
+- add_proto(*qw('void vpx_highbd_sad64x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_highbd_sad64x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_highbd_sad64x32x4d sse2 neon avx2'))
+
+- add_proto(*qw('void vpx_highbd_sad32x64x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_highbd_sad32x64x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_highbd_sad32x64x4d sse2 neon avx2'))
+
+- add_proto(*qw('void vpx_highbd_sad32x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_highbd_sad32x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_highbd_sad32x32x4d sse2 neon avx2'))
+
+- add_proto(*qw('void vpx_highbd_sad32x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_highbd_sad32x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_highbd_sad32x16x4d sse2 neon avx2'))
+
+- add_proto(*qw('void vpx_highbd_sad16x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_highbd_sad16x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_highbd_sad16x32x4d sse2 neon avx2'))
+
+- add_proto(*qw('void vpx_highbd_sad16x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_highbd_sad16x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_highbd_sad16x16x4d sse2 neon avx2'))
+
+- add_proto(*qw('void vpx_highbd_sad16x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_highbd_sad16x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_highbd_sad16x8x4d sse2 neon avx2'))
+
+- add_proto(*qw('void vpx_highbd_sad8x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_highbd_sad8x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_highbd_sad8x16x4d sse2 neon'))
+
+- add_proto(*qw('void vpx_highbd_sad8x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_highbd_sad8x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_highbd_sad8x8x4d sse2 neon'))
+
+- add_proto(*qw('void vpx_highbd_sad8x4x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_highbd_sad8x4x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_highbd_sad8x4x4d sse2 neon'))
+
+- add_proto(*qw('void vpx_highbd_sad4x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_highbd_sad4x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_highbd_sad4x8x4d sse2 neon'))
+
+- add_proto(*qw('void vpx_highbd_sad4x4x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t* const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ add_proto(*qw('void vpx_highbd_sad4x4x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
+ specialize(*qw('vpx_highbd_sad4x4x4d sse2 neon'))
+
++ add_proto(*qw('void vpx_highbd_sad_skip_64x64x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_highbd_sad_skip_64x64x4d neon sse2 avx2'))
++
++ add_proto(*qw('void vpx_highbd_sad_skip_64x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_highbd_sad_skip_64x32x4d neon sse2 avx2'))
++
++ add_proto(*qw('void vpx_highbd_sad_skip_32x64x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_highbd_sad_skip_32x64x4d neon sse2 avx2'))
++
++ add_proto(*qw('void vpx_highbd_sad_skip_32x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_highbd_sad_skip_32x32x4d neon sse2 avx2'))
++
++ add_proto(*qw('void vpx_highbd_sad_skip_32x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_highbd_sad_skip_32x16x4d neon sse2 avx2'))
++
++ add_proto(*qw('void vpx_highbd_sad_skip_16x32x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_highbd_sad_skip_16x32x4d neon sse2 avx2'))
++
++ add_proto(*qw('void vpx_highbd_sad_skip_16x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_highbd_sad_skip_16x16x4d neon sse2 avx2'))
++
++ add_proto(*qw('void vpx_highbd_sad_skip_16x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_highbd_sad_skip_16x8x4d neon sse2 avx2'))
++
++ add_proto(*qw('void vpx_highbd_sad_skip_8x16x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_highbd_sad_skip_8x16x4d neon sse2'))
++
++ add_proto(*qw('void vpx_highbd_sad_skip_8x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_highbd_sad_skip_8x8x4d neon sse2'))
++
++ add_proto(*qw('void vpx_highbd_sad_skip_8x4x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_highbd_sad_skip_8x4x4d neon'))
++
++ add_proto(*qw('void vpx_highbd_sad_skip_4x8x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_highbd_sad_skip_4x8x4d neon sse2'))
++
++ add_proto(*qw('void vpx_highbd_sad_skip_4x4x4d'), "const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]")
++ specialize(*qw('vpx_highbd_sad_skip_4x4x4d neon'))
++
+ #
+ # Structured Similarity (SSIM)
+ #
+@@ -1091,73 +1261,73 @@ if vpx_config("CONFIG_ENCODERS") == "yes" or vpx_config("CONFIG_POSTPROC") == "y
+ # Variance
+ #
+ add_proto(*qw('unsigned int vpx_variance64x64'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_variance64x64 sse2 avx2 neon msa mmi vsx lsx'))
++ specialize(*qw('vpx_variance64x64 sse2 avx2 neon neon_dotprod msa mmi vsx lsx'))
+
+ add_proto(*qw('unsigned int vpx_variance64x32'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_variance64x32 sse2 avx2 neon msa mmi vsx'))
++ specialize(*qw('vpx_variance64x32 sse2 avx2 neon neon_dotprod msa mmi vsx'))
+
+ add_proto(*qw('unsigned int vpx_variance32x64'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_variance32x64 sse2 avx2 neon msa mmi vsx'))
++ specialize(*qw('vpx_variance32x64 sse2 avx2 neon neon_dotprod msa mmi vsx'))
+
+ add_proto(*qw('unsigned int vpx_variance32x32'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_variance32x32 sse2 avx2 neon msa mmi vsx lsx'))
++ specialize(*qw('vpx_variance32x32 sse2 avx2 neon neon_dotprod msa mmi vsx lsx'))
+
+ add_proto(*qw('unsigned int vpx_variance32x16'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_variance32x16 sse2 avx2 neon msa mmi vsx'))
++ specialize(*qw('vpx_variance32x16 sse2 avx2 neon neon_dotprod msa mmi vsx'))
+
+ add_proto(*qw('unsigned int vpx_variance16x32'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_variance16x32 sse2 avx2 neon msa mmi vsx'))
++ specialize(*qw('vpx_variance16x32 sse2 avx2 neon neon_dotprod msa mmi vsx'))
+
+ add_proto(*qw('unsigned int vpx_variance16x16'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_variance16x16 sse2 avx2 neon msa mmi vsx lsx'))
++ specialize(*qw('vpx_variance16x16 sse2 avx2 neon neon_dotprod msa mmi vsx lsx'))
+
+ add_proto(*qw('unsigned int vpx_variance16x8'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_variance16x8 sse2 avx2 neon msa mmi vsx'))
++ specialize(*qw('vpx_variance16x8 sse2 avx2 neon neon_dotprod msa mmi vsx'))
+
+ add_proto(*qw('unsigned int vpx_variance8x16'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_variance8x16 sse2 neon msa mmi vsx'))
++ specialize(*qw('vpx_variance8x16 sse2 avx2 neon neon_dotprod msa mmi vsx'))
+
+ add_proto(*qw('unsigned int vpx_variance8x8'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_variance8x8 sse2 neon msa mmi vsx lsx'))
++ specialize(*qw('vpx_variance8x8 sse2 avx2 neon neon_dotprod msa mmi vsx lsx'))
+
+ add_proto(*qw('unsigned int vpx_variance8x4'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_variance8x4 sse2 neon msa mmi vsx'))
++ specialize(*qw('vpx_variance8x4 sse2 avx2 neon neon_dotprod msa mmi vsx'))
+
+ add_proto(*qw('unsigned int vpx_variance4x8'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_variance4x8 sse2 neon msa mmi vsx'))
++ specialize(*qw('vpx_variance4x8 sse2 neon neon_dotprod msa mmi vsx'))
+
+ add_proto(*qw('unsigned int vpx_variance4x4'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_variance4x4 sse2 neon msa mmi vsx'))
++ specialize(*qw('vpx_variance4x4 sse2 neon neon_dotprod msa mmi vsx'))
+
+ #
+ # Specialty Variance
+ #
+ add_proto(*qw('void vpx_get16x16var'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum")
+- specialize(*qw('vpx_get16x16var sse2 avx2 neon msa vsx lsx'))
++ specialize(*qw('vpx_get16x16var sse2 avx2 neon neon_dotprod msa vsx lsx'))
+
+ add_proto(*qw('void vpx_get8x8var'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum")
+- specialize(*qw('vpx_get8x8var sse2 neon msa vsx'))
++ specialize(*qw('vpx_get8x8var sse2 neon neon_dotprod msa vsx'))
+
+ add_proto(*qw('unsigned int vpx_mse16x16'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_mse16x16 sse2 avx2 neon msa mmi vsx lsx'))
++ specialize(*qw('vpx_mse16x16 sse2 avx2 neon neon_dotprod msa mmi vsx lsx'))
+
+ add_proto(*qw('unsigned int vpx_mse16x8'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_mse16x8 sse2 avx2 msa mmi vsx'))
++ specialize(*qw('vpx_mse16x8 sse2 avx2 neon neon_dotprod msa mmi vsx'))
+
+ add_proto(*qw('unsigned int vpx_mse8x16'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_mse8x16 sse2 msa mmi vsx'))
++ specialize(*qw('vpx_mse8x16 sse2 neon neon_dotprod msa mmi vsx'))
+
+ add_proto(*qw('unsigned int vpx_mse8x8'), "const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse")
+- specialize(*qw('vpx_mse8x8 sse2 msa mmi vsx'))
++ specialize(*qw('vpx_mse8x8 sse2 neon neon_dotprod msa mmi vsx'))
+
+ add_proto(*qw('unsigned int vpx_get_mb_ss'), "const int16_t *")
+ specialize(*qw('vpx_get_mb_ss sse2 msa vsx'))
+
+ add_proto(*qw('unsigned int vpx_get4x4sse_cs'), "const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride")
+- specialize(*qw('vpx_get4x4sse_cs neon msa vsx'))
++ specialize(*qw('vpx_get4x4sse_cs neon neon_dotprod msa vsx'))
+
+ add_proto(*qw('void vpx_comp_avg_pred'), "uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride")
+- specialize(*qw('vpx_comp_avg_pred neon sse2 vsx lsx'))
++ specialize(*qw('vpx_comp_avg_pred neon sse2 avx2 vsx lsx'))
+
+ #
+ # Subpixel Variance
+diff --git a/vpx_ports/meson.build b/vpx_ports/meson.build
+index 961ed54bd..c8079af98 100644
+--- a/vpx_ports/meson.build
++++ b/vpx_ports/meson.build
+@@ -12,9 +12,6 @@ ports_optional_sources = {
+ 'mmx' : files(
+ # 'emms_mmx.c' # MANUAL
+ ),
+- 'arm' : files(
+- 'arm_cpudetect.c',
+- ),
+ 'ppc' : files(
+ 'ppc_cpudetect.c',
+ ),
+@@ -24,6 +21,7 @@ ports_optional_sources = {
+ 'loongarch' : files(
+ 'loongarch_cpudetect.c',
+ ),
++ 'arm': files(), # MANUAL
+ }
+
+ ports_headers_sources = files(
+@@ -41,6 +39,7 @@ ports_headers_sources = files(
+ ports_headers_optional_sources = {
+ 'arm' : files(
+ 'arm.h',
++ 'arm_cpudetect.h'
+ ),
+ 'ppc' : files(
+ 'ppc.h',
+@@ -66,6 +65,8 @@ ports_asm_optional_sources = {
+ ),
+ }
+
++#### --- END GENERATED --- ####
++
+ ports_headers_sources += files( # MANUAL
+ 'emmintrin_compat.h',
+ 'mem_ops.h',
+@@ -73,6 +74,18 @@ ports_headers_sources += files( # MANUAL
+ 'vpx_once.h',
+ )
+
++if features.get('aarch64', false) # MANUAL
++ ports_sources += files(
++ 'aarch64_cpudetect.c'
++ )
++else
++ ports_optional_sources += {
++ 'arm' : ports_optional_sources['arm'] + files(
++ 'aarch32_cpudetect.c'
++ ),
++ }
++endif
++
+ if features.get('x86', false) # MANUAL
+ ports_optional_sources += {
+ 'mmx' : ports_optional_sources['mmx'] + files(
+@@ -115,8 +128,6 @@ ports_headers_optional_sources += { # MANUAL
+ )
+ }
+
+-#### --- END GENERATED --- ####
+-
+ codec_srcs += ports_sources
+ optional_sources = get_variable('ports_optional_sources', {})
+ foreach comp_name, comp_sources : optional_sources
+diff --git a/vpx_util/meson.build b/vpx_util/meson.build
+index e39384421..fa6d42e5e 100644
+--- a/vpx_util/meson.build
++++ b/vpx_util/meson.build
+@@ -30,6 +30,9 @@ util_headers_optional_sources = {
+ ),
+ }
+
++#### --- END GENERATED --- ####
++
++
+ if features.get('bitstream_debug', false) or features.get('mismatch_debug', false)
+ util_sources += files(
+ 'vpx_debug_util.c'
+@@ -40,8 +43,6 @@ if features.get('bitstream_debug', false) or features.get('mismatch_debug', fals
+ )
+ endif
+
+-#### --- END GENERATED --- ####
+-
+ optional_sources = get_variable('util_optional_sources', {})
+ foreach comp_name, comp_sources : optional_sources
+ if features.get(comp_name, false)
+--
+2.44.0.windows.1
+
+
+From e9e378d725ae4d0fadcc5e8ac72105b3e1edbd5c Mon Sep 17 00:00:00 2001
+From: "L. E. Segovia" <amy@centricular.com>
+Date: Wed, 10 Jul 2024 19:50:24 -0300
+Subject: [PATCH] meson: Properly ban shared libraries with the MSVC ABI
+
+I hadn't noticed that the codeblock already existed, it was just in a
+different place.
+
+Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/21>
+
+diff --git a/meson.build b/meson.build
+index 198165e61..fee067173 100644
+--- a/meson.build
++++ b/meson.build
+@@ -1542,12 +1542,14 @@ endif
+
+ ### process_detect ###
+
+-# Shared builds are supported everywhere thanks to
+-# the Meson generation of the module definition files.
+-# if features.get('shared', false) and not ['linux', 'solaris', 'darwin', 'iphonesimulator'].contains(tgt_os) and not features.get('gnu', false)
+-# # Again, no OS/2 because Meson itself doens't support it
+-# error('Shared library build is only supported on ELF and Darwin for now')
+-# endif
++# MSVC requires declspec on data symbols when importing from a shared library.
++if features.get('shared', false) and not ['linux', 'solaris', 'darwin', 'ios', 'iphonesimulator'].contains(tgt_os)
++ if features.get('gcc', false) # Looks like a typo on upstream
++ warning('Shared library build is only supported on ELF; assuming this is OK')
++ else
++ error('Shared library build is only supported on ELF and Darwin for now')
++ endif
++endif
+
+ features.set('unistd_h', c.has_header('unistd.h'))
+ if c.has_header('vpx/vpx_integer.h', include_directories: include_directories('.'))
+@@ -1670,14 +1672,6 @@ vpx_config_c = configure_file(
+
+ # libs.mk
+
+-if features.get('shared', false) and not ['linux', 'darwin', 'ios', 'iphonesimulator'].contains(tgt_os)
+- if features.get('gcc', false) # Looks like a typo on upstream
+- warn('Shared libraries are only supported on ELF; assuming this is OK')
+- else
+- error('Shared libraries are only supported on ELF, OS/2, and Darwin for now')
+- endif
+-endif
+-
+ rtcd_exe = find_program('build/make/rtcd.py', required: true)
+
+ extra_libs += c.find_library('m', required: false)
+--
+2.44.0.windows.1
+
+
+From 26d95c89ad8da6f958e909783c3c353d7ee4ca86 Mon Sep 17 00:00:00 2001
+From: "L. E. Segovia" <amy@centricular.com>
+Date: Thu, 11 Jul 2024 01:10:29 +0000
+Subject: [PATCH] meson: Fix wrong Xcode testing for armv7
+
+Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/21>
+
+diff --git a/meson.build b/meson.build
+index fee067173..df50b53d4 100644
+--- a/meson.build
++++ b/meson.build
+@@ -765,7 +765,7 @@ if tgt_isa.startswith('arm')
+ check: true
+ ).stdout().strip().split('\n').get(0, '').split()
+
+- if features.get('neon', false) and xcode_version.get(1, '').version_compare('>=6.3')
++ if features.get('neon', false) and not xcode_version.get(1, '').version_compare('>=6.3')
+ feature = 'neon'
+ if not features.get(feature, false)
+ if not features.has(feature)
+--
+2.44.0.windows.1
+
+
+From 2918ddf8a880cf0d9768b1c8464d288c2fcb87c0 Mon Sep 17 00:00:00 2001
+From: "L. E. Segovia" <amy@centricular.com>
+Date: Sat, 20 Jul 2024 20:56:31 +0000
+Subject: [PATCH] meson: Fix discrepancy in option formatting between the RTCD
+ Perl and Python generators
+
+Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/21>
+
+diff --git a/meson.build b/meson.build
+index df50b53d4..0ad31918a 100644
+--- a/meson.build
++++ b/meson.build
+@@ -1816,7 +1816,20 @@ else
+ asm_compiler = disabler()
+ endif
+
+-rtcd_h_template = ['--arch=@0@'.format(tgt_isa), '--config=@INPUT0@'] + rtcd_options
++rtcd_h_template = ['--arch=@0@'.format(tgt_isa), '--config=@INPUT0@']
++
++original_rtcd_options = []
++merge_rtcd_opt = ''
++foreach opt: rtcd_options
++ if merge_rtcd_opt != ''
++ original_rtcd_options += ['@0@-@1@'.format(merge_rtcd_opt, opt)]
++ merge_rtcd_opt = ''
++ elif opt == '--disable'
++ merge_rtcd_opt = opt
++ else
++ original_rtcd_options += opt
++ endif
++endforeach
+
+ doxy_template = find_program('meson/generate_doxy.py', required: true)
+
+@@ -1849,7 +1862,7 @@ foreach symbol, input_file : codec_rtcds
+ rtcd_py = files(input_file)
+
+ rtcd_h = configure_file(
+- command: [rtcd_exe] + rtcd_h_template + ['--sym=@0@'.format(symbol), '@INPUT1@'],
++ command: [rtcd_exe] + rtcd_h_template + rtcd_options + ['--sym=@0@'.format(symbol), '@INPUT1@'],
+ input: [config_mk] + rtcd_py,
+ output: rtcd_filename,
+ capture: true,
+@@ -1864,7 +1877,7 @@ foreach symbol, input_file : codec_rtcds
+ )
+
+ original_rtcd_h = configure_file(
+- command: [original_rtcd_exe] + rtcd_h_template + ['--sym=@0@'.format(symbol), '@INPUT1@'],
++ command: [original_rtcd_exe] + rtcd_h_template + original_rtcd_options + ['--sym=@0@'.format(symbol), '@INPUT1@'],
+ input: [config_mk] + original_rtcd_pl,
+ output: 'original@0@'.format(rtcd_filename),
+ capture: true,
+--
+2.44.0.windows.1
+
+
+From 31fdd5dd78347b2468d8a3c4a946f21d230cf19b Mon Sep 17 00:00:00 2001
+From: "L. E. Segovia" <amy@centricular.com>
+Date: Tue, 6 Aug 2024 17:00:29 -0300
+Subject: [PATCH] vp9: Fix Apple silicon build
+
+ld: Undefined symbols:
+ _vp9_block_error_fp_neon, referenced from:
+ _block_yrd in vp9_encoder_vp9_pickmode.c.o
+ _vp9_block_error_neon, referenced from:
+ _rd_pick_intra_sub_8x8_y_mode in vp9_encoder_vp9_rdopt.c.o
+ _rd_pick_intra_sub_8x8_y_mode in vp9_encoder_vp9_rdopt.c.o
+ _rd_pick_best_sub8x8_mode in vp9_encoder_vp9_rdopt.c.o
+ _dist_block in vp9_encoder_vp9_rdopt.c.o
+ _vp9_setup_tpl_stats in vp9_encoder_vp9_tpl_model.c.o
+
+Part-of: <https://gitlab.freedesktop.org/gstreamer/meson-ports/libvpx/-/merge_requests/21>
+
+diff --git a/vp9/meson.build b/vp9/meson.build
+index e9314966c..c1c63f044 100644
+--- a/vp9/meson.build
++++ b/vp9/meson.build
+@@ -131,7 +131,7 @@ vp9_cx_optional_sources = {
+ # 'encoder/arm/neon/vp9_highbd_temporal_filter_neon.c', # MANUAL
+ 'encoder/arm/neon/vp9_dct_neon.c',
+ # 'encoder/arm/neon/vp9_denoiser_neon.c', # MANUAL
+- # 'encoder/arm/neon/vp9_error_neon.c', # MANUAL
++ 'encoder/arm/neon/vp9_error_neon.c',
+ 'encoder/arm/neon/vp9_frame_scale_neon.c',
+ 'encoder/arm/neon/vp9_quantize_neon.c',
+ # 'encoder/arm/neon/vp9_highbd_error_neon.c', # MANUAL
+@@ -380,9 +380,6 @@ if features.get('vp9_highbitdepth', false)
+ }
+ else
+ vp9_cx_optional_sources += {
+- 'neon' : vp9_cx_optional_sources['neon'] + files(
+- 'encoder/arm/neon/vp9_error_neon.c',
+- ),
+ 'msa' : vp9_cx_optional_sources['msa'] + files(
+ 'encoder/mips/msa/vp9_fdct4x4_msa.c',
+ 'encoder/mips/msa/vp9_fdct8x8_msa.c',
+--
+2.44.0.windows.1
+