diff options
author | Ard Biesheuvel <ardb@kernel.org> | 2022-10-20 15:54:33 +0200 |
---|---|---|
committer | Ard Biesheuvel <ardb@kernel.org> | 2023-09-11 08:13:17 +0000 |
commit | cf8e8658100d4eae80ce9b21f7a81cb024dd5057 (patch) | |
tree | 31d3b640bebf97c33d354768fc44dfd532c2df81 /arch/ia64/lib/clear_user.S | |
parent | a0334bf78b95532cec54f56b53e8ae1bfe7e1ca1 (diff) |
arch: Remove Itanium (IA-64) architecture
The Itanium architecture is obsolete, and an informal survey [0] reveals
that any residual use of Itanium hardware in production is mostly HP-UX
or OpenVMS based. The use of Linux on Itanium appears to be limited to
enthusiasts that occasionally boot a fresh Linux kernel to see whether
things are still working as intended, and perhaps to churn out some
distro packages that are rarely used in practice.
None of the original companies behind Itanium still produce or support
any hardware or software for the architecture, and it is listed as
'Orphaned' in the MAINTAINERS file, as apparently, none of the engineers
that contributed on behalf of those companies (nor anyone else, for that
matter) have been willing to support or maintain the architecture
upstream or even be responsible for applying the odd fix. The Intel
firmware team removed all IA-64 support from the Tianocore/EDK2
reference implementation of EFI in 2018. (Itanium is the original
architecture for which EFI was developed, and the way Linux supports it
deviates significantly from other architectures.) Some distros, such as
Debian and Gentoo, still maintain [unofficial] ia64 ports, but many have
dropped support years ago.
While the argument is being made [1] that there is a 'for the common
good' angle to being able to build and run existing projects such as the
Grid Community Toolkit [2] on Itanium for interoperability testing, the
fact remains that none of those projects are known to be deployed on
Linux/ia64, and very few people actually have access to such a system in
the first place. Even if there were ways imaginable in which Linux/ia64
could be put to good use today, what matters is whether anyone is
actually doing that, and this does not appear to be the case.
There are no emulators widely available, and so boot testing Itanium is
generally infeasible for ordinary contributors. GCC still supports IA-64
but its compile farm [3] no longer has any IA-64 machines. GLIBC would
like to get rid of IA-64 [4] too because it would permit some overdue
code cleanups. In summary, the benefits to the ecosystem of having IA-64
be part of it are mostly theoretical, whereas the maintenance overhead
of keeping it supported is real.
So let's rip off the band aid, and remove the IA-64 arch code entirely.
This follows the timeline proposed by the Debian/ia64 maintainer [5],
which removes support in a controlled manner, leaving IA-64 in a known
good state in the most recent LTS release. Other projects will follow
once the kernel support is removed.
[0] https://lore.kernel.org/all/CAMj1kXFCMh_578jniKpUtx_j8ByHnt=s7S+yQ+vGbKt9ud7+kQ@mail.gmail.com/
[1] https://lore.kernel.org/all/0075883c-7c51-00f5-2c2d-5119c1820410@web.de/
[2] https://gridcf.org/gct-docs/latest/index.html
[3] https://cfarm.tetaneutral.net/machines/list/
[4] https://lore.kernel.org/all/87bkiilpc4.fsf@mid.deneb.enyo.de/
[5] https://lore.kernel.org/all/ff58a3e76e5102c94bb5946d99187b358def688a.camel@physik.fu-berlin.de/
Acked-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Diffstat (limited to 'arch/ia64/lib/clear_user.S')
-rw-r--r-- | arch/ia64/lib/clear_user.S | 212 |
1 files changed, 0 insertions, 212 deletions
diff --git a/arch/ia64/lib/clear_user.S b/arch/ia64/lib/clear_user.S deleted file mode 100644 index 1d9e45ccf8e5..000000000000 --- a/arch/ia64/lib/clear_user.S +++ /dev/null @@ -1,212 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This routine clears to zero a linear memory buffer in user space. - * - * Inputs: - * in0: address of buffer - * in1: length of buffer in bytes - * Outputs: - * r8: number of bytes that didn't get cleared due to a fault - * - * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co - * Stephane Eranian <eranian@hpl.hp.com> - */ - -#include <linux/export.h> -#include <asm/asmmacro.h> - -// -// arguments -// -#define buf r32 -#define len r33 - -// -// local registers -// -#define cnt r16 -#define buf2 r17 -#define saved_lc r18 -#define saved_pfs r19 -#define tmp r20 -#define len2 r21 -#define len3 r22 - -// -// Theory of operations: -// - we check whether or not the buffer is small, i.e., less than 17 -// in which case we do the byte by byte loop. -// -// - Otherwise we go progressively from 1 byte store to 8byte store in -// the head part, the body is a 16byte store loop and we finish we the -// tail for the last 15 bytes. -// The good point about this breakdown is that the long buffer handling -// contains only 2 branches. -// -// The reason for not using shifting & masking for both the head and the -// tail is to stay semantically correct. This routine is not supposed -// to write bytes outside of the buffer. While most of the time this would -// be ok, we can't tolerate a mistake. A classical example is the case -// of multithreaded code were to the extra bytes touched is actually owned -// by another thread which runs concurrently to ours. Another, less likely, -// example is with device drivers where reading an I/O mapped location may -// have side effects (same thing for writing). -// - -GLOBAL_ENTRY(__do_clear_user) - .prologue - .save ar.pfs, saved_pfs - alloc saved_pfs=ar.pfs,2,0,0,0 - cmp.eq p6,p0=r0,len // check for zero length - .save ar.lc, saved_lc - mov saved_lc=ar.lc // preserve ar.lc (slow) - .body - ;; // avoid WAW on CFM - adds tmp=-1,len // br.ctop is repeat/until - mov ret0=len // return value is length at this point -(p6) br.ret.spnt.many rp - ;; - cmp.lt p6,p0=16,len // if len > 16 then long memset - mov ar.lc=tmp // initialize lc for small count -(p6) br.cond.dptk .long_do_clear - ;; // WAR on ar.lc - // - // worst case 16 iterations, avg 8 iterations - // - // We could have played with the predicates to use the extra - // M slot for 2 stores/iteration but the cost the initialization - // the various counters compared to how long the loop is supposed - // to last on average does not make this solution viable. - // -1: - EX( .Lexit1, st1 [buf]=r0,1 ) - adds len=-1,len // countdown length using len - br.cloop.dptk 1b - ;; // avoid RAW on ar.lc - // - // .Lexit4: comes from byte by byte loop - // len contains bytes left -.Lexit1: - mov ret0=len // faster than using ar.lc - mov ar.lc=saved_lc - br.ret.sptk.many rp // end of short clear_user - - - // - // At this point we know we have more than 16 bytes to copy - // so we focus on alignment (no branches required) - // - // The use of len/len2 for countdown of the number of bytes left - // instead of ret0 is due to the fact that the exception code - // changes the values of r8. - // -.long_do_clear: - tbit.nz p6,p0=buf,0 // odd alignment (for long_do_clear) - ;; - EX( .Lexit3, (p6) st1 [buf]=r0,1 ) // 1-byte aligned -(p6) adds len=-1,len;; // sync because buf is modified - tbit.nz p6,p0=buf,1 - ;; - EX( .Lexit3, (p6) st2 [buf]=r0,2 ) // 2-byte aligned -(p6) adds len=-2,len;; - tbit.nz p6,p0=buf,2 - ;; - EX( .Lexit3, (p6) st4 [buf]=r0,4 ) // 4-byte aligned -(p6) adds len=-4,len;; - tbit.nz p6,p0=buf,3 - ;; - EX( .Lexit3, (p6) st8 [buf]=r0,8 ) // 8-byte aligned -(p6) adds len=-8,len;; - shr.u cnt=len,4 // number of 128-bit (2x64bit) words - ;; - cmp.eq p6,p0=r0,cnt - adds tmp=-1,cnt -(p6) br.cond.dpnt .dotail // we have less than 16 bytes left - ;; - adds buf2=8,buf // setup second base pointer - mov ar.lc=tmp - ;; - - // - // 16bytes/iteration core loop - // - // The second store can never generate a fault because - // we come into the loop only when we are 16-byte aligned. - // This means that if we cross a page then it will always be - // in the first store and never in the second. - // - // - // We need to keep track of the remaining length. A possible (optimistic) - // way would be to use ar.lc and derive how many byte were left by - // doing : left= 16*ar.lc + 16. this would avoid the addition at - // every iteration. - // However we need to keep the synchronization point. A template - // M;;MB does not exist and thus we can keep the addition at no - // extra cycle cost (use a nop slot anyway). It also simplifies the - // (unlikely) error recovery code - // - -2: EX(.Lexit3, st8 [buf]=r0,16 ) - ;; // needed to get len correct when error - st8 [buf2]=r0,16 - adds len=-16,len - br.cloop.dptk 2b - ;; - mov ar.lc=saved_lc - // - // tail correction based on len only - // - // We alternate the use of len3,len2 to allow parallelism and correct - // error handling. We also reuse p6/p7 to return correct value. - // The addition of len2/len3 does not cost anything more compared to - // the regular memset as we had empty slots. - // -.dotail: - mov len2=len // for parallelization of error handling - mov len3=len - tbit.nz p6,p0=len,3 - ;; - EX( .Lexit2, (p6) st8 [buf]=r0,8 ) // at least 8 bytes -(p6) adds len3=-8,len2 - tbit.nz p7,p6=len,2 - ;; - EX( .Lexit2, (p7) st4 [buf]=r0,4 ) // at least 4 bytes -(p7) adds len2=-4,len3 - tbit.nz p6,p7=len,1 - ;; - EX( .Lexit2, (p6) st2 [buf]=r0,2 ) // at least 2 bytes -(p6) adds len3=-2,len2 - tbit.nz p7,p6=len,0 - ;; - EX( .Lexit2, (p7) st1 [buf]=r0 ) // only 1 byte left - mov ret0=r0 // success - br.ret.sptk.many rp // end of most likely path - - // - // Outlined error handling code - // - - // - // .Lexit3: comes from core loop, need restore pr/lc - // len contains bytes left - // - // - // .Lexit2: - // if p6 -> coming from st8 or st2 : len2 contains what's left - // if p7 -> coming from st4 or st1 : len3 contains what's left - // We must restore lc/pr even though might not have been used. -.Lexit2: - .pred.rel "mutex", p6, p7 -(p6) mov len=len2 -(p7) mov len=len3 - ;; - // - // .Lexit4: comes from head, need not restore pr/lc - // len contains bytes left - // -.Lexit3: - mov ret0=len - mov ar.lc=saved_lc - br.ret.sptk.many rp -END(__do_clear_user) -EXPORT_SYMBOL(__do_clear_user) |