summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2014-08-07 17:06:15 +1000
committerStephen Rothwell <sfr@canb.auug.org.au>2014-08-07 17:06:15 +1000
commit38fde44da4258f0a2ae975ecc833106c726ad37e (patch)
tree69e0e7ca7581794d29eaf9bb683214a692be391b
parente2ea6c3da34fa52f332cbc7914ca282cf4d2eeb9 (diff)
parentaf86fefe413b6b1542e1b0f97be8bc9570c5eaf7 (diff)
Merge branch 'akpm/master'
-rw-r--r--CREDITS7
-rw-r--r--Documentation/ABI/testing/sysfs-class-bdi8
-rw-r--r--Documentation/filesystems/vfat.txt10
-rw-r--r--Documentation/vm/ksm.txt7
-rw-r--r--Documentation/vm/remap_file_pages.txt7
-rw-r--r--MAINTAINERS73
-rw-r--r--arch/arm/Kconfig2
-rw-r--r--arch/arm/mach-omap2/board-omap3touchbook.c2
-rw-r--r--arch/arm/mach-omap2/mux.c22
-rw-r--r--arch/arm/mach-pxa/balloon3.c2
-rw-r--r--arch/arm/mach-pxa/viper.c2
-rw-r--r--arch/arm/mach-s3c24xx/mach-jive.c2
-rw-r--r--arch/arm/mach-w90x900/cpu.c3
-rw-r--r--arch/arm64/include/asm/page.h3
-rw-r--r--arch/arm64/kernel/vdso.c19
-rw-r--r--arch/ia64/Kconfig2
-rw-r--r--arch/ia64/include/asm/page.h2
-rw-r--r--arch/ia64/mm/init.c31
-rw-r--r--arch/m68k/Kconfig2
-rw-r--r--arch/mips/Kconfig2
-rw-r--r--arch/powerpc/Kconfig2
-rw-r--r--arch/powerpc/include/asm/page.h3
-rw-r--r--arch/powerpc/kernel/setup_64.c6
-rw-r--r--arch/powerpc/kernel/vdso.c16
-rw-r--r--arch/powerpc/kernel/vio.c2
-rw-r--r--arch/powerpc/platforms/pseries/dlpar.c4
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c2
-rw-r--r--arch/s390/Kconfig2
-rw-r--r--arch/s390/include/asm/page.h2
-rw-r--r--arch/s390/kernel/vdso.c15
-rw-r--r--arch/sh/Kconfig2
-rw-r--r--arch/sh/include/asm/page.h5
-rw-r--r--arch/sh/kernel/vsyscall/vsyscall.c15
-rw-r--r--arch/tile/Kconfig2
-rw-r--r--arch/tile/include/asm/page.h6
-rw-r--r--arch/tile/kernel/vdso.c15
-rw-r--r--arch/um/include/asm/page.h5
-rw-r--r--arch/x86/Kbuild4
-rw-r--r--arch/x86/Kconfig25
-rw-r--r--arch/x86/Makefile8
-rw-r--r--arch/x86/include/asm/crash.h9
-rw-r--r--arch/x86/include/asm/kexec-bzimage64.h6
-rw-r--r--arch/x86/include/asm/kexec.h45
-rw-r--r--arch/x86/include/asm/page.h1
-rw-r--r--arch/x86/include/asm/page_64.h2
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c4
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c6
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c4
-rw-r--r--arch/x86/kernel/crash.c563
-rw-r--r--arch/x86/kernel/kexec-bzimage64.c553
-rw-r--r--arch/x86/kernel/machine_kexec_64.c239
-rw-r--r--arch/x86/kvm/mmu_audit.c2
-rw-r--r--arch/x86/platform/uv/tlb_uv.c2
-rw-r--r--arch/x86/purgatory/Makefile30
-rw-r--r--arch/x86/purgatory/entry64.S101
-rw-r--r--arch/x86/purgatory/purgatory.c72
-rw-r--r--arch/x86/purgatory/setup-x86_64.S58
-rw-r--r--arch/x86/purgatory/sha256.c283
-rw-r--r--arch/x86/purgatory/sha256.h22
-rw-r--r--arch/x86/purgatory/stack.S19
-rw-r--r--arch/x86/purgatory/string.c13
-rw-r--r--arch/x86/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/um/asm/elf.h1
-rw-r--r--arch/x86/um/mem_64.c15
-rw-r--r--arch/x86/vdso/vdso2c.h12
-rw-r--r--arch/x86/vdso/vdso32-setup.c19
-rw-r--r--drivers/atm/he.c31
-rw-r--r--drivers/atm/idt77252.c15
-rw-r--r--drivers/block/DAC960.c18
-rw-r--r--drivers/block/cciss.c11
-rw-r--r--drivers/block/skd_main.c25
-rw-r--r--drivers/crypto/hifn_795x.c5
-rw-r--r--drivers/firmware/efi/runtime-map.c21
-rw-r--r--drivers/gpio/gpio-zevio.c4
-rw-r--r--drivers/gpu/drm/i810/i810_dma.c5
-rw-r--r--drivers/infiniband/hw/amso1100/c2.c6
-rw-r--r--drivers/infiniband/hw/nes/nes_hw.c12
-rw-r--r--drivers/infiniband/hw/nes/nes_verbs.c5
-rw-r--r--drivers/media/common/saa7146/saa7146_core.c15
-rw-r--r--drivers/media/common/saa7146/saa7146_fops.c5
-rw-r--r--drivers/media/pci/bt8xx/bt878.c16
-rw-r--r--drivers/media/pci/ngene/ngene-core.c7
-rw-r--r--drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c11
-rw-r--r--drivers/media/usb/ttusb-dec/ttusb_dec.c11
-rw-r--r--drivers/net/ethernet/amd/pcnet32.c45
-rw-r--r--drivers/net/ethernet/atheros/atl1e/atl1e_main.c7
-rw-r--r--drivers/net/ethernet/cisco/enic/vnic_dev.c8
-rw-r--r--drivers/net/ethernet/marvell/sky2.c5
-rw-r--r--drivers/net/ethernet/micrel/ksz884x.c7
-rw-r--r--drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c4
-rw-r--r--drivers/net/ethernet/qlogic/qlge/qlge_main.c11
-rw-r--r--drivers/net/irda/vlsi_ir.c4
-rw-r--r--drivers/net/wireless/ipw2x00/ipw2100.c16
-rw-r--r--drivers/net/wireless/mwl8k.c6
-rw-r--r--drivers/net/wireless/rtl818x/rtl8180/dev.c11
-rw-r--r--drivers/net/wireless/rtlwifi/pci.c17
-rw-r--r--drivers/scsi/3w-sas.c5
-rw-r--r--drivers/scsi/a100u2w.c8
-rw-r--r--drivers/scsi/be2iscsi/be_main.c10
-rw-r--r--drivers/scsi/be2iscsi/be_mgmt.c3
-rw-r--r--drivers/scsi/csiostor/csio_wr.c8
-rw-r--r--drivers/scsi/eata.c5
-rw-r--r--drivers/scsi/hpsa.c8
-rw-r--r--drivers/scsi/megaraid/megaraid_mbox.c16
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c8
-rw-r--r--drivers/scsi/mesh.c6
-rw-r--r--drivers/scsi/mvumi.c9
-rw-r--r--drivers/scsi/pm8001/pm8001_sas.c5
-rw-r--r--drivers/scsi/pmcraid.c4
-rw-r--r--drivers/scsi/scsi_sysfs.c4
-rw-r--r--drivers/staging/rtl8192e/rtl8192e/rtl_core.c15
-rw-r--r--drivers/staging/rtl8192ee/pci.c37
-rw-r--r--drivers/staging/rtl8821ae/pci.c36
-rw-r--r--drivers/staging/slicoss/slicoss.c9
-rw-r--r--drivers/staging/vt6655/device_main.c40
-rw-r--r--drivers/tty/synclink_gt.c5
-rw-r--r--drivers/vme/bridges/vme_ca91cx42.c6
-rw-r--r--drivers/vme/bridges/vme_tsi148.c6
-rw-r--r--drivers/w1/w1_int.c3
-rw-r--r--fs/dlm/plock.c34
-rw-r--r--fs/fat/cache.c70
-rw-r--r--fs/fat/fat.h6
-rw-r--r--fs/fat/file.c78
-rw-r--r--fs/fat/inode.c87
-rw-r--r--fs/fcntl.c5
-rw-r--r--fs/inode.c1
-rw-r--r--fs/lockd/svclock.c12
-rw-r--r--include/asm-generic/pci-dma-compat.h8
-rw-r--r--include/linux/efi.h19
-rw-r--r--include/linux/fs.h1238
-rw-r--r--include/linux/ioport.h6
-rw-r--r--include/linux/kernel.h4
-rw-r--r--include/linux/kexec.h104
-rw-r--r--include/linux/mm.h17
-rw-r--r--include/linux/sched.h2
-rw-r--r--include/linux/shmem_fs.h17
-rw-r--r--include/linux/syscalls.h5
-rw-r--r--include/uapi/linux/fcntl.h15
-rw-r--r--include/uapi/linux/kexec.h11
-rw-r--r--include/uapi/linux/memfd.h8
-rw-r--r--init/Kconfig5
-rw-r--r--init/main.c23
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/kexec.c1291
-rw-r--r--kernel/kprobes.c36
-rw-r--r--kernel/resource.c101
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--kernel/time/posix-timers.c57
-rw-r--r--kernel/time/timer.c27
-rw-r--r--mm/Makefile2
-rw-r--r--mm/backing-dev.c35
-rw-r--r--mm/fremap.c283
-rw-r--r--mm/ksm.c41
-rw-r--r--mm/memory.c38
-rw-r--r--mm/mmap.c99
-rw-r--r--mm/nommu.c13
-rw-r--r--mm/shmem.c324
-rw-r--r--mm/swap_state.c1
-rw-r--r--scripts/.gitignore1
-rw-r--r--scripts/Makefile1
-rw-r--r--scripts/basic/.gitignore1
-rw-r--r--scripts/basic/Makefile1
-rw-r--r--scripts/basic/bin2c.c (renamed from scripts/bin2c.c)7
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/memfd/.gitignore4
-rw-r--r--tools/testing/selftests/memfd/Makefile41
-rw-r--r--tools/testing/selftests/memfd/fuse_mnt.c110
-rw-r--r--tools/testing/selftests/memfd/fuse_test.c311
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c913
-rw-r--r--tools/testing/selftests/memfd/run_fuse_test.sh14
173 files changed, 6786 insertions, 1802 deletions
diff --git a/CREDITS b/CREDITS
index a80b66718f66..bb6278884f89 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1381,6 +1381,9 @@ S: 17 rue Danton
S: F - 94270 Le Kremlin-Bicêtre
S: France
+N: Jack Hammer
+D: IBM ServeRAID RAID (ips) driver maintenance
+
N: Greg Hankins
E: gregh@cc.gatech.edu
D: fixed keyboard driver to separate LED and locking status
@@ -1691,6 +1694,10 @@ S: Reading
S: RG6 2NU
S: United Kingdom
+N: Dave Jeffery
+E: dhjeffery@gmail.com
+D: SCSI hacks and IBM ServeRAID RAID driver maintenance
+
N: Jakub Jelinek
E: jakub@redhat.com
W: http://sunsite.mff.cuni.cz/~jj
diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi
index d773d5697cf5..3187a18af6da 100644
--- a/Documentation/ABI/testing/sysfs-class-bdi
+++ b/Documentation/ABI/testing/sysfs-class-bdi
@@ -53,3 +53,11 @@ stable_pages_required (read-only)
If set, the backing device requires that all pages comprising a write
request must not be changed until writeout is complete.
+
+strictlimit (read-write)
+
+ Forces per-BDI checks for the share of given device in the write-back
+ cache even before the global background dirty limit is reached. This
+ is useful in situations where the global limit is much higher than
+ affordable for given relatively slow (or untrusted) device. Turning
+ strictlimit on has no visible effect if max_ratio is equal to 100%.
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index ce1126aceed8..223c32171dcc 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -180,6 +180,16 @@ dos1xfloppy -- If set, use a fallback default BIOS Parameter Block
<bool>: 0,1,yes,no,true,false
+LIMITATION
+---------------------------------------------------------------------
+* The fallocated region of file is discarded at umount/evict time
+ when using fallocate with FALLOC_FL_KEEP_SIZE.
+ So, User should assume that fallocated region can be discarded at
+ last close if there is memory pressure resulting in eviction of
+ the inode from the memory. As a result, for any dependency on
+ the fallocated region, user should make sure to recheck fallocate
+ after reopening the file.
+
TODO
----------------------------------------------------------------------
* Need to get rid of the raw scanning stuff. Instead, always use
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt
index f34a8ee6f860..9735c87ca363 100644
--- a/Documentation/vm/ksm.txt
+++ b/Documentation/vm/ksm.txt
@@ -87,6 +87,13 @@ pages_sharing - how many more sites are sharing them i.e. how much saved
pages_unshared - how many pages unique but repeatedly checked for merging
pages_volatile - how many pages changing too fast to be placed in a tree
full_scans - how many times all mergeable areas have been scanned
+deferrable_timer - whether to use deferrable timers or not
+ e.g. "echo 1 > /sys/kernel/mm/ksm/deferrable_timer"
+ Default: 0 (means, we are not using deferrable timers. Users
+ might want to set deferrable_timer option if they donot want
+ ksm thread to wakeup CPU to carryout ksm activities thus
+ gaining on battery while compromising slightly on memory
+ that could have been saved.)
A high ratio of pages_sharing to pages_shared indicates good sharing, but
a high ratio of pages_unshared to pages_sharing indicates wasted effort.
diff --git a/Documentation/vm/remap_file_pages.txt b/Documentation/vm/remap_file_pages.txt
index 560e4363a55d..f609142f406a 100644
--- a/Documentation/vm/remap_file_pages.txt
+++ b/Documentation/vm/remap_file_pages.txt
@@ -18,10 +18,9 @@ on 32-bit systems to map files bigger than can linearly fit into 32-bit
virtual address space. This use-case is not critical anymore since 64-bit
systems are widely available.
-The plan is to deprecate the syscall and replace it with an emulation.
-The emulation will create new VMAs instead of nonlinear mappings. It's
-going to work slower for rare users of remap_file_pages() but ABI is
-preserved.
+The syscall is deprecated and replaced it with an emulation now. The
+emulation creates new VMAs instead of nonlinear mappings. It's going to
+work slower for rare users of remap_file_pages() but ABI is preserved.
One side effect of emulation (apart from performance) is that user can hit
vm.max_map_count limit more easily due to additional VMAs. See comment for
diff --git a/MAINTAINERS b/MAINTAINERS
index ecadc1317ec7..8bd7b934b950 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -597,7 +597,7 @@ AMD GEODE CS5536 USB DEVICE CONTROLLER DRIVER
M: Thomas Dahlmann <dahlmann.thomas@arcor.de>
L: linux-geode@lists.infradead.org (moderated for non-subscribers)
S: Supported
-F: drivers/usb/gadget/amd5536udc.*
+F: drivers/usb/gadget/udc/amd5536udc.*
AMD GEODE PROCESSOR/CHIPSET SUPPORT
P: Andres Salomon <dilinger@queued.net>
@@ -621,7 +621,7 @@ AMD MICROCODE UPDATE SUPPORT
M: Andreas Herrmann <herrmann.der.user@googlemail.com>
L: amd64-microcode@amd64.org
S: Maintained
-F: arch/x86/kernel/microcode_amd.c
+F: arch/x86/kernel/cpu/microcode/amd*
AMD XGBE DRIVER
M: Tom Lendacky <thomas.lendacky@amd.com>
@@ -911,7 +911,7 @@ L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
T: git git://git.kernel.org/pub/scm/linux/kernel/git/baohua/linux.git
S: Maintained
F: arch/arm/mach-prima2/
-F: drivers/clk/clk-prima2.c
+F: drivers/clk/sirf/
F: drivers/clocksource/timer-prima2.c
F: drivers/clocksource/timer-marco.c
N: [^a-z]sirf
@@ -1164,6 +1164,7 @@ M: Linus Walleij <linus.walleij@linaro.org>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/mach-nomadik/
+F: drivers/pinctrl/nomadik/
F: drivers/i2c/busses/i2c-nomadik.c
T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git
@@ -1185,8 +1186,7 @@ F: drivers/mmc/host/msm_sdcc.h
F: drivers/tty/serial/msm_serial.h
F: drivers/tty/serial/msm_serial.c
F: drivers/*/pm8???-*
-F: drivers/mfd/ssbi/
-F: include/linux/mfd/pm8xxx/
+F: drivers/mfd/ssbi.c
T: git git://git.kernel.org/pub/scm/linux/kernel/git/davidb/linux-msm.git
S: Maintained
@@ -1443,7 +1443,8 @@ F: drivers/mfd/abx500*
F: drivers/mfd/ab8500*
F: drivers/mfd/dbx500*
F: drivers/mfd/db8500*
-F: drivers/pinctrl/pinctrl-nomadik*
+F: drivers/pinctrl/nomadik/pinctrl-ab*
+F: drivers/pinctrl/nomadik/pinctrl-nomadik*
F: drivers/rtc/rtc-ab8500.c
F: drivers/rtc/rtc-pl031.c
T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-stericsson.git
@@ -1699,7 +1700,7 @@ ATMEL USBA UDC DRIVER
M: Nicolas Ferre <nicolas.ferre@atmel.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
-F: drivers/usb/gadget/atmel_usba_udc.*
+F: drivers/usb/gadget/udc/atmel_usba_udc.*
ATMEL WIRELESS DRIVER
M: Simon Kelley <simon@thekelleys.org.uk>
@@ -1991,7 +1992,7 @@ F: arch/arm/boot/dts/bcm113*
F: arch/arm/boot/dts/bcm216*
F: arch/arm/boot/dts/bcm281*
F: arch/arm/configs/bcm_defconfig
-F: drivers/mmc/host/sdhci_bcm_kona.c
+F: drivers/mmc/host/sdhci-bcm-kona.c
F: drivers/clocksource/bcm_kona_timer.c
BROADCOM BCM2835 ARM ARCHICTURE
@@ -2341,12 +2342,6 @@ L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/ethernet/cirrus/ep93xx_eth.c
-CIRRUS LOGIC EP93XX OHCI USB HOST DRIVER
-M: Lennert Buytenhek <kernel@wantstofly.org>
-L: linux-usb@vger.kernel.org
-S: Maintained
-F: drivers/usb/host/ohci-ep93xx.c
-
CIRRUS LOGIC AUDIO CODEC DRIVERS
M: Brian Austin <brian.austin@cirrus.com>
M: Paul Handrigan <Paul.Handrigan@cirrus.com>
@@ -2431,7 +2426,7 @@ W: http://linux-cifs.samba.org/
Q: http://patchwork.ozlabs.org/project/linux-cifs-client/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/sfrench/cifs-2.6.git
S: Supported
-F: Documentation/filesystems/cifs.txt
+F: Documentation/filesystems/cifs/
F: fs/cifs/
COMPACTPCI HOTPLUG CORE
@@ -2966,7 +2961,9 @@ L: linux-media@vger.kernel.org
L: dri-devel@lists.freedesktop.org
L: linaro-mm-sig@lists.linaro.org
F: drivers/dma-buf/
-F: include/linux/dma-buf* include/linux/reservation.h include/linux/*fence.h
+F: include/linux/dma-buf*
+F: include/linux/reservation.h
+F: include/linux/*fence.h
F: Documentation/dma-buf-sharing.txt
T: git git://git.linaro.org/people/sumitsemwal/linux-dma-buf.git
@@ -3061,7 +3058,6 @@ L: dri-devel@lists.freedesktop.org
T: git git://people.freedesktop.org/~agd5f/linux
S: Supported
F: drivers/gpu/drm/radeon/
-F: include/drm/radeon*
F: include/uapi/drm/radeon*
DRM PANEL DRIVERS
@@ -3255,26 +3251,12 @@ T: git git://linuxtv.org/anttip/media_tree.git
S: Maintained
F: drivers/media/tuners/e4000*
-EATA-DMA SCSI DRIVER
-M: Michael Neuffer <mike@i-Connect.Net>
-L: linux-eata@i-connect.net
-L: linux-scsi@vger.kernel.org
-S: Maintained
-F: drivers/scsi/eata*
-
EATA ISA/EISA/PCI SCSI DRIVER
M: Dario Ballabio <ballabio_dario@emc.com>
L: linux-scsi@vger.kernel.org
S: Maintained
F: drivers/scsi/eata.c
-EATA-PIO SCSI DRIVER
-M: Michael Neuffer <mike@i-Connect.Net>
-L: linux-eata@i-connect.net
-L: linux-scsi@vger.kernel.org
-S: Maintained
-F: drivers/scsi/eata_pio.*
-
EC100 MEDIA DRIVER
M: Antti Palosaari <crope@iki.fi>
L: linux-media@vger.kernel.org
@@ -3449,7 +3431,7 @@ M: Matt Fleming <matt.fleming@intel.com>
L: linux-efi@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mfleming/efi.git
S: Maintained
-F: Documentation/x86/efi-stub.txt
+F: Documentation/efi-stub.txt
F: arch/ia64/kernel/efi.c
F: arch/x86/boot/compressed/eboot.[ch]
F: arch/x86/include/asm/efi.h
@@ -3836,7 +3818,7 @@ M: Li Yang <leoli@freescale.com>
L: linux-usb@vger.kernel.org
L: linuxppc-dev@lists.ozlabs.org
S: Maintained
-F: drivers/usb/gadget/fsl*
+F: drivers/usb/gadget/udc/fsl*
FREESCALE QUICC ENGINE UCC ETHERNET DRIVER
M: Li Yang <leoli@freescale.com>
@@ -4525,10 +4507,7 @@ S: Supported
F: drivers/scsi/ibmvscsi/ibmvfc*
IBM ServeRAID RAID DRIVER
-P: Jack Hammer
-M: Dave Jeffery <ipslinux@adaptec.com>
-W: http://www.developer.ibm.com/welcome/netfinity/serveraid.html
-S: Supported
+S: Orphan
F: drivers/scsi/ips.*
ICH LPC AND GPIO DRIVER
@@ -4725,8 +4704,8 @@ F: drivers/platform/x86/intel_menlow.c
INTEL IA32 MICROCODE UPDATE SUPPORT
M: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
S: Maintained
-F: arch/x86/kernel/microcode_core.c
-F: arch/x86/kernel/microcode_intel.c
+F: arch/x86/kernel/cpu/microcode/core*
+F: arch/x86/kernel/cpu/microcode/intel*
INTEL I/OAT DMA DRIVER
M: Dan Williams <dan.j.williams@intel.com>
@@ -5185,7 +5164,6 @@ L: linux-nfs@vger.kernel.org
W: http://nfs.sourceforge.net/
S: Supported
F: fs/nfsd/
-F: include/linux/nfsd/
F: include/uapi/linux/nfsd/
F: fs/lockd/
F: fs/nfs_common/
@@ -5906,7 +5884,6 @@ F: drivers/clocksource/metag_generic.c
F: drivers/irqchip/irq-metag.c
F: drivers/irqchip/irq-metag-ext.c
F: drivers/tty/metag_da.c
-F: fs/imgdafs/
MICROBLAZE ARCHITECTURE
M: Michal Simek <monstr@monstr.eu>
@@ -7005,9 +6982,9 @@ M: Jamie Iles <jamie@jamieiles.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
T: git git://github.com/jamieiles/linux-2.6-ji.git
S: Supported
+F: arch/arm/boot/dts/picoxcell*
F: arch/arm/mach-picoxcell/
-F: drivers/*/picoxcell*
-F: drivers/*/*/picoxcell*
+F: drivers/crypto/picoxcell*
PIN CONTROL SUBSYSTEM
M: Linus Walleij <linus.walleij@linaro.org>
@@ -7231,7 +7208,7 @@ F: drivers/ptp/*
F: include/linux/ptp_cl*
PTRACE SUPPORT
-M: Roland McGrath <roland@redhat.com>
+M: Roland McGrath <roland@hack.frob.com>
M: Oleg Nesterov <oleg@redhat.com>
S: Maintained
F: include/asm-generic/syscall.h
@@ -7281,7 +7258,7 @@ S: Maintained
F: arch/arm/mach-pxa/
F: drivers/pcmcia/pxa2xx*
F: drivers/spi/spi-pxa2xx*
-F: drivers/usb/gadget/pxa2*
+F: drivers/usb/gadget/udc/pxa2*
F: include/sound/pxa2xx-lib.h
F: sound/arm/pxa*
F: sound/soc/pxa/
@@ -7290,7 +7267,7 @@ PXA3xx NAND FLASH DRIVER
M: Ezequiel Garcia <ezequiel.garcia@free-electrons.com>
L: linux-mtd@lists.infradead.org
S: Maintained
-F: drivers/mtd/nand/pxa3xx-nand.c
+F: drivers/mtd/nand/pxa3xx_nand.c
MMP SUPPORT
M: Eric Miao <eric.y.miao@gmail.com>
@@ -9635,8 +9612,8 @@ USB WEBCAM GADGET
M: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
L: linux-usb@vger.kernel.org
S: Maintained
-F: drivers/usb/gadget/*uvc*.c
-F: drivers/usb/gadget/webcam.c
+F: drivers/usb/gadget/function/*uvc*.c
+F: drivers/usb/gadget/legacy/webcam.c
USB WIRELESS RNDIS DRIVER (rndis_wlan)
M: Jussi Kivilinna <jussi.kivilinna@iki.fi>
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 32cbbd565902..c49a775937db 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1983,6 +1983,8 @@ config XIP_PHYS_ADDR
config KEXEC
bool "Kexec system call (EXPERIMENTAL)"
depends on (!SMP || PM_SLEEP_SMP)
+ select CRYPTO
+ select CRYPTO_SHA256
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/arm/mach-omap2/board-omap3touchbook.c b/arch/arm/mach-omap2/board-omap3touchbook.c
index 7da48bc42bbf..70b904c010c6 100644
--- a/arch/arm/mach-omap2/board-omap3touchbook.c
+++ b/arch/arm/mach-omap2/board-omap3touchbook.c
@@ -336,7 +336,7 @@ static int __init early_touchbook_revision(char *p)
if (!p)
return 0;
- return strict_strtoul(p, 10, &touchbook_revision);
+ return kstrtoul(p, 10, &touchbook_revision);
}
early_param("tbr", early_touchbook_revision);
diff --git a/arch/arm/mach-omap2/mux.c b/arch/arm/mach-omap2/mux.c
index f62f7537d899..ac8a249779f2 100644
--- a/arch/arm/mach-omap2/mux.c
+++ b/arch/arm/mach-omap2/mux.c
@@ -681,29 +681,19 @@ static ssize_t omap_mux_dbg_signal_write(struct file *file,
const char __user *user_buf,
size_t count, loff_t *ppos)
{
- char buf[OMAP_MUX_MAX_ARG_CHAR];
struct seq_file *seqf;
struct omap_mux *m;
- unsigned long val;
- int buf_size, ret;
+ u16 val;
+ int ret;
struct omap_mux_partition *partition;
if (count > OMAP_MUX_MAX_ARG_CHAR)
return -EINVAL;
- memset(buf, 0, sizeof(buf));
- buf_size = min(count, sizeof(buf) - 1);
-
- if (copy_from_user(buf, user_buf, buf_size))
- return -EFAULT;
-
- ret = strict_strtoul(buf, 0x10, &val);
+ ret = kstrtou16_from_user(user_buf, count, 0x10, &val);
if (ret < 0)
return ret;
- if (val > 0xffff)
- return -EINVAL;
-
seqf = file->private_data;
m = seqf->private;
@@ -711,7 +701,7 @@ static ssize_t omap_mux_dbg_signal_write(struct file *file,
if (!partition)
return -ENODEV;
- omap_mux_write(partition, (u16)val, m->reg_offset);
+ omap_mux_write(partition, val, m->reg_offset);
*ppos += count;
return count;
@@ -917,14 +907,14 @@ static void __init omap_mux_set_cmdline_signals(void)
while ((token = strsep(&next_opt, ",")) != NULL) {
char *keyval, *name;
- unsigned long val;
+ u16 val;
keyval = token;
name = strsep(&keyval, "=");
if (name) {
int res;
- res = strict_strtoul(keyval, 0x10, &val);
+ res = kstrtou16(keyval, 0x10, &val);
if (res < 0)
continue;
diff --git a/arch/arm/mach-pxa/balloon3.c b/arch/arm/mach-pxa/balloon3.c
index 43596e0ed051..d897292712eb 100644
--- a/arch/arm/mach-pxa/balloon3.c
+++ b/arch/arm/mach-pxa/balloon3.c
@@ -90,7 +90,7 @@ int __init parse_balloon3_features(char *arg)
if (!arg)
return 0;
- return strict_strtoul(arg, 0, &balloon3_features_present);
+ return kstrtoul(arg, 0, &balloon3_features_present);
}
early_param("balloon3_features", parse_balloon3_features);
diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c
index 41f27f667ca8..de3b08073fe7 100644
--- a/arch/arm/mach-pxa/viper.c
+++ b/arch/arm/mach-pxa/viper.c
@@ -769,7 +769,7 @@ static unsigned long viper_tpm;
static int __init viper_tpm_setup(char *str)
{
- return strict_strtoul(str, 10, &viper_tpm) >= 0;
+ return kstrtoul(str, 10, &viper_tpm) >= 0;
}
__setup("tpm=", viper_tpm_setup);
diff --git a/arch/arm/mach-s3c24xx/mach-jive.c b/arch/arm/mach-s3c24xx/mach-jive.c
index e647b47244a9..7804d3c6991b 100644
--- a/arch/arm/mach-s3c24xx/mach-jive.c
+++ b/arch/arm/mach-s3c24xx/mach-jive.c
@@ -242,7 +242,7 @@ static int __init jive_mtdset(char *options)
if (options == NULL || options[0] == '\0')
return 0;
- if (strict_strtoul(options, 10, &set)) {
+ if (kstrtoul(options, 10, &set)) {
printk(KERN_ERR "failed to parse mtdset=%s\n", options);
return 0;
}
diff --git a/arch/arm/mach-w90x900/cpu.c b/arch/arm/mach-w90x900/cpu.c
index b1eabaad50a5..213230ee57d1 100644
--- a/arch/arm/mach-w90x900/cpu.c
+++ b/arch/arm/mach-w90x900/cpu.c
@@ -178,7 +178,8 @@ static int __init nuc900_set_cpufreq(char *str)
if (!*str)
return 0;
- strict_strtoul(str, 0, &cpufreq);
+ if (kstrtoul(str, 0, &cpufreq))
+ return 0;
nuc900_clock_source(NULL, "ext");
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 7a3f462133b0..22b16232bd60 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -28,9 +28,6 @@
#define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE-1))
-/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
-#define __HAVE_ARCH_GATE_AREA 1
-
/*
* The idmap and swapper page tables need some space reserved in the kernel
* image. Both require pgd, pud (4 levels only) and pmd tables to (section)
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index a81a446a5786..32aeea083d93 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -195,25 +195,6 @@ up_fail:
}
/*
- * We define AT_SYSINFO_EHDR, so we need these function stubs to keep
- * Linux happy.
- */
-int in_gate_area_no_mm(unsigned long addr)
-{
- return 0;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
- return 0;
-}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
- return NULL;
-}
-
-/*
* Update the vDSO data page to keep in sync with kernel timekeeping.
*/
void update_vsyscall(struct timekeeper *tk)
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index c84c88bbbbd7..64aefb76bd69 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -549,6 +549,8 @@ source "drivers/sn/Kconfig"
config KEXEC
bool "kexec system call"
depends on !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
+ select CRYPTO
+ select CRYPTO_SHA256
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h
index f1e1b2e3cdb3..1f1bf144fe62 100644
--- a/arch/ia64/include/asm/page.h
+++ b/arch/ia64/include/asm/page.h
@@ -231,4 +231,6 @@ get_order (unsigned long size)
#define PERCPU_ADDR (-PERCPU_PAGE_SIZE)
#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE)
+#define __HAVE_ARCH_GATE_AREA 1
+
#endif /* _ASM_IA64_PAGE_H */
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 892d43e32f3b..6b3345758d3e 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -278,6 +278,37 @@ setup_gate (void)
ia64_patch_gate();
}
+static struct vm_area_struct gate_vma;
+
+static int __init gate_vma_init(void)
+{
+ gate_vma.vm_mm = NULL;
+ gate_vma.vm_start = FIXADDR_USER_START;
+ gate_vma.vm_end = FIXADDR_USER_END;
+ gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+ gate_vma.vm_page_prot = __P101;
+
+ return 0;
+}
+__initcall(gate_vma_init);
+
+struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+ return &gate_vma;
+}
+
+int in_gate_area_no_mm(unsigned long addr)
+{
+ if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
+ return 1;
+ return 0;
+}
+
+int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+ return in_gate_area_no_mm(addr);
+}
+
void ia64_mmu_init(void *my_cpu_data)
{
unsigned long pta, impl_va_bits;
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 87b7c7581b1d..3ff8c9a25335 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -91,6 +91,8 @@ config MMU_SUN3
config KEXEC
bool "kexec system call"
depends on M68KCLASSIC
+ select CRYPTO
+ select CRYPTO_SHA256
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 900c7e5333b6..df51e78a72cc 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -2396,6 +2396,8 @@ source "kernel/Kconfig.preempt"
config KEXEC
bool "Kexec system call"
+ select CRYPTO
+ select CRYPTO_SHA256
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4bc7b62fb4b6..a577609f8ed6 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -399,6 +399,8 @@ config PPC64_SUPPORTS_MEMORY_FAILURE
config KEXEC
bool "kexec system call"
depends on (PPC_BOOK3S || FSL_BOOKE || (44x && !SMP))
+ select CRYPTO
+ select CRYPTO_SHA256
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 32e4e212b9c1..26fe1ae15212 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -48,9 +48,6 @@ extern unsigned int HPAGE_SHIFT;
#define HUGE_MAX_HSTATE (MMU_PAGE_COUNT-1)
#endif
-/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */
-#define __HAVE_ARCH_GATE_AREA 1
-
/*
* Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we
* assign PAGE_MASK to a larger type it gets extended the way we want
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index d0225572faa1..75d62d63fe68 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -149,13 +149,13 @@ static void check_smt_enabled(void)
else if (!strcmp(smt_enabled_cmdline, "off"))
smt_enabled_at_boot = 0;
else {
- long smt;
+ int smt;
int rc;
- rc = strict_strtol(smt_enabled_cmdline, 10, &smt);
+ rc = kstrtoint(smt_enabled_cmdline, 10, &smt);
if (!rc)
smt_enabled_at_boot =
- min(threads_per_core, (int)smt);
+ min(threads_per_core, smt);
}
} else {
dn = of_find_node_by_path("/options");
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index ce74c335a6a4..f174351842cf 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -840,19 +840,3 @@ static int __init vdso_init(void)
return 0;
}
arch_initcall(vdso_init);
-
-int in_gate_area_no_mm(unsigned long addr)
-{
- return 0;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
- return 0;
-}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
- return NULL;
-}
-
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 904c66128fae..5bfdab9047be 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -977,7 +977,7 @@ static ssize_t viodev_cmo_desired_set(struct device *dev,
size_t new_desired;
int ret;
- ret = strict_strtoul(buf, 10, &new_desired);
+ ret = kstrtoul(buf, 10, &new_desired);
if (ret)
return ret;
diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c
index 2d0b4d68a40a..a2450b8a50a5 100644
--- a/arch/powerpc/platforms/pseries/dlpar.c
+++ b/arch/powerpc/platforms/pseries/dlpar.c
@@ -400,10 +400,10 @@ out:
static ssize_t dlpar_cpu_probe(const char *buf, size_t count)
{
struct device_node *dn, *parent;
- unsigned long drc_index;
+ u32 drc_index;
int rc;
- rc = strict_strtoul(buf, 0, &drc_index);
+ rc = kstrtou32(buf, 0, &drc_index);
if (rc)
return -EINVAL;
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index d146fef038b8..e7cb6d4a871a 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -320,7 +320,7 @@ static ssize_t migrate_store(struct class *class, struct class_attribute *attr,
u64 streamid;
int rc;
- rc = strict_strtoull(buf, 0, &streamid);
+ rc = kstrtou64(buf, 0, &streamid);
if (rc)
return rc;
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 05c78bb5f570..ab39ceb89ecf 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -48,6 +48,8 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
config KEXEC
def_bool y
+ select CRYPTO
+ select CRYPTO_SHA256
config AUDIT_ARCH
def_bool y
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 114258eeaacd..7b2ac6e44166 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -162,6 +162,4 @@ static inline int devmem_is_allowed(unsigned long pfn)
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
-#define __HAVE_ARCH_GATE_AREA 1
-
#endif /* _S390_PAGE_H */
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 613649096783..0bbb7e027c5a 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -316,18 +316,3 @@ static int __init vdso_init(void)
return 0;
}
early_initcall(vdso_init);
-
-int in_gate_area_no_mm(unsigned long addr)
-{
- return 0;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
- return 0;
-}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
- return NULL;
-}
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index aa2df3eaeb29..453fa5c09550 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -595,6 +595,8 @@ source kernel/Kconfig.hz
config KEXEC
bool "kexec system call (EXPERIMENTAL)"
depends on SUPERH32 && MMU
+ select CRYPTO
+ select CRYPTO_SHA256
help
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/sh/include/asm/page.h b/arch/sh/include/asm/page.h
index 15d970328f71..fe20d14ae051 100644
--- a/arch/sh/include/asm/page.h
+++ b/arch/sh/include/asm/page.h
@@ -186,11 +186,6 @@ typedef struct page *pgtable_t;
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
-/* vDSO support */
-#ifdef CONFIG_VSYSCALL
-#define __HAVE_ARCH_GATE_AREA
-#endif
-
/*
* Some drivers need to perform DMA into kmalloc'ed buffers
* and so we have to increase the kmalloc minalign for this.
diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c
index 5ca579720a09..ea2aa1393b87 100644
--- a/arch/sh/kernel/vsyscall/vsyscall.c
+++ b/arch/sh/kernel/vsyscall/vsyscall.c
@@ -92,18 +92,3 @@ const char *arch_vma_name(struct vm_area_struct *vma)
return NULL;
}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
- return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long address)
-{
- return 0;
-}
-
-int in_gate_area_no_mm(unsigned long address)
-{
- return 0;
-}
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 7fcd492adbfc..a3ffe2dd4832 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -191,6 +191,8 @@ source "kernel/Kconfig.hz"
config KEXEC
bool "kexec system call"
+ select CRYPTO
+ select CRYPTO_SHA256
---help---
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
diff --git a/arch/tile/include/asm/page.h b/arch/tile/include/asm/page.h
index 672768008618..a213a8d84a95 100644
--- a/arch/tile/include/asm/page.h
+++ b/arch/tile/include/asm/page.h
@@ -39,12 +39,6 @@
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
/*
- * We do define AT_SYSINFO_EHDR to support vDSO,
- * but don't use the gate mechanism.
- */
-#define __HAVE_ARCH_GATE_AREA 1
-
-/*
* If the Kconfig doesn't specify, set a maximum zone order that
* is enough so that we can create huge pages from small pages given
* the respective sizes of the two page types. See <linux/mmzone.h>.
diff --git a/arch/tile/kernel/vdso.c b/arch/tile/kernel/vdso.c
index 1533af24106e..5bc51d7dfdcb 100644
--- a/arch/tile/kernel/vdso.c
+++ b/arch/tile/kernel/vdso.c
@@ -121,21 +121,6 @@ const char *arch_vma_name(struct vm_area_struct *vma)
return NULL;
}
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
- return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long address)
-{
- return 0;
-}
-
-int in_gate_area_no_mm(unsigned long address)
-{
- return 0;
-}
-
int setup_vdso_pages(void)
{
struct page **pagelist;
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 5ff53d9185f7..71c5d132062a 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -119,4 +119,9 @@ extern unsigned long uml_physmem;
#include <asm-generic/getorder.h>
#endif /* __ASSEMBLY__ */
+
+#ifdef CONFIG_X86_32
+#define __HAVE_ARCH_GATE_AREA 1
+#endif
+
#endif /* __UM_PAGE_H */
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild
index e5287d8517aa..61b6d51866f8 100644
--- a/arch/x86/Kbuild
+++ b/arch/x86/Kbuild
@@ -16,3 +16,7 @@ obj-$(CONFIG_IA32_EMULATION) += ia32/
obj-y += platform/
obj-y += net/
+
+ifeq ($(CONFIG_X86_64),y)
+obj-$(CONFIG_KEXEC) += purgatory/
+endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8f0d2957a053..4bdf8443ff3e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1584,6 +1584,9 @@ source kernel/Kconfig.hz
config KEXEC
bool "kexec system call"
+ select BUILD_BIN2C
+ select CRYPTO
+ select CRYPTO_SHA256
---help---
kexec is a system call that implements the ability to shutdown your
current kernel, and to start another kernel. It is like a reboot
@@ -1598,6 +1601,28 @@ config KEXEC
interface is strongly in flux, so no good recommendation can be
made.
+config KEXEC_VERIFY_SIG
+ bool "Verify kernel signature during kexec_file_load() syscall"
+ depends on KEXEC
+ ---help---
+ This option makes kernel signature verification mandatory for
+ kexec_file_load() syscall. If kernel is signature can not be
+ verified, kexec_file_load() will fail.
+
+ This option enforces signature verification at generic level.
+ One needs to enable signature verification for type of kernel
+ image being loaded to make sure it works. For example, enable
+ bzImage signature verification option to be able to load and
+ verify signatures of bzImage. Otherwise kernel loading will fail.
+
+config KEXEC_BZIMAGE_VERIFY_SIG
+ bool "Enable bzImage signature verification support"
+ depends on KEXEC_VERIFY_SIG
+ depends on SIGNED_PE_FILE_VERIFICATION
+ select SYSTEM_TRUSTED_KEYRING
+ ---help---
+ Enable bzImage signature verification support.
+
config CRASH_DUMP
bool "kernel crash dumps"
depends on X86_64 || (X86_32 && HIGHMEM)
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index c65fd9650467..c1aa36887843 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -183,6 +183,14 @@ archscripts: scripts_basic
archheaders:
$(Q)$(MAKE) $(build)=arch/x86/syscalls all
+archprepare:
+ifeq ($(CONFIG_KEXEC),y)
+# Build only for 64bit. No loaders for 32bit yet.
+ ifeq ($(CONFIG_X86_64),y)
+ $(Q)$(MAKE) $(build)=arch/x86/purgatory arch/x86/purgatory/kexec-purgatory.c
+ endif
+endif
+
###
# Kernel objects
diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h
new file mode 100644
index 000000000000..f498411f2500
--- /dev/null
+++ b/arch/x86/include/asm/crash.h
@@ -0,0 +1,9 @@
+#ifndef _ASM_X86_CRASH_H
+#define _ASM_X86_CRASH_H
+
+int crash_load_segments(struct kimage *image);
+int crash_copy_backup_region(struct kimage *image);
+int crash_setup_memmap_entries(struct kimage *image,
+ struct boot_params *params);
+
+#endif /* _ASM_X86_CRASH_H */
diff --git a/arch/x86/include/asm/kexec-bzimage64.h b/arch/x86/include/asm/kexec-bzimage64.h
new file mode 100644
index 000000000000..d1b5d194e31d
--- /dev/null
+++ b/arch/x86/include/asm/kexec-bzimage64.h
@@ -0,0 +1,6 @@
+#ifndef _ASM_KEXEC_BZIMAGE64_H
+#define _ASM_KEXEC_BZIMAGE64_H
+
+extern struct kexec_file_ops kexec_bzImage64_ops;
+
+#endif /* _ASM_KEXE_BZIMAGE64_H */
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 17483a492f18..d2434c1cad05 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -23,6 +23,9 @@
#include <asm/page.h>
#include <asm/ptrace.h>
+#include <asm/bootparam.h>
+
+struct kimage;
/*
* KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
@@ -61,6 +64,10 @@
# define KEXEC_ARCH KEXEC_ARCH_X86_64
#endif
+/* Memory to backup during crash kdump */
+#define KEXEC_BACKUP_SRC_START (0UL)
+#define KEXEC_BACKUP_SRC_END (640 * 1024UL) /* 640K */
+
/*
* CPU does not save ss and sp on stack if execution is already
* running in kernel mode at the time of NMI occurrence. This code
@@ -160,6 +167,44 @@ struct kimage_arch {
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
+ /* Details of backup region */
+ unsigned long backup_src_start;
+ unsigned long backup_src_sz;
+
+ /* Physical address of backup segment */
+ unsigned long backup_load_addr;
+
+ /* Core ELF header buffer */
+ void *elf_headers;
+ unsigned long elf_headers_sz;
+ unsigned long elf_load_addr;
+};
+#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_64
+/*
+ * Number of elements and order of elements in this structure should match
+ * with the ones in arch/x86/purgatory/entry64.S. If you make a change here
+ * make an appropriate change in purgatory too.
+ */
+struct kexec_entry64_regs {
+ uint64_t rax;
+ uint64_t rcx;
+ uint64_t rdx;
+ uint64_t rbx;
+ uint64_t rsp;
+ uint64_t rbp;
+ uint64_t rsi;
+ uint64_t rdi;
+ uint64_t r8;
+ uint64_t r9;
+ uint64_t r10;
+ uint64_t r11;
+ uint64_t r12;
+ uint64_t r13;
+ uint64_t r14;
+ uint64_t r15;
+ uint64_t rip;
};
#endif
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index 775873d3be55..802dde30c928 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -70,7 +70,6 @@ extern bool __virt_addr_valid(unsigned long kaddr);
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
-#define __HAVE_ARCH_GATE_AREA 1
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 0f1ddee6a0ce..f408caf73430 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -39,4 +39,6 @@ void copy_page(void *to, void *from);
#endif /* !__ASSEMBLY__ */
+#define __HAVE_ARCH_GATE_AREA 1
+
#endif /* _ASM_X86_PAGE_64_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index bde3993624f1..b5ea75c4a4b4 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -118,4 +118,5 @@ ifeq ($(CONFIG_X86_64),y)
obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
obj-y += vsmp_64.o
+ obj-$(CONFIG_KEXEC) += kexec-bzimage64.o
endif
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 9c8f7394c612..c7035073dfc1 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -461,7 +461,7 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
- if (strict_strtoul(buf, 10, &val) < 0)
+ if (kstrtoul(buf, 10, &val) < 0)
return -EINVAL;
err = amd_set_l3_disable_slot(this_leaf->base.nb, cpu, slot, val);
@@ -511,7 +511,7 @@ store_subcaches(struct _cpuid4_info *this_leaf, const char *buf, size_t count,
if (!this_leaf->base.nb || !amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
return -EINVAL;
- if (strict_strtoul(buf, 16, &val) < 0)
+ if (kstrtoul(buf, 16, &val) < 0)
return -EINVAL;
if (amd_set_subcaches(cpu, val))
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 4fc57975acc1..bd9ccda8087f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2136,7 +2136,7 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr,
{
u64 new;
- if (strict_strtoull(buf, 0, &new) < 0)
+ if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
attr_to_bank(attr)->ctl = new;
@@ -2174,7 +2174,7 @@ static ssize_t set_ignore_ce(struct device *s,
{
u64 new;
- if (strict_strtoull(buf, 0, &new) < 0)
+ if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
if (mca_cfg.ignore_ce ^ !!new) {
@@ -2198,7 +2198,7 @@ static ssize_t set_cmci_disabled(struct device *s,
{
u64 new;
- if (strict_strtoull(buf, 0, &new) < 0)
+ if (kstrtou64(buf, 0, &new) < 0)
return -EINVAL;
if (mca_cfg.cmci_disabled ^ !!new) {
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 603df4f74640..1e49f8f41276 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -353,7 +353,7 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
if (!b->interrupt_capable)
return -EINVAL;
- if (strict_strtoul(buf, 0, &new) < 0)
+ if (kstrtoul(buf, 0, &new) < 0)
return -EINVAL;
b->interrupt_enable = !!new;
@@ -372,7 +372,7 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
struct thresh_restart tr;
unsigned long new;
- if (strict_strtoul(buf, 0, &new) < 0)
+ if (kstrtoul(buf, 0, &new) < 0)
return -EINVAL;
if (new > THRESHOLD_MAX)
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 507de8066594..0553a34fa0df 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -4,9 +4,14 @@
* Created by: Hariprasad Nellitheertha (hari@in.ibm.com)
*
* Copyright (C) IBM Corporation, 2004. All rights reserved.
+ * Copyright (C) Red Hat Inc., 2014. All rights reserved.
+ * Authors:
+ * Vivek Goyal <vgoyal@redhat.com>
*
*/
+#define pr_fmt(fmt) "kexec: " fmt
+
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/smp.h>
@@ -16,6 +21,7 @@
#include <linux/elf.h>
#include <linux/elfcore.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <asm/processor.h>
#include <asm/hardirq.h>
@@ -28,6 +34,45 @@
#include <asm/reboot.h>
#include <asm/virtext.h>
+/* Alignment required for elf header segment */
+#define ELF_CORE_HEADER_ALIGN 4096
+
+/* This primarily represents number of split ranges due to exclusion */
+#define CRASH_MAX_RANGES 16
+
+struct crash_mem_range {
+ u64 start, end;
+};
+
+struct crash_mem {
+ unsigned int nr_ranges;
+ struct crash_mem_range ranges[CRASH_MAX_RANGES];
+};
+
+/* Misc data about ram ranges needed to prepare elf headers */
+struct crash_elf_data {
+ struct kimage *image;
+ /*
+ * Total number of ram ranges we have after various adjustments for
+ * GART, crash reserved region etc.
+ */
+ unsigned int max_nr_ranges;
+ unsigned long gart_start, gart_end;
+
+ /* Pointer to elf header */
+ void *ehdr;
+ /* Pointer to next phdr */
+ void *bufp;
+ struct crash_mem mem;
+};
+
+/* Used while preparing memory map entries for second kernel */
+struct crash_memmap_data {
+ struct boot_params *params;
+ /* Type of memory */
+ unsigned int type;
+};
+
int in_crash_kexec;
/*
@@ -39,6 +84,7 @@ int in_crash_kexec;
*/
crash_vmclear_fn __rcu *crash_vmclear_loaded_vmcss = NULL;
EXPORT_SYMBOL_GPL(crash_vmclear_loaded_vmcss);
+unsigned long crash_zero_bytes;
static inline void cpu_crash_vmclear_loaded_vmcss(void)
{
@@ -135,3 +181,520 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
#endif
crash_save_cpu(regs, safe_smp_processor_id());
}
+
+#ifdef CONFIG_X86_64
+
+static int get_nr_ram_ranges_callback(unsigned long start_pfn,
+ unsigned long nr_pfn, void *arg)
+{
+ int *nr_ranges = arg;
+
+ (*nr_ranges)++;
+ return 0;
+}
+
+static int get_gart_ranges_callback(u64 start, u64 end, void *arg)
+{
+ struct crash_elf_data *ced = arg;
+
+ ced->gart_start = start;
+ ced->gart_end = end;
+
+ /* Not expecting more than 1 gart aperture */
+ return 1;
+}
+
+
+/* Gather all the required information to prepare elf headers for ram regions */
+static void fill_up_crash_elf_data(struct crash_elf_data *ced,
+ struct kimage *image)
+{
+ unsigned int nr_ranges = 0;
+
+ ced->image = image;
+
+ walk_system_ram_range(0, -1, &nr_ranges,
+ get_nr_ram_ranges_callback);
+
+ ced->max_nr_ranges = nr_ranges;
+
+ /*
+ * We don't create ELF headers for GART aperture as an attempt
+ * to dump this memory in second kernel leads to hang/crash.
+ * If gart aperture is present, one needs to exclude that region
+ * and that could lead to need of extra phdr.
+ */
+ walk_iomem_res("GART", IORESOURCE_MEM, 0, -1,
+ ced, get_gart_ranges_callback);
+
+ /*
+ * If we have gart region, excluding that could potentially split
+ * a memory range, resulting in extra header. Account for that.
+ */
+ if (ced->gart_end)
+ ced->max_nr_ranges++;
+
+ /* Exclusion of crash region could split memory ranges */
+ ced->max_nr_ranges++;
+
+ /* If crashk_low_res is not 0, another range split possible */
+ if (crashk_low_res.end != 0)
+ ced->max_nr_ranges++;
+}
+
+static int exclude_mem_range(struct crash_mem *mem,
+ unsigned long long mstart, unsigned long long mend)
+{
+ int i, j;
+ unsigned long long start, end;
+ struct crash_mem_range temp_range = {0, 0};
+
+ for (i = 0; i < mem->nr_ranges; i++) {
+ start = mem->ranges[i].start;
+ end = mem->ranges[i].end;
+
+ if (mstart > end || mend < start)
+ continue;
+
+ /* Truncate any area outside of range */
+ if (mstart < start)
+ mstart = start;
+ if (mend > end)
+ mend = end;
+
+ /* Found completely overlapping range */
+ if (mstart == start && mend == end) {
+ mem->ranges[i].start = 0;
+ mem->ranges[i].end = 0;
+ if (i < mem->nr_ranges - 1) {
+ /* Shift rest of the ranges to left */
+ for (j = i; j < mem->nr_ranges - 1; j++) {
+ mem->ranges[j].start =
+ mem->ranges[j+1].start;
+ mem->ranges[j].end =
+ mem->ranges[j+1].end;
+ }
+ }
+ mem->nr_ranges--;
+ return 0;
+ }
+
+ if (mstart > start && mend < end) {
+ /* Split original range */
+ mem->ranges[i].end = mstart - 1;
+ temp_range.start = mend + 1;
+ temp_range.end = end;
+ } else if (mstart != start)
+ mem->ranges[i].end = mstart - 1;
+ else
+ mem->ranges[i].start = mend + 1;
+ break;
+ }
+
+ /* If a split happend, add the split to array */
+ if (!temp_range.end)
+ return 0;
+
+ /* Split happened */
+ if (i == CRASH_MAX_RANGES - 1) {
+ pr_err("Too many crash ranges after split\n");
+ return -ENOMEM;
+ }
+
+ /* Location where new range should go */
+ j = i + 1;
+ if (j < mem->nr_ranges) {
+ /* Move over all ranges one slot towards the end */
+ for (i = mem->nr_ranges - 1; i >= j; i--)
+ mem->ranges[i + 1] = mem->ranges[i];
+ }
+
+ mem->ranges[j].start = temp_range.start;
+ mem->ranges[j].end = temp_range.end;
+ mem->nr_ranges++;
+ return 0;
+}
+
+/*
+ * Look for any unwanted ranges between mstart, mend and remove them. This
+ * might lead to split and split ranges are put in ced->mem.ranges[] array
+ */
+static int elf_header_exclude_ranges(struct crash_elf_data *ced,
+ unsigned long long mstart, unsigned long long mend)
+{
+ struct crash_mem *cmem = &ced->mem;
+ int ret = 0;
+
+ memset(cmem->ranges, 0, sizeof(cmem->ranges));
+
+ cmem->ranges[0].start = mstart;
+ cmem->ranges[0].end = mend;
+ cmem->nr_ranges = 1;
+
+ /* Exclude crashkernel region */
+ ret = exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
+ if (ret)
+ return ret;
+
+ ret = exclude_mem_range(cmem, crashk_low_res.start, crashk_low_res.end);
+ if (ret)
+ return ret;
+
+ /* Exclude GART region */
+ if (ced->gart_end) {
+ ret = exclude_mem_range(cmem, ced->gart_start, ced->gart_end);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg)
+{
+ struct crash_elf_data *ced = arg;
+ Elf64_Ehdr *ehdr;
+ Elf64_Phdr *phdr;
+ unsigned long mstart, mend;
+ struct kimage *image = ced->image;
+ struct crash_mem *cmem;
+ int ret, i;
+
+ ehdr = ced->ehdr;
+
+ /* Exclude unwanted mem ranges */
+ ret = elf_header_exclude_ranges(ced, start, end);
+ if (ret)
+ return ret;
+
+ /* Go through all the ranges in ced->mem.ranges[] and prepare phdr */
+ cmem = &ced->mem;
+
+ for (i = 0; i < cmem->nr_ranges; i++) {
+ mstart = cmem->ranges[i].start;
+ mend = cmem->ranges[i].end;
+
+ phdr = ced->bufp;
+ ced->bufp += sizeof(Elf64_Phdr);
+
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R|PF_W|PF_X;
+ phdr->p_offset = mstart;
+
+ /*
+ * If a range matches backup region, adjust offset to backup
+ * segment.
+ */
+ if (mstart == image->arch.backup_src_start &&
+ (mend - mstart + 1) == image->arch.backup_src_sz)
+ phdr->p_offset = image->arch.backup_load_addr;
+
+ phdr->p_paddr = mstart;
+ phdr->p_vaddr = (unsigned long long) __va(mstart);
+ phdr->p_filesz = phdr->p_memsz = mend - mstart + 1;
+ phdr->p_align = 0;
+ ehdr->e_phnum++;
+ pr_debug("Crash PT_LOAD elf header. phdr=%p vaddr=0x%llx, paddr=0x%llx, sz=0x%llx e_phnum=%d p_offset=0x%llx\n",
+ phdr, phdr->p_vaddr, phdr->p_paddr, phdr->p_filesz,
+ ehdr->e_phnum, phdr->p_offset);
+ }
+
+ return ret;
+}
+
+static int prepare_elf64_headers(struct crash_elf_data *ced,
+ void **addr, unsigned long *sz)
+{
+ Elf64_Ehdr *ehdr;
+ Elf64_Phdr *phdr;
+ unsigned long nr_cpus = num_possible_cpus(), nr_phdr, elf_sz;
+ unsigned char *buf, *bufp;
+ unsigned int cpu;
+ unsigned long long notes_addr;
+ int ret;
+
+ /* extra phdr for vmcoreinfo elf note */
+ nr_phdr = nr_cpus + 1;
+ nr_phdr += ced->max_nr_ranges;
+
+ /*
+ * kexec-tools creates an extra PT_LOAD phdr for kernel text mapping
+ * area on x86_64 (ffffffff80000000 - ffffffffa0000000).
+ * I think this is required by tools like gdb. So same physical
+ * memory will be mapped in two elf headers. One will contain kernel
+ * text virtual addresses and other will have __va(physical) addresses.
+ */
+
+ nr_phdr++;
+ elf_sz = sizeof(Elf64_Ehdr) + nr_phdr * sizeof(Elf64_Phdr);
+ elf_sz = ALIGN(elf_sz, ELF_CORE_HEADER_ALIGN);
+
+ buf = vzalloc(elf_sz);
+ if (!buf)
+ return -ENOMEM;
+
+ bufp = buf;
+ ehdr = (Elf64_Ehdr *)bufp;
+ bufp += sizeof(Elf64_Ehdr);
+ memcpy(ehdr->e_ident, ELFMAG, SELFMAG);
+ ehdr->e_ident[EI_CLASS] = ELFCLASS64;
+ ehdr->e_ident[EI_DATA] = ELFDATA2LSB;
+ ehdr->e_ident[EI_VERSION] = EV_CURRENT;
+ ehdr->e_ident[EI_OSABI] = ELF_OSABI;
+ memset(ehdr->e_ident + EI_PAD, 0, EI_NIDENT - EI_PAD);
+ ehdr->e_type = ET_CORE;
+ ehdr->e_machine = ELF_ARCH;
+ ehdr->e_version = EV_CURRENT;
+ ehdr->e_phoff = sizeof(Elf64_Ehdr);
+ ehdr->e_ehsize = sizeof(Elf64_Ehdr);
+ ehdr->e_phentsize = sizeof(Elf64_Phdr);
+
+ /* Prepare one phdr of type PT_NOTE for each present cpu */
+ for_each_present_cpu(cpu) {
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = PT_NOTE;
+ notes_addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpu));
+ phdr->p_offset = phdr->p_paddr = notes_addr;
+ phdr->p_filesz = phdr->p_memsz = sizeof(note_buf_t);
+ (ehdr->e_phnum)++;
+ }
+
+ /* Prepare one PT_NOTE header for vmcoreinfo */
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = PT_NOTE;
+ phdr->p_offset = phdr->p_paddr = paddr_vmcoreinfo_note();
+ phdr->p_filesz = phdr->p_memsz = sizeof(vmcoreinfo_note);
+ (ehdr->e_phnum)++;
+
+#ifdef CONFIG_X86_64
+ /* Prepare PT_LOAD type program header for kernel text region */
+ phdr = (Elf64_Phdr *)bufp;
+ bufp += sizeof(Elf64_Phdr);
+ phdr->p_type = PT_LOAD;
+ phdr->p_flags = PF_R|PF_W|PF_X;
+ phdr->p_vaddr = (Elf64_Addr)_text;
+ phdr->p_filesz = phdr->p_memsz = _end - _text;
+ phdr->p_offset = phdr->p_paddr = __pa_symbol(_text);
+ (ehdr->e_phnum)++;
+#endif
+
+ /* Prepare PT_LOAD headers for system ram chunks. */
+ ced->ehdr = ehdr;
+ ced->bufp = bufp;
+ ret = walk_system_ram_res(0, -1, ced,
+ prepare_elf64_ram_headers_callback);
+ if (ret < 0)
+ return ret;
+
+ *addr = buf;
+ *sz = elf_sz;
+ return 0;
+}
+
+/* Prepare elf headers. Return addr and size */
+static int prepare_elf_headers(struct kimage *image, void **addr,
+ unsigned long *sz)
+{
+ struct crash_elf_data *ced;
+ int ret;
+
+ ced = kzalloc(sizeof(*ced), GFP_KERNEL);
+ if (!ced)
+ return -ENOMEM;
+
+ fill_up_crash_elf_data(ced, image);
+
+ /* By default prepare 64bit headers */
+ ret = prepare_elf64_headers(ced, addr, sz);
+ kfree(ced);
+ return ret;
+}
+
+static int add_e820_entry(struct boot_params *params, struct e820entry *entry)
+{
+ unsigned int nr_e820_entries;
+
+ nr_e820_entries = params->e820_entries;
+ if (nr_e820_entries >= E820MAX)
+ return 1;
+
+ memcpy(&params->e820_map[nr_e820_entries], entry,
+ sizeof(struct e820entry));
+ params->e820_entries++;
+ return 0;
+}
+
+static int memmap_entry_callback(u64 start, u64 end, void *arg)
+{
+ struct crash_memmap_data *cmd = arg;
+ struct boot_params *params = cmd->params;
+ struct e820entry ei;
+
+ ei.addr = start;
+ ei.size = end - start + 1;
+ ei.type = cmd->type;
+ add_e820_entry(params, &ei);
+
+ return 0;
+}
+
+static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
+ unsigned long long mstart,
+ unsigned long long mend)
+{
+ unsigned long start, end;
+ int ret = 0;
+
+ cmem->ranges[0].start = mstart;
+ cmem->ranges[0].end = mend;
+ cmem->nr_ranges = 1;
+
+ /* Exclude Backup region */
+ start = image->arch.backup_load_addr;
+ end = start + image->arch.backup_src_sz - 1;
+ ret = exclude_mem_range(cmem, start, end);
+ if (ret)
+ return ret;
+
+ /* Exclude elf header region */
+ start = image->arch.elf_load_addr;
+ end = start + image->arch.elf_headers_sz - 1;
+ return exclude_mem_range(cmem, start, end);
+}
+
+/* Prepare memory map for crash dump kernel */
+int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
+{
+ int i, ret = 0;
+ unsigned long flags;
+ struct e820entry ei;
+ struct crash_memmap_data cmd;
+ struct crash_mem *cmem;
+
+ cmem = vzalloc(sizeof(struct crash_mem));
+ if (!cmem)
+ return -ENOMEM;
+
+ memset(&cmd, 0, sizeof(struct crash_memmap_data));
+ cmd.params = params;
+
+ /* Add first 640K segment */
+ ei.addr = image->arch.backup_src_start;
+ ei.size = image->arch.backup_src_sz;
+ ei.type = E820_RAM;
+ add_e820_entry(params, &ei);
+
+ /* Add ACPI tables */
+ cmd.type = E820_ACPI;
+ flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ walk_iomem_res("ACPI Tables", flags, 0, -1, &cmd,
+ memmap_entry_callback);
+
+ /* Add ACPI Non-volatile Storage */
+ cmd.type = E820_NVS;
+ walk_iomem_res("ACPI Non-volatile Storage", flags, 0, -1, &cmd,
+ memmap_entry_callback);
+
+ /* Add crashk_low_res region */
+ if (crashk_low_res.end) {
+ ei.addr = crashk_low_res.start;
+ ei.size = crashk_low_res.end - crashk_low_res.start + 1;
+ ei.type = E820_RAM;
+ add_e820_entry(params, &ei);
+ }
+
+ /* Exclude some ranges from crashk_res and add rest to memmap */
+ ret = memmap_exclude_ranges(image, cmem, crashk_res.start,
+ crashk_res.end);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < cmem->nr_ranges; i++) {
+ ei.size = cmem->ranges[i].end - cmem->ranges[i].start + 1;
+
+ /* If entry is less than a page, skip it */
+ if (ei.size < PAGE_SIZE)
+ continue;
+ ei.addr = cmem->ranges[i].start;
+ ei.type = E820_RAM;
+ add_e820_entry(params, &ei);
+ }
+
+out:
+ vfree(cmem);
+ return ret;
+}
+
+static int determine_backup_region(u64 start, u64 end, void *arg)
+{
+ struct kimage *image = arg;
+
+ image->arch.backup_src_start = start;
+ image->arch.backup_src_sz = end - start + 1;
+
+ /* Expecting only one range for backup region */
+ return 1;
+}
+
+int crash_load_segments(struct kimage *image)
+{
+ unsigned long src_start, src_sz, elf_sz;
+ void *elf_addr;
+ int ret;
+
+ /*
+ * Determine and load a segment for backup area. First 640K RAM
+ * region is backup source
+ */
+
+ ret = walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END,
+ image, determine_backup_region);
+
+ /* Zero or postive return values are ok */
+ if (ret < 0)
+ return ret;
+
+ src_start = image->arch.backup_src_start;
+ src_sz = image->arch.backup_src_sz;
+
+ /* Add backup segment. */
+ if (src_sz) {
+ /*
+ * Ideally there is no source for backup segment. This is
+ * copied in purgatory after crash. Just add a zero filled
+ * segment for now to make sure checksum logic works fine.
+ */
+ ret = kexec_add_buffer(image, (char *)&crash_zero_bytes,
+ sizeof(crash_zero_bytes), src_sz,
+ PAGE_SIZE, 0, -1, 0,
+ &image->arch.backup_load_addr);
+ if (ret)
+ return ret;
+ pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n",
+ image->arch.backup_load_addr, src_start, src_sz);
+ }
+
+ /* Prepare elf headers and add a segment */
+ ret = prepare_elf_headers(image, &elf_addr, &elf_sz);
+ if (ret)
+ return ret;
+
+ image->arch.elf_headers = elf_addr;
+ image->arch.elf_headers_sz = elf_sz;
+
+ ret = kexec_add_buffer(image, (char *)elf_addr, elf_sz, elf_sz,
+ ELF_CORE_HEADER_ALIGN, 0, -1, 0,
+ &image->arch.elf_load_addr);
+ if (ret) {
+ vfree((void *)image->arch.elf_headers);
+ return ret;
+ }
+ pr_debug("Loaded ELF headers at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ image->arch.elf_load_addr, elf_sz, elf_sz);
+
+ return ret;
+}
+
+#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
new file mode 100644
index 000000000000..9642b9b33655
--- /dev/null
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -0,0 +1,553 @@
+/*
+ * Kexec bzImage loader
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ * Authors:
+ * Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#define pr_fmt(fmt) "kexec-bzImage64: " fmt
+
+#include <linux/string.h>
+#include <linux/printk.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/efi.h>
+#include <linux/verify_pefile.h>
+#include <keys/system_keyring.h>
+
+#include <asm/bootparam.h>
+#include <asm/setup.h>
+#include <asm/crash.h>
+#include <asm/efi.h>
+
+#define MAX_ELFCOREHDR_STR_LEN 30 /* elfcorehdr=0x<64bit-value> */
+
+/*
+ * Defines lowest physical address for various segments. Not sure where
+ * exactly these limits came from. Current bzimage64 loader in kexec-tools
+ * uses these so I am retaining it. It can be changed over time as we gain
+ * more insight.
+ */
+#define MIN_PURGATORY_ADDR 0x3000
+#define MIN_BOOTPARAM_ADDR 0x3000
+#define MIN_KERNEL_LOAD_ADDR 0x100000
+#define MIN_INITRD_LOAD_ADDR 0x1000000
+
+/*
+ * This is a place holder for all boot loader specific data structure which
+ * gets allocated in one call but gets freed much later during cleanup
+ * time. Right now there is only one field but it can grow as need be.
+ */
+struct bzimage64_data {
+ /*
+ * Temporary buffer to hold bootparams buffer. This should be
+ * freed once the bootparam segment has been loaded.
+ */
+ void *bootparams_buf;
+};
+
+static int setup_initrd(struct boot_params *params,
+ unsigned long initrd_load_addr, unsigned long initrd_len)
+{
+ params->hdr.ramdisk_image = initrd_load_addr & 0xffffffffUL;
+ params->hdr.ramdisk_size = initrd_len & 0xffffffffUL;
+
+ params->ext_ramdisk_image = initrd_load_addr >> 32;
+ params->ext_ramdisk_size = initrd_len >> 32;
+
+ return 0;
+}
+
+static int setup_cmdline(struct kimage *image, struct boot_params *params,
+ unsigned long bootparams_load_addr,
+ unsigned long cmdline_offset, char *cmdline,
+ unsigned long cmdline_len)
+{
+ char *cmdline_ptr = ((char *)params) + cmdline_offset;
+ unsigned long cmdline_ptr_phys, len;
+ uint32_t cmdline_low_32, cmdline_ext_32;
+
+ memcpy(cmdline_ptr, cmdline, cmdline_len);
+ if (image->type == KEXEC_TYPE_CRASH) {
+ len = sprintf(cmdline_ptr + cmdline_len - 1,
+ " elfcorehdr=0x%lx", image->arch.elf_load_addr);
+ cmdline_len += len;
+ }
+ cmdline_ptr[cmdline_len - 1] = '\0';
+
+ pr_debug("Final command line is: %s\n", cmdline_ptr);
+ cmdline_ptr_phys = bootparams_load_addr + cmdline_offset;
+ cmdline_low_32 = cmdline_ptr_phys & 0xffffffffUL;
+ cmdline_ext_32 = cmdline_ptr_phys >> 32;
+
+ params->hdr.cmd_line_ptr = cmdline_low_32;
+ if (cmdline_ext_32)
+ params->ext_cmd_line_ptr = cmdline_ext_32;
+
+ return 0;
+}
+
+static int setup_e820_entries(struct boot_params *params)
+{
+ unsigned int nr_e820_entries;
+
+ nr_e820_entries = e820_saved.nr_map;
+
+ /* TODO: Pass entries more than E820MAX in bootparams setup data */
+ if (nr_e820_entries > E820MAX)
+ nr_e820_entries = E820MAX;
+
+ params->e820_entries = nr_e820_entries;
+ memcpy(&params->e820_map, &e820_saved.map,
+ nr_e820_entries * sizeof(struct e820entry));
+
+ return 0;
+}
+
+#ifdef CONFIG_EFI
+static int setup_efi_info_memmap(struct boot_params *params,
+ unsigned long params_load_addr,
+ unsigned int efi_map_offset,
+ unsigned int efi_map_sz)
+{
+ void *efi_map = (void *)params + efi_map_offset;
+ unsigned long efi_map_phys_addr = params_load_addr + efi_map_offset;
+ struct efi_info *ei = &params->efi_info;
+
+ if (!efi_map_sz)
+ return 0;
+
+ efi_runtime_map_copy(efi_map, efi_map_sz);
+
+ ei->efi_memmap = efi_map_phys_addr & 0xffffffff;
+ ei->efi_memmap_hi = efi_map_phys_addr >> 32;
+ ei->efi_memmap_size = efi_map_sz;
+
+ return 0;
+}
+
+static int
+prepare_add_efi_setup_data(struct boot_params *params,
+ unsigned long params_load_addr,
+ unsigned int efi_setup_data_offset)
+{
+ unsigned long setup_data_phys;
+ struct setup_data *sd = (void *)params + efi_setup_data_offset;
+ struct efi_setup_data *esd = (void *)sd + sizeof(struct setup_data);
+
+ esd->fw_vendor = efi.fw_vendor;
+ esd->runtime = efi.runtime;
+ esd->tables = efi.config_table;
+ esd->smbios = efi.smbios;
+
+ sd->type = SETUP_EFI;
+ sd->len = sizeof(struct efi_setup_data);
+
+ /* Add setup data */
+ setup_data_phys = params_load_addr + efi_setup_data_offset;
+ sd->next = params->hdr.setup_data;
+ params->hdr.setup_data = setup_data_phys;
+
+ return 0;
+}
+
+static int
+setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
+ unsigned int efi_map_offset, unsigned int efi_map_sz,
+ unsigned int efi_setup_data_offset)
+{
+ struct efi_info *current_ei = &boot_params.efi_info;
+ struct efi_info *ei = &params->efi_info;
+
+ if (!current_ei->efi_memmap_size)
+ return 0;
+
+ /*
+ * If 1:1 mapping is not enabled, second kernel can not setup EFI
+ * and use EFI run time services. User space will have to pass
+ * acpi_rsdp=<addr> on kernel command line to make second kernel boot
+ * without efi.
+ */
+ if (efi_enabled(EFI_OLD_MEMMAP))
+ return 0;
+
+ ei->efi_loader_signature = current_ei->efi_loader_signature;
+ ei->efi_systab = current_ei->efi_systab;
+ ei->efi_systab_hi = current_ei->efi_systab_hi;
+
+ ei->efi_memdesc_version = current_ei->efi_memdesc_version;
+ ei->efi_memdesc_size = efi_get_runtime_map_desc_size();
+
+ setup_efi_info_memmap(params, params_load_addr, efi_map_offset,
+ efi_map_sz);
+ prepare_add_efi_setup_data(params, params_load_addr,
+ efi_setup_data_offset);
+ return 0;
+}
+#endif /* CONFIG_EFI */
+
+static int
+setup_boot_parameters(struct kimage *image, struct boot_params *params,
+ unsigned long params_load_addr,
+ unsigned int efi_map_offset, unsigned int efi_map_sz,
+ unsigned int efi_setup_data_offset)
+{
+ unsigned int nr_e820_entries;
+ unsigned long long mem_k, start, end;
+ int i, ret = 0;
+
+ /* Get subarch from existing bootparams */
+ params->hdr.hardware_subarch = boot_params.hdr.hardware_subarch;
+
+ /* Copying screen_info will do? */
+ memcpy(&params->screen_info, &boot_params.screen_info,
+ sizeof(struct screen_info));
+
+ /* Fill in memsize later */
+ params->screen_info.ext_mem_k = 0;
+ params->alt_mem_k = 0;
+
+ /* Default APM info */
+ memset(&params->apm_bios_info, 0, sizeof(params->apm_bios_info));
+
+ /* Default drive info */
+ memset(&params->hd0_info, 0, sizeof(params->hd0_info));
+ memset(&params->hd1_info, 0, sizeof(params->hd1_info));
+
+ /* Default sysdesc table */
+ params->sys_desc_table.length = 0;
+
+ if (image->type == KEXEC_TYPE_CRASH) {
+ ret = crash_setup_memmap_entries(image, params);
+ if (ret)
+ return ret;
+ } else
+ setup_e820_entries(params);
+
+ nr_e820_entries = params->e820_entries;
+
+ for (i = 0; i < nr_e820_entries; i++) {
+ if (params->e820_map[i].type != E820_RAM)
+ continue;
+ start = params->e820_map[i].addr;
+ end = params->e820_map[i].addr + params->e820_map[i].size - 1;
+
+ if ((start <= 0x100000) && end > 0x100000) {
+ mem_k = (end >> 10) - (0x100000 >> 10);
+ params->screen_info.ext_mem_k = mem_k;
+ params->alt_mem_k = mem_k;
+ if (mem_k > 0xfc00)
+ params->screen_info.ext_mem_k = 0xfc00; /* 64M*/
+ if (mem_k > 0xffffffff)
+ params->alt_mem_k = 0xffffffff;
+ }
+ }
+
+#ifdef CONFIG_EFI
+ /* Setup EFI state */
+ setup_efi_state(params, params_load_addr, efi_map_offset, efi_map_sz,
+ efi_setup_data_offset);
+#endif
+
+ /* Setup EDD info */
+ memcpy(params->eddbuf, boot_params.eddbuf,
+ EDDMAXNR * sizeof(struct edd_info));
+ params->eddbuf_entries = boot_params.eddbuf_entries;
+
+ memcpy(params->edd_mbr_sig_buffer, boot_params.edd_mbr_sig_buffer,
+ EDD_MBR_SIG_MAX * sizeof(unsigned int));
+
+ return ret;
+}
+
+int bzImage64_probe(const char *buf, unsigned long len)
+{
+ int ret = -ENOEXEC;
+ struct setup_header *header;
+
+ /* kernel should be atleast two sectors long */
+ if (len < 2 * 512) {
+ pr_err("File is too short to be a bzImage\n");
+ return ret;
+ }
+
+ header = (struct setup_header *)(buf + offsetof(struct boot_params, hdr));
+ if (memcmp((char *)&header->header, "HdrS", 4) != 0) {
+ pr_err("Not a bzImage\n");
+ return ret;
+ }
+
+ if (header->boot_flag != 0xAA55) {
+ pr_err("No x86 boot sector present\n");
+ return ret;
+ }
+
+ if (header->version < 0x020C) {
+ pr_err("Must be at least protocol version 2.12\n");
+ return ret;
+ }
+
+ if (!(header->loadflags & LOADED_HIGH)) {
+ pr_err("zImage not a bzImage\n");
+ return ret;
+ }
+
+ if (!(header->xloadflags & XLF_KERNEL_64)) {
+ pr_err("Not a bzImage64. XLF_KERNEL_64 is not set.\n");
+ return ret;
+ }
+
+ if (!(header->xloadflags & XLF_CAN_BE_LOADED_ABOVE_4G)) {
+ pr_err("XLF_CAN_BE_LOADED_ABOVE_4G is not set.\n");
+ return ret;
+ }
+
+ /*
+ * Can't handle 32bit EFI as it does not allow loading kernel
+ * above 4G. This should be handled by 32bit bzImage loader
+ */
+ if (efi_enabled(EFI_RUNTIME_SERVICES) && !efi_enabled(EFI_64BIT)) {
+ pr_debug("EFI is 32 bit. Can't load kernel above 4G.\n");
+ return ret;
+ }
+
+ /* I've got a bzImage */
+ pr_debug("It's a relocatable bzImage64\n");
+ ret = 0;
+
+ return ret;
+}
+
+void *bzImage64_load(struct kimage *image, char *kernel,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+
+ struct setup_header *header;
+ int setup_sects, kern16_size, ret = 0;
+ unsigned long setup_header_size, params_cmdline_sz, params_misc_sz;
+ struct boot_params *params;
+ unsigned long bootparam_load_addr, kernel_load_addr, initrd_load_addr;
+ unsigned long purgatory_load_addr;
+ unsigned long kernel_bufsz, kernel_memsz, kernel_align;
+ char *kernel_buf;
+ struct bzimage64_data *ldata;
+ struct kexec_entry64_regs regs64;
+ void *stack;
+ unsigned int setup_hdr_offset = offsetof(struct boot_params, hdr);
+ unsigned int efi_map_offset, efi_map_sz, efi_setup_data_offset;
+
+ header = (struct setup_header *)(kernel + setup_hdr_offset);
+ setup_sects = header->setup_sects;
+ if (setup_sects == 0)
+ setup_sects = 4;
+
+ kern16_size = (setup_sects + 1) * 512;
+ if (kernel_len < kern16_size) {
+ pr_err("bzImage truncated\n");
+ return ERR_PTR(-ENOEXEC);
+ }
+
+ if (cmdline_len > header->cmdline_size) {
+ pr_err("Kernel command line too long\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ /*
+ * In case of crash dump, we will append elfcorehdr=<addr> to
+ * command line. Make sure it does not overflow
+ */
+ if (cmdline_len + MAX_ELFCOREHDR_STR_LEN > header->cmdline_size) {
+ pr_debug("Appending elfcorehdr=<addr> to command line exceeds maximum allowed length\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ /* Allocate and load backup region */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ ret = crash_load_segments(image);
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ /*
+ * Load purgatory. For 64bit entry point, purgatory code can be
+ * anywhere.
+ */
+ ret = kexec_load_purgatory(image, MIN_PURGATORY_ADDR, ULONG_MAX, 1,
+ &purgatory_load_addr);
+ if (ret) {
+ pr_err("Loading purgatory failed\n");
+ return ERR_PTR(ret);
+ }
+
+ pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
+
+
+ /*
+ * Load Bootparams and cmdline and space for efi stuff.
+ *
+ * Allocate memory together for multiple data structures so
+ * that they all can go in single area/segment and we don't
+ * have to create separate segment for each. Keeps things
+ * little bit simple
+ */
+ efi_map_sz = efi_get_runtime_map_size();
+ efi_map_sz = ALIGN(efi_map_sz, 16);
+ params_cmdline_sz = sizeof(struct boot_params) + cmdline_len +
+ MAX_ELFCOREHDR_STR_LEN;
+ params_cmdline_sz = ALIGN(params_cmdline_sz, 16);
+ params_misc_sz = params_cmdline_sz + efi_map_sz +
+ sizeof(struct setup_data) +
+ sizeof(struct efi_setup_data);
+
+ params = kzalloc(params_misc_sz, GFP_KERNEL);
+ if (!params)
+ return ERR_PTR(-ENOMEM);
+ efi_map_offset = params_cmdline_sz;
+ efi_setup_data_offset = efi_map_offset + efi_map_sz;
+
+ /* Copy setup header onto bootparams. Documentation/x86/boot.txt */
+ setup_header_size = 0x0202 + kernel[0x0201] - setup_hdr_offset;
+
+ /* Is there a limit on setup header size? */
+ memcpy(&params->hdr, (kernel + setup_hdr_offset), setup_header_size);
+
+ ret = kexec_add_buffer(image, (char *)params, params_misc_sz,
+ params_misc_sz, 16, MIN_BOOTPARAM_ADDR,
+ ULONG_MAX, 1, &bootparam_load_addr);
+ if (ret)
+ goto out_free_params;
+ pr_debug("Loaded boot_param, command line and misc at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ bootparam_load_addr, params_misc_sz, params_misc_sz);
+
+ /* Load kernel */
+ kernel_buf = kernel + kern16_size;
+ kernel_bufsz = kernel_len - kern16_size;
+ kernel_memsz = PAGE_ALIGN(header->init_size);
+ kernel_align = header->kernel_alignment;
+
+ ret = kexec_add_buffer(image, kernel_buf,
+ kernel_bufsz, kernel_memsz, kernel_align,
+ MIN_KERNEL_LOAD_ADDR, ULONG_MAX, 1,
+ &kernel_load_addr);
+ if (ret)
+ goto out_free_params;
+
+ pr_debug("Loaded 64bit kernel at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ kernel_load_addr, kernel_memsz, kernel_memsz);
+
+ /* Load initrd high */
+ if (initrd) {
+ ret = kexec_add_buffer(image, initrd, initrd_len, initrd_len,
+ PAGE_SIZE, MIN_INITRD_LOAD_ADDR,
+ ULONG_MAX, 1, &initrd_load_addr);
+ if (ret)
+ goto out_free_params;
+
+ pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n",
+ initrd_load_addr, initrd_len, initrd_len);
+
+ setup_initrd(params, initrd_load_addr, initrd_len);
+ }
+
+ setup_cmdline(image, params, bootparam_load_addr,
+ sizeof(struct boot_params), cmdline, cmdline_len);
+
+ /* bootloader info. Do we need a separate ID for kexec kernel loader? */
+ params->hdr.type_of_loader = 0x0D << 4;
+ params->hdr.loadflags = 0;
+
+ /* Setup purgatory regs for entry */
+ ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
+ sizeof(regs64), 1);
+ if (ret)
+ goto out_free_params;
+
+ regs64.rbx = 0; /* Bootstrap Processor */
+ regs64.rsi = bootparam_load_addr;
+ regs64.rip = kernel_load_addr + 0x200;
+ stack = kexec_purgatory_get_symbol_addr(image, "stack_end");
+ if (IS_ERR(stack)) {
+ pr_err("Could not find address of symbol stack_end\n");
+ ret = -EINVAL;
+ goto out_free_params;
+ }
+
+ regs64.rsp = (unsigned long)stack;
+ ret = kexec_purgatory_get_set_symbol(image, "entry64_regs", &regs64,
+ sizeof(regs64), 0);
+ if (ret)
+ goto out_free_params;
+
+ ret = setup_boot_parameters(image, params, bootparam_load_addr,
+ efi_map_offset, efi_map_sz,
+ efi_setup_data_offset);
+ if (ret)
+ goto out_free_params;
+
+ /* Allocate loader specific data */
+ ldata = kzalloc(sizeof(struct bzimage64_data), GFP_KERNEL);
+ if (!ldata) {
+ ret = -ENOMEM;
+ goto out_free_params;
+ }
+
+ /*
+ * Store pointer to params so that it could be freed after loading
+ * params segment has been loaded and contents have been copied
+ * somewhere else.
+ */
+ ldata->bootparams_buf = params;
+ return ldata;
+
+out_free_params:
+ kfree(params);
+ return ERR_PTR(ret);
+}
+
+/* This cleanup function is called after various segments have been loaded */
+int bzImage64_cleanup(void *loader_data)
+{
+ struct bzimage64_data *ldata = loader_data;
+
+ if (!ldata)
+ return 0;
+
+ kfree(ldata->bootparams_buf);
+ ldata->bootparams_buf = NULL;
+
+ return 0;
+}
+
+#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
+int bzImage64_verify_sig(const char *kernel, unsigned long kernel_len)
+{
+ bool trusted;
+ int ret;
+
+ ret = verify_pefile_signature(kernel, kernel_len,
+ system_trusted_keyring, &trusted);
+ if (ret < 0)
+ return ret;
+ if (!trusted)
+ return -EKEYREJECTED;
+ return 0;
+}
+#endif
+
+struct kexec_file_ops kexec_bzImage64_ops = {
+ .probe = bzImage64_probe,
+ .load = bzImage64_load,
+ .cleanup = bzImage64_cleanup,
+#ifdef CONFIG_KEXEC_BZIMAGE_VERIFY_SIG
+ .verify_sig = bzImage64_verify_sig,
+#endif
+};
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 679cef0791cd..8b04018e5d1f 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -6,6 +6,8 @@
* Version 2. See the file COPYING for more details.
*/
+#define pr_fmt(fmt) "kexec: " fmt
+
#include <linux/mm.h>
#include <linux/kexec.h>
#include <linux/string.h>
@@ -21,6 +23,11 @@
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/debugreg.h>
+#include <asm/kexec-bzimage64.h>
+
+static struct kexec_file_ops *kexec_file_loaders[] = {
+ &kexec_bzImage64_ops,
+};
static void free_transition_pgtable(struct kimage *image)
{
@@ -171,6 +178,38 @@ static void load_segments(void)
);
}
+/* Update purgatory as needed after various image segments have been prepared */
+static int arch_update_purgatory(struct kimage *image)
+{
+ int ret = 0;
+
+ if (!image->file_mode)
+ return 0;
+
+ /* Setup copying of backup region */
+ if (image->type == KEXEC_TYPE_CRASH) {
+ ret = kexec_purgatory_get_set_symbol(image, "backup_dest",
+ &image->arch.backup_load_addr,
+ sizeof(image->arch.backup_load_addr), 0);
+ if (ret)
+ return ret;
+
+ ret = kexec_purgatory_get_set_symbol(image, "backup_src",
+ &image->arch.backup_src_start,
+ sizeof(image->arch.backup_src_start), 0);
+ if (ret)
+ return ret;
+
+ ret = kexec_purgatory_get_set_symbol(image, "backup_sz",
+ &image->arch.backup_src_sz,
+ sizeof(image->arch.backup_src_sz), 0);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
int machine_kexec_prepare(struct kimage *image)
{
unsigned long start_pgtable;
@@ -184,6 +223,11 @@ int machine_kexec_prepare(struct kimage *image)
if (result)
return result;
+ /* update purgatory as needed */
+ result = arch_update_purgatory(image);
+ if (result)
+ return result;
+
return 0;
}
@@ -283,3 +327,198 @@ void arch_crash_save_vmcoreinfo(void)
(unsigned long)&_text - __START_KERNEL);
}
+/* arch-dependent functionality related to kexec file-based syscall */
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ int i, ret = -ENOEXEC;
+ struct kexec_file_ops *fops;
+
+ for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
+ fops = kexec_file_loaders[i];
+ if (!fops || !fops->probe)
+ continue;
+
+ ret = fops->probe(buf, buf_len);
+ if (!ret) {
+ image->fops = fops;
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+void *arch_kexec_kernel_image_load(struct kimage *image)
+{
+ vfree(image->arch.elf_headers);
+ image->arch.elf_headers = NULL;
+
+ if (!image->fops || !image->fops->load)
+ return ERR_PTR(-ENOEXEC);
+
+ return image->fops->load(image, image->kernel_buf,
+ image->kernel_buf_len, image->initrd_buf,
+ image->initrd_buf_len, image->cmdline_buf,
+ image->cmdline_buf_len);
+}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ if (!image->fops || !image->fops->cleanup)
+ return 0;
+
+ return image->fops->cleanup(image->image_loader_data);
+}
+
+int arch_kexec_kernel_verify_sig(struct kimage *image, void *kernel,
+ unsigned long kernel_len)
+{
+ if (!image->fops || !image->fops->verify_sig) {
+ pr_debug("kernel loader does not support signature verification.");
+ return -EKEYREJECTED;
+ }
+
+ return image->fops->verify_sig(kernel, kernel_len);
+}
+
+/*
+ * Apply purgatory relocations.
+ *
+ * ehdr: Pointer to elf headers
+ * sechdrs: Pointer to section headers.
+ * relsec: section index of SHT_RELA section.
+ *
+ * TODO: Some of the code belongs to generic code. Move that in kexec.c.
+ */
+int arch_kexec_apply_relocations_add(const Elf64_Ehdr *ehdr,
+ Elf64_Shdr *sechdrs, unsigned int relsec)
+{
+ unsigned int i;
+ Elf64_Rela *rel;
+ Elf64_Sym *sym;
+ void *location;
+ Elf64_Shdr *section, *symtabsec;
+ unsigned long address, sec_base, value;
+ const char *strtab, *name, *shstrtab;
+
+ /*
+ * ->sh_offset has been modified to keep the pointer to section
+ * contents in memory
+ */
+ rel = (void *)sechdrs[relsec].sh_offset;
+
+ /* Section to which relocations apply */
+ section = &sechdrs[sechdrs[relsec].sh_info];
+
+ pr_debug("Applying relocate section %u to %u\n", relsec,
+ sechdrs[relsec].sh_info);
+
+ /* Associated symbol table */
+ symtabsec = &sechdrs[sechdrs[relsec].sh_link];
+
+ /* String table */
+ if (symtabsec->sh_link >= ehdr->e_shnum) {
+ /* Invalid strtab section number */
+ pr_err("Invalid string table section index %d\n",
+ symtabsec->sh_link);
+ return -ENOEXEC;
+ }
+
+ strtab = (char *)sechdrs[symtabsec->sh_link].sh_offset;
+
+ /* section header string table */
+ shstrtab = (char *)sechdrs[ehdr->e_shstrndx].sh_offset;
+
+ for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) {
+
+ /*
+ * rel[i].r_offset contains byte offset from beginning
+ * of section to the storage unit affected.
+ *
+ * This is location to update (->sh_offset). This is temporary
+ * buffer where section is currently loaded. This will finally
+ * be loaded to a different address later, pointed to by
+ * ->sh_addr. kexec takes care of moving it
+ * (kexec_load_segment()).
+ */
+ location = (void *)(section->sh_offset + rel[i].r_offset);
+
+ /* Final address of the location */
+ address = section->sh_addr + rel[i].r_offset;
+
+ /*
+ * rel[i].r_info contains information about symbol table index
+ * w.r.t which relocation must be made and type of relocation
+ * to apply. ELF64_R_SYM() and ELF64_R_TYPE() macros get
+ * these respectively.
+ */
+ sym = (Elf64_Sym *)symtabsec->sh_offset +
+ ELF64_R_SYM(rel[i].r_info);
+
+ if (sym->st_name)
+ name = strtab + sym->st_name;
+ else
+ name = shstrtab + sechdrs[sym->st_shndx].sh_name;
+
+ pr_debug("Symbol: %s info: %02x shndx: %02x value=%llx size: %llx\n",
+ name, sym->st_info, sym->st_shndx, sym->st_value,
+ sym->st_size);
+
+ if (sym->st_shndx == SHN_UNDEF) {
+ pr_err("Undefined symbol: %s\n", name);
+ return -ENOEXEC;
+ }
+
+ if (sym->st_shndx == SHN_COMMON) {
+ pr_err("symbol '%s' in common section\n", name);
+ return -ENOEXEC;
+ }
+
+ if (sym->st_shndx == SHN_ABS)
+ sec_base = 0;
+ else if (sym->st_shndx >= ehdr->e_shnum) {
+ pr_err("Invalid section %d for symbol %s\n",
+ sym->st_shndx, name);
+ return -ENOEXEC;
+ } else
+ sec_base = sechdrs[sym->st_shndx].sh_addr;
+
+ value = sym->st_value;
+ value += sec_base;
+ value += rel[i].r_addend;
+
+ switch (ELF64_R_TYPE(rel[i].r_info)) {
+ case R_X86_64_NONE:
+ break;
+ case R_X86_64_64:
+ *(u64 *)location = value;
+ break;
+ case R_X86_64_32:
+ *(u32 *)location = value;
+ if (value != *(u32 *)location)
+ goto overflow;
+ break;
+ case R_X86_64_32S:
+ *(s32 *)location = value;
+ if ((s64)value != *(s32 *)location)
+ goto overflow;
+ break;
+ case R_X86_64_PC32:
+ value -= (u64)address;
+ *(u32 *)location = value;
+ break;
+ default:
+ pr_err("Unknown rela relocation: %llu\n",
+ ELF64_R_TYPE(rel[i].r_info));
+ return -ENOEXEC;
+ }
+ }
+ return 0;
+
+overflow:
+ pr_err("Overflow in relocation type %d value 0x%lx\n",
+ (int)ELF64_R_TYPE(rel[i].r_info), value);
+ return -ENOEXEC;
+}
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 1185fe7a7f47..9ade5cfb5a4c 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -273,7 +273,7 @@ static int mmu_audit_set(const char *val, const struct kernel_param *kp)
int ret;
unsigned long enable;
- ret = strict_strtoul(val, 10, &enable);
+ ret = kstrtoul(val, 10, &enable);
if (ret < 0)
return -EINVAL;
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index ed161c6e278b..3968d67d366b 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -1479,7 +1479,7 @@ static ssize_t ptc_proc_write(struct file *file, const char __user *user,
return count;
}
- if (strict_strtol(optstr, 10, &input_arg) < 0) {
+ if (kstrtol(optstr, 10, &input_arg) < 0) {
printk(KERN_DEBUG "%s is invalid\n", optstr);
return -EINVAL;
}
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
new file mode 100644
index 000000000000..7fde9ee438a4
--- /dev/null
+++ b/arch/x86/purgatory/Makefile
@@ -0,0 +1,30 @@
+purgatory-y := purgatory.o stack.o setup-x86_$(BITS).o sha256.o entry64.o string.o
+
+targets += $(purgatory-y)
+PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
+
+LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
+targets += purgatory.ro
+
+# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That
+# in turn leaves some undefined symbols like __fentry__ in purgatory and not
+# sure how to relocate those. Like kexec-tools, use custom flags.
+
+KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -c -MD -Os -mcmodel=large
+
+$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+ $(call if_changed,ld)
+
+targets += kexec-purgatory.c
+
+quiet_cmd_bin2c = BIN2C $@
+ cmd_bin2c = cat $(obj)/purgatory.ro | $(objtree)/scripts/basic/bin2c kexec_purgatory > $(obj)/kexec-purgatory.c
+
+$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
+ $(call if_changed,bin2c)
+
+
+# No loaders for 32bits yet.
+ifeq ($(CONFIG_X86_64),y)
+ obj-$(CONFIG_KEXEC) += kexec-purgatory.o
+endif
diff --git a/arch/x86/purgatory/entry64.S b/arch/x86/purgatory/entry64.S
new file mode 100644
index 000000000000..d1a4291d3568
--- /dev/null
+++ b/arch/x86/purgatory/entry64.S
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com)
+ * Copyright (C) 2014 Red Hat Inc.
+
+ * Author(s): Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This code has been taken from kexec-tools.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+ .text
+ .balign 16
+ .code64
+ .globl entry64, entry64_regs
+
+
+entry64:
+ /* Setup a gdt that should be preserved */
+ lgdt gdt(%rip)
+
+ /* load the data segments */
+ movl $0x18, %eax /* data segment */
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+ movl %eax, %fs
+ movl %eax, %gs
+
+ /* Setup new stack */
+ leaq stack_init(%rip), %rsp
+ pushq $0x10 /* CS */
+ leaq new_cs_exit(%rip), %rax
+ pushq %rax
+ lretq
+new_cs_exit:
+
+ /* Load the registers */
+ movq rax(%rip), %rax
+ movq rbx(%rip), %rbx
+ movq rcx(%rip), %rcx
+ movq rdx(%rip), %rdx
+ movq rsi(%rip), %rsi
+ movq rdi(%rip), %rdi
+ movq rsp(%rip), %rsp
+ movq rbp(%rip), %rbp
+ movq r8(%rip), %r8
+ movq r9(%rip), %r9
+ movq r10(%rip), %r10
+ movq r11(%rip), %r11
+ movq r12(%rip), %r12
+ movq r13(%rip), %r13
+ movq r14(%rip), %r14
+ movq r15(%rip), %r15
+
+ /* Jump to the new code... */
+ jmpq *rip(%rip)
+
+ .section ".rodata"
+ .balign 4
+entry64_regs:
+rax: .quad 0x0
+rcx: .quad 0x0
+rdx: .quad 0x0
+rbx: .quad 0x0
+rsp: .quad 0x0
+rbp: .quad 0x0
+rsi: .quad 0x0
+rdi: .quad 0x0
+r8: .quad 0x0
+r9: .quad 0x0
+r10: .quad 0x0
+r11: .quad 0x0
+r12: .quad 0x0
+r13: .quad 0x0
+r14: .quad 0x0
+r15: .quad 0x0
+rip: .quad 0x0
+ .size entry64_regs, . - entry64_regs
+
+ /* GDT */
+ .section ".rodata"
+ .balign 16
+gdt:
+ /* 0x00 unusable segment
+ * 0x08 unused
+ * so use them as gdt ptr
+ */
+ .word gdt_end - gdt - 1
+ .quad gdt
+ .word 0, 0, 0
+
+ /* 0x10 4GB flat code segment */
+ .word 0xFFFF, 0x0000, 0x9A00, 0x00AF
+
+ /* 0x18 4GB flat data segment */
+ .word 0xFFFF, 0x0000, 0x9200, 0x00CF
+gdt_end:
+stack: .quad 0, 0
+stack_init:
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
new file mode 100644
index 000000000000..25e068ba3382
--- /dev/null
+++ b/arch/x86/purgatory/purgatory.c
@@ -0,0 +1,72 @@
+/*
+ * purgatory: Runs between two kernels
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ *
+ * Author:
+ * Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include "sha256.h"
+#include "../boot/string.h"
+
+struct sha_region {
+ unsigned long start;
+ unsigned long len;
+};
+
+unsigned long backup_dest = 0;
+unsigned long backup_src = 0;
+unsigned long backup_sz = 0;
+
+u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 };
+
+struct sha_region sha_regions[16] = {};
+
+/*
+ * On x86, second kernel requries first 640K of memory to boot. Copy
+ * first 640K to a backup region in reserved memory range so that second
+ * kernel can use first 640K.
+ */
+static int copy_backup_region(void)
+{
+ if (backup_dest)
+ memcpy((void *)backup_dest, (void *)backup_src, backup_sz);
+
+ return 0;
+}
+
+int verify_sha256_digest(void)
+{
+ struct sha_region *ptr, *end;
+ u8 digest[SHA256_DIGEST_SIZE];
+ struct sha256_state sctx;
+
+ sha256_init(&sctx);
+ end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])];
+ for (ptr = sha_regions; ptr < end; ptr++)
+ sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len);
+
+ sha256_final(&sctx, digest);
+
+ if (memcmp(digest, sha256_digest, sizeof(digest)))
+ return 1;
+
+ return 0;
+}
+
+void purgatory(void)
+{
+ int ret;
+
+ ret = verify_sha256_digest();
+ if (ret) {
+ /* loop forever */
+ for (;;)
+ ;
+ }
+ copy_backup_region();
+}
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
new file mode 100644
index 000000000000..fe3c91ba1bd0
--- /dev/null
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -0,0 +1,58 @@
+/*
+ * purgatory: setup code
+ *
+ * Copyright (C) 2003,2004 Eric Biederman (ebiederm@xmission.com)
+ * Copyright (C) 2014 Red Hat Inc.
+ *
+ * This code has been taken from kexec-tools.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+ .text
+ .globl purgatory_start
+ .balign 16
+purgatory_start:
+ .code64
+
+ /* Load a gdt so I know what the segment registers are */
+ lgdt gdt(%rip)
+
+ /* load the data segments */
+ movl $0x18, %eax /* data segment */
+ movl %eax, %ds
+ movl %eax, %es
+ movl %eax, %ss
+ movl %eax, %fs
+ movl %eax, %gs
+
+ /* Setup a stack */
+ leaq lstack_end(%rip), %rsp
+
+ /* Call the C code */
+ call purgatory
+ jmp entry64
+
+ .section ".rodata"
+ .balign 16
+gdt: /* 0x00 unusable segment
+ * 0x08 unused
+ * so use them as the gdt ptr
+ */
+ .word gdt_end - gdt - 1
+ .quad gdt
+ .word 0, 0, 0
+
+ /* 0x10 4GB flat code segment */
+ .word 0xFFFF, 0x0000, 0x9A00, 0x00AF
+
+ /* 0x18 4GB flat data segment */
+ .word 0xFFFF, 0x0000, 0x9200, 0x00CF
+gdt_end:
+
+ .bss
+ .balign 4096
+lstack:
+ .skip 4096
+lstack_end:
diff --git a/arch/x86/purgatory/sha256.c b/arch/x86/purgatory/sha256.c
new file mode 100644
index 000000000000..548ca675a14a
--- /dev/null
+++ b/arch/x86/purgatory/sha256.c
@@ -0,0 +1,283 @@
+/*
+ * SHA-256, as specified in
+ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
+ *
+ * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
+ *
+ * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
+ * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ * Copyright (c) 2014 Red Hat Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include <linux/bitops.h>
+#include <asm/byteorder.h>
+#include "sha256.h"
+#include "../boot/string.h"
+
+static inline u32 Ch(u32 x, u32 y, u32 z)
+{
+ return z ^ (x & (y ^ z));
+}
+
+static inline u32 Maj(u32 x, u32 y, u32 z)
+{
+ return (x & y) | (z & (x | y));
+}
+
+#define e0(x) (ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22))
+#define e1(x) (ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25))
+#define s0(x) (ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3))
+#define s1(x) (ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10))
+
+static inline void LOAD_OP(int I, u32 *W, const u8 *input)
+{
+ W[I] = __be32_to_cpu(((__be32 *)(input))[I]);
+}
+
+static inline void BLEND_OP(int I, u32 *W)
+{
+ W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
+}
+
+static void sha256_transform(u32 *state, const u8 *input)
+{
+ u32 a, b, c, d, e, f, g, h, t1, t2;
+ u32 W[64];
+ int i;
+
+ /* load the input */
+ for (i = 0; i < 16; i++)
+ LOAD_OP(i, W, input);
+
+ /* now blend */
+ for (i = 16; i < 64; i++)
+ BLEND_OP(i, W);
+
+ /* load the state into our registers */
+ a = state[0]; b = state[1]; c = state[2]; d = state[3];
+ e = state[4]; f = state[5]; g = state[6]; h = state[7];
+
+ /* now iterate */
+ t1 = h + e1(e) + Ch(e, f, g) + 0x428a2f98 + W[0];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0x71374491 + W[1];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0xb5c0fbcf + W[2];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0xe9b5dba5 + W[3];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x3956c25b + W[4];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0x59f111f1 + W[5];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x923f82a4 + W[6];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0xab1c5ed5 + W[7];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1 + t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0xd807aa98 + W[8];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1 + t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0x12835b01 + W[9];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1 + t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0x243185be + W[10];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1 + t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0x550c7dc3 + W[11];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1 + t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x72be5d74 + W[12];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1 + t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0x80deb1fe + W[13];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1 + t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x9bdc06a7 + W[14];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1 + t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0xc19bf174 + W[15];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0xe49b69c1 + W[16];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0xefbe4786 + W[17];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0x0fc19dc6 + W[18];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0x240ca1cc + W[19];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x2de92c6f + W[20];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0x4a7484aa + W[21];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x5cb0a9dc + W[22];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0x76f988da + W[23];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0x983e5152 + W[24];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0xa831c66d + W[25];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0xb00327c8 + W[26];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0xbf597fc7 + W[27];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0xc6e00bf3 + W[28];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0xd5a79147 + W[29];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x06ca6351 + W[30];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0x14292967 + W[31];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0x27b70a85 + W[32];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0x2e1b2138 + W[33];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0x4d2c6dfc + W[34];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0x53380d13 + W[35];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x650a7354 + W[36];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0x766a0abb + W[37];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x81c2c92e + W[38];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0x92722c85 + W[39];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0xa2bfe8a1 + W[40];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0xa81a664b + W[41];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0xc24b8b70 + W[42];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0xc76c51a3 + W[43];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0xd192e819 + W[44];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0xd6990624 + W[45];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0xf40e3585 + W[46];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0x106aa070 + W[47];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0x19a4c116 + W[48];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0x1e376c08 + W[49];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0x2748774c + W[50];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0x34b0bcb5 + W[51];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x391c0cb3 + W[52];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0x4ed8aa4a + W[53];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0x5b9cca4f + W[54];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0x682e6ff3 + W[55];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ t1 = h + e1(e) + Ch(e, f, g) + 0x748f82ee + W[56];
+ t2 = e0(a) + Maj(a, b, c); d += t1; h = t1+t2;
+ t1 = g + e1(d) + Ch(d, e, f) + 0x78a5636f + W[57];
+ t2 = e0(h) + Maj(h, a, b); c += t1; g = t1+t2;
+ t1 = f + e1(c) + Ch(c, d, e) + 0x84c87814 + W[58];
+ t2 = e0(g) + Maj(g, h, a); b += t1; f = t1+t2;
+ t1 = e + e1(b) + Ch(b, c, d) + 0x8cc70208 + W[59];
+ t2 = e0(f) + Maj(f, g, h); a += t1; e = t1+t2;
+ t1 = d + e1(a) + Ch(a, b, c) + 0x90befffa + W[60];
+ t2 = e0(e) + Maj(e, f, g); h += t1; d = t1+t2;
+ t1 = c + e1(h) + Ch(h, a, b) + 0xa4506ceb + W[61];
+ t2 = e0(d) + Maj(d, e, f); g += t1; c = t1+t2;
+ t1 = b + e1(g) + Ch(g, h, a) + 0xbef9a3f7 + W[62];
+ t2 = e0(c) + Maj(c, d, e); f += t1; b = t1+t2;
+ t1 = a + e1(f) + Ch(f, g, h) + 0xc67178f2 + W[63];
+ t2 = e0(b) + Maj(b, c, d); e += t1; a = t1+t2;
+
+ state[0] += a; state[1] += b; state[2] += c; state[3] += d;
+ state[4] += e; state[5] += f; state[6] += g; state[7] += h;
+
+ /* clear any sensitive info... */
+ a = b = c = d = e = f = g = h = t1 = t2 = 0;
+ memset(W, 0, 64 * sizeof(u32));
+}
+
+int sha256_init(struct sha256_state *sctx)
+{
+ sctx->state[0] = SHA256_H0;
+ sctx->state[1] = SHA256_H1;
+ sctx->state[2] = SHA256_H2;
+ sctx->state[3] = SHA256_H3;
+ sctx->state[4] = SHA256_H4;
+ sctx->state[5] = SHA256_H5;
+ sctx->state[6] = SHA256_H6;
+ sctx->state[7] = SHA256_H7;
+ sctx->count = 0;
+
+ return 0;
+}
+
+int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
+{
+ unsigned int partial, done;
+ const u8 *src;
+
+ partial = sctx->count & 0x3f;
+ sctx->count += len;
+ done = 0;
+ src = data;
+
+ if ((partial + len) > 63) {
+ if (partial) {
+ done = -partial;
+ memcpy(sctx->buf + partial, data, done + 64);
+ src = sctx->buf;
+ }
+
+ do {
+ sha256_transform(sctx->state, src);
+ done += 64;
+ src = data + done;
+ } while (done + 63 < len);
+
+ partial = 0;
+ }
+ memcpy(sctx->buf + partial, src, len - done);
+
+ return 0;
+}
+
+int sha256_final(struct sha256_state *sctx, u8 *out)
+{
+ __be32 *dst = (__be32 *)out;
+ __be64 bits;
+ unsigned int index, pad_len;
+ int i;
+ static const u8 padding[64] = { 0x80, };
+
+ /* Save number of bits */
+ bits = cpu_to_be64(sctx->count << 3);
+
+ /* Pad out to 56 mod 64. */
+ index = sctx->count & 0x3f;
+ pad_len = (index < 56) ? (56 - index) : ((64+56) - index);
+ sha256_update(sctx, padding, pad_len);
+
+ /* Append length (before padding) */
+ sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
+
+ /* Store state in digest */
+ for (i = 0; i < 8; i++)
+ dst[i] = cpu_to_be32(sctx->state[i]);
+
+ /* Zeroize sensitive information. */
+ memset(sctx, 0, sizeof(*sctx));
+
+ return 0;
+}
diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h
new file mode 100644
index 000000000000..bd15a4127735
--- /dev/null
+++ b/arch/x86/purgatory/sha256.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2014 Red Hat Inc.
+ *
+ * Author: Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#ifndef SHA256_H
+#define SHA256_H
+
+
+#include <linux/types.h>
+#include <crypto/sha.h>
+
+extern int sha256_init(struct sha256_state *sctx);
+extern int sha256_update(struct sha256_state *sctx, const u8 *input,
+ unsigned int length);
+extern int sha256_final(struct sha256_state *sctx, u8 *hash);
+
+#endif /* SHA256_H */
diff --git a/arch/x86/purgatory/stack.S b/arch/x86/purgatory/stack.S
new file mode 100644
index 000000000000..3cefba1fefc8
--- /dev/null
+++ b/arch/x86/purgatory/stack.S
@@ -0,0 +1,19 @@
+/*
+ * purgatory: stack
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+ /* A stack for the loaded kernel.
+ * Seperate and in the data section so it can be prepopulated.
+ */
+ .data
+ .balign 4096
+ .globl stack, stack_end
+
+stack:
+ .skip 4096
+stack_end:
diff --git a/arch/x86/purgatory/string.c b/arch/x86/purgatory/string.c
new file mode 100644
index 000000000000..d886b1fa36f0
--- /dev/null
+++ b/arch/x86/purgatory/string.c
@@ -0,0 +1,13 @@
+/*
+ * Simple string functions.
+ *
+ * Copyright (C) 2014 Red Hat Inc.
+ *
+ * Author:
+ * Vivek Goyal <vgoyal@redhat.com>
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2. See the file COPYING for more details.
+ */
+
+#include "../boot/string.c"
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index d1b4a119d4a5..028b78168d85 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -362,3 +362,4 @@
353 i386 renameat2 sys_renameat2
354 i386 seccomp sys_seccomp
355 i386 getrandom sys_getrandom
+356 i386 memfd_create sys_memfd_create
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 252c804bb1aa..35dd922727b9 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -325,6 +325,8 @@
316 common renameat2 sys_renameat2
317 common seccomp sys_seccomp
318 common getrandom sys_getrandom
+319 common memfd_create sys_memfd_create
+320 common kexec_file_load sys_kexec_file_load
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index 0feee2fd5077..25a1022dd793 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -216,6 +216,5 @@ extern long elf_aux_hwcap;
#define ELF_HWCAP (elf_aux_hwcap)
#define SET_PERSONALITY(ex) do ; while(0)
-#define __HAVE_ARCH_GATE_AREA 1
#endif
diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c
index c6492e75797b..f8fecaddcc0d 100644
--- a/arch/x86/um/mem_64.c
+++ b/arch/x86/um/mem_64.c
@@ -9,18 +9,3 @@ const char *arch_vma_name(struct vm_area_struct *vma)
return NULL;
}
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
- return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
- return 0;
-}
-
-int in_gate_area_no_mm(unsigned long addr)
-{
- return 0;
-}
diff --git a/arch/x86/vdso/vdso2c.h b/arch/x86/vdso/vdso2c.h
index fd57829b30d8..0224987556ce 100644
--- a/arch/x86/vdso/vdso2c.h
+++ b/arch/x86/vdso/vdso2c.h
@@ -109,16 +109,18 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
/* Validate mapping addresses. */
for (i = 0; i < sizeof(special_pages) / sizeof(special_pages[0]); i++) {
- if (!syms[i])
+ INT_BITS symval = syms[special_pages[i]];
+
+ if (!symval)
continue; /* The mapping isn't used; ignore it. */
- if (syms[i] % 4096)
+ if (symval % 4096)
fail("%s must be a multiple of 4096\n",
required_syms[i].name);
- if (syms[sym_vvar_start] > syms[i] + 4096)
- fail("%s underruns begin_vvar\n",
+ if (symval + 4096 < syms[sym_vvar_start])
+ fail("%s underruns vvar_start\n",
required_syms[i].name);
- if (syms[i] + 4096 > 0)
+ if (symval + 4096 > 0)
fail("%s is on the wrong side of the vdso text\n",
required_syms[i].name);
}
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index e4f7781ee162..e904c270573b 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -115,23 +115,6 @@ static __init int ia32_binfmt_init(void)
return 0;
}
__initcall(ia32_binfmt_init);
-#endif
-
-#else /* CONFIG_X86_32 */
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
- return NULL;
-}
-
-int in_gate_area(struct mm_struct *mm, unsigned long addr)
-{
- return 0;
-}
-
-int in_gate_area_no_mm(unsigned long addr)
-{
- return 0;
-}
+#endif /* CONFIG_SYSCTL */
#endif /* CONFIG_X86_64 */
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index aa6be2698669..c39702bc279d 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -533,14 +533,13 @@ static void he_init_tx_lbfp(struct he_dev *he_dev)
static int he_init_tpdrq(struct he_dev *he_dev)
{
- he_dev->tpdrq_base = pci_alloc_consistent(he_dev->pci_dev,
- CONFIG_TPDRQ_SIZE * sizeof(struct he_tpdrq), &he_dev->tpdrq_phys);
+ he_dev->tpdrq_base = pci_zalloc_consistent(he_dev->pci_dev,
+ CONFIG_TPDRQ_SIZE * sizeof(struct he_tpdrq),
+ &he_dev->tpdrq_phys);
if (he_dev->tpdrq_base == NULL) {
hprintk("failed to alloc tpdrq\n");
return -ENOMEM;
}
- memset(he_dev->tpdrq_base, 0,
- CONFIG_TPDRQ_SIZE * sizeof(struct he_tpdrq));
he_dev->tpdrq_tail = he_dev->tpdrq_base;
he_dev->tpdrq_head = he_dev->tpdrq_base;
@@ -804,13 +803,13 @@ static int he_init_group(struct he_dev *he_dev, int group)
goto out_free_rbpl_virt;
}
- he_dev->rbpl_base = pci_alloc_consistent(he_dev->pci_dev,
- CONFIG_RBPL_SIZE * sizeof(struct he_rbp), &he_dev->rbpl_phys);
+ he_dev->rbpl_base = pci_zalloc_consistent(he_dev->pci_dev,
+ CONFIG_RBPL_SIZE * sizeof(struct he_rbp),
+ &he_dev->rbpl_phys);
if (he_dev->rbpl_base == NULL) {
hprintk("failed to alloc rbpl_base\n");
goto out_destroy_rbpl_pool;
}
- memset(he_dev->rbpl_base, 0, CONFIG_RBPL_SIZE * sizeof(struct he_rbp));
INIT_LIST_HEAD(&he_dev->rbpl_outstanding);
@@ -843,13 +842,13 @@ static int he_init_group(struct he_dev *he_dev, int group)
/* rx buffer ready queue */
- he_dev->rbrq_base = pci_alloc_consistent(he_dev->pci_dev,
- CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq), &he_dev->rbrq_phys);
+ he_dev->rbrq_base = pci_zalloc_consistent(he_dev->pci_dev,
+ CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq),
+ &he_dev->rbrq_phys);
if (he_dev->rbrq_base == NULL) {
hprintk("failed to allocate rbrq\n");
goto out_free_rbpl;
}
- memset(he_dev->rbrq_base, 0, CONFIG_RBRQ_SIZE * sizeof(struct he_rbrq));
he_dev->rbrq_head = he_dev->rbrq_base;
he_writel(he_dev, he_dev->rbrq_phys, G0_RBRQ_ST + (group * 16));
@@ -867,13 +866,13 @@ static int he_init_group(struct he_dev *he_dev, int group)
/* tx buffer ready queue */
- he_dev->tbrq_base = pci_alloc_consistent(he_dev->pci_dev,
- CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq), &he_dev->tbrq_phys);
+ he_dev->tbrq_base = pci_zalloc_consistent(he_dev->pci_dev,
+ CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq),
+ &he_dev->tbrq_phys);
if (he_dev->tbrq_base == NULL) {
hprintk("failed to allocate tbrq\n");
goto out_free_rbpq_base;
}
- memset(he_dev->tbrq_base, 0, CONFIG_TBRQ_SIZE * sizeof(struct he_tbrq));
he_dev->tbrq_head = he_dev->tbrq_base;
@@ -1460,13 +1459,13 @@ static int he_start(struct atm_dev *dev)
/* host status page */
- he_dev->hsp = pci_alloc_consistent(he_dev->pci_dev,
- sizeof(struct he_hsp), &he_dev->hsp_phys);
+ he_dev->hsp = pci_zalloc_consistent(he_dev->pci_dev,
+ sizeof(struct he_hsp),
+ &he_dev->hsp_phys);
if (he_dev->hsp == NULL) {
hprintk("failed to allocate host status page\n");
return -ENOMEM;
}
- memset(he_dev->hsp, 0, sizeof(struct he_hsp));
he_writel(he_dev, he_dev->hsp_phys, HSP_BA);
/* initialize framer */
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index b621f56a36be..2b24ed056728 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -641,13 +641,11 @@ alloc_scq(struct idt77252_dev *card, int class)
scq = kzalloc(sizeof(struct scq_info), GFP_KERNEL);
if (!scq)
return NULL;
- scq->base = pci_alloc_consistent(card->pcidev, SCQ_SIZE,
- &scq->paddr);
+ scq->base = pci_zalloc_consistent(card->pcidev, SCQ_SIZE, &scq->paddr);
if (scq->base == NULL) {
kfree(scq);
return NULL;
}
- memset(scq->base, 0, SCQ_SIZE);
scq->next = scq->base;
scq->last = scq->base + (SCQ_ENTRIES - 1);
@@ -972,13 +970,12 @@ init_rsq(struct idt77252_dev *card)
{
struct rsq_entry *rsqe;
- card->rsq.base = pci_alloc_consistent(card->pcidev, RSQSIZE,
- &card->rsq.paddr);
+ card->rsq.base = pci_zalloc_consistent(card->pcidev, RSQSIZE,
+ &card->rsq.paddr);
if (card->rsq.base == NULL) {
printk("%s: can't allocate RSQ.\n", card->name);
return -1;
}
- memset(card->rsq.base, 0, RSQSIZE);
card->rsq.last = card->rsq.base + RSQ_NUM_ENTRIES - 1;
card->rsq.next = card->rsq.last;
@@ -3400,14 +3397,14 @@ static int init_card(struct atm_dev *dev)
writel(0, SAR_REG_GP);
/* Initialize RAW Cell Handle Register */
- card->raw_cell_hnd = pci_alloc_consistent(card->pcidev, 2 * sizeof(u32),
- &card->raw_cell_paddr);
+ card->raw_cell_hnd = pci_zalloc_consistent(card->pcidev,
+ 2 * sizeof(u32),
+ &card->raw_cell_paddr);
if (!card->raw_cell_hnd) {
printk("%s: memory allocation failure.\n", card->name);
deinit_card(card);
return -1;
}
- memset(card->raw_cell_hnd, 0, 2 * sizeof(u32));
writel(card->raw_cell_paddr, SAR_REG_RAWHND);
IPRINTK("%s: raw cell handle is at 0x%p.\n", card->name,
card->raw_cell_hnd);
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 125d84505738..811e11c82f32 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -6741,11 +6741,11 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
ErrorCode = -ENOMEM;
if (DataTransferLength > 0)
{
- DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
- DataTransferLength, &DataTransferBufferDMA);
+ DataTransferBuffer = pci_zalloc_consistent(Controller->PCIDevice,
+ DataTransferLength,
+ &DataTransferBufferDMA);
if (DataTransferBuffer == NULL)
break;
- memset(DataTransferBuffer, 0, DataTransferLength);
}
else if (DataTransferLength < 0)
{
@@ -6877,11 +6877,11 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
ErrorCode = -ENOMEM;
if (DataTransferLength > 0)
{
- DataTransferBuffer = pci_alloc_consistent(Controller->PCIDevice,
- DataTransferLength, &DataTransferBufferDMA);
+ DataTransferBuffer = pci_zalloc_consistent(Controller->PCIDevice,
+ DataTransferLength,
+ &DataTransferBufferDMA);
if (DataTransferBuffer == NULL)
break;
- memset(DataTransferBuffer, 0, DataTransferLength);
}
else if (DataTransferLength < 0)
{
@@ -6899,14 +6899,14 @@ static long DAC960_gam_ioctl(struct file *file, unsigned int Request,
RequestSenseLength = UserCommand.RequestSenseLength;
if (RequestSenseLength > 0)
{
- RequestSenseBuffer = pci_alloc_consistent(Controller->PCIDevice,
- RequestSenseLength, &RequestSenseBufferDMA);
+ RequestSenseBuffer = pci_zalloc_consistent(Controller->PCIDevice,
+ RequestSenseLength,
+ &RequestSenseBufferDMA);
if (RequestSenseBuffer == NULL)
{
ErrorCode = -ENOMEM;
goto Failure2;
}
- memset(RequestSenseBuffer, 0, RequestSenseLength);
}
spin_lock_irqsave(&Controller->queue_lock, flags);
while ((Command = DAC960_AllocateCommand(Controller)) == NULL)
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index 4595c22f33f7..ff20f192b0f6 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -1014,24 +1014,21 @@ static CommandList_struct *cmd_special_alloc(ctlr_info_t *h)
u64bit temp64;
dma_addr_t cmd_dma_handle, err_dma_handle;
- c = (CommandList_struct *) pci_alloc_consistent(h->pdev,
- sizeof(CommandList_struct), &cmd_dma_handle);
+ c = pci_zalloc_consistent(h->pdev, sizeof(CommandList_struct),
+ &cmd_dma_handle);
if (c == NULL)
return NULL;
- memset(c, 0, sizeof(CommandList_struct));
c->cmdindex = -1;
- c->err_info = (ErrorInfo_struct *)
- pci_alloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
- &err_dma_handle);
+ c->err_info = pci_zalloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
+ &err_dma_handle);
if (c->err_info == NULL) {
pci_free_consistent(h->pdev,
sizeof(CommandList_struct), c, cmd_dma_handle);
return NULL;
}
- memset(c->err_info, 0, sizeof(ErrorInfo_struct));
INIT_LIST_HEAD(&c->list);
c->busaddr = (__u32) cmd_dma_handle;
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c
index 608532d3f8c9..f0a089df85cc 100644
--- a/drivers/block/skd_main.c
+++ b/drivers/block/skd_main.c
@@ -4112,16 +4112,14 @@ static int skd_cons_skcomp(struct skd_device *skdev)
skdev->name, __func__, __LINE__,
nbytes, SKD_N_COMPLETION_ENTRY);
- skcomp = pci_alloc_consistent(skdev->pdev, nbytes,
- &skdev->cq_dma_address);
+ skcomp = pci_zalloc_consistent(skdev->pdev, nbytes,
+ &skdev->cq_dma_address);
if (skcomp == NULL) {
rc = -ENOMEM;
goto err_out;
}
- memset(skcomp, 0, nbytes);
-
skdev->skcomp_table = skcomp;
skdev->skerr_table = (struct fit_comp_error_info *)((char *)skcomp +
sizeof(*skcomp) *
@@ -4304,15 +4302,14 @@ static int skd_cons_skspcl(struct skd_device *skdev)
nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
- skspcl->msg_buf = pci_alloc_consistent(skdev->pdev, nbytes,
- &skspcl->mb_dma_address);
+ skspcl->msg_buf =
+ pci_zalloc_consistent(skdev->pdev, nbytes,
+ &skspcl->mb_dma_address);
if (skspcl->msg_buf == NULL) {
rc = -ENOMEM;
goto err_out;
}
- memset(skspcl->msg_buf, 0, nbytes);
-
skspcl->req.sg = kzalloc(sizeof(struct scatterlist) *
SKD_N_SG_PER_SPECIAL, GFP_KERNEL);
if (skspcl->req.sg == NULL) {
@@ -4353,25 +4350,21 @@ static int skd_cons_sksb(struct skd_device *skdev)
nbytes = SKD_N_INTERNAL_BYTES;
- skspcl->data_buf = pci_alloc_consistent(skdev->pdev, nbytes,
- &skspcl->db_dma_address);
+ skspcl->data_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
+ &skspcl->db_dma_address);
if (skspcl->data_buf == NULL) {
rc = -ENOMEM;
goto err_out;
}
- memset(skspcl->data_buf, 0, nbytes);
-
nbytes = SKD_N_SPECIAL_FITMSG_BYTES;
- skspcl->msg_buf = pci_alloc_consistent(skdev->pdev, nbytes,
- &skspcl->mb_dma_address);
+ skspcl->msg_buf = pci_zalloc_consistent(skdev->pdev, nbytes,
+ &skspcl->mb_dma_address);
if (skspcl->msg_buf == NULL) {
rc = -ENOMEM;
goto err_out;
}
- memset(skspcl->msg_buf, 0, nbytes);
-
skspcl->req.sksg_list = skd_cons_sg_list(skdev, 1,
&skspcl->req.sksg_dma_address);
if (skspcl->req.sksg_list == NULL) {
diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
index 12fea3e22348..8d2a7728434d 100644
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
@@ -2617,14 +2617,13 @@ static int hifn_probe(struct pci_dev *pdev, const struct pci_device_id *id)
}
}
- dev->desc_virt = pci_alloc_consistent(pdev, sizeof(struct hifn_dma),
- &dev->desc_dma);
+ dev->desc_virt = pci_zalloc_consistent(pdev, sizeof(struct hifn_dma),
+ &dev->desc_dma);
if (!dev->desc_virt) {
dprintk("Failed to allocate descriptor rings.\n");
err = -ENOMEM;
goto err_out_unmap_bars;
}
- memset(dev->desc_virt, 0, sizeof(struct hifn_dma));
dev->pdev = pdev;
dev->irq = pdev->irq;
diff --git a/drivers/firmware/efi/runtime-map.c b/drivers/firmware/efi/runtime-map.c
index 97cdd16a2169..018c29a26615 100644
--- a/drivers/firmware/efi/runtime-map.c
+++ b/drivers/firmware/efi/runtime-map.c
@@ -138,6 +138,27 @@ add_sysfs_runtime_map_entry(struct kobject *kobj, int nr)
return entry;
}
+int efi_get_runtime_map_size(void)
+{
+ return nr_efi_runtime_map * efi_memdesc_size;
+}
+
+int efi_get_runtime_map_desc_size(void)
+{
+ return efi_memdesc_size;
+}
+
+int efi_runtime_map_copy(void *buf, size_t bufsz)
+{
+ size_t sz = efi_get_runtime_map_size();
+
+ if (sz > bufsz)
+ sz = bufsz;
+
+ memcpy(buf, efi_runtime_map, sz);
+ return 0;
+}
+
void efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size)
{
efi_runtime_map = map;
diff --git a/drivers/gpio/gpio-zevio.c b/drivers/gpio/gpio-zevio.c
index 54e54e4cc6c4..cd71e5769a76 100644
--- a/drivers/gpio/gpio-zevio.c
+++ b/drivers/gpio/gpio-zevio.c
@@ -18,6 +18,10 @@
#include <linux/slab.h>
#include <linux/gpio.h>
+#ifndef IOMEM
+#define IOMEM(x) ((void __force __iomem *)(x))
+#endif
+
/*
* Memory layout:
* This chip has four gpio sections, each controls 8 GPIOs.
diff --git a/drivers/gpu/drm/i810/i810_dma.c b/drivers/gpu/drm/i810/i810_dma.c
index e88bac1d781f..bae897de9468 100644
--- a/drivers/gpu/drm/i810/i810_dma.c
+++ b/drivers/gpu/drm/i810/i810_dma.c
@@ -393,15 +393,14 @@ static int i810_dma_initialize(struct drm_device *dev,
/* Program Hardware Status Page */
dev_priv->hw_status_page =
- pci_alloc_consistent(dev->pdev, PAGE_SIZE,
- &dev_priv->dma_status_page);
+ pci_zalloc_consistent(dev->pdev, PAGE_SIZE,
+ &dev_priv->dma_status_page);
if (!dev_priv->hw_status_page) {
dev->dev_private = (void *)dev_priv;
i810_dma_cleanup(dev);
DRM_ERROR("Can not allocate hardware status page\n");
return -ENOMEM;
}
- memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
DRM_DEBUG("hw status page @ %p\n", dev_priv->hw_status_page);
I810_WRITE(0x02080, dev_priv->dma_status_page);
diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c
index 00400c352c1a..766a71ccefed 100644
--- a/drivers/infiniband/hw/amso1100/c2.c
+++ b/drivers/infiniband/hw/amso1100/c2.c
@@ -604,16 +604,14 @@ static int c2_up(struct net_device *netdev)
tx_size = c2_port->tx_ring.count * sizeof(struct c2_tx_desc);
c2_port->mem_size = tx_size + rx_size;
- c2_port->mem = pci_alloc_consistent(c2dev->pcidev, c2_port->mem_size,
- &c2_port->dma);
+ c2_port->mem = pci_zalloc_consistent(c2dev->pcidev, c2_port->mem_size,
+ &c2_port->dma);
if (c2_port->mem == NULL) {
pr_debug("Unable to allocate memory for "
"host descriptor rings\n");
return -ENOMEM;
}
- memset(c2_port->mem, 0, c2_port->mem_size);
-
/* Create the Rx host descriptor ring */
if ((ret =
c2_rx_ring_alloc(&c2_port->rx_ring, c2_port->mem, c2_port->dma,
diff --git a/drivers/infiniband/hw/nes/nes_hw.c b/drivers/infiniband/hw/nes/nes_hw.c
index 90200245c5eb..02120d340d50 100644
--- a/drivers/infiniband/hw/nes/nes_hw.c
+++ b/drivers/infiniband/hw/nes/nes_hw.c
@@ -1003,13 +1003,13 @@ int nes_init_cqp(struct nes_device *nesdev)
(sizeof(struct nes_hw_aeqe) * nesadapter->max_qp) +
sizeof(struct nes_hw_cqp_qp_context);
- nesdev->cqp_vbase = pci_alloc_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
- &nesdev->cqp_pbase);
+ nesdev->cqp_vbase = pci_zalloc_consistent(nesdev->pcidev,
+ nesdev->cqp_mem_size,
+ &nesdev->cqp_pbase);
if (!nesdev->cqp_vbase) {
nes_debug(NES_DBG_INIT, "Unable to allocate memory for host descriptor rings\n");
return -ENOMEM;
}
- memset(nesdev->cqp_vbase, 0, nesdev->cqp_mem_size);
/* Allocate a twice the number of CQP requests as the SQ size */
nesdev->nes_cqp_requests = kzalloc(sizeof(struct nes_cqp_request) *
@@ -1691,13 +1691,13 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
(NES_NIC_WQ_SIZE * 2 * sizeof(struct nes_hw_nic_cqe)) +
sizeof(struct nes_hw_nic_qp_context);
- nesvnic->nic_vbase = pci_alloc_consistent(nesdev->pcidev, nesvnic->nic_mem_size,
- &nesvnic->nic_pbase);
+ nesvnic->nic_vbase = pci_zalloc_consistent(nesdev->pcidev,
+ nesvnic->nic_mem_size,
+ &nesvnic->nic_pbase);
if (!nesvnic->nic_vbase) {
nes_debug(NES_DBG_INIT, "Unable to allocate memory for NIC host descriptor rings\n");
return -ENOMEM;
}
- memset(nesvnic->nic_vbase, 0, nesvnic->nic_mem_size);
nes_debug(NES_DBG_INIT, "Allocated NIC QP structures at %p (phys = %016lX), size = %u.\n",
nesvnic->nic_vbase, (unsigned long)nesvnic->nic_pbase, nesvnic->nic_mem_size);
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 218dd3574285..fef067c959fc 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -1616,8 +1616,8 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
entries, nescq->cq_mem_size, nescq->hw_cq.cq_number);
/* allocate the physical buffer space */
- mem = pci_alloc_consistent(nesdev->pcidev, nescq->cq_mem_size,
- &nescq->hw_cq.cq_pbase);
+ mem = pci_zalloc_consistent(nesdev->pcidev, nescq->cq_mem_size,
+ &nescq->hw_cq.cq_pbase);
if (!mem) {
printk(KERN_ERR PFX "Unable to allocate pci memory for cq\n");
nes_free_resource(nesadapter, nesadapter->allocated_cqs, cq_num);
@@ -1625,7 +1625,6 @@ static struct ib_cq *nes_create_cq(struct ib_device *ibdev, int entries,
return ERR_PTR(-ENOMEM);
}
- memset(mem, 0, nescq->cq_mem_size);
nescq->hw_cq.cq_vbase = mem;
nescq->hw_cq.cq_head = 0;
nes_debug(NES_DBG_CQ, "CQ%u virtual address @ %p, phys = 0x%08X\n",
diff --git a/drivers/media/common/saa7146/saa7146_core.c b/drivers/media/common/saa7146/saa7146_core.c
index 34b0d0ddeef3..97afee672d07 100644
--- a/drivers/media/common/saa7146/saa7146_core.c
+++ b/drivers/media/common/saa7146/saa7146_core.c
@@ -421,23 +421,20 @@ static int saa7146_init_one(struct pci_dev *pci, const struct pci_device_id *ent
err = -ENOMEM;
/* get memory for various stuff */
- dev->d_rps0.cpu_addr = pci_alloc_consistent(pci, SAA7146_RPS_MEM,
- &dev->d_rps0.dma_handle);
+ dev->d_rps0.cpu_addr = pci_zalloc_consistent(pci, SAA7146_RPS_MEM,
+ &dev->d_rps0.dma_handle);
if (!dev->d_rps0.cpu_addr)
goto err_free_irq;
- memset(dev->d_rps0.cpu_addr, 0x0, SAA7146_RPS_MEM);
- dev->d_rps1.cpu_addr = pci_alloc_consistent(pci, SAA7146_RPS_MEM,
- &dev->d_rps1.dma_handle);
+ dev->d_rps1.cpu_addr = pci_zalloc_consistent(pci, SAA7146_RPS_MEM,
+ &dev->d_rps1.dma_handle);
if (!dev->d_rps1.cpu_addr)
goto err_free_rps0;
- memset(dev->d_rps1.cpu_addr, 0x0, SAA7146_RPS_MEM);
- dev->d_i2c.cpu_addr = pci_alloc_consistent(pci, SAA7146_RPS_MEM,
- &dev->d_i2c.dma_handle);
+ dev->d_i2c.cpu_addr = pci_zalloc_consistent(pci, SAA7146_RPS_MEM,
+ &dev->d_i2c.dma_handle);
if (!dev->d_i2c.cpu_addr)
goto err_free_rps1;
- memset(dev->d_i2c.cpu_addr, 0x0, SAA7146_RPS_MEM);
/* the rest + print status message */
diff --git a/drivers/media/common/saa7146/saa7146_fops.c b/drivers/media/common/saa7146/saa7146_fops.c
index d9e1d6395ed9..6c47f3fe9b0f 100644
--- a/drivers/media/common/saa7146/saa7146_fops.c
+++ b/drivers/media/common/saa7146/saa7146_fops.c
@@ -520,14 +520,15 @@ int saa7146_vv_init(struct saa7146_dev* dev, struct saa7146_ext_vv *ext_vv)
configuration data) */
dev->ext_vv_data = ext_vv;
- vv->d_clipping.cpu_addr = pci_alloc_consistent(dev->pci, SAA7146_CLIPPING_MEM, &vv->d_clipping.dma_handle);
+ vv->d_clipping.cpu_addr =
+ pci_zalloc_consistent(dev->pci, SAA7146_CLIPPING_MEM,
+ &vv->d_clipping.dma_handle);
if( NULL == vv->d_clipping.cpu_addr ) {
ERR("out of memory. aborting.\n");
kfree(vv);
v4l2_ctrl_handler_free(hdl);
return -1;
}
- memset(vv->d_clipping.cpu_addr, 0x0, SAA7146_CLIPPING_MEM);
saa7146_video_uops.init(dev,vv);
if (dev->ext_vv_data->capabilities & V4L2_CAP_VBI_CAPTURE)
diff --git a/drivers/media/pci/bt8xx/bt878.c b/drivers/media/pci/bt8xx/bt878.c
index d0c281f41a0a..11765835d7b2 100644
--- a/drivers/media/pci/bt8xx/bt878.c
+++ b/drivers/media/pci/bt8xx/bt878.c
@@ -101,28 +101,20 @@ static int bt878_mem_alloc(struct bt878 *bt)
if (!bt->buf_cpu) {
bt->buf_size = 128 * 1024;
- bt->buf_cpu =
- pci_alloc_consistent(bt->dev, bt->buf_size,
- &bt->buf_dma);
-
+ bt->buf_cpu = pci_zalloc_consistent(bt->dev, bt->buf_size,
+ &bt->buf_dma);
if (!bt->buf_cpu)
return -ENOMEM;
-
- memset(bt->buf_cpu, 0, bt->buf_size);
}
if (!bt->risc_cpu) {
bt->risc_size = PAGE_SIZE;
- bt->risc_cpu =
- pci_alloc_consistent(bt->dev, bt->risc_size,
- &bt->risc_dma);
-
+ bt->risc_cpu = pci_zalloc_consistent(bt->dev, bt->risc_size,
+ &bt->risc_dma);
if (!bt->risc_cpu) {
bt878_mem_free(bt);
return -ENOMEM;
}
-
- memset(bt->risc_cpu, 0, bt->risc_size);
}
return 0;
diff --git a/drivers/media/pci/ngene/ngene-core.c b/drivers/media/pci/ngene/ngene-core.c
index 826228c3800e..4930b55fd5f4 100644
--- a/drivers/media/pci/ngene/ngene-core.c
+++ b/drivers/media/pci/ngene/ngene-core.c
@@ -1075,12 +1075,11 @@ static int AllocCommonBuffers(struct ngene *dev)
dev->ngenetohost = dev->FWInterfaceBuffer + 256;
dev->EventBuffer = dev->FWInterfaceBuffer + 512;
- dev->OverflowBuffer = pci_alloc_consistent(dev->pci_dev,
- OVERFLOW_BUFFER_SIZE,
- &dev->PAOverflowBuffer);
+ dev->OverflowBuffer = pci_zalloc_consistent(dev->pci_dev,
+ OVERFLOW_BUFFER_SIZE,
+ &dev->PAOverflowBuffer);
if (!dev->OverflowBuffer)
return -ENOMEM;
- memset(dev->OverflowBuffer, 0, OVERFLOW_BUFFER_SIZE);
for (i = STREAM_VIDEOIN1; i < MAX_STREAM; i++) {
int type = dev->card_info->io_type[i];
diff --git a/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c b/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
index f166ffc9800a..cef7a00099ea 100644
--- a/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
+++ b/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
@@ -803,11 +803,9 @@ static int ttusb_alloc_iso_urbs(struct ttusb *ttusb)
{
int i;
- ttusb->iso_buffer = pci_alloc_consistent(NULL,
- ISO_FRAME_SIZE *
- FRAMES_PER_ISO_BUF *
- ISO_BUF_COUNT,
- &ttusb->iso_dma_handle);
+ ttusb->iso_buffer = pci_zalloc_consistent(NULL,
+ ISO_FRAME_SIZE * FRAMES_PER_ISO_BUF * ISO_BUF_COUNT,
+ &ttusb->iso_dma_handle);
if (!ttusb->iso_buffer) {
dprintk("%s: pci_alloc_consistent - not enough memory\n",
@@ -815,9 +813,6 @@ static int ttusb_alloc_iso_urbs(struct ttusb *ttusb)
return -ENOMEM;
}
- memset(ttusb->iso_buffer, 0,
- ISO_FRAME_SIZE * FRAMES_PER_ISO_BUF * ISO_BUF_COUNT);
-
for (i = 0; i < ISO_BUF_COUNT; i++) {
struct urb *urb;
diff --git a/drivers/media/usb/ttusb-dec/ttusb_dec.c b/drivers/media/usb/ttusb-dec/ttusb_dec.c
index 29724af9b9ab..15ab584cf265 100644
--- a/drivers/media/usb/ttusb-dec/ttusb_dec.c
+++ b/drivers/media/usb/ttusb-dec/ttusb_dec.c
@@ -1151,11 +1151,9 @@ static int ttusb_dec_alloc_iso_urbs(struct ttusb_dec *dec)
dprintk("%s\n", __func__);
- dec->iso_buffer = pci_alloc_consistent(NULL,
- ISO_FRAME_SIZE *
- (FRAMES_PER_ISO_BUF *
- ISO_BUF_COUNT),
- &dec->iso_dma_handle);
+ dec->iso_buffer = pci_zalloc_consistent(NULL,
+ ISO_FRAME_SIZE * (FRAMES_PER_ISO_BUF * ISO_BUF_COUNT),
+ &dec->iso_dma_handle);
if (!dec->iso_buffer) {
dprintk("%s: pci_alloc_consistent - not enough memory\n",
@@ -1163,9 +1161,6 @@ static int ttusb_dec_alloc_iso_urbs(struct ttusb_dec *dec)
return -ENOMEM;
}
- memset(dec->iso_buffer, 0,
- ISO_FRAME_SIZE * (FRAMES_PER_ISO_BUF * ISO_BUF_COUNT));
-
for (i = 0; i < ISO_BUF_COUNT; i++) {
struct urb *urb;
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index e7cc9174e364..4a8fdc4721d5 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -481,37 +481,32 @@ static void pcnet32_realloc_tx_ring(struct net_device *dev,
dma_addr_t *new_dma_addr_list;
struct pcnet32_tx_head *new_tx_ring;
struct sk_buff **new_skb_list;
+ unsigned int entries = BIT(size);
pcnet32_purge_tx_ring(dev);
- new_tx_ring = pci_alloc_consistent(lp->pci_dev,
- sizeof(struct pcnet32_tx_head) *
- (1 << size),
- &new_ring_dma_addr);
- if (new_tx_ring == NULL) {
- netif_err(lp, drv, dev, "Consistent memory allocation failed\n");
+ new_tx_ring =
+ pci_zalloc_consistent(lp->pci_dev,
+ sizeof(struct pcnet32_tx_head) * entries,
+ &new_ring_dma_addr);
+ if (new_tx_ring == NULL)
return;
- }
- memset(new_tx_ring, 0, sizeof(struct pcnet32_tx_head) * (1 << size));
- new_dma_addr_list = kcalloc(1 << size, sizeof(dma_addr_t),
- GFP_ATOMIC);
+ new_dma_addr_list = kcalloc(entries, sizeof(dma_addr_t), GFP_ATOMIC);
if (!new_dma_addr_list)
goto free_new_tx_ring;
- new_skb_list = kcalloc(1 << size, sizeof(struct sk_buff *),
- GFP_ATOMIC);
+ new_skb_list = kcalloc(entries, sizeof(struct sk_buff *), GFP_ATOMIC);
if (!new_skb_list)
goto free_new_lists;
kfree(lp->tx_skbuff);
kfree(lp->tx_dma_addr);
pci_free_consistent(lp->pci_dev,
- sizeof(struct pcnet32_tx_head) *
- lp->tx_ring_size, lp->tx_ring,
- lp->tx_ring_dma_addr);
+ sizeof(struct pcnet32_tx_head) * lp->tx_ring_size,
+ lp->tx_ring, lp->tx_ring_dma_addr);
- lp->tx_ring_size = (1 << size);
+ lp->tx_ring_size = entries;
lp->tx_mod_mask = lp->tx_ring_size - 1;
lp->tx_len_bits = (size << 12);
lp->tx_ring = new_tx_ring;
@@ -524,8 +519,7 @@ free_new_lists:
kfree(new_dma_addr_list);
free_new_tx_ring:
pci_free_consistent(lp->pci_dev,
- sizeof(struct pcnet32_tx_head) *
- (1 << size),
+ sizeof(struct pcnet32_tx_head) * entries,
new_tx_ring,
new_ring_dma_addr);
}
@@ -549,17 +543,14 @@ static void pcnet32_realloc_rx_ring(struct net_device *dev,
struct pcnet32_rx_head *new_rx_ring;
struct sk_buff **new_skb_list;
int new, overlap;
- unsigned int entries = 1 << size;
+ unsigned int entries = BIT(size);
- new_rx_ring = pci_alloc_consistent(lp->pci_dev,
- sizeof(struct pcnet32_rx_head) *
- entries,
- &new_ring_dma_addr);
- if (new_rx_ring == NULL) {
- netif_err(lp, drv, dev, "Consistent memory allocation failed\n");
+ new_rx_ring =
+ pci_zalloc_consistent(lp->pci_dev,
+ sizeof(struct pcnet32_rx_head) * entries,
+ &new_ring_dma_addr);
+ if (new_rx_ring == NULL)
return;
- }
- memset(new_rx_ring, 0, sizeof(struct pcnet32_rx_head) * entries);
new_dma_addr_list = kcalloc(entries, sizeof(dma_addr_t), GFP_ATOMIC);
if (!new_dma_addr_list)
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index 4345332533ad..316e0c3fe048 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -831,17 +831,14 @@ static int atl1e_setup_ring_resources(struct atl1e_adapter *adapter)
/* real ring DMA buffer */
size = adapter->ring_size;
- adapter->ring_vir_addr = pci_alloc_consistent(pdev,
- adapter->ring_size, &adapter->ring_dma);
-
+ adapter->ring_vir_addr = pci_zalloc_consistent(pdev, adapter->ring_size,
+ &adapter->ring_dma);
if (adapter->ring_vir_addr == NULL) {
netdev_err(adapter->netdev,
"pci_alloc_consistent failed, size = D%d\n", size);
return -ENOMEM;
}
- memset(adapter->ring_vir_addr, 0, adapter->ring_size);
-
rx_page_desc = rx_ring->rx_page_desc;
/* Init TPD Ring */
diff --git a/drivers/net/ethernet/cisco/enic/vnic_dev.c b/drivers/net/ethernet/cisco/enic/vnic_dev.c
index 5abc496bcf29..37472ce4fac3 100644
--- a/drivers/net/ethernet/cisco/enic/vnic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/vnic_dev.c
@@ -432,14 +432,12 @@ int vnic_dev_fw_info(struct vnic_dev *vdev,
int err = 0;
if (!vdev->fw_info) {
- vdev->fw_info = pci_alloc_consistent(vdev->pdev,
- sizeof(struct vnic_devcmd_fw_info),
- &vdev->fw_info_pa);
+ vdev->fw_info = pci_zalloc_consistent(vdev->pdev,
+ sizeof(struct vnic_devcmd_fw_info),
+ &vdev->fw_info_pa);
if (!vdev->fw_info)
return -ENOMEM;
- memset(vdev->fw_info, 0, sizeof(struct vnic_devcmd_fw_info));
-
a0 = vdev->fw_info_pa;
a1 = sizeof(struct vnic_devcmd_fw_info);
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 69693384b58c..59915144aabb 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -1622,11 +1622,10 @@ static int sky2_alloc_buffers(struct sky2_port *sky2)
if (!sky2->tx_ring)
goto nomem;
- sky2->rx_le = pci_alloc_consistent(hw->pdev, RX_LE_BYTES,
- &sky2->rx_le_map);
+ sky2->rx_le = pci_zalloc_consistent(hw->pdev, RX_LE_BYTES,
+ &sky2->rx_le_map);
if (!sky2->rx_le)
goto nomem;
- memset(sky2->rx_le, 0, RX_LE_BYTES);
sky2->rx_ring = kcalloc(sky2->rx_pending, sizeof(struct rx_ring_info),
GFP_KERNEL);
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index 064a48d0c368..cd5f106306d9 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -4409,14 +4409,13 @@ static int ksz_alloc_desc(struct dev_info *adapter)
DESC_ALIGNMENT;
adapter->desc_pool.alloc_virt =
- pci_alloc_consistent(
- adapter->pdev, adapter->desc_pool.alloc_size,
- &adapter->desc_pool.dma_addr);
+ pci_zalloc_consistent(adapter->pdev,
+ adapter->desc_pool.alloc_size,
+ &adapter->desc_pool.dma_addr);
if (adapter->desc_pool.alloc_virt == NULL) {
adapter->desc_pool.alloc_size = 0;
return 1;
}
- memset(adapter->desc_pool.alloc_virt, 0, adapter->desc_pool.alloc_size);
/* Align to the next cache line boundary. */
offset = (((ulong) adapter->desc_pool.alloc_virt % DESC_ALIGNMENT) ?
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
index 6f6be57f4690..b8d5270359cd 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_ctx.c
@@ -129,14 +129,12 @@ netxen_get_minidump_template(struct netxen_adapter *adapter)
return NX_RCODE_INVALID_ARGS;
}
- addr = pci_alloc_consistent(adapter->pdev, size, &md_template_addr);
-
+ addr = pci_zalloc_consistent(adapter->pdev, size, &md_template_addr);
if (!addr) {
dev_err(&adapter->pdev->dev, "Unable to allocate dmable memory for template.\n");
return -ENOMEM;
}
- memset(addr, 0, size);
memset(&cmd, 0, sizeof(cmd));
memset(&cmd.rsp, 1, sizeof(struct _cdrp_cmd));
cmd.req.cmd = NX_CDRP_CMD_GET_TEMP_HDR;
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index b40050e03a56..d836ace52277 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2727,23 +2727,22 @@ static void ql_free_shadow_space(struct ql_adapter *qdev)
static int ql_alloc_shadow_space(struct ql_adapter *qdev)
{
qdev->rx_ring_shadow_reg_area =
- pci_alloc_consistent(qdev->pdev,
- PAGE_SIZE, &qdev->rx_ring_shadow_reg_dma);
+ pci_zalloc_consistent(qdev->pdev, PAGE_SIZE,
+ &qdev->rx_ring_shadow_reg_dma);
if (qdev->rx_ring_shadow_reg_area == NULL) {
netif_err(qdev, ifup, qdev->ndev,
"Allocation of RX shadow space failed.\n");
return -ENOMEM;
}
- memset(qdev->rx_ring_shadow_reg_area, 0, PAGE_SIZE);
+
qdev->tx_ring_shadow_reg_area =
- pci_alloc_consistent(qdev->pdev, PAGE_SIZE,
- &qdev->tx_ring_shadow_reg_dma);
+ pci_zalloc_consistent(qdev->pdev, PAGE_SIZE,
+ &qdev->tx_ring_shadow_reg_dma);
if (qdev->tx_ring_shadow_reg_area == NULL) {
netif_err(qdev, ifup, qdev->ndev,
"Allocation of TX shadow space failed.\n");
goto err_wqp_sh_area;
}
- memset(qdev->tx_ring_shadow_reg_area, 0, PAGE_SIZE);
return 0;
err_wqp_sh_area:
diff --git a/drivers/net/irda/vlsi_ir.c b/drivers/net/irda/vlsi_ir.c
index 485006604bbc..58ef59469dd0 100644
--- a/drivers/net/irda/vlsi_ir.c
+++ b/drivers/net/irda/vlsi_ir.c
@@ -485,13 +485,13 @@ static int vlsi_create_hwif(vlsi_irda_dev_t *idev)
idev->virtaddr = NULL;
idev->busaddr = 0;
- ringarea = pci_alloc_consistent(idev->pdev, HW_RING_AREA_SIZE, &idev->busaddr);
+ ringarea = pci_zalloc_consistent(idev->pdev, HW_RING_AREA_SIZE,
+ &idev->busaddr);
if (!ringarea) {
IRDA_ERROR("%s: insufficient memory for descriptor rings\n",
__func__);
goto out;
}
- memset(ringarea, 0, HW_RING_AREA_SIZE);
hwmap = (struct ring_descr_hw *)ringarea;
idev->rx_ring = vlsi_alloc_ring(idev->pdev, hwmap, ringsize[1],
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
index dfc6dfc56d52..1ab8e500fb77 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
@@ -3449,8 +3449,9 @@ static int ipw2100_msg_allocate(struct ipw2100_priv *priv)
return -ENOMEM;
for (i = 0; i < IPW_COMMAND_POOL_SIZE; i++) {
- v = pci_alloc_consistent(priv->pci_dev,
- sizeof(struct ipw2100_cmd_header), &p);
+ v = pci_zalloc_consistent(priv->pci_dev,
+ sizeof(struct ipw2100_cmd_header),
+ &p);
if (!v) {
printk(KERN_ERR DRV_NAME ": "
"%s: PCI alloc failed for msg "
@@ -3459,8 +3460,6 @@ static int ipw2100_msg_allocate(struct ipw2100_priv *priv)
break;
}
- memset(v, 0, sizeof(struct ipw2100_cmd_header));
-
priv->msg_buffers[i].type = COMMAND;
priv->msg_buffers[i].info.c_struct.cmd =
(struct ipw2100_cmd_header *)v;
@@ -4336,16 +4335,12 @@ static int status_queue_allocate(struct ipw2100_priv *priv, int entries)
IPW_DEBUG_INFO("enter\n");
q->size = entries * sizeof(struct ipw2100_status);
- q->drv =
- (struct ipw2100_status *)pci_alloc_consistent(priv->pci_dev,
- q->size, &q->nic);
+ q->drv = pci_zalloc_consistent(priv->pci_dev, q->size, &q->nic);
if (!q->drv) {
IPW_DEBUG_WARNING("Can not allocate status queue.\n");
return -ENOMEM;
}
- memset(q->drv, 0, q->size);
-
IPW_DEBUG_INFO("exit\n");
return 0;
@@ -4374,13 +4369,12 @@ static int bd_queue_allocate(struct ipw2100_priv *priv,
q->entries = entries;
q->size = entries * sizeof(struct ipw2100_bd);
- q->drv = pci_alloc_consistent(priv->pci_dev, q->size, &q->nic);
+ q->drv = pci_zalloc_consistent(priv->pci_dev, q->size, &q->nic);
if (!q->drv) {
IPW_DEBUG_INFO
("can't allocate shared memory for buffer descriptors\n");
return -ENOMEM;
}
- memset(q->drv, 0, q->size);
IPW_DEBUG_INFO("exit\n");
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index 9a3d4d6724f7..fc6cb215e761 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -1159,12 +1159,11 @@ static int mwl8k_rxq_init(struct ieee80211_hw *hw, int index)
size = MWL8K_RX_DESCS * priv->rxd_ops->rxd_size;
- rxq->rxd = pci_alloc_consistent(priv->pdev, size, &rxq->rxd_dma);
+ rxq->rxd = pci_zalloc_consistent(priv->pdev, size, &rxq->rxd_dma);
if (rxq->rxd == NULL) {
wiphy_err(hw->wiphy, "failed to alloc RX descriptors\n");
return -ENOMEM;
}
- memset(rxq->rxd, 0, size);
rxq->buf = kcalloc(MWL8K_RX_DESCS, sizeof(*rxq->buf), GFP_KERNEL);
if (rxq->buf == NULL) {
@@ -1451,12 +1450,11 @@ static int mwl8k_txq_init(struct ieee80211_hw *hw, int index)
size = MWL8K_TX_DESCS * sizeof(struct mwl8k_tx_desc);
- txq->txd = pci_alloc_consistent(priv->pdev, size, &txq->txd_dma);
+ txq->txd = pci_zalloc_consistent(priv->pdev, size, &txq->txd_dma);
if (txq->txd == NULL) {
wiphy_err(hw->wiphy, "failed to alloc TX descriptors\n");
return -ENOMEM;
}
- memset(txq->txd, 0, size);
txq->skb = kcalloc(MWL8K_TX_DESCS, sizeof(*txq->skb), GFP_KERNEL);
if (txq->skb == NULL) {
diff --git a/drivers/net/wireless/rtl818x/rtl8180/dev.c b/drivers/net/wireless/rtl818x/rtl8180/dev.c
index 4b904f708184..fcc45e5bf50a 100644
--- a/drivers/net/wireless/rtl818x/rtl8180/dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8180/dev.c
@@ -972,16 +972,13 @@ static int rtl8180_init_rx_ring(struct ieee80211_hw *dev)
else
priv->rx_ring_sz = sizeof(struct rtl8180_rx_desc);
- priv->rx_ring = pci_alloc_consistent(priv->pdev,
- priv->rx_ring_sz * 32,
- &priv->rx_ring_dma);
-
+ priv->rx_ring = pci_zalloc_consistent(priv->pdev, priv->rx_ring_sz * 32,
+ &priv->rx_ring_dma);
if (!priv->rx_ring || (unsigned long)priv->rx_ring & 0xFF) {
wiphy_err(dev->wiphy, "Cannot allocate RX ring\n");
return -ENOMEM;
}
- memset(priv->rx_ring, 0, priv->rx_ring_sz * 32);
priv->rx_idx = 0;
for (i = 0; i < 32; i++) {
@@ -1040,14 +1037,14 @@ static int rtl8180_init_tx_ring(struct ieee80211_hw *dev,
dma_addr_t dma;
int i;
- ring = pci_alloc_consistent(priv->pdev, sizeof(*ring) * entries, &dma);
+ ring = pci_zalloc_consistent(priv->pdev, sizeof(*ring) * entries,
+ &dma);
if (!ring || (unsigned long)ring & 0xFF) {
wiphy_err(dev->wiphy, "Cannot allocate TX ring (prio = %d)\n",
prio);
return -ENOMEM;
}
- memset(ring, 0, sizeof(*ring)*entries);
priv->tx_ring[prio].desc = ring;
priv->tx_ring[prio].dma = dma;
priv->tx_ring[prio].idx = 0;
diff --git a/drivers/net/wireless/rtlwifi/pci.c b/drivers/net/wireless/rtlwifi/pci.c
index dae55257f0e8..67d1ee6edcad 100644
--- a/drivers/net/wireless/rtlwifi/pci.c
+++ b/drivers/net/wireless/rtlwifi/pci.c
@@ -1092,16 +1092,14 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
u32 nextdescaddress;
int i;
- ring = pci_alloc_consistent(rtlpci->pdev,
- sizeof(*ring) * entries, &dma);
-
+ ring = pci_zalloc_consistent(rtlpci->pdev, sizeof(*ring) * entries,
+ &dma);
if (!ring || (unsigned long)ring & 0xFF) {
RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG,
"Cannot allocate TX ring (prio = %d)\n", prio);
return -ENOMEM;
}
- memset(ring, 0, sizeof(*ring) * entries);
rtlpci->tx_ring[prio].desc = ring;
rtlpci->tx_ring[prio].dma = dma;
rtlpci->tx_ring[prio].idx = 0;
@@ -1139,10 +1137,9 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw)
for (rx_queue_idx = 0; rx_queue_idx < RTL_PCI_MAX_RX_QUEUE;
rx_queue_idx++) {
rtlpci->rx_ring[rx_queue_idx].desc =
- pci_alloc_consistent(rtlpci->pdev,
- sizeof(*rtlpci->rx_ring[rx_queue_idx].
- desc) * rtlpci->rxringcount,
- &rtlpci->rx_ring[rx_queue_idx].dma);
+ pci_zalloc_consistent(rtlpci->pdev,
+ sizeof(*rtlpci->rx_ring[rx_queue_idx].desc) * rtlpci->rxringcount,
+ &rtlpci->rx_ring[rx_queue_idx].dma);
if (!rtlpci->rx_ring[rx_queue_idx].desc ||
(unsigned long)rtlpci->rx_ring[rx_queue_idx].desc & 0xFF) {
@@ -1151,10 +1148,6 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw)
return -ENOMEM;
}
- memset(rtlpci->rx_ring[rx_queue_idx].desc, 0,
- sizeof(*rtlpci->rx_ring[rx_queue_idx].desc) *
- rtlpci->rxringcount);
-
rtlpci->rx_ring[rx_queue_idx].idx = 0;
/* If amsdu_8k is disabled, set buffersize to 4096. This
diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c
index 4de346017e9f..6da6cec9a651 100644
--- a/drivers/scsi/3w-sas.c
+++ b/drivers/scsi/3w-sas.c
@@ -683,14 +683,13 @@ static int twl_allocate_memory(TW_Device_Extension *tw_dev, int size, int which)
unsigned long *cpu_addr;
int retval = 1;
- cpu_addr = pci_alloc_consistent(tw_dev->tw_pci_dev, size*TW_Q_LENGTH, &dma_handle);
+ cpu_addr = pci_zalloc_consistent(tw_dev->tw_pci_dev, size * TW_Q_LENGTH,
+ &dma_handle);
if (!cpu_addr) {
TW_PRINTK(tw_dev->host, TW_DRIVER, 0x5, "Memory allocation failed");
goto out;
}
- memset(cpu_addr, 0, size*TW_Q_LENGTH);
-
for (i = 0; i < TW_Q_LENGTH; i++) {
switch(which) {
case 0:
diff --git a/drivers/scsi/a100u2w.c b/drivers/scsi/a100u2w.c
index 522570d297ca..7e33a61c1ba4 100644
--- a/drivers/scsi/a100u2w.c
+++ b/drivers/scsi/a100u2w.c
@@ -1125,23 +1125,19 @@ static int inia100_probe_one(struct pci_dev *pdev,
/* Get total memory needed for SCB */
sz = ORC_MAXQUEUE * sizeof(struct orc_scb);
- host->scb_virt = pci_alloc_consistent(pdev, sz,
- &host->scb_phys);
+ host->scb_virt = pci_zalloc_consistent(pdev, sz, &host->scb_phys);
if (!host->scb_virt) {
printk("inia100: SCB memory allocation error\n");
goto out_host_put;
}
- memset(host->scb_virt, 0, sz);
/* Get total memory needed for ESCB */
sz = ORC_MAXQUEUE * sizeof(struct orc_extended_scb);
- host->escb_virt = pci_alloc_consistent(pdev, sz,
- &host->escb_phys);
+ host->escb_virt = pci_zalloc_consistent(pdev, sz, &host->escb_phys);
if (!host->escb_virt) {
printk("inia100: ESCB memory allocation error\n");
goto out_free_scb_array;
}
- memset(host->escb_virt, 0, sz);
biosaddr = host->BIOScfg;
biosaddr = (biosaddr << 4);
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index 56467df3d6de..eb3e3e619155 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -3538,10 +3538,9 @@ static int be_queue_alloc(struct beiscsi_hba *phba, struct be_queue_info *q,
q->len = len;
q->entry_size = entry_size;
mem->size = len * entry_size;
- mem->va = pci_alloc_consistent(phba->pcidev, mem->size, &mem->dma);
+ mem->va = pci_zalloc_consistent(phba->pcidev, mem->size, &mem->dma);
if (!mem->va)
return -ENOMEM;
- memset(mem->va, 0, mem->size);
return 0;
}
@@ -4320,9 +4319,9 @@ static int beiscsi_get_boot_info(struct beiscsi_hba *phba)
"BM_%d : No boot session\n");
return ret;
}
- nonemb_cmd.va = pci_alloc_consistent(phba->ctrl.pdev,
- sizeof(*session_resp),
- &nonemb_cmd.dma);
+ nonemb_cmd.va = pci_zalloc_consistent(phba->ctrl.pdev,
+ sizeof(*session_resp),
+ &nonemb_cmd.dma);
if (nonemb_cmd.va == NULL) {
beiscsi_log(phba, KERN_ERR,
BEISCSI_LOG_INIT | BEISCSI_LOG_CONFIG,
@@ -4332,7 +4331,6 @@ static int beiscsi_get_boot_info(struct beiscsi_hba *phba)
return -ENOMEM;
}
- memset(nonemb_cmd.va, 0, sizeof(*session_resp));
tag = mgmt_get_session_info(phba, s_handle,
&nonemb_cmd);
if (!tag) {
diff --git a/drivers/scsi/be2iscsi/be_mgmt.c b/drivers/scsi/be2iscsi/be_mgmt.c
index a3e56487616c..665afcb74a56 100644
--- a/drivers/scsi/be2iscsi/be_mgmt.c
+++ b/drivers/scsi/be2iscsi/be_mgmt.c
@@ -900,13 +900,12 @@ free_cmd:
static int mgmt_alloc_cmd_data(struct beiscsi_hba *phba, struct be_dma_mem *cmd,
int iscsi_cmd, int size)
{
- cmd->va = pci_alloc_consistent(phba->ctrl.pdev, size, &cmd->dma);
+ cmd->va = pci_zalloc_consistent(phba->ctrl.pdev, size, &cmd->dma);
if (!cmd->va) {
beiscsi_log(phba, KERN_ERR, BEISCSI_LOG_CONFIG,
"BG_%d : Failed to allocate memory for if info\n");
return -ENOMEM;
}
- memset(cmd->va, 0, size);
cmd->size = size;
be_cmd_hdr_prepare(cmd->va, CMD_SUBSYSTEM_ISCSI, iscsi_cmd, size);
return 0;
diff --git a/drivers/scsi/csiostor/csio_wr.c b/drivers/scsi/csiostor/csio_wr.c
index 4255ce264abf..773da14cfa14 100644
--- a/drivers/scsi/csiostor/csio_wr.c
+++ b/drivers/scsi/csiostor/csio_wr.c
@@ -232,7 +232,7 @@ csio_wr_alloc_q(struct csio_hw *hw, uint32_t qsize, uint32_t wrsize,
q = wrm->q_arr[free_idx];
- q->vstart = pci_alloc_consistent(hw->pdev, qsz, &q->pstart);
+ q->vstart = pci_zalloc_consistent(hw->pdev, qsz, &q->pstart);
if (!q->vstart) {
csio_err(hw,
"Failed to allocate DMA memory for "
@@ -240,12 +240,6 @@ csio_wr_alloc_q(struct csio_hw *hw, uint32_t qsize, uint32_t wrsize,
return -1;
}
- /*
- * We need to zero out the contents, importantly for ingress,
- * since we start with a generatiom bit of 1 for ingress.
- */
- memset(q->vstart, 0, qsz);
-
q->type = type;
q->owner = owner;
q->pidx = q->cidx = q->inc_idx = 0;
diff --git a/drivers/scsi/eata.c b/drivers/scsi/eata.c
index 03372cff38f3..813dd5c998e4 100644
--- a/drivers/scsi/eata.c
+++ b/drivers/scsi/eata.c
@@ -1238,8 +1238,8 @@ static int port_detect(unsigned long port_base, unsigned int j,
struct eata_config *cf;
dma_addr_t cf_dma_addr;
- cf = pci_alloc_consistent(pdev, sizeof(struct eata_config),
- &cf_dma_addr);
+ cf = pci_zalloc_consistent(pdev, sizeof(struct eata_config),
+ &cf_dma_addr);
if (!cf) {
printk
@@ -1249,7 +1249,6 @@ static int port_detect(unsigned long port_base, unsigned int j,
}
/* Set board configuration */
- memset((char *)cf, 0, sizeof(struct eata_config));
cf->len = (ushort) H2DEV16((ushort) 510);
cf->ocena = 1;
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 8545d1826725..6b35d0dfe64c 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -4732,23 +4732,21 @@ static struct CommandList *cmd_special_alloc(struct ctlr_info *h)
union u64bit temp64;
dma_addr_t cmd_dma_handle, err_dma_handle;
- c = pci_alloc_consistent(h->pdev, sizeof(*c), &cmd_dma_handle);
+ c = pci_zalloc_consistent(h->pdev, sizeof(*c), &cmd_dma_handle);
if (c == NULL)
return NULL;
- memset(c, 0, sizeof(*c));
c->cmd_type = CMD_SCSI;
c->cmdindex = -1;
- c->err_info = pci_alloc_consistent(h->pdev, sizeof(*c->err_info),
- &err_dma_handle);
+ c->err_info = pci_zalloc_consistent(h->pdev, sizeof(*c->err_info),
+ &err_dma_handle);
if (c->err_info == NULL) {
pci_free_consistent(h->pdev,
sizeof(*c), c, cmd_dma_handle);
return NULL;
}
- memset(c->err_info, 0, sizeof(*c->err_info));
INIT_LIST_HEAD(&c->list);
c->busaddr = (u32) cmd_dma_handle;
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index e2237a97cb9d..531dce419c18 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -998,8 +998,9 @@ megaraid_alloc_cmd_packets(adapter_t *adapter)
* Allocate the common 16-byte aligned memory for the handshake
* mailbox.
*/
- raid_dev->una_mbox64 = pci_alloc_consistent(adapter->pdev,
- sizeof(mbox64_t), &raid_dev->una_mbox64_dma);
+ raid_dev->una_mbox64 = pci_zalloc_consistent(adapter->pdev,
+ sizeof(mbox64_t),
+ &raid_dev->una_mbox64_dma);
if (!raid_dev->una_mbox64) {
con_log(CL_ANN, (KERN_WARNING
@@ -1007,7 +1008,6 @@ megaraid_alloc_cmd_packets(adapter_t *adapter)
__LINE__));
return -1;
}
- memset(raid_dev->una_mbox64, 0, sizeof(mbox64_t));
/*
* Align the mailbox at 16-byte boundary
@@ -1026,8 +1026,8 @@ megaraid_alloc_cmd_packets(adapter_t *adapter)
align;
// Allocate memory for commands issued internally
- adapter->ibuf = pci_alloc_consistent(pdev, MBOX_IBUF_SIZE,
- &adapter->ibuf_dma_h);
+ adapter->ibuf = pci_zalloc_consistent(pdev, MBOX_IBUF_SIZE,
+ &adapter->ibuf_dma_h);
if (!adapter->ibuf) {
con_log(CL_ANN, (KERN_WARNING
@@ -1036,7 +1036,6 @@ megaraid_alloc_cmd_packets(adapter_t *adapter)
goto out_free_common_mbox;
}
- memset(adapter->ibuf, 0, MBOX_IBUF_SIZE);
// Allocate memory for our SCSI Command Blocks and their associated
// memory
@@ -2972,8 +2971,8 @@ megaraid_mbox_product_info(adapter_t *adapter)
* Issue an ENQUIRY3 command to find out certain adapter parameters,
* e.g., max channels, max commands etc.
*/
- pinfo = pci_alloc_consistent(adapter->pdev, sizeof(mraid_pinfo_t),
- &pinfo_dma_h);
+ pinfo = pci_zalloc_consistent(adapter->pdev, sizeof(mraid_pinfo_t),
+ &pinfo_dma_h);
if (pinfo == NULL) {
con_log(CL_ANN, (KERN_WARNING
@@ -2982,7 +2981,6 @@ megaraid_mbox_product_info(adapter_t *adapter)
return -1;
}
- memset(pinfo, 0, sizeof(mraid_pinfo_t));
mbox->xferaddr = (uint32_t)adapter->ibuf_dma_h;
memset((void *)adapter->ibuf, 0, MBOX_IBUF_SIZE);
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 112799b131a9..22a04e37b70a 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -2038,9 +2038,9 @@ int megasas_sriov_start_heartbeat(struct megasas_instance *instance,
if (initial) {
instance->hb_host_mem =
- pci_alloc_consistent(instance->pdev,
- sizeof(struct MR_CTRL_HB_HOST_MEM),
- &instance->hb_host_mem_h);
+ pci_zalloc_consistent(instance->pdev,
+ sizeof(struct MR_CTRL_HB_HOST_MEM),
+ &instance->hb_host_mem_h);
if (!instance->hb_host_mem) {
printk(KERN_DEBUG "megasas: SR-IOV: Couldn't allocate"
" memory for heartbeat host memory for "
@@ -2048,8 +2048,6 @@ int megasas_sriov_start_heartbeat(struct megasas_instance *instance,
retval = -ENOMEM;
goto out;
}
- memset(instance->hb_host_mem, 0,
- sizeof(struct MR_CTRL_HB_HOST_MEM));
}
memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
diff --git a/drivers/scsi/mesh.c b/drivers/scsi/mesh.c
index 7a6160f172ce..57a95e2c3442 100644
--- a/drivers/scsi/mesh.c
+++ b/drivers/scsi/mesh.c
@@ -1915,14 +1915,12 @@ static int mesh_probe(struct macio_dev *mdev, const struct of_device_id *match)
/* We use the PCI APIs for now until the generic one gets fixed
* enough or until we get some macio-specific versions
*/
- dma_cmd_space = pci_alloc_consistent(macio_get_pci_dev(mdev),
- ms->dma_cmd_size,
- &dma_cmd_bus);
+ dma_cmd_space = pci_zalloc_consistent(macio_get_pci_dev(mdev),
+ ms->dma_cmd_size, &dma_cmd_bus);
if (dma_cmd_space == NULL) {
printk(KERN_ERR "mesh: can't allocate DMA table\n");
goto out_unmap;
}
- memset(dma_cmd_space, 0, ms->dma_cmd_size);
ms->dma_cmds = (struct dbdma_cmd *) DBDMA_ALIGN(dma_cmd_space);
ms->dma_cmd_space = dma_cmd_space;
diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c
index edbee8dc62c9..3e716b2f611a 100644
--- a/drivers/scsi/mvumi.c
+++ b/drivers/scsi/mvumi.c
@@ -142,8 +142,8 @@ static struct mvumi_res *mvumi_alloc_mem_resource(struct mvumi_hba *mhba,
case RESOURCE_UNCACHED_MEMORY:
size = round_up(size, 8);
- res->virt_addr = pci_alloc_consistent(mhba->pdev, size,
- &res->bus_addr);
+ res->virt_addr = pci_zalloc_consistent(mhba->pdev, size,
+ &res->bus_addr);
if (!res->virt_addr) {
dev_err(&mhba->pdev->dev,
"unable to allocate consistent mem,"
@@ -151,7 +151,6 @@ static struct mvumi_res *mvumi_alloc_mem_resource(struct mvumi_hba *mhba,
kfree(res);
return NULL;
}
- memset(res->virt_addr, 0, size);
break;
default:
@@ -258,12 +257,10 @@ static int mvumi_internal_cmd_sgl(struct mvumi_hba *mhba, struct mvumi_cmd *cmd,
if (size == 0)
return 0;
- virt_addr = pci_alloc_consistent(mhba->pdev, size, &phy_addr);
+ virt_addr = pci_zalloc_consistent(mhba->pdev, size, &phy_addr);
if (!virt_addr)
return -1;
- memset(virt_addr, 0, size);
-
m_sg = (struct mvumi_sgl *) &cmd->frame->payload[0];
cmd->frame->sg_counts = 1;
cmd->data_buf = virt_addr;
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index 34cea8291772..76570e6a547d 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -116,13 +116,12 @@ int pm8001_mem_alloc(struct pci_dev *pdev, void **virt_addr,
u64 align_offset = 0;
if (align)
align_offset = (dma_addr_t)align - 1;
- mem_virt_alloc =
- pci_alloc_consistent(pdev, mem_size + align, &mem_dma_handle);
+ mem_virt_alloc = pci_zalloc_consistent(pdev, mem_size + align,
+ &mem_dma_handle);
if (!mem_virt_alloc) {
pm8001_printk("memory allocation error\n");
return -1;
}
- memset((void *)mem_virt_alloc, 0, mem_size+align);
*pphys_addr = mem_dma_handle;
phys_align = (*pphys_addr + align_offset) & ~align_offset;
*virt_addr = (void *)mem_virt_alloc + phys_align - *pphys_addr;
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index 017f8b9554e5..6f3275d020a0 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -4213,9 +4213,9 @@ static ssize_t pmcraid_store_log_level(
{
struct Scsi_Host *shost;
struct pmcraid_instance *pinstance;
- unsigned long val;
+ u8 val;
- if (strict_strtoul(buf, 10, &val))
+ if (kstrtou8(buf, 10, &val))
return -EINVAL;
/* log-level should be from 0 to 2 */
if (val > 2)
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 406b3038bbad..8b4105a22ac2 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -910,9 +910,9 @@ sdev_store_queue_ramp_up_period(struct device *dev,
const char *buf, size_t count)
{
struct scsi_device *sdev = to_scsi_device(dev);
- unsigned long period;
+ unsigned int period;
- if (strict_strtoul(buf, 10, &period))
+ if (kstrtouint(buf, 10, &period))
return -EINVAL;
sdev->queue_ramp_up_period = msecs_to_jiffies(period);
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 2920e406030a..5729cf678765 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -2065,20 +2065,16 @@ static short rtl8192_alloc_rx_desc_ring(struct net_device *dev)
int i, rx_queue_idx;
for (rx_queue_idx = 0; rx_queue_idx < MAX_RX_QUEUE; rx_queue_idx++) {
- priv->rx_ring[rx_queue_idx] = pci_alloc_consistent(priv->pdev,
- sizeof(*priv->rx_ring[rx_queue_idx]) *
- priv->rxringcount,
- &priv->rx_ring_dma[rx_queue_idx]);
-
+ priv->rx_ring[rx_queue_idx] =
+ pci_zalloc_consistent(priv->pdev,
+ sizeof(*priv->rx_ring[rx_queue_idx]) * priv->rxringcount,
+ &priv->rx_ring_dma[rx_queue_idx]);
if (!priv->rx_ring[rx_queue_idx] ||
(unsigned long)priv->rx_ring[rx_queue_idx] & 0xFF) {
RT_TRACE(COMP_ERR, "Cannot allocate RX ring\n");
return -ENOMEM;
}
- memset(priv->rx_ring[rx_queue_idx], 0,
- sizeof(*priv->rx_ring[rx_queue_idx]) *
- priv->rxringcount);
priv->rx_idx[rx_queue_idx] = 0;
for (i = 0; i < priv->rxringcount; i++) {
@@ -2118,14 +2114,13 @@ static int rtl8192_alloc_tx_desc_ring(struct net_device *dev,
dma_addr_t dma;
int i;
- ring = pci_alloc_consistent(priv->pdev, sizeof(*ring) * entries, &dma);
+ ring = pci_zalloc_consistent(priv->pdev, sizeof(*ring) * entries, &dma);
if (!ring || (unsigned long)ring & 0xFF) {
RT_TRACE(COMP_ERR, "Cannot allocate TX ring (prio = %d)\n",
prio);
return -ENOMEM;
}
- memset(ring, 0, sizeof(*ring)*entries);
priv->tx_ring[prio].desc = ring;
priv->tx_ring[prio].dma = dma;
priv->tx_ring[prio].idx = 0;
diff --git a/drivers/staging/rtl8192ee/pci.c b/drivers/staging/rtl8192ee/pci.c
index f3abbcc9f3ba..0215aef1eacc 100644
--- a/drivers/staging/rtl8192ee/pci.c
+++ b/drivers/staging/rtl8192ee/pci.c
@@ -1224,10 +1224,10 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
/* alloc tx buffer desc for new trx flow*/
if (rtlpriv->use_new_trx_flow) {
- buffer_desc = pci_alloc_consistent(rtlpci->pdev,
- sizeof(*buffer_desc) * entries,
- &buffer_desc_dma);
-
+ buffer_desc =
+ pci_zalloc_consistent(rtlpci->pdev,
+ sizeof(*buffer_desc) * entries,
+ &buffer_desc_dma);
if (!buffer_desc || (unsigned long)buffer_desc & 0xFF) {
RT_TRACE(COMP_ERR, DBG_EMERG,
("Cannot allocate TX ring (prio = %d)\n",
@@ -1235,7 +1235,6 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
return -ENOMEM;
}
- memset(buffer_desc, 0, sizeof(*buffer_desc) * entries);
rtlpci->tx_ring[prio].buffer_desc = buffer_desc;
rtlpci->tx_ring[prio].buffer_desc_dma = buffer_desc_dma;
@@ -1245,16 +1244,14 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
}
/* alloc dma for this ring */
- desc = pci_alloc_consistent(rtlpci->pdev,
- sizeof(*desc) * entries, &desc_dma);
-
+ desc = pci_zalloc_consistent(rtlpci->pdev, sizeof(*desc) * entries,
+ &desc_dma);
if (!desc || (unsigned long)desc & 0xFF) {
RT_TRACE(COMP_ERR, DBG_EMERG,
("Cannot allocate TX ring (prio = %d)\n", prio));
return -ENOMEM;
}
- memset(desc, 0, sizeof(*desc) * entries);
rtlpci->tx_ring[prio].desc = desc;
rtlpci->tx_ring[prio].dma = desc_dma;
@@ -1290,11 +1287,9 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
struct rtl_rx_buffer_desc *entry = NULL;
/* alloc dma for this ring */
rtlpci->rx_ring[rxring_idx].buffer_desc =
- pci_alloc_consistent(rtlpci->pdev,
- sizeof(*rtlpci->rx_ring[rxring_idx].
- buffer_desc) *
- rtlpci->rxringcount,
- &rtlpci->rx_ring[rxring_idx].dma);
+ pci_zalloc_consistent(rtlpci->pdev,
+ sizeof(*rtlpci->rx_ring[rxring_idx].buffer_desc) * rtlpci->rxringcount,
+ &rtlpci->rx_ring[rxring_idx].dma);
if (!rtlpci->rx_ring[rxring_idx].buffer_desc ||
(unsigned long)rtlpci->rx_ring[rxring_idx].buffer_desc & 0xFF) {
RT_TRACE(COMP_ERR, DBG_EMERG,
@@ -1302,10 +1297,6 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
return -ENOMEM;
}
- memset(rtlpci->rx_ring[rxring_idx].buffer_desc, 0,
- sizeof(*rtlpci->rx_ring[rxring_idx].buffer_desc) *
- rtlpci->rxringcount);
-
/* init every desc in this ring */
rtlpci->rx_ring[rxring_idx].idx = 0;
@@ -1320,19 +1311,15 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
u8 tmp_one = 1;
/* alloc dma for this ring */
rtlpci->rx_ring[rxring_idx].desc =
- pci_alloc_consistent(rtlpci->pdev,
- sizeof(*rtlpci->rx_ring[rxring_idx].
- desc) * rtlpci->rxringcount,
- &rtlpci->rx_ring[rxring_idx].dma);
+ pci_zalloc_consistent(rtlpci->pdev,
+ sizeof(*rtlpci->rx_ring[rxring_idx].desc) * rtlpci->rxringcount,
+ &rtlpci->rx_ring[rxring_idx].dma);
if (!rtlpci->rx_ring[rxring_idx].desc ||
(unsigned long)rtlpci->rx_ring[rxring_idx].desc & 0xFF) {
RT_TRACE(COMP_ERR, DBG_EMERG,
("Cannot allocate RX ring\n"));
return -ENOMEM;
}
- memset(rtlpci->rx_ring[rxring_idx].desc, 0,
- sizeof(*rtlpci->rx_ring[rxring_idx].desc) *
- rtlpci->rxringcount);
/* init every desc in this ring */
rtlpci->rx_ring[rxring_idx].idx = 0;
diff --git a/drivers/staging/rtl8821ae/pci.c b/drivers/staging/rtl8821ae/pci.c
index f9847d1fbdeb..26d7b2fc852a 100644
--- a/drivers/staging/rtl8821ae/pci.c
+++ b/drivers/staging/rtl8821ae/pci.c
@@ -1248,9 +1248,10 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
/* alloc tx buffer desc for new trx flow*/
if (rtlpriv->use_new_trx_flow) {
- buffer_desc = pci_alloc_consistent(rtlpci->pdev,
- sizeof(*buffer_desc) * entries,
- &buffer_desc_dma);
+ buffer_desc =
+ pci_zalloc_consistent(rtlpci->pdev,
+ sizeof(*buffer_desc) * entries,
+ &buffer_desc_dma);
if (!buffer_desc || (unsigned long)buffer_desc & 0xFF) {
RT_TRACE(COMP_ERR, DBG_EMERG,
@@ -1259,7 +1260,6 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
return -ENOMEM;
}
- memset(buffer_desc, 0, sizeof(*buffer_desc) * entries);
rtlpci->tx_ring[prio].buffer_desc = buffer_desc;
rtlpci->tx_ring[prio].buffer_desc_dma = buffer_desc_dma;
@@ -1270,8 +1270,8 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
}
/* alloc dma for this ring */
- desc = pci_alloc_consistent(rtlpci->pdev,
- sizeof(*desc) * entries, &desc_dma);
+ desc = pci_zalloc_consistent(rtlpci->pdev, sizeof(*desc) * entries,
+ &desc_dma);
if (!desc || (unsigned long)desc & 0xFF) {
RT_TRACE(COMP_ERR, DBG_EMERG,
@@ -1279,7 +1279,6 @@ static int _rtl_pci_init_tx_ring(struct ieee80211_hw *hw,
return -ENOMEM;
}
- memset(desc, 0, sizeof(*desc) * entries);
rtlpci->tx_ring[prio].desc = desc;
rtlpci->tx_ring[prio].dma = desc_dma;
@@ -1316,21 +1315,15 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
struct rtl_rx_buffer_desc *entry = NULL;
/* alloc dma for this ring */
rtlpci->rx_ring[rxring_idx].buffer_desc =
- pci_alloc_consistent(rtlpci->pdev,
- sizeof(*rtlpci->rx_ring[rxring_idx].
- buffer_desc) *
- rtlpci->rxringcount,
- &rtlpci->rx_ring[rxring_idx].dma);
+ pci_zalloc_consistent(rtlpci->pdev,
+ sizeof(*rtlpci->rx_ring[rxring_idx].buffer_desc) * rtlpci->rxringcount,
+ &rtlpci->rx_ring[rxring_idx].dma);
if (!rtlpci->rx_ring[rxring_idx].buffer_desc ||
(unsigned long)rtlpci->rx_ring[rxring_idx].buffer_desc & 0xFF) {
RT_TRACE(COMP_ERR, DBG_EMERG, ("Cannot allocate RX ring\n"));
return -ENOMEM;
}
- memset(rtlpci->rx_ring[rxring_idx].buffer_desc, 0,
- sizeof(*rtlpci->rx_ring[rxring_idx].buffer_desc) *
- rtlpci->rxringcount);
-
/* init every desc in this ring */
rtlpci->rx_ring[rxring_idx].idx = 0;
for (i = 0; i < rtlpci->rxringcount; i++) {
@@ -1344,10 +1337,9 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
u8 tmp_one = 1;
/* alloc dma for this ring */
rtlpci->rx_ring[rxring_idx].desc =
- pci_alloc_consistent(rtlpci->pdev,
- sizeof(*rtlpci->rx_ring[rxring_idx].
- desc) * rtlpci->rxringcount,
- &rtlpci->rx_ring[rxring_idx].dma);
+ pci_zalloc_consistent(rtlpci->pdev,
+ sizeof(*rtlpci->rx_ring[rxring_idx].desc) * rtlpci->rxringcount,
+ &rtlpci->rx_ring[rxring_idx].dma);
if (!rtlpci->rx_ring[rxring_idx].desc ||
(unsigned long)rtlpci->rx_ring[rxring_idx].desc & 0xFF) {
RT_TRACE(COMP_ERR, DBG_EMERG,
@@ -1355,10 +1347,6 @@ static int _rtl_pci_init_rx_ring(struct ieee80211_hw *hw, int rxring_idx)
return -ENOMEM;
}
- memset(rtlpci->rx_ring[rxring_idx].desc, 0,
- sizeof(*rtlpci->rx_ring[rxring_idx].desc) *
- rtlpci->rxringcount);
-
/* init every desc in this ring */
rtlpci->rx_ring[rxring_idx].idx = 0;
for (i = 0; i < rtlpci->rxringcount; i++) {
diff --git a/drivers/staging/slicoss/slicoss.c b/drivers/staging/slicoss/slicoss.c
index 50ece291fc6a..f35fa3dfe22c 100644
--- a/drivers/staging/slicoss/slicoss.c
+++ b/drivers/staging/slicoss/slicoss.c
@@ -1191,18 +1191,15 @@ static int slic_rspqueue_init(struct adapter *adapter)
rspq->num_pages = SLIC_RSPQ_PAGES_GB;
for (i = 0; i < rspq->num_pages; i++) {
- rspq->vaddr[i] = pci_alloc_consistent(adapter->pcidev,
- PAGE_SIZE,
- &rspq->paddr[i]);
+ rspq->vaddr[i] = pci_zalloc_consistent(adapter->pcidev,
+ PAGE_SIZE,
+ &rspq->paddr[i]);
if (!rspq->vaddr[i]) {
dev_err(&adapter->pcidev->dev,
"pci_alloc_consistent failed\n");
slic_rspqueue_free(adapter);
return -ENOMEM;
}
- /* FIXME:
- * do we really need this assertions (4K PAGE_SIZE aligned addr)? */
- memset(rspq->vaddr[i], 0, PAGE_SIZE);
if (paddrh == 0) {
slic_reg32_write(&slic_regs->slic_rbar,
diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c
index c78d06eff7ea..0b583a37f5b3 100644
--- a/drivers/staging/vt6655/device_main.c
+++ b/drivers/staging/vt6655/device_main.c
@@ -1111,25 +1111,17 @@ static bool device_init_rings(PSDevice pDevice)
void *vir_pool;
/*allocate all RD/TD rings a single pool*/
- vir_pool = pci_alloc_consistent(pDevice->pcid,
- pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc) +
- pDevice->sOpts.nRxDescs1 * sizeof(SRxDesc) +
- pDevice->sOpts.nTxDescs[0] * sizeof(STxDesc) +
- pDevice->sOpts.nTxDescs[1] * sizeof(STxDesc),
- &pDevice->pool_dma);
-
+ vir_pool = pci_zalloc_consistent(pDevice->pcid,
+ pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc) +
+ pDevice->sOpts.nRxDescs1 * sizeof(SRxDesc) +
+ pDevice->sOpts.nTxDescs[0] * sizeof(STxDesc) +
+ pDevice->sOpts.nTxDescs[1] * sizeof(STxDesc),
+ &pDevice->pool_dma);
if (vir_pool == NULL) {
DBG_PRT(MSG_LEVEL_ERR, KERN_ERR "%s : allocate desc dma memory failed\n", pDevice->dev->name);
return false;
}
- memset(vir_pool, 0,
- pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc) +
- pDevice->sOpts.nRxDescs1 * sizeof(SRxDesc) +
- pDevice->sOpts.nTxDescs[0] * sizeof(STxDesc) +
- pDevice->sOpts.nTxDescs[1] * sizeof(STxDesc)
- );
-
pDevice->aRD0Ring = vir_pool;
pDevice->aRD1Ring = vir_pool +
pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc);
@@ -1138,13 +1130,12 @@ static bool device_init_rings(PSDevice pDevice)
pDevice->rd1_pool_dma = pDevice->rd0_pool_dma +
pDevice->sOpts.nRxDescs0 * sizeof(SRxDesc);
- pDevice->tx0_bufs = pci_alloc_consistent(pDevice->pcid,
- pDevice->sOpts.nTxDescs[0] * PKT_BUF_SZ +
- pDevice->sOpts.nTxDescs[1] * PKT_BUF_SZ +
- CB_BEACON_BUF_SIZE +
- CB_MAX_BUF_SIZE,
- &pDevice->tx_bufs_dma0);
-
+ pDevice->tx0_bufs = pci_zalloc_consistent(pDevice->pcid,
+ pDevice->sOpts.nTxDescs[0] * PKT_BUF_SZ +
+ pDevice->sOpts.nTxDescs[1] * PKT_BUF_SZ +
+ CB_BEACON_BUF_SIZE +
+ CB_MAX_BUF_SIZE,
+ &pDevice->tx_bufs_dma0);
if (pDevice->tx0_bufs == NULL) {
DBG_PRT(MSG_LEVEL_ERR, KERN_ERR "%s: allocate buf dma memory failed\n", pDevice->dev->name);
pci_free_consistent(pDevice->pcid,
@@ -1157,13 +1148,6 @@ static bool device_init_rings(PSDevice pDevice)
return false;
}
- memset(pDevice->tx0_bufs, 0,
- pDevice->sOpts.nTxDescs[0] * PKT_BUF_SZ +
- pDevice->sOpts.nTxDescs[1] * PKT_BUF_SZ +
- CB_BEACON_BUF_SIZE +
- CB_MAX_BUF_SIZE
- );
-
pDevice->td0_pool_dma = pDevice->rd1_pool_dma +
pDevice->sOpts.nRxDescs1 * sizeof(SRxDesc);
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index ba1dbcdf4609..0e8c39b6ccd4 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -3383,12 +3383,11 @@ static int alloc_desc(struct slgt_info *info)
unsigned int pbufs;
/* allocate memory to hold descriptor lists */
- info->bufs = pci_alloc_consistent(info->pdev, DESC_LIST_SIZE, &info->bufs_dma_addr);
+ info->bufs = pci_zalloc_consistent(info->pdev, DESC_LIST_SIZE,
+ &info->bufs_dma_addr);
if (info->bufs == NULL)
return -ENOMEM;
- memset(info->bufs, 0, DESC_LIST_SIZE);
-
info->rbufs = (struct slgt_desc*)info->bufs;
info->tbufs = ((struct slgt_desc*)info->bufs) + info->rbuf_count;
diff --git a/drivers/vme/bridges/vme_ca91cx42.c b/drivers/vme/bridges/vme_ca91cx42.c
index bfb2d3f06738..18078ecbfcc6 100644
--- a/drivers/vme/bridges/vme_ca91cx42.c
+++ b/drivers/vme/bridges/vme_ca91cx42.c
@@ -1555,16 +1555,14 @@ static int ca91cx42_crcsr_init(struct vme_bridge *ca91cx42_bridge,
}
/* Allocate mem for CR/CSR image */
- bridge->crcsr_kernel = pci_alloc_consistent(pdev, VME_CRCSR_BUF_SIZE,
- &bridge->crcsr_bus);
+ bridge->crcsr_kernel = pci_zalloc_consistent(pdev, VME_CRCSR_BUF_SIZE,
+ &bridge->crcsr_bus);
if (bridge->crcsr_kernel == NULL) {
dev_err(&pdev->dev, "Failed to allocate memory for CR/CSR "
"image\n");
return -ENOMEM;
}
- memset(bridge->crcsr_kernel, 0, VME_CRCSR_BUF_SIZE);
-
crcsr_addr = slot * (512 * 1024);
iowrite32(bridge->crcsr_bus - crcsr_addr, bridge->base + VCSR_TO);
diff --git a/drivers/vme/bridges/vme_tsi148.c b/drivers/vme/bridges/vme_tsi148.c
index 61e706c0e00c..e07cfa8001bb 100644
--- a/drivers/vme/bridges/vme_tsi148.c
+++ b/drivers/vme/bridges/vme_tsi148.c
@@ -2275,16 +2275,14 @@ static int tsi148_crcsr_init(struct vme_bridge *tsi148_bridge,
bridge = tsi148_bridge->driver_priv;
/* Allocate mem for CR/CSR image */
- bridge->crcsr_kernel = pci_alloc_consistent(pdev, VME_CRCSR_BUF_SIZE,
- &bridge->crcsr_bus);
+ bridge->crcsr_kernel = pci_zalloc_consistent(pdev, VME_CRCSR_BUF_SIZE,
+ &bridge->crcsr_bus);
if (bridge->crcsr_kernel == NULL) {
dev_err(tsi148_bridge->parent, "Failed to allocate memory for "
"CR/CSR image\n");
return -ENOMEM;
}
- memset(bridge->crcsr_kernel, 0, VME_CRCSR_BUF_SIZE);
-
reg_split(bridge->crcsr_bus, &crcsr_bus_high, &crcsr_bus_low);
iowrite32be(crcsr_bus_high, bridge->base + TSI148_LCSR_CROU);
diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c
index 47249a30eae3..20f766afa4c7 100644
--- a/drivers/w1/w1_int.c
+++ b/drivers/w1/w1_int.c
@@ -91,8 +91,7 @@ static struct w1_master *w1_alloc_dev(u32 id, int slave_count, int slave_ttl,
err = device_register(&dev->dev);
if (err) {
pr_err("Failed to register master device. err=%d\n", err);
- memset(dev, 0, sizeof(struct w1_master));
- kfree(dev);
+ put_device(&dev->dev);
dev = NULL;
}
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index f704458ea5f5..3e0b6fcb0d2b 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -30,7 +30,7 @@ struct plock_op {
struct plock_xop {
struct plock_op xop;
- void *callback;
+ int (*callback)(struct file_lock *fl, int result);
void *fl;
void *file;
struct file_lock flc;
@@ -144,23 +144,23 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
send_op(op);
- if (xop->callback == NULL) {
- rv = wait_event_killable(recv_wq, (op->done != 0));
- if (rv == -ERESTARTSYS) {
- log_debug(ls, "dlm_posix_lock: wait killed %llx",
- (unsigned long long)number);
- spin_lock(&ops_lock);
- list_del(&op->list);
- spin_unlock(&ops_lock);
- kfree(xop);
- do_unlock_close(ls, number, file, fl);
- goto out;
- }
- } else {
+ if (xop->callback) {
rv = FILE_LOCK_DEFERRED;
goto out;
}
+ rv = wait_event_killable(recv_wq, (op->done != 0));
+ if (rv == -ERESTARTSYS) {
+ log_debug(ls, "dlm_posix_lock: wait killed %llx",
+ (unsigned long long)number);
+ spin_lock(&ops_lock);
+ list_del(&op->list);
+ spin_unlock(&ops_lock);
+ kfree(xop);
+ do_unlock_close(ls, number, file, fl);
+ goto out;
+ }
+
spin_lock(&ops_lock);
if (!list_empty(&op->list)) {
log_error(ls, "dlm_posix_lock: op on list %llx",
@@ -190,7 +190,7 @@ static int dlm_plock_callback(struct plock_op *op)
struct file *file;
struct file_lock *fl;
struct file_lock *flc;
- int (*notify)(void *, void *, int) = NULL;
+ int (*notify)(struct file_lock *fl, int result) = NULL;
struct plock_xop *xop = (struct plock_xop *)op;
int rv = 0;
@@ -209,7 +209,7 @@ static int dlm_plock_callback(struct plock_op *op)
notify = xop->callback;
if (op->info.rv) {
- notify(fl, NULL, op->info.rv);
+ notify(fl, op->info.rv);
goto out;
}
@@ -228,7 +228,7 @@ static int dlm_plock_callback(struct plock_op *op)
(unsigned long long)op->info.number, file, fl);
}
- rv = notify(fl, NULL, 0);
+ rv = notify(fl, 0);
if (rv) {
/* XXX: We need to cancel the fs lock here: */
log_print("dlm_plock_callback: lock granted after lock request "
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 91ad9e1c9441..e26bc9a22ac9 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -303,6 +303,31 @@ static int fat_bmap_cluster(struct inode *inode, int cluster)
return dclus;
}
+static int fat_get_mapped_cluster(struct inode *inode, sector_t sector,
+ sector_t last_block,
+ unsigned long *mapped_blocks, sector_t *bmap)
+{
+ struct super_block *sb = inode->i_sb;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ int cluster, offset;
+
+ cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits);
+ offset = sector & (sbi->sec_per_clus - 1);
+ cluster = fat_bmap_cluster(inode, cluster);
+
+ if (cluster < 0)
+ return cluster;
+
+ else if (cluster) {
+ *bmap = fat_clus_to_blknr(sbi, cluster) + offset;
+ *mapped_blocks = sbi->sec_per_clus - offset;
+ if (*mapped_blocks > last_block - sector)
+ *mapped_blocks = last_block - sector;
+ }
+
+ return 0;
+}
+
int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
unsigned long *mapped_blocks, int create)
{
@@ -311,7 +336,6 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
const unsigned long blocksize = sb->s_blocksize;
const unsigned char blocksize_bits = sb->s_blocksize_bits;
sector_t last_block;
- int cluster, offset;
*phys = 0;
*mapped_blocks = 0;
@@ -329,25 +353,39 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
return 0;
/*
- * ->mmu_private can access on only allocation path.
- * (caller must hold ->i_mutex)
+ * Both ->mmu_private and ->i_disksize can access
+ * on only allocation path. (caller must hold ->i_mutex)
*/
- last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
+ last_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1))
>> blocksize_bits;
if (sector >= last_block)
return 0;
}
- cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits);
- offset = sector & (sbi->sec_per_clus - 1);
- cluster = fat_bmap_cluster(inode, cluster);
- if (cluster < 0)
- return cluster;
- else if (cluster) {
- *phys = fat_clus_to_blknr(sbi, cluster) + offset;
- *mapped_blocks = sbi->sec_per_clus - offset;
- if (*mapped_blocks > last_block - sector)
- *mapped_blocks = last_block - sector;
- }
- return 0;
+ return fat_get_mapped_cluster(inode, sector, last_block, mapped_blocks,
+ phys);
+}
+
+int fat_bmap2(struct inode *inode, sector_t sector,
+ unsigned long *mapped_blocks, struct buffer_head *bh_result,
+ int create, sector_t *bmap)
+{
+ struct super_block *sb = inode->i_sb;
+ sector_t last_block;
+ const unsigned long blocksize = sb->s_blocksize;
+ const unsigned char blocksize_bits = sb->s_blocksize_bits;
+
+ BUG_ON(create != 0);
+
+ *bmap = 0;
+ *mapped_blocks = 0;
+
+ last_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1))
+ >> blocksize_bits;
+
+ if (sector >= last_block)
+ return 0;
+
+ return fat_get_mapped_cluster(inode, sector, last_block, mapped_blocks,
+ bmap);
}
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index e0c4ba39a377..13b7202bd651 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -119,7 +119,8 @@ struct msdos_inode_info {
unsigned int cache_valid_id;
/* NOTE: mmu_private is 64bits, so must hold ->i_mutex to access */
- loff_t mmu_private; /* physically allocated size */
+ loff_t mmu_private; /* physically allocated size (initialized) */
+ loff_t i_disksize; /* physically allocated size (uninitialized) */
int i_start; /* first cluster or 0 */
int i_logstart; /* logical first cluster */
@@ -290,6 +291,9 @@ extern int fat_get_cluster(struct inode *inode, int cluster,
int *fclus, int *dclus);
extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
unsigned long *mapped_blocks, int create);
+extern int fat_bmap2(struct inode *inode, sector_t sector,
+ unsigned long *mapped_blocks,
+ struct buffer_head *bh_result, int create, sector_t *bmap);
/* fat/dir.c */
extern const struct file_operations fat_dir_operations;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 85f79a89e747..92e9e753b554 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -17,8 +17,12 @@
#include <linux/blkdev.h>
#include <linux/fsnotify.h>
#include <linux/security.h>
+#include <linux/falloc.h>
#include "fat.h"
+static long fat_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len);
+
static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
{
u32 attr;
@@ -182,6 +186,7 @@ const struct file_operations fat_file_operations = {
#endif
.fsync = fat_file_fsync,
.splice_read = generic_file_splice_read,
+ .fallocate = fat_fallocate,
};
static int fat_cont_expand(struct inode *inode, loff_t size)
@@ -220,6 +225,75 @@ out:
return err;
}
+/*
+ * Preallocate space for a file. This implements fat's fallocate file
+ * operation, which gets called from sys_fallocate system call. User
+ * space requests len bytes at offset. If FALLOC_FL_KEEP_SIZE is set
+ * we just allocate clusters without zeroing them out. Otherwise we
+ * allocate and zero out clusters via an expanding truncate.
+ */
+static long fat_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len)
+{
+ int cluster;
+ int nr_cluster; /* Number of clusters to be allocated */
+ loff_t mm_bytes; /* Number of bytes to be allocated for file */
+ struct inode *inode = file->f_mapping->host;
+ struct super_block *sb = inode->i_sb;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ int err = 0;
+
+ /* No support for hole punch or other fallocate flags. */
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ return -EOPNOTSUPP;
+
+ /* No support for dir */
+ if (!S_ISREG(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&inode->i_mutex);
+ if ((offset + len) <= MSDOS_I(inode)->i_disksize)
+ goto error;
+
+ err = inode_newsize_ok(inode, (len + offset));
+ if (err)
+ goto error;
+
+ if (mode & FALLOC_FL_KEEP_SIZE) {
+ /* First compute the number of clusters to be allocated */
+ mm_bytes = offset + len - round_up(MSDOS_I(inode)->i_disksize,
+ sbi->cluster_size);
+ nr_cluster = (mm_bytes + (sbi->cluster_size - 1)) >>
+ sbi->cluster_bits;
+
+ /* Start the allocation.We are not zeroing out the clusters */
+ while (nr_cluster-- > 0) {
+ err = fat_alloc_clusters(inode, &cluster, 1);
+ if (err) {
+ fat_msg(sb, KERN_ERR,
+ "fat_fallocate(): fat_alloc_clusters() error");
+ goto error;
+ }
+ err = fat_chain_add(inode, cluster, 1);
+ if (err) {
+ fat_free_clusters(inode, cluster);
+ goto error;
+ }
+ MSDOS_I(inode)->i_disksize += sbi->cluster_size;
+ }
+ } else {
+ /* This is just an expanding truncate */
+ err = fat_cont_expand(inode, (offset + len));
+ if (err)
+ fat_msg(sb, KERN_ERR,
+ "fat_fallocate(): fat_cont_expand() error");
+ }
+
+error:
+ mutex_unlock(&inode->i_mutex);
+ return err;
+}
+
/* Free all clusters after the skip'th cluster. */
static int fat_free(struct inode *inode, int skip)
{
@@ -300,8 +374,10 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset)
* This protects against truncating a file bigger than it was then
* trying to write into the hole.
*/
- if (MSDOS_I(inode)->mmu_private > offset)
+ if (MSDOS_I(inode)->i_disksize > offset) {
MSDOS_I(inode)->mmu_private = offset;
+ MSDOS_I(inode)->i_disksize = offset;
+ }
nr_clusters = (offset + (cluster_size - 1)) >> sbi->cluster_bits;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 756aead10d96..c3b86c58ad88 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -116,6 +116,25 @@ static int fat_add_cluster(struct inode *inode)
return err;
}
+static void check_fallocated_region(struct inode *inode, sector_t iblock,
+ unsigned long *max_blocks, struct buffer_head *bh_result)
+{
+ struct super_block *sb = inode->i_sb;
+ sector_t last_block, disk_block;
+ const unsigned long blocksize = sb->s_blocksize;
+ const unsigned char blocksize_bits = sb->s_blocksize_bits;
+
+ last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
+ >> blocksize_bits;
+ disk_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1))
+ >> blocksize_bits;
+ if (iblock >= last_block && iblock <= disk_block) {
+ MSDOS_I(inode)->mmu_private += *max_blocks << blocksize_bits;
+ set_buffer_new(bh_result);
+ }
+
+}
+
static inline int __fat_get_block(struct inode *inode, sector_t iblock,
unsigned long *max_blocks,
struct buffer_head *bh_result, int create)
@@ -130,8 +149,11 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
if (err)
return err;
if (phys) {
- map_bh(bh_result, sb, phys);
*max_blocks = min(mapped_blocks, *max_blocks);
+ if (create)
+ check_fallocated_region(inode, iblock, max_blocks,
+ bh_result);
+ map_bh(bh_result, sb, phys);
return 0;
}
if (!create)
@@ -155,6 +177,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
*max_blocks = min(mapped_blocks, *max_blocks);
MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
+ MSDOS_I(inode)->i_disksize = MSDOS_I(inode)->mmu_private;
err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
if (err)
@@ -269,6 +292,13 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
loff_t size = offset + count;
if (MSDOS_I(inode)->mmu_private < size)
return 0;
+
+ /*
+ * In case of writing in fallocated region, return 0 and
+ * fallback to buffered write.
+ */
+ if (MSDOS_I(inode)->i_disksize > MSDOS_I(inode)->mmu_private)
+ return 0;
}
/*
@@ -282,13 +312,36 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
return ret;
}
+static int fat_get_block_bmap(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ struct super_block *sb = inode->i_sb;
+ unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
+ int err;
+ sector_t bmap;
+ unsigned long mapped_blocks;
+
+ err = fat_bmap2(inode, iblock, &mapped_blocks, bh_result, create,
+ &bmap);
+ if (err)
+ return err;
+
+ if (bmap) {
+ map_bh(bh_result, sb, bmap);
+ max_blocks = min(mapped_blocks, max_blocks);
+ }
+
+ bh_result->b_size = max_blocks << sb->s_blocksize_bits;
+ return 0;
+}
+
static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
{
sector_t blocknr;
/* fat_get_cluster() assumes the requested blocknr isn't truncated. */
down_read(&MSDOS_I(mapping->host)->truncate_lock);
- blocknr = generic_block_bmap(mapping, block, fat_get_block);
+ blocknr = generic_block_bmap(mapping, block, fat_get_block_bmap);
up_read(&MSDOS_I(mapping->host)->truncate_lock);
return blocknr;
@@ -469,7 +522,6 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
error = fat_calc_dir_size(inode);
if (error < 0)
return error;
- MSDOS_I(inode)->mmu_private = inode->i_size;
set_nlink(inode, fat_subdirs(inode));
} else { /* not a directory */
@@ -484,8 +536,12 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
inode->i_op = &fat_file_inode_operations;
inode->i_fop = &fat_file_operations;
inode->i_mapping->a_ops = &fat_aops;
- MSDOS_I(inode)->mmu_private = inode->i_size;
}
+
+ MSDOS_I(inode)->mmu_private = inode->i_size;
+ MSDOS_I(inode)->i_disksize = round_up(inode->i_size,
+ inode->i_sb->s_blocksize);
+
if (de->attr & ATTR_SYS) {
if (sbi->options.sys_immutable)
inode->i_flags |= S_IMMUTABLE;
@@ -550,12 +606,34 @@ out:
EXPORT_SYMBOL_GPL(fat_build_inode);
+static int __fat_write_inode(struct inode *inode, int wait);
static void fat_evict_inode(struct inode *inode)
{
truncate_inode_pages_final(&inode->i_data);
if (!inode->i_nlink) {
inode->i_size = 0;
fat_truncate_blocks(inode, 0);
+ } else {
+ /* Release unwritten fallocated blocks on inode eviction. */
+ if (MSDOS_I(inode)->i_disksize >
+ round_up(MSDOS_I(inode)->mmu_private,
+ inode->i_sb->s_blocksize)) {
+ int err;
+ fat_truncate_blocks(inode, MSDOS_I(inode)->mmu_private);
+ /* Fallocate results in updating the i_start/iogstart
+ * for the zero byte file. So, make it return to
+ * original state during evict and commit it to avoid
+ * any corruption on the next access to the cluster
+ * chain for the file.
+ */
+ err = __fat_write_inode(inode, inode_needs_sync(inode));
+ if (err) {
+ fat_msg(inode->i_sb, KERN_WARNING, "Failed to "
+ "update on disk inode for unused fallocated "
+ "blocks, inode could be corrupted. Please run "
+ "fsck");
+ }
+ }
}
invalidate_inode_buffers(inode);
clear_inode(inode);
@@ -1293,6 +1371,7 @@ static int fat_read_root(struct inode *inode)
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
MSDOS_I(inode)->i_logstart = 0;
MSDOS_I(inode)->mmu_private = inode->i_size;
+ MSDOS_I(inode)->i_disksize = inode->i_size;
fat_save_attrs(inode, ATTR_DIR);
inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 72c82f69b01b..22d1c3df61ac 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -21,6 +21,7 @@
#include <linux/rcupdate.h>
#include <linux/pid_namespace.h>
#include <linux/user_namespace.h>
+#include <linux/shmem_fs.h>
#include <asm/poll.h>
#include <asm/siginfo.h>
@@ -336,6 +337,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_GETPIPE_SZ:
err = pipe_fcntl(filp, cmd, arg);
break;
+ case F_ADD_SEALS:
+ case F_GET_SEALS:
+ err = shmem_fcntl(filp, cmd, arg);
+ break;
default:
break;
}
diff --git a/fs/inode.c b/fs/inode.c
index 5938f3928944..26753ba7b6d6 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -165,6 +165,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
mapping->a_ops = &empty_aops;
mapping->host = inode;
mapping->flags = 0;
+ atomic_set(&mapping->i_mmap_writable, 0);
mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
mapping->private_data = NULL;
mapping->backing_dev_info = &default_backing_dev_info;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index ab798a88ec1d..2a6170133c1d 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -667,22 +667,16 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
* deferred rpc for GETLK and SETLK.
*/
static void
-nlmsvc_update_deferred_block(struct nlm_block *block, struct file_lock *conf,
- int result)
+nlmsvc_update_deferred_block(struct nlm_block *block, int result)
{
block->b_flags |= B_GOT_CALLBACK;
if (result == 0)
block->b_granted = 1;
else
block->b_flags |= B_TIMED_OUT;
- if (conf) {
- if (block->b_fl)
- __locks_copy_lock(block->b_fl, conf);
- }
}
-static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
- int result)
+static int nlmsvc_grant_deferred(struct file_lock *fl, int result)
{
struct nlm_block *block;
int rc = -ENOENT;
@@ -697,7 +691,7 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
rc = -ENOLCK;
break;
}
- nlmsvc_update_deferred_block(block, conf, result);
+ nlmsvc_update_deferred_block(block, result);
} else if (result == 0)
block->b_granted = 1;
diff --git a/include/asm-generic/pci-dma-compat.h b/include/asm-generic/pci-dma-compat.h
index 1437b7da09b2..c110843fc53b 100644
--- a/include/asm-generic/pci-dma-compat.h
+++ b/include/asm-generic/pci-dma-compat.h
@@ -19,6 +19,14 @@ pci_alloc_consistent(struct pci_dev *hwdev, size_t size,
return dma_alloc_coherent(hwdev == NULL ? NULL : &hwdev->dev, size, dma_handle, GFP_ATOMIC);
}
+static inline void *
+pci_zalloc_consistent(struct pci_dev *hwdev, size_t size,
+ dma_addr_t *dma_handle)
+{
+ return dma_zalloc_coherent(hwdev == NULL ? NULL : &hwdev->dev,
+ size, dma_handle, GFP_ATOMIC);
+}
+
static inline void
pci_free_consistent(struct pci_dev *hwdev, size_t size,
void *vaddr, dma_addr_t dma_handle)
diff --git a/include/linux/efi.h b/include/linux/efi.h
index efc681fd5895..45cb4ffdea62 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1156,6 +1156,9 @@ int efivars_sysfs_init(void);
#ifdef CONFIG_EFI_RUNTIME_MAP
int efi_runtime_map_init(struct kobject *);
void efi_runtime_map_setup(void *, int, u32);
+int efi_get_runtime_map_size(void);
+int efi_get_runtime_map_desc_size(void);
+int efi_runtime_map_copy(void *buf, size_t bufsz);
#else
static inline int efi_runtime_map_init(struct kobject *kobj)
{
@@ -1164,6 +1167,22 @@ static inline int efi_runtime_map_init(struct kobject *kobj)
static inline void
efi_runtime_map_setup(void *map, int nr_entries, u32 desc_size) {}
+
+static inline int efi_get_runtime_map_size(void)
+{
+ return 0;
+}
+
+static inline int efi_get_runtime_map_desc_size(void)
+{
+ return 0;
+}
+
+static inline int efi_runtime_map_copy(void *buf, size_t bufsz)
+{
+ return 0;
+}
+
#endif
/* prototypes shared between arch specific and generic stub code */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1ab6c6913040..0deda9e00b5c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1,7 +1,6 @@
#ifndef _LINUX_FS_H
#define _LINUX_FS_H
-
#include <linux/linkage.h>
#include <linux/wait.h>
#include <linux/kdev_t.h>
@@ -50,12 +49,12 @@ struct seq_file;
struct workqueue_struct;
struct iov_iter;
-extern void __init inode_init(void);
-extern void __init inode_init_early(void);
-extern void __init files_init(unsigned long);
+void __init inode_init(void);
+void __init inode_init_early(void);
+void __init files_init(unsigned long);
extern struct files_stat_struct files_stat;
-extern unsigned long get_max_files(void);
+unsigned long get_max_files(void);
extern int sysctl_nr_open;
extern struct inodes_stat_t inodes_stat;
extern int leases_enable, lease_break_time;
@@ -64,9 +63,9 @@ extern int sysctl_protected_hardlinks;
struct buffer_head;
typedef int (get_block_t)(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
+ struct buffer_head *bh_result, int create);
typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
- ssize_t bytes, void *private);
+ ssize_t bytes, void *private);
#define MAY_EXEC 0x00000001
#define MAY_WRITE 0x00000002
@@ -192,8 +191,8 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
#define READ 0
#define WRITE RW_MASK
#define READA RWA_MASK
-#define KERNEL_READ (READ|REQ_KERNEL)
-#define KERNEL_WRITE (WRITE|REQ_KERNEL)
+#define KERNEL_READ (READ | REQ_KERNEL)
+#define KERNEL_WRITE (WRITE | REQ_KERNEL)
#define READ_SYNC (READ | REQ_SYNC)
#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE)
@@ -256,25 +255,25 @@ struct iattr {
*/
#include <linux/quota.h>
-/**
+/**
* enum positive_aop_returns - aop return codes with specific semantics
*
* @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
- * completed, that the page is still locked, and
- * should be considered active. The VM uses this hint
- * to return the page to the active list -- it won't
- * be a candidate for writeback again in the near
- * future. Other callers must be careful to unlock
- * the page if they get this return. Returned by
- * writepage();
+ * completed, that the page is still locked, and
+ * should be considered active. The VM uses this hint
+ * to return the page to the active list -- it won't
+ * be a candidate for writeback again in the near
+ * future. Other callers must be careful to unlock
+ * the page if they get this return. Returned by
+ * writepage();
*
* @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
- * unlocked it and the page might have been truncated.
- * The caller should back up to acquiring a new page and
- * trying again. The aop will be taking reasonable
- * precautions not to livelock. If the caller held a page
- * reference, it should drop it before retrying. Returned
- * by readpage().
+ * unlocked it and the page might have been truncated.
+ * The caller should back up to acquiring a new page and
+ * trying again. The aop will be taking reasonable
+ * precautions not to livelock. If the caller held a page
+ * reference, it should drop it before retrying. Returned
+ * by readpage().
*
* address_space_operation functions return these large constants to indicate
* special semantics to the caller. These are much larger than the bytes in a
@@ -320,7 +319,7 @@ typedef struct {
} read_descriptor_t;
typedef int (*read_actor_t)(read_descriptor_t *, struct page *,
- unsigned long, unsigned long);
+ unsigned long, unsigned long);
struct address_space_operations {
int (*writepage)(struct page *page, struct writeback_control *wbc);
@@ -333,38 +332,38 @@ struct address_space_operations {
int (*set_page_dirty)(struct page *page);
int (*readpages)(struct file *filp, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages);
+ struct list_head *pages, unsigned nr_pages);
int (*write_begin)(struct file *, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata);
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata);
int (*write_end)(struct file *, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata);
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata);
/* Unfortunately this kludge is needed for FIBMAP. Don't use it */
sector_t (*bmap)(struct address_space *, sector_t);
- void (*invalidatepage) (struct page *, unsigned int, unsigned int);
- int (*releasepage) (struct page *, gfp_t);
+ void (*invalidatepage)(struct page *, unsigned int, unsigned int);
+ int (*releasepage)(struct page *, gfp_t);
void (*freepage)(struct page *);
ssize_t (*direct_IO)(int, struct kiocb *, struct iov_iter *iter, loff_t offset);
int (*get_xip_mem)(struct address_space *, pgoff_t, int,
- void **, unsigned long *);
+ void **, unsigned long *);
/*
* migrate the contents of a page to the specified target. If
* migrate_mode is MIGRATE_ASYNC, it must not block.
*/
- int (*migratepage) (struct address_space *,
- struct page *, struct page *, enum migrate_mode);
- int (*launder_page) (struct page *);
- int (*is_partially_uptodate) (struct page *, unsigned long,
- unsigned long);
- void (*is_dirty_writeback) (struct page *, bool *, bool *);
+ int (*migratepage)(struct address_space *,
+ struct page *, struct page *, enum migrate_mode);
+ int (*launder_page)(struct page *);
+ int (*is_partially_uptodate)(struct page *, unsigned long,
+ unsigned long);
+ void (*is_dirty_writeback)(struct page *, bool *, bool *);
int (*error_remove_page)(struct address_space *, struct page *);
/* swapfile support */
int (*swap_activate)(struct swap_info_struct *sis, struct file *file,
- sector_t *span);
+ sector_t *span);
void (*swap_deactivate)(struct file *file);
};
@@ -375,19 +374,19 @@ extern const struct address_space_operations empty_aops;
* to write into the pagecache.
*/
int pagecache_write_begin(struct file *, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata);
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata);
int pagecache_write_end(struct file *, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata);
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata);
struct backing_dev_info;
struct address_space {
struct inode *host; /* owner: inode, block_device */
struct radix_tree_root page_tree; /* radix tree of all pages */
spinlock_t tree_lock; /* and lock protecting it */
- unsigned int i_mmap_writable;/* count VM_SHARED mappings */
+ atomic_t i_mmap_writable;/* count VM_SHARED mappings */
struct rb_root i_mmap; /* tree of private and shared mappings */
struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
struct mutex i_mmap_mutex; /* protect tree, count, list */
@@ -402,35 +401,35 @@ struct address_space {
struct list_head private_list; /* ditto */
void *private_data; /* ditto */
} __attribute__((aligned(sizeof(long))));
- /*
- * On most architectures that alignment is already the case; but
- * must be enforced here for CRIS, to let the least significant bit
- * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
- */
+/*
+ * On most architectures that alignment is already the case; but
+ * must be enforced here for CRIS, to let the least significant bit
+ * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON.
+ */
struct request_queue;
struct block_device {
dev_t bd_dev; /* not a kdev_t - it's a search key */
int bd_openers;
- struct inode * bd_inode; /* will die */
- struct super_block * bd_super;
+ struct inode *bd_inode; /* will die */
+ struct super_block *bd_super;
struct mutex bd_mutex; /* open/close mutex */
struct list_head bd_inodes;
- void * bd_claiming;
- void * bd_holder;
+ void *bd_claiming;
+ void *bd_holder;
int bd_holders;
bool bd_write_holder;
#ifdef CONFIG_SYSFS
struct list_head bd_holder_disks;
#endif
- struct block_device * bd_contains;
+ struct block_device *bd_contains;
unsigned bd_block_size;
- struct hd_struct * bd_part;
+ struct hd_struct *bd_part;
/* number of times partitions within this device have been opened. */
unsigned bd_part_count;
int bd_invalidated;
- struct gendisk * bd_disk;
- struct request_queue * bd_queue;
+ struct gendisk *bd_disk;
+ struct request_queue *bd_queue;
struct list_head bd_list;
/*
* Private data. You must have bd_claim'ed the block_device
@@ -461,8 +460,8 @@ int mapping_tagged(struct address_space *mapping, int tag);
*/
static inline int mapping_mapped(struct address_space *mapping)
{
- return !RB_EMPTY_ROOT(&mapping->i_mmap) ||
- !list_empty(&mapping->i_mmap_nonlinear);
+ return !RB_EMPTY_ROOT(&mapping->i_mmap) ||
+ !list_empty(&mapping->i_mmap_nonlinear);
}
/*
@@ -470,16 +469,41 @@ static inline int mapping_mapped(struct address_space *mapping)
* Note that i_mmap_writable counts all VM_SHARED vmas: do_mmap_pgoff
* marks vma as VM_SHARED if it is shared, and the file was opened for
* writing i.e. vma may be mprotected writable even if now readonly.
+ *
+ * If i_mmap_writable is negative, no new writable mappings are allowed. You
+ * can only deny writable mappings, if none exists right now.
*/
static inline int mapping_writably_mapped(struct address_space *mapping)
{
- return mapping->i_mmap_writable != 0;
+ return atomic_read(&mapping->i_mmap_writable) > 0;
+}
+
+static inline int mapping_map_writable(struct address_space *mapping)
+{
+ return atomic_inc_unless_negative(&mapping->i_mmap_writable) ?
+ 0 : -EPERM;
+}
+
+static inline void mapping_unmap_writable(struct address_space *mapping)
+{
+ atomic_dec(&mapping->i_mmap_writable);
+}
+
+static inline int mapping_deny_writable(struct address_space *mapping)
+{
+ return atomic_dec_unless_positive(&mapping->i_mmap_writable) ?
+ 0 : -EBUSY;
+}
+
+static inline void mapping_allow_writable(struct address_space *mapping)
+{
+ atomic_inc(&mapping->i_mmap_writable);
}
/*
* Use sequence counter to get consistent i_size on 32-bit processors.
*/
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
#include <linux/seqlock.h>
#define __NEED_I_SIZE_ORDERED
#define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount)
@@ -609,8 +633,7 @@ static inline int inode_unhashed(struct inode *inode)
* The locking order between these classes is
* parent -> child -> normal -> xattr -> second non-directory
*/
-enum inode_i_mutex_lock_class
-{
+enum inode_i_mutex_lock_class {
I_MUTEX_NORMAL,
I_MUTEX_PARENT,
I_MUTEX_CHILD,
@@ -633,7 +656,7 @@ void unlock_two_nondirectories(struct inode *, struct inode*);
*/
static inline loff_t i_size_read(const struct inode *inode)
{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
loff_t i_size;
unsigned int seq;
@@ -642,7 +665,7 @@ static inline loff_t i_size_read(const struct inode *inode)
i_size = inode->i_size;
} while (read_seqcount_retry(&inode->i_size_seqcount, seq));
return i_size;
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
loff_t i_size;
preempt_disable();
@@ -661,13 +684,13 @@ static inline loff_t i_size_read(const struct inode *inode)
*/
static inline void i_size_write(struct inode *inode, loff_t i_size)
{
-#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
preempt_disable();
write_seqcount_begin(&inode->i_size_seqcount);
inode->i_size = i_size;
write_seqcount_end(&inode->i_size_seqcount);
preempt_enable();
-#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
+#elif BITS_PER_LONG == 32 && defined(CONFIG_PREEMPT)
preempt_disable();
inode->i_size = i_size;
preempt_enable();
@@ -711,7 +734,7 @@ static inline unsigned imajor(const struct inode *inode)
return MAJOR(inode->i_rdev);
}
-extern struct block_device *I_BDEV(struct inode *inode);
+struct block_device *I_BDEV(struct inode *inode);
struct fown_struct {
rwlock_t lock; /* protects pid, uid, euid fields */
@@ -747,7 +770,7 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index)
struct file {
union {
struct llist_node fu_llist;
- struct rcu_head fu_rcuhead;
+ struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
#define f_dentry f_path.dentry
@@ -760,7 +783,7 @@ struct file {
*/
spinlock_t f_lock;
atomic_long_t f_count;
- unsigned int f_flags;
+ unsigned int f_flags;
fmode_t f_mode;
struct mutex f_pos_lock;
loff_t f_pos;
@@ -800,12 +823,12 @@ static inline struct file *get_file(struct file *f)
#define MAX_NON_LFS ((1UL<<31) - 1)
-/* Page cache limit. The filesystems should put that into their s_maxbytes
- limits, otherwise bad things can happen in VM. */
-#if BITS_PER_LONG==32
-#define MAX_LFS_FILESIZE (((loff_t)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
-#elif BITS_PER_LONG==64
-#define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL)
+/* Page cache limit. The filesystems should put that into their s_maxbytes
+ limits, otherwise bad things can happen in VM. */
+#if BITS_PER_LONG == 32
+#define MAX_LFS_FILESIZE (((loff_t)PAGE_CACHE_SIZE << (BITS_PER_LONG - 1)) - 1)
+#elif BITS_PER_LONG == 64
+#define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL)
#endif
#define FL_POSIX 1
@@ -836,17 +859,17 @@ static inline struct file *get_file(struct file *f)
typedef void *fl_owner_t;
struct file_lock_operations {
- void (*fl_copy_lock)(struct file_lock *, struct file_lock *);
- void (*fl_release_private)(struct file_lock *);
+ void (*fl_copy_lock)(struct file_lock *dst, struct file_lock *fl);
+ void (*fl_release_private)(struct file_lock *fl);
};
struct lock_manager_operations {
- int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
- unsigned long (*lm_owner_key)(struct file_lock *);
- void (*lm_notify)(struct file_lock *); /* unblock callback */
- int (*lm_grant)(struct file_lock *, struct file_lock *, int);
- void (*lm_break)(struct file_lock *);
- int (*lm_change)(struct file_lock **, int);
+ int (*lm_compare_owner)(struct file_lock *fl1, struct file_lock *fl2);
+ unsigned long (*lm_owner_key)(struct file_lock *fl);
+ void (*lm_notify)(struct file_lock *fl); /* unblock callback */
+ int (*lm_grant)(struct file_lock *fl, int result);
+ void (*lm_break)(struct file_lock *fl);
+ int (*lm_change)(struct file_lock **fl, int type);
};
struct lock_manager {
@@ -894,7 +917,7 @@ struct file_lock {
loff_t fl_start;
loff_t fl_end;
- struct fasync_struct * fl_fasync; /* for lease break notifications */
+ struct fasync_struct *fl_fasync; /* for lease break notifications */
/* for lease breaks: */
unsigned long fl_break_time;
unsigned long fl_downgrade_time;
@@ -920,46 +943,47 @@ struct file_lock {
#include <linux/fcntl.h>
-extern void send_sigio(struct fown_struct *fown, int fd, int band);
+void send_sigio(struct fown_struct *fown, int fd, int band);
#ifdef CONFIG_FILE_LOCKING
-extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *);
-extern int fcntl_setlk(unsigned int, struct file *, unsigned int,
- struct flock __user *);
+int fcntl_getlk(struct file *, unsigned int, struct flock __user *);
+int fcntl_setlk(unsigned int, struct file *, unsigned int,
+ struct flock __user *);
#if BITS_PER_LONG == 32
-extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 __user *);
-extern int fcntl_setlk64(unsigned int, struct file *, unsigned int,
- struct flock64 __user *);
+int fcntl_getlk64(struct file *, unsigned int, struct flock64 __user *);
+int fcntl_setlk64(unsigned int, struct file *, unsigned int,
+ struct flock64 __user *);
#endif
-extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
-extern int fcntl_getlease(struct file *filp);
+int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
+int fcntl_getlease(struct file *filp);
/* fs/locks.c */
void locks_free_lock(struct file_lock *fl);
-extern void locks_init_lock(struct file_lock *);
-extern struct file_lock * locks_alloc_lock(void);
-extern void locks_copy_lock(struct file_lock *, struct file_lock *);
-extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
-extern void locks_remove_posix(struct file *, fl_owner_t);
-extern void locks_remove_file(struct file *);
-extern void locks_release_private(struct file_lock *);
-extern void posix_test_lock(struct file *, struct file_lock *);
-extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
-extern int posix_lock_file_wait(struct file *, struct file_lock *);
-extern int posix_unblock_lock(struct file_lock *);
-extern int vfs_test_lock(struct file *, struct file_lock *);
-extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
-extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
-extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
-extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
-extern void lease_get_mtime(struct inode *, struct timespec *time);
-extern int generic_setlease(struct file *, long, struct file_lock **);
-extern int vfs_setlease(struct file *, long, struct file_lock **);
-extern int lease_modify(struct file_lock **, int);
-extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
-extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
+void locks_init_lock(struct file_lock *);
+struct file_lock *locks_alloc_lock(void);
+void locks_copy_lock(struct file_lock *, struct file_lock *);
+void __locks_copy_lock(struct file_lock *, const struct file_lock *);
+void locks_remove_posix(struct file *, fl_owner_t);
+void locks_remove_file(struct file *);
+void locks_release_private(struct file_lock *);
+void posix_test_lock(struct file *, struct file_lock *);
+int posix_lock_file(struct file *, struct file_lock *, struct file_lock *);
+int posix_lock_file_wait(struct file *, struct file_lock *);
+int posix_unblock_lock(struct file_lock *);
+int vfs_test_lock(struct file *, struct file_lock *);
+int vfs_lock_file(struct file *, unsigned int, struct file_lock *,
+ struct file_lock *);
+int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
+int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
+int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
+void lease_get_mtime(struct inode *, struct timespec *time);
+int generic_setlease(struct file *, long, struct file_lock **);
+int vfs_setlease(struct file *, long, struct file_lock **);
+int lease_modify(struct file_lock **, int);
+int lock_may_read(struct inode *, loff_t start, unsigned long count);
+int lock_may_write(struct inode *, loff_t start, unsigned long count);
#else /* !CONFIG_FILE_LOCKING */
static inline int fcntl_getlk(struct file *file, unsigned int cmd,
struct flock __user *user)
@@ -998,32 +1022,26 @@ static inline int fcntl_getlease(struct file *filp)
static inline void locks_init_lock(struct file_lock *fl)
{
- return;
}
static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
- return;
}
static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
{
- return;
}
static inline void locks_remove_posix(struct file *filp, fl_owner_t owner)
{
- return;
}
static inline void locks_remove_file(struct file *filp)
{
- return;
}
static inline void posix_test_lock(struct file *filp, struct file_lock *fl)
{
- return;
}
static inline int posix_lock_file(struct file *filp, struct file_lock *fl,
@@ -1064,18 +1082,18 @@ static inline int flock_lock_file_wait(struct file *filp,
return -ENOLCK;
}
-static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
+static inline int __break_lease(struct inode *inode, unsigned int mode,
+ unsigned int type)
{
return 0;
}
static inline void lease_get_mtime(struct inode *inode, struct timespec *time)
{
- return;
}
static inline int generic_setlease(struct file *filp, long arg,
- struct file_lock **flp)
+ struct file_lock **flp)
{
return -EINVAL;
}
@@ -1104,7 +1122,6 @@ static inline int lock_may_write(struct inode *inode, loff_t start,
}
#endif /* !CONFIG_FILE_LOCKING */
-
struct fasync_struct {
spinlock_t fa_lock;
int magic;
@@ -1117,20 +1134,22 @@ struct fasync_struct {
#define FASYNC_MAGIC 0x4601
/* SMP safe fasync helpers: */
-extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
-extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *);
-extern int fasync_remove_entry(struct file *, struct fasync_struct **);
-extern struct fasync_struct *fasync_alloc(void);
-extern void fasync_free(struct fasync_struct *);
+int fasync_helper(int, struct file *, int, struct fasync_struct **);
+struct fasync_struct *fasync_insert_entry(int, struct file *,
+ struct fasync_struct **,
+ struct fasync_struct *);
+int fasync_remove_entry(struct file *, struct fasync_struct **);
+struct fasync_struct *fasync_alloc(void);
+void fasync_free(struct fasync_struct *);
/* can be called from interrupts */
-extern void kill_fasync(struct fasync_struct **, int, int);
+void kill_fasync(struct fasync_struct **, int, int);
-extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
-extern int f_setown(struct file *filp, unsigned long arg, int force);
-extern void f_delown(struct file *filp);
-extern pid_t f_getown(struct file *filp);
-extern int send_sigurg(struct fown_struct *fown);
+int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
+int f_setown(struct file *filp, unsigned long arg, int force);
+void f_delown(struct file *filp);
+pid_t f_getown(struct file *filp);
+int send_sigurg(struct fown_struct *fown);
struct mm_struct;
@@ -1208,7 +1227,7 @@ struct super_block {
char s_id[32]; /* Informational name */
u8 s_uuid[16]; /* UUID */
- void *s_fs_info; /* Filesystem private info */
+ void *s_fs_info; /* Filesystem private info */
unsigned int s_max_links;
fmode_t s_mode;
@@ -1260,7 +1279,7 @@ struct super_block {
struct rcu_head rcu;
};
-extern struct timespec current_fs_time(struct super_block *sb);
+struct timespec current_fs_time(struct super_block *sb);
/*
* Snapshotting support.
@@ -1376,31 +1395,31 @@ static inline void sb_start_intwrite(struct super_block *sb)
__sb_start_write(sb, SB_FREEZE_FS, true);
}
-
-extern bool inode_owner_or_capable(const struct inode *inode);
+bool inode_owner_or_capable(const struct inode *inode);
/*
* VFS helper functions..
*/
-extern int vfs_create(struct inode *, struct dentry *, umode_t, bool);
-extern int vfs_mkdir(struct inode *, struct dentry *, umode_t);
-extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
-extern int vfs_symlink(struct inode *, struct dentry *, const char *);
-extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **);
-extern int vfs_rmdir(struct inode *, struct dentry *);
-extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
+int vfs_create(struct inode *, struct dentry *, umode_t, bool);
+int vfs_mkdir(struct inode *, struct dentry *, umode_t);
+int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
+int vfs_symlink(struct inode *, struct dentry *, const char *);
+int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **);
+int vfs_rmdir(struct inode *, struct dentry *);
+int vfs_unlink(struct inode *, struct dentry *, struct inode **);
+int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *,
+ struct inode **, unsigned int);
/*
* VFS dentry helper functions.
*/
-extern void dentry_unhash(struct dentry *dentry);
+void dentry_unhash(struct dentry *dentry);
/*
* VFS file helper functions.
*/
-extern void inode_init_owner(struct inode *inode, const struct inode *dir,
- umode_t mode);
+void inode_init_owner(struct inode *inode, const struct inode *dir,
+ umode_t mode);
/*
* VFS FS_IOC_FIEMAP helper definitions.
*/
@@ -1409,7 +1428,7 @@ struct fiemap_extent_info {
unsigned int fi_extents_mapped; /* Number of mapped extents */
unsigned int fi_extents_max; /* Size of fiemap_extent array */
struct fiemap_extent __user *fi_extents_start; /* Start of
- fiemap_extent array */
+ fiemap_extent array */
};
int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical,
u64 phys, u64 len, u32 flags);
@@ -1455,31 +1474,38 @@ struct iov_iter;
struct file_operations {
struct module *owner;
- loff_t (*llseek) (struct file *, loff_t, int);
- ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
- ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
- ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
- ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t);
- ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
- ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
- int (*iterate) (struct file *, struct dir_context *);
- unsigned int (*poll) (struct file *, struct poll_table_struct *);
- long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
- long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
- int (*mmap) (struct file *, struct vm_area_struct *);
- int (*open) (struct inode *, struct file *);
- int (*flush) (struct file *, fl_owner_t id);
- int (*release) (struct inode *, struct file *);
- int (*fsync) (struct file *, loff_t, loff_t, int datasync);
- int (*aio_fsync) (struct kiocb *, int datasync);
- int (*fasync) (int, struct file *, int);
- int (*lock) (struct file *, int, struct file_lock *);
- ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
- unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+ loff_t (*llseek)(struct file *, loff_t, int);
+ ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
+ ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
+ ssize_t (*aio_read)(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
+ ssize_t (*aio_write)(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
+ ssize_t (*read_iter)(struct kiocb *, struct iov_iter *);
+ ssize_t (*write_iter)(struct kiocb *, struct iov_iter *);
+ int (*iterate)(struct file *, struct dir_context *);
+ unsigned int (*poll)(struct file *, struct poll_table_struct *);
+ long (*unlocked_ioctl)(struct file *, unsigned int, unsigned long);
+ long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
+ int (*mmap)(struct file *, struct vm_area_struct *);
+ int (*open)(struct inode *, struct file *);
+ int (*flush)(struct file *, fl_owner_t id);
+ int (*release)(struct inode *, struct file *);
+ int (*fsync)(struct file *, loff_t, loff_t, int datasync);
+ int (*aio_fsync)(struct kiocb *, int datasync);
+ int (*fasync)(int, struct file *, int);
+ int (*lock)(struct file *, int, struct file_lock *);
+ ssize_t (*sendpage)(struct file *, struct page *, int, size_t,
+ loff_t *, int);
+ unsigned long (*get_unmapped_area)(struct file *, unsigned long,
+ unsigned long, unsigned long,
+ unsigned long);
int (*check_flags)(int);
- int (*flock) (struct file *, int, struct file_lock *);
- ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
- ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
+ int (*flock)(struct file *, int, struct file_lock *);
+ ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
+ loff_t *, size_t, unsigned int);
+ ssize_t (*splice_read)(struct file *, loff_t *,
+ struct pipe_inode_info *, size_t, unsigned int);
int (*setlease)(struct file *, long, struct file_lock **);
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
@@ -1487,76 +1513,79 @@ struct file_operations {
};
struct inode_operations {
- struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
- void * (*follow_link) (struct dentry *, struct nameidata *);
- int (*permission) (struct inode *, int);
+ struct dentry *(*lookup)(struct inode *, struct dentry *, unsigned int);
+ void *(*follow_link)(struct dentry *, struct nameidata *);
+ int (*permission)(struct inode *, int);
struct posix_acl * (*get_acl)(struct inode *, int);
- int (*readlink) (struct dentry *, char __user *,int);
- void (*put_link) (struct dentry *, struct nameidata *, void *);
-
- int (*create) (struct inode *,struct dentry *, umode_t, bool);
- int (*link) (struct dentry *,struct inode *,struct dentry *);
- int (*unlink) (struct inode *,struct dentry *);
- int (*symlink) (struct inode *,struct dentry *,const char *);
- int (*mkdir) (struct inode *,struct dentry *,umode_t);
- int (*rmdir) (struct inode *,struct dentry *);
- int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
- int (*rename) (struct inode *, struct dentry *,
- struct inode *, struct dentry *);
- int (*rename2) (struct inode *, struct dentry *,
- struct inode *, struct dentry *, unsigned int);
- int (*setattr) (struct dentry *, struct iattr *);
- int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
- int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
- ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
- ssize_t (*listxattr) (struct dentry *, char *, size_t);
- int (*removexattr) (struct dentry *, const char *);
+ int (*readlink)(struct dentry *, char __user *, int);
+ void (*put_link)(struct dentry *, struct nameidata *, void *);
+
+ int (*create)(struct inode *, struct dentry *, umode_t, bool);
+ int (*link)(struct dentry *, struct inode *, struct dentry *);
+ int (*unlink)(struct inode *, struct dentry *);
+ int (*symlink)(struct inode *, struct dentry *, const char *);
+ int (*mkdir)(struct inode *, struct dentry *, umode_t);
+ int (*rmdir)(struct inode *, struct dentry *);
+ int (*mknod)(struct inode *, struct dentry *, umode_t, dev_t);
+ int (*rename)(struct inode *, struct dentry *,
+ struct inode *, struct dentry *);
+ int (*rename2)(struct inode *, struct dentry *,
+ struct inode *, struct dentry *, unsigned int);
+ int (*setattr)(struct dentry *, struct iattr *);
+ int (*getattr)(struct vfsmount *mnt, struct dentry *, struct kstat *);
+ int (*setxattr)(struct dentry *, const char *, const void *, size_t,
+ int);
+ ssize_t (*getxattr)(struct dentry *, const char *, void *, size_t);
+ ssize_t (*listxattr)(struct dentry *, char *, size_t);
+ int (*removexattr)(struct dentry *, const char *);
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
u64 len);
int (*update_time)(struct inode *, struct timespec *, int);
int (*atomic_open)(struct inode *, struct dentry *,
struct file *, unsigned open_flag,
umode_t create_mode, int *opened);
- int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+ int (*tmpfile)(struct inode *, struct dentry *, umode_t);
int (*set_acl)(struct inode *, struct posix_acl *, int);
} ____cacheline_aligned;
-ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
+ssize_t rw_copy_check_uvector(int type, const struct iovec __user *uvector,
unsigned long nr_segs, unsigned long fast_segs,
struct iovec *fast_pointer,
struct iovec **ret_pointer);
-extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
-extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
-extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
- unsigned long, loff_t *);
-extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
- unsigned long, loff_t *);
+ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
+ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
+ssize_t vfs_readv(struct file *, const struct iovec __user *, unsigned long,
+ loff_t *);
+ssize_t vfs_writev(struct file *, const struct iovec __user *, unsigned long,
+ loff_t *);
struct super_operations {
- struct inode *(*alloc_inode)(struct super_block *sb);
+ struct inode *(*alloc_inode)(struct super_block *sb);
void (*destroy_inode)(struct inode *);
- void (*dirty_inode) (struct inode *, int flags);
- int (*write_inode) (struct inode *, struct writeback_control *wbc);
- int (*drop_inode) (struct inode *);
- void (*evict_inode) (struct inode *);
- void (*put_super) (struct super_block *);
+ void (*dirty_inode)(struct inode *, int flags);
+ int (*write_inode)(struct inode *, struct writeback_control *wbc);
+ int (*drop_inode)(struct inode *);
+ void (*evict_inode)(struct inode *);
+ void (*put_super)(struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
- int (*freeze_fs) (struct super_block *);
- int (*unfreeze_fs) (struct super_block *);
- int (*statfs) (struct dentry *, struct kstatfs *);
- int (*remount_fs) (struct super_block *, int *, char *);
- void (*umount_begin) (struct super_block *);
+ int (*freeze_fs)(struct super_block *);
+ int (*unfreeze_fs)(struct super_block *);
+ int (*statfs)(struct dentry *, struct kstatfs *);
+ int (*remount_fs)(struct super_block *, int *, char *);
+ void (*umount_begin)(struct super_block *);
int (*show_options)(struct seq_file *, struct dentry *);
int (*show_devname)(struct seq_file *, struct dentry *);
int (*show_path)(struct seq_file *, struct dentry *);
int (*show_stats)(struct seq_file *, struct dentry *);
#ifdef CONFIG_QUOTA
- ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
- ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+ ssize_t (*quota_read)(struct super_block *, int, char *, size_t,
+ loff_t);
+ ssize_t (*quota_write)(struct super_block *, int, const char *, size_t,
+ loff_t);
#endif
int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
long (*nr_cached_objects)(struct super_block *, int);
@@ -1596,12 +1625,12 @@ struct super_operations {
#define __IS_FLG(inode, flg) ((inode)->i_sb->s_flags & (flg))
#define IS_RDONLY(inode) ((inode)->i_sb->s_flags & MS_RDONLY)
-#define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \
- ((inode)->i_flags & S_SYNC))
-#define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \
- ((inode)->i_flags & (S_SYNC|S_DIRSYNC)))
+#define IS_SYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS) || \
+ ((inode)->i_flags & S_SYNC))
+#define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS | MS_DIRSYNC) || \
+ ((inode)->i_flags & (S_SYNC | S_DIRSYNC)))
#define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK)
-#define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME)
+#define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY | MS_NOATIME)
#define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION)
#define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA)
@@ -1687,7 +1716,7 @@ struct super_operations {
#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
-extern void __mark_inode_dirty(struct inode *, int);
+void __mark_inode_dirty(struct inode *, int);
static inline void mark_inode_dirty(struct inode *inode)
{
__mark_inode_dirty(inode, I_DIRTY);
@@ -1698,10 +1727,10 @@ static inline void mark_inode_dirty_sync(struct inode *inode)
__mark_inode_dirty(inode, I_DIRTY_SYNC);
}
-extern void inc_nlink(struct inode *inode);
-extern void drop_nlink(struct inode *inode);
-extern void clear_nlink(struct inode *inode);
-extern void set_nlink(struct inode *inode, unsigned int nlink);
+void inc_nlink(struct inode *inode);
+void drop_nlink(struct inode *inode);
+void clear_nlink(struct inode *inode);
+void set_nlink(struct inode *inode, unsigned int nlink);
static inline void inode_inc_link_count(struct inode *inode)
{
@@ -1725,9 +1754,9 @@ static inline void inode_dec_link_count(struct inode *inode)
static inline void inode_inc_iversion(struct inode *inode)
{
- spin_lock(&inode->i_lock);
- inode->i_version++;
- spin_unlock(&inode->i_lock);
+ spin_lock(&inode->i_lock);
+ inode->i_version++;
+ spin_unlock(&inode->i_lock);
}
enum file_time_flags {
@@ -1737,7 +1766,7 @@ enum file_time_flags {
S_VERSION = 8,
};
-extern void touch_atime(const struct path *);
+void touch_atime(const struct path *);
static inline void file_accessed(struct file *file)
{
if (!(file->f_flags & O_NOATIME))
@@ -1750,17 +1779,17 @@ int sync_inode_metadata(struct inode *inode, int wait);
struct file_system_type {
const char *name;
int fs_flags;
-#define FS_REQUIRES_DEV 1
+#define FS_REQUIRES_DEV 1
#define FS_BINARY_MOUNTDATA 2
#define FS_HAS_SUBTYPE 4
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
#define FS_USERNS_DEV_MOUNT 16 /* A userns mount does not imply MNT_NODEV */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
- struct dentry *(*mount) (struct file_system_type *, int,
- const char *, void *);
- void (*kill_sb) (struct super_block *);
+ struct dentry *(*mount)(struct file_system_type *, int,
+ const char *, void *);
+ void (*kill_sb)(struct super_block *);
struct module *owner;
- struct file_system_type * next;
+ struct file_system_type *next;
struct hlist_head fs_supers;
struct lock_class_key s_lock_key;
@@ -1775,18 +1804,19 @@ struct file_system_type {
#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
-extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
- void *data, int (*fill_super)(struct super_block *, void *, int));
-extern struct dentry *mount_bdev(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *data,
- int (*fill_super)(struct super_block *, void *, int));
-extern struct dentry *mount_single(struct file_system_type *fs_type,
- int flags, void *data,
- int (*fill_super)(struct super_block *, void *, int));
-extern struct dentry *mount_nodev(struct file_system_type *fs_type,
- int flags, void *data,
- int (*fill_super)(struct super_block *, void *, int));
-extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
+struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
+ void *data,
+ int (*fill_super)(struct super_block *, void *, int));
+struct dentry *mount_bdev(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data,
+ int (*fill_super)(struct super_block *, void *, int));
+struct dentry *mount_single(struct file_system_type *fs_type,
+ int flags, void *data,
+ int (*fill_super)(struct super_block *, void *, int));
+struct dentry *mount_nodev(struct file_system_type *fs_type,
+ int flags, void *data,
+ int (*fill_super)(struct super_block *, void *, int));
+struct dentry *mount_subtree(struct vfsmount *mnt, const char *path);
void generic_shutdown_super(struct super_block *sb);
void kill_block_super(struct super_block *sb);
void kill_anon_super(struct super_block *sb);
@@ -1797,56 +1827,56 @@ int set_anon_super(struct super_block *s, void *data);
int get_anon_bdev(dev_t *);
void free_anon_bdev(dev_t);
struct super_block *sget(struct file_system_type *type,
- int (*test)(struct super_block *,void *),
- int (*set)(struct super_block *,void *),
- int flags, void *data);
-extern struct dentry *mount_pseudo(struct file_system_type *, char *,
- const struct super_operations *ops,
- const struct dentry_operations *dops,
- unsigned long);
+ int (*test)(struct super_block *, void *),
+ int (*set)(struct super_block *, void *),
+ int flags, void *data);
+struct dentry *mount_pseudo(struct file_system_type *, char *,
+ const struct super_operations *ops,
+ const struct dentry_operations *dops,
+ unsigned long);
/* Alas, no aliases. Too much hassle with bringing module.h everywhere */
-#define fops_get(fops) \
+#define fops_get(fops) \
(((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
-#define fops_put(fops) \
- do { if (fops) module_put((fops)->owner); } while(0)
+#define fops_put(fops) \
+ do { if (fops) module_put((fops)->owner); } while (0)
/*
* This one is to be used *ONLY* from ->open() instances.
* fops must be non-NULL, pinned down *and* module dependencies
* should be sufficient to pin the caller down as well.
*/
-#define replace_fops(f, fops) \
- do { \
- struct file *__file = (f); \
- fops_put(__file->f_op); \
- BUG_ON(!(__file->f_op = (fops))); \
- } while(0)
-
-extern int register_filesystem(struct file_system_type *);
-extern int unregister_filesystem(struct file_system_type *);
-extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
+#define replace_fops(f, fops) \
+ do { \
+ struct file *__file = (f); \
+ fops_put(__file->f_op); \
+ BUG_ON(!(__file->f_op = (fops))); \
+ } while (0)
+
+int register_filesystem(struct file_system_type *);
+int unregister_filesystem(struct file_system_type *);
+struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
#define kern_mount(type) kern_mount_data(type, NULL)
-extern void kern_unmount(struct vfsmount *mnt);
-extern int may_umount_tree(struct vfsmount *);
-extern int may_umount(struct vfsmount *);
-extern long do_mount(const char *, const char *, const char *, unsigned long, void *);
-extern struct vfsmount *collect_mounts(struct path *);
-extern void drop_collected_mounts(struct vfsmount *);
-extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
- struct vfsmount *);
-extern int vfs_statfs(struct path *, struct kstatfs *);
-extern int user_statfs(const char __user *, struct kstatfs *);
-extern int fd_statfs(int, struct kstatfs *);
-extern int vfs_ustat(dev_t, struct kstatfs *);
-extern int freeze_super(struct super_block *super);
-extern int thaw_super(struct super_block *super);
-extern bool our_mnt(struct vfsmount *mnt);
-extern bool fs_fully_visible(struct file_system_type *);
-
-extern int current_umask(void);
-
-extern void ihold(struct inode * inode);
-extern void iput(struct inode *);
+void kern_unmount(struct vfsmount *mnt);
+int may_umount_tree(struct vfsmount *);
+int may_umount(struct vfsmount *);
+long do_mount(const char *, const char *, const char *, unsigned long, void *);
+struct vfsmount *collect_mounts(struct path *);
+void drop_collected_mounts(struct vfsmount *);
+int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
+ struct vfsmount *);
+int vfs_statfs(struct path *, struct kstatfs *);
+int user_statfs(const char __user *, struct kstatfs *);
+int fd_statfs(int, struct kstatfs *);
+int vfs_ustat(dev_t, struct kstatfs *);
+int freeze_super(struct super_block *super);
+int thaw_super(struct super_block *super);
+bool our_mnt(struct vfsmount *mnt);
+bool fs_fully_visible(struct file_system_type *);
+
+int current_umask(void);
+
+void ihold(struct inode *inode);
+void iput(struct inode *);
static inline struct inode *file_inode(struct file *f)
{
@@ -1862,8 +1892,8 @@ extern struct kobject *fs_kobj;
#define FLOCK_VERIFY_WRITE 2
#ifdef CONFIG_FILE_LOCKING
-extern int locks_mandatory_locked(struct file *);
-extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
+int locks_mandatory_locked(struct file *);
+int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t);
/*
* Candidates for mandatory locking have the setgid bit set
@@ -1893,8 +1923,8 @@ static inline int locks_verify_locked(struct file *file)
}
static inline int locks_verify_truncate(struct inode *inode,
- struct file *filp,
- loff_t size)
+ struct file *filp,
+ loff_t size)
{
if (inode->i_flock && mandatory_lock(inode))
return locks_mandatory_area(
@@ -1902,7 +1932,7 @@ static inline int locks_verify_truncate(struct inode *inode,
size < inode->i_size ? size : inode->i_size,
(size < inode->i_size ? inode->i_size - size
: size - inode->i_size)
- );
+ );
return 0;
}
@@ -1932,11 +1962,12 @@ static inline int break_deleg(struct inode *inode, unsigned int mode)
return 0;
}
-static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
+static inline int try_break_deleg(struct inode *inode,
+ struct inode **delegated_inode)
{
int ret;
- ret = break_deleg(inode, O_WRONLY|O_NONBLOCK);
+ ret = break_deleg(inode, O_WRONLY | O_NONBLOCK);
if (ret == -EWOULDBLOCK && delegated_inode) {
*delegated_inode = inode;
ihold(inode);
@@ -1998,7 +2029,8 @@ static inline int break_deleg(struct inode *inode, unsigned int mode)
return 0;
}
-static inline int try_break_deleg(struct inode *inode, struct inode **delegated_inode)
+static inline int try_break_deleg(struct inode *inode,
+ struct inode **delegated_inode)
{
return 0;
}
@@ -2020,69 +2052,66 @@ struct filename {
bool separate; /* should "name" be freed? */
};
-extern long vfs_truncate(struct path *, loff_t);
-extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
- struct file *filp);
-extern int do_fallocate(struct file *file, int mode, loff_t offset,
- loff_t len);
-extern long do_sys_open(int dfd, const char __user *filename, int flags,
- umode_t mode);
-extern struct file *file_open_name(struct filename *, int, umode_t);
-extern struct file *filp_open(const char *, int, umode_t);
-extern struct file *file_open_root(struct dentry *, struct vfsmount *,
- const char *, int);
-extern struct file * dentry_open(const struct path *, int, const struct cred *);
-extern int filp_close(struct file *, fl_owner_t id);
-
-extern struct filename *getname(const char __user *);
-extern struct filename *getname_kernel(const char *);
+long vfs_truncate(struct path *, loff_t);
+int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
+ struct file *filp);
+int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len);
+long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode);
+struct file *file_open_name(struct filename *, int, umode_t);
+struct file *filp_open(const char *, int, umode_t);
+struct file *file_open_root(struct dentry *, struct vfsmount *,
+ const char *, int);
+struct file *dentry_open(const struct path *, int, const struct cred *);
+int filp_close(struct file *, fl_owner_t id);
+
+struct filename *getname(const char __user *);
+struct filename *getname_kernel(const char *);
enum {
FILE_CREATED = 1,
FILE_OPENED = 2
};
-extern int finish_open(struct file *file, struct dentry *dentry,
- int (*open)(struct inode *, struct file *),
- int *opened);
-extern int finish_no_open(struct file *file, struct dentry *dentry);
+int finish_open(struct file *file, struct dentry *dentry,
+ int (*open)(struct inode *, struct file *), int *opened);
+int finish_no_open(struct file *file, struct dentry *dentry);
/* fs/ioctl.c */
-extern int ioctl_preallocate(struct file *filp, void __user *argp);
+int ioctl_preallocate(struct file *filp, void __user *argp);
/* fs/dcache.c */
-extern void __init vfs_caches_init_early(void);
-extern void __init vfs_caches_init(unsigned long);
+void __init vfs_caches_init_early(void);
+void __init vfs_caches_init(unsigned long);
extern struct kmem_cache *names_cachep;
-extern void final_putname(struct filename *name);
+void final_putname(struct filename *name);
#define __getname() kmem_cache_alloc(names_cachep, GFP_KERNEL)
#define __putname(name) kmem_cache_free(names_cachep, (void *)(name))
#ifndef CONFIG_AUDITSYSCALL
#define putname(name) final_putname(name)
#else
-extern void putname(struct filename *name);
+void putname(struct filename *name);
#endif
#ifdef CONFIG_BLOCK
-extern int register_blkdev(unsigned int, const char *);
-extern void unregister_blkdev(unsigned int, const char *);
-extern struct block_device *bdget(dev_t);
-extern struct block_device *bdgrab(struct block_device *bdev);
-extern void bd_set_size(struct block_device *, loff_t size);
-extern void bd_forget(struct inode *inode);
-extern void bdput(struct block_device *);
-extern void invalidate_bdev(struct block_device *);
-extern void iterate_bdevs(void (*)(struct block_device *, void *), void *);
-extern int sync_blockdev(struct block_device *bdev);
-extern void kill_bdev(struct block_device *);
-extern struct super_block *freeze_bdev(struct block_device *);
-extern void emergency_thaw_all(void);
-extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
-extern int fsync_bdev(struct block_device *);
-extern int sb_is_blkdev_sb(struct super_block *sb);
+int register_blkdev(unsigned int, const char *);
+void unregister_blkdev(unsigned int, const char *);
+struct block_device *bdget(dev_t);
+struct block_device *bdgrab(struct block_device *bdev);
+void bd_set_size(struct block_device *, loff_t size);
+void bd_forget(struct inode *inode);
+void bdput(struct block_device *);
+void invalidate_bdev(struct block_device *);
+void iterate_bdevs(void (*)(struct block_device *, void *), void *);
+int sync_blockdev(struct block_device *bdev);
+void kill_bdev(struct block_device *);
+struct super_block *freeze_bdev(struct block_device *);
+void emergency_thaw_all(void);
+int thaw_bdev(struct block_device *bdev, struct super_block *sb);
+int fsync_bdev(struct block_device *);
+int sb_is_blkdev_sb(struct super_block *sb);
#else
static inline void bd_forget(struct inode *inode) {}
static inline int sync_blockdev(struct block_device *bdev) { return 0; }
@@ -2108,24 +2137,23 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
return 0;
}
#endif
-extern int sync_filesystem(struct super_block *);
+int sync_filesystem(struct super_block *);
extern const struct file_operations def_blk_fops;
extern const struct file_operations def_chr_fops;
extern const struct file_operations bad_sock_fops;
#ifdef CONFIG_BLOCK
-extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
-extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
-extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
-extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
-extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
- void *holder);
-extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
- void *holder);
-extern void blkdev_put(struct block_device *bdev, fmode_t mode);
+int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
+int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
+long compat_blkdev_ioctl(struct file *, unsigned, unsigned long);
+int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);
+struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+ void *holder);
+struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode,
+ void *holder);
+void blkdev_put(struct block_device *bdev, fmode_t mode);
#ifdef CONFIG_SYSFS
-extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
-extern void bd_unlink_disk_holder(struct block_device *bdev,
- struct gendisk *disk);
+int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
+void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
#else
static inline int bd_link_disk_holder(struct block_device *bdev,
struct gendisk *disk)
@@ -2141,15 +2169,15 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
/* fs/char_dev.c */
#define CHRDEV_MAJOR_HASH_SIZE 255
-extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
-extern int register_chrdev_region(dev_t, unsigned, const char *);
-extern int __register_chrdev(unsigned int major, unsigned int baseminor,
- unsigned int count, const char *name,
- const struct file_operations *fops);
-extern void __unregister_chrdev(unsigned int major, unsigned int baseminor,
- unsigned int count, const char *name);
-extern void unregister_chrdev_region(dev_t, unsigned);
-extern void chrdev_show(struct seq_file *,off_t);
+int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *);
+int register_chrdev_region(dev_t, unsigned, const char *);
+int __register_chrdev(unsigned int major, unsigned int baseminor,
+ unsigned int count, const char *name,
+ const struct file_operations *fops);
+void __unregister_chrdev(unsigned int major, unsigned int baseminor,
+ unsigned int count, const char *name);
+void unregister_chrdev_region(dev_t, unsigned);
+void chrdev_show(struct seq_file *, off_t);
static inline int register_chrdev(unsigned int major, const char *name,
const struct file_operations *fops)
@@ -2168,20 +2196,20 @@ static inline void unregister_chrdev(unsigned int major, const char *name)
#ifdef CONFIG_BLOCK
#define BLKDEV_MAJOR_HASH_SIZE 255
-extern const char *__bdevname(dev_t, char *buffer);
-extern const char *bdevname(struct block_device *bdev, char *buffer);
-extern struct block_device *lookup_bdev(const char *);
-extern void blkdev_show(struct seq_file *,off_t);
+const char *__bdevname(dev_t, char *buffer);
+const char *bdevname(struct block_device *bdev, char *buffer);
+struct block_device *lookup_bdev(const char *);
+void blkdev_show(struct seq_file *, off_t);
#else
#define BLKDEV_MAJOR_HASH_SIZE 0
#endif
-extern void init_special_inode(struct inode *, umode_t, dev_t);
+void init_special_inode(struct inode *, umode_t, dev_t);
/* Invalid inode operations -- fs/bad_inode.c */
-extern void make_bad_inode(struct inode *);
-extern int is_bad_inode(struct inode *);
+void make_bad_inode(struct inode *);
+int is_bad_inode(struct inode *);
#ifdef CONFIG_BLOCK
/*
@@ -2194,15 +2222,14 @@ extern int is_bad_inode(struct inode *);
*/
#define bio_data_dir(bio) ((bio)->bi_rw & 1)
-extern void check_disk_size_change(struct gendisk *disk,
- struct block_device *bdev);
-extern int revalidate_disk(struct gendisk *);
-extern int check_disk_change(struct block_device *);
-extern int __invalidate_device(struct block_device *, bool);
-extern int invalidate_partition(struct gendisk *, int);
+void check_disk_size_change(struct gendisk *disk, struct block_device *bdev);
+int revalidate_disk(struct gendisk *);
+int check_disk_change(struct block_device *);
+int __invalidate_device(struct block_device *, bool);
+int invalidate_partition(struct gendisk *, int);
#endif
unsigned long invalidate_mapping_pages(struct address_space *mapping,
- pgoff_t start, pgoff_t end);
+ pgoff_t start, pgoff_t end);
static inline void invalidate_remote_inode(struct inode *inode)
{
@@ -2210,26 +2237,24 @@ static inline void invalidate_remote_inode(struct inode *inode)
S_ISLNK(inode->i_mode))
invalidate_mapping_pages(inode->i_mapping, 0, -1);
}
-extern int invalidate_inode_pages2(struct address_space *mapping);
-extern int invalidate_inode_pages2_range(struct address_space *mapping,
- pgoff_t start, pgoff_t end);
-extern int write_inode_now(struct inode *, int);
-extern int filemap_fdatawrite(struct address_space *);
-extern int filemap_flush(struct address_space *);
-extern int filemap_fdatawait(struct address_space *);
-extern int filemap_fdatawait_range(struct address_space *, loff_t lstart,
- loff_t lend);
-extern int filemap_write_and_wait(struct address_space *mapping);
-extern int filemap_write_and_wait_range(struct address_space *mapping,
- loff_t lstart, loff_t lend);
-extern int __filemap_fdatawrite_range(struct address_space *mapping,
- loff_t start, loff_t end, int sync_mode);
-extern int filemap_fdatawrite_range(struct address_space *mapping,
- loff_t start, loff_t end);
-
-extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
- int datasync);
-extern int vfs_fsync(struct file *file, int datasync);
+int invalidate_inode_pages2(struct address_space *mapping);
+int invalidate_inode_pages2_range(struct address_space *mapping,
+ pgoff_t start, pgoff_t end);
+int write_inode_now(struct inode *, int);
+int filemap_fdatawrite(struct address_space *);
+int filemap_flush(struct address_space *);
+int filemap_fdatawait(struct address_space *);
+int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend);
+int filemap_write_and_wait(struct address_space *mapping);
+int filemap_write_and_wait_range(struct address_space *mapping,
+ loff_t lstart, loff_t lend);
+int __filemap_fdatawrite_range(struct address_space *mapping,
+ loff_t start, loff_t end, int sync_mode);
+int filemap_fdatawrite_range(struct address_space *mapping,
+ loff_t start, loff_t end);
+
+int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync);
+int vfs_fsync(struct file *file, int datasync);
static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count)
{
if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host))
@@ -2237,14 +2262,14 @@ static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count
return vfs_fsync_range(file, pos, pos + count - 1,
(file->f_flags & __O_SYNC) ? 0 : 1);
}
-extern void emergency_sync(void);
-extern void emergency_remount(void);
+void emergency_sync(void);
+void emergency_remount(void);
#ifdef CONFIG_BLOCK
-extern sector_t bmap(struct inode *, sector_t);
+sector_t bmap(struct inode *, sector_t);
#endif
-extern int notify_change(struct dentry *, struct iattr *, struct inode **);
-extern int inode_permission(struct inode *, int);
-extern int generic_permission(struct inode *, int);
+int notify_change(struct dentry *, struct iattr *, struct inode **);
+int inode_permission(struct inode *, int);
+int generic_permission(struct inode *, int);
static inline bool execute_ok(struct inode *inode)
{
@@ -2297,7 +2322,7 @@ static inline int deny_write_access(struct file *file)
struct inode *inode = file_inode(file);
return atomic_dec_unless_positive(&inode->i_writecount) ? 0 : -ETXTBSY;
}
-static inline void put_write_access(struct inode * inode)
+static inline void put_write_access(struct inode *inode)
{
atomic_dec(&inode->i_writecount);
}
@@ -2324,145 +2349,156 @@ static inline void i_readcount_inc(struct inode *inode)
#else
static inline void i_readcount_dec(struct inode *inode)
{
- return;
}
static inline void i_readcount_inc(struct inode *inode)
{
- return;
}
#endif
-extern int do_pipe_flags(int *, int);
+int do_pipe_flags(int *, int);
+
+int kernel_read(struct file *, loff_t, char *, unsigned long);
+ssize_t kernel_write(struct file *, const char *, size_t, loff_t);
+struct file *open_exec(const char *);
-extern int kernel_read(struct file *, loff_t, char *, unsigned long);
-extern ssize_t kernel_write(struct file *, const char *, size_t, loff_t);
-extern struct file * open_exec(const char *);
-
/* fs/dcache.c -- generic fs support functions */
-extern int is_subdir(struct dentry *, struct dentry *);
-extern int path_is_under(struct path *, struct path *);
+int is_subdir(struct dentry *, struct dentry *);
+int path_is_under(struct path *, struct path *);
#include <linux/err.h>
/* needed for stackable file system support */
-extern loff_t default_llseek(struct file *file, loff_t offset, int whence);
+loff_t default_llseek(struct file *file, loff_t offset, int whence);
-extern loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
+loff_t vfs_llseek(struct file *file, loff_t offset, int whence);
-extern int inode_init_always(struct super_block *, struct inode *);
-extern void inode_init_once(struct inode *);
-extern void address_space_init_once(struct address_space *mapping);
-extern struct inode * igrab(struct inode *);
-extern ino_t iunique(struct super_block *, ino_t);
-extern int inode_needs_sync(struct inode *inode);
-extern int generic_delete_inode(struct inode *inode);
+int inode_init_always(struct super_block *, struct inode *);
+void inode_init_once(struct inode *);
+void address_space_init_once(struct address_space *mapping);
+struct inode *igrab(struct inode *);
+ino_t iunique(struct super_block *, ino_t);
+int inode_needs_sync(struct inode *inode);
+int generic_delete_inode(struct inode *inode);
static inline int generic_drop_inode(struct inode *inode)
{
return !inode->i_nlink || inode_unhashed(inode);
}
-extern struct inode *ilookup5_nowait(struct super_block *sb,
- unsigned long hashval, int (*test)(struct inode *, void *),
- void *data);
-extern struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
- int (*test)(struct inode *, void *), void *data);
-extern struct inode *ilookup(struct super_block *sb, unsigned long ino);
+struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval,
+ int (*test)(struct inode *, void *), void *data);
+struct inode *ilookup5(struct super_block *sb, unsigned long hashval,
+ int (*test)(struct inode *, void *), void *data);
+struct inode *ilookup(struct super_block *sb, unsigned long ino);
-extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *);
-extern struct inode * iget_locked(struct super_block *, unsigned long);
-extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
-extern int insert_inode_locked(struct inode *);
+struct inode *iget5_locked(struct super_block *, unsigned long,
+ int (*test)(struct inode *, void *),
+ int (*set)(struct inode *, void *), void *);
+struct inode *iget_locked(struct super_block *, unsigned long);
+int insert_inode_locked4(struct inode *, unsigned long,
+ int (*test)(struct inode *, void *), void *);
+int insert_inode_locked(struct inode *);
#ifdef CONFIG_DEBUG_LOCK_ALLOC
-extern void lockdep_annotate_inode_mutex_key(struct inode *inode);
+void lockdep_annotate_inode_mutex_key(struct inode *inode);
#else
static inline void lockdep_annotate_inode_mutex_key(struct inode *inode) { };
#endif
-extern void unlock_new_inode(struct inode *);
-extern unsigned int get_next_ino(void);
-
-extern void __iget(struct inode * inode);
-extern void iget_failed(struct inode *);
-extern void clear_inode(struct inode *);
-extern void __destroy_inode(struct inode *);
-extern struct inode *new_inode_pseudo(struct super_block *sb);
-extern struct inode *new_inode(struct super_block *sb);
-extern void free_inode_nonrcu(struct inode *inode);
-extern int should_remove_suid(struct dentry *);
-extern int file_remove_suid(struct file *);
-
-extern void __insert_inode_hash(struct inode *, unsigned long hashval);
+void unlock_new_inode(struct inode *);
+unsigned int get_next_ino(void);
+
+void __iget(struct inode *inode);
+void iget_failed(struct inode *);
+void clear_inode(struct inode *);
+void __destroy_inode(struct inode *);
+struct inode *new_inode_pseudo(struct super_block *sb);
+struct inode *new_inode(struct super_block *sb);
+void free_inode_nonrcu(struct inode *inode);
+int should_remove_suid(struct dentry *);
+int file_remove_suid(struct file *);
+
+void __insert_inode_hash(struct inode *, unsigned long hashval);
static inline void insert_inode_hash(struct inode *inode)
{
__insert_inode_hash(inode, inode->i_ino);
}
-extern void __remove_inode_hash(struct inode *);
+void __remove_inode_hash(struct inode *);
static inline void remove_inode_hash(struct inode *inode)
{
if (!inode_unhashed(inode))
__remove_inode_hash(inode);
}
-extern void inode_sb_list_add(struct inode *inode);
+void inode_sb_list_add(struct inode *inode);
#ifdef CONFIG_BLOCK
-extern void submit_bio(int, struct bio *);
-extern int bdev_read_only(struct block_device *);
+void submit_bio(int, struct bio *);
+int bdev_read_only(struct block_device *);
#endif
-extern int set_blocksize(struct block_device *, int);
-extern int sb_set_blocksize(struct super_block *, int);
-extern int sb_min_blocksize(struct super_block *, int);
-
-extern int generic_file_mmap(struct file *, struct vm_area_struct *);
-extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
-extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr,
- unsigned long size, pgoff_t pgoff);
-int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk);
-extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
-extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
-extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
-extern ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t);
-extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
-extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
-extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
-extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos);
-extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
+int set_blocksize(struct block_device *, int);
+int sb_set_blocksize(struct super_block *, int);
+int sb_min_blocksize(struct super_block *, int);
+
+int generic_file_mmap(struct file *, struct vm_area_struct *);
+int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
+static inline int generic_file_remap_pages(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long size, pgoff_t pgoff)
+{
+ BUG();
+ return 0;
+}
+int generic_write_checks(struct file *file, loff_t *pos, size_t *count,
+ int isblk);
+ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
+ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
+ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
+ssize_t generic_file_direct_write(struct kiocb *, struct iov_iter *, loff_t);
+ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t);
+ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len,
+ loff_t *ppos);
+ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len,
+ loff_t *ppos);
+ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len,
+ loff_t *ppos);
+ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len,
+ loff_t *ppos);
/* fs/block_dev.c */
-extern ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from);
-extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
- int datasync);
-extern void block_sync_page(struct page *page);
+ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from);
+int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync);
+void block_sync_page(struct page *page);
/* fs/splice.c */
-extern ssize_t generic_file_splice_read(struct file *, loff_t *,
- struct pipe_inode_info *, size_t, unsigned int);
-extern ssize_t default_file_splice_read(struct file *, loff_t *,
- struct pipe_inode_info *, size_t, unsigned int);
-extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
- struct file *, loff_t *, size_t, unsigned int);
-extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
- struct file *out, loff_t *, size_t len, unsigned int flags);
-
-extern void
+ssize_t generic_file_splice_read(struct file *, loff_t *,
+ struct pipe_inode_info *, size_t,
+ unsigned int);
+ssize_t default_file_splice_read(struct file *, loff_t *,
+ struct pipe_inode_info *, size_t,
+ unsigned int);
+ssize_t iter_file_splice_write(struct pipe_inode_info *,
+ struct file *, loff_t *, size_t, unsigned int);
+ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
+ struct file *out, loff_t *, size_t len,
+ unsigned int flags);
+
+void
file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
-extern loff_t noop_llseek(struct file *file, loff_t offset, int whence);
-extern loff_t no_llseek(struct file *file, loff_t offset, int whence);
-extern loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
-extern loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
-extern loff_t generic_file_llseek_size(struct file *file, loff_t offset,
- int whence, loff_t maxsize, loff_t eof);
-extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
- int whence, loff_t size);
-extern int generic_file_open(struct inode * inode, struct file * filp);
-extern int nonseekable_open(struct inode * inode, struct file * filp);
+loff_t noop_llseek(struct file *file, loff_t offset, int whence);
+loff_t no_llseek(struct file *file, loff_t offset, int whence);
+loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize);
+loff_t generic_file_llseek(struct file *file, loff_t offset, int whence);
+loff_t generic_file_llseek_size(struct file *file, loff_t offset,
+ int whence, loff_t maxsize, loff_t eof);
+loff_t fixed_size_llseek(struct file *file, loff_t offset,
+ int whence, loff_t size);
+int generic_file_open(struct inode *inode, struct file *filp);
+int nonseekable_open(struct inode *inode, struct file *filp);
#ifdef CONFIG_FS_XIP
-extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len,
- loff_t *ppos);
-extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma);
-extern ssize_t xip_file_write(struct file *filp, const char __user *buf,
- size_t len, loff_t *ppos);
-extern int xip_truncate_page(struct address_space *mapping, loff_t from);
+ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len,
+ loff_t *ppos);
+int xip_file_mmap(struct file *file, struct vm_area_struct *vma);
+ssize_t xip_file_write(struct file *filp, const char __user *buf, size_t len,
+ loff_t *ppos);
+int xip_truncate_page(struct address_space *mapping, loff_t from);
#else
static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
{
@@ -2488,13 +2524,15 @@ enum {
void dio_end_io(struct bio *bio, int error);
ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
- struct block_device *bdev, struct iov_iter *iter, loff_t offset,
- get_block_t get_block, dio_iodone_t end_io,
- dio_submit_t submit_io, int flags);
+ struct block_device *bdev, struct iov_iter *iter,
+ loff_t offset, get_block_t get_block,
+ dio_iodone_t end_io, dio_submit_t submit_io,
+ int flags);
static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
- struct inode *inode, struct iov_iter *iter, loff_t offset,
- get_block_t get_block)
+ struct inode *inode,
+ struct iov_iter *iter, loff_t offset,
+ get_block_t get_block)
{
return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iter,
offset, get_block, NULL, NULL,
@@ -2505,26 +2543,25 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
void inode_dio_wait(struct inode *inode);
void inode_dio_done(struct inode *inode);
-extern void inode_set_flags(struct inode *inode, unsigned int flags,
- unsigned int mask);
+void inode_set_flags(struct inode *inode, unsigned int flags,
+ unsigned int mask);
extern const struct file_operations generic_ro_fops;
-#define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
+#define special_file(m) (S_ISCHR(m) || S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m))
-extern int readlink_copy(char __user *, int, const char *);
-extern int page_readlink(struct dentry *, char __user *, int);
-extern void *page_follow_link_light(struct dentry *, struct nameidata *);
-extern void page_put_link(struct dentry *, struct nameidata *, void *);
-extern int __page_symlink(struct inode *inode, const char *symname, int len,
- int nofs);
-extern int page_symlink(struct inode *inode, const char *symname, int len);
+int readlink_copy(char __user *, int, const char *);
+int page_readlink(struct dentry *, char __user *, int);
+void *page_follow_link_light(struct dentry *, struct nameidata *);
+void page_put_link(struct dentry *, struct nameidata *, void *);
+int __page_symlink(struct inode *inode, const char *symname, int len, int nofs);
+int page_symlink(struct inode *inode, const char *symname, int len);
extern const struct inode_operations page_symlink_inode_operations;
-extern void kfree_put_link(struct dentry *, struct nameidata *, void *);
-extern int generic_readlink(struct dentry *, char __user *, int);
-extern void generic_fillattr(struct inode *, struct kstat *);
+void kfree_put_link(struct dentry *, struct nameidata *, void *);
+int generic_readlink(struct dentry *, char __user *, int);
+void generic_fillattr(struct inode *, struct kstat *);
int vfs_getattr_nosec(struct path *path, struct kstat *stat);
-extern int vfs_getattr(struct path *, struct kstat *);
+int vfs_getattr(struct path *, struct kstat *);
void __inode_add_bytes(struct inode *inode, loff_t bytes);
void inode_add_bytes(struct inode *inode, loff_t bytes);
void __inode_sub_bytes(struct inode *inode, loff_t bytes);
@@ -2532,97 +2569,98 @@ void inode_sub_bytes(struct inode *inode, loff_t bytes);
loff_t inode_get_bytes(struct inode *inode);
void inode_set_bytes(struct inode *inode, loff_t bytes);
-extern int vfs_readdir(struct file *, filldir_t, void *);
-extern int iterate_dir(struct file *, struct dir_context *);
-
-extern int vfs_stat(const char __user *, struct kstat *);
-extern int vfs_lstat(const char __user *, struct kstat *);
-extern int vfs_fstat(unsigned int, struct kstat *);
-extern int vfs_fstatat(int , const char __user *, struct kstat *, int);
-
-extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
- unsigned long arg);
-extern int __generic_block_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo,
- loff_t start, loff_t len,
- get_block_t *get_block);
-extern int generic_block_fiemap(struct inode *inode,
- struct fiemap_extent_info *fieinfo, u64 start,
- u64 len, get_block_t *get_block);
-
-extern void get_filesystem(struct file_system_type *fs);
-extern void put_filesystem(struct file_system_type *fs);
-extern struct file_system_type *get_fs_type(const char *name);
-extern struct super_block *get_super(struct block_device *);
-extern struct super_block *get_super_thawed(struct block_device *);
-extern struct super_block *get_active_super(struct block_device *bdev);
-extern void drop_super(struct super_block *sb);
-extern void iterate_supers(void (*)(struct super_block *, void *), void *);
-extern void iterate_supers_type(struct file_system_type *,
- void (*)(struct super_block *, void *), void *);
-
-extern int dcache_dir_open(struct inode *, struct file *);
-extern int dcache_dir_close(struct inode *, struct file *);
-extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
-extern int dcache_readdir(struct file *, struct dir_context *);
-extern int simple_setattr(struct dentry *, struct iattr *);
-extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
-extern int simple_statfs(struct dentry *, struct kstatfs *);
-extern int simple_open(struct inode *inode, struct file *file);
-extern int simple_link(struct dentry *, struct inode *, struct dentry *);
-extern int simple_unlink(struct inode *, struct dentry *);
-extern int simple_rmdir(struct inode *, struct dentry *);
-extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
-extern int noop_fsync(struct file *, loff_t, loff_t, int);
-extern int simple_empty(struct dentry *);
-extern int simple_readpage(struct file *file, struct page *page);
-extern int simple_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata);
-extern int simple_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata);
-extern int always_delete_dentry(const struct dentry *);
-extern struct inode *alloc_anon_inode(struct super_block *);
+int vfs_readdir(struct file *, filldir_t, void *);
+int iterate_dir(struct file *, struct dir_context *);
+
+int vfs_stat(const char __user *, struct kstat *);
+int vfs_lstat(const char __user *, struct kstat *);
+int vfs_fstat(unsigned int, struct kstat *);
+int vfs_fstatat(int , const char __user *, struct kstat *, int);
+
+int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
+ unsigned long arg);
+int __generic_block_fiemap(struct inode *inode,
+ struct fiemap_extent_info *fieinfo,
+ loff_t start, loff_t len, get_block_t *get_block);
+int generic_block_fiemap(struct inode *inode,
+ struct fiemap_extent_info *fieinfo,
+ u64 start, u64 len, get_block_t *get_block);
+
+void get_filesystem(struct file_system_type *fs);
+void put_filesystem(struct file_system_type *fs);
+struct file_system_type *get_fs_type(const char *name);
+struct super_block *get_super(struct block_device *);
+struct super_block *get_super_thawed(struct block_device *);
+struct super_block *get_active_super(struct block_device *bdev);
+void drop_super(struct super_block *sb);
+void iterate_supers(void (*)(struct super_block *, void *), void *);
+void iterate_supers_type(struct file_system_type *,
+ void (*)(struct super_block *, void *), void *);
+
+int dcache_dir_open(struct inode *, struct file *);
+int dcache_dir_close(struct inode *, struct file *);
+loff_t dcache_dir_lseek(struct file *, loff_t, int);
+int dcache_readdir(struct file *, struct dir_context *);
+int simple_setattr(struct dentry *, struct iattr *);
+int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+int simple_statfs(struct dentry *, struct kstatfs *);
+int simple_open(struct inode *inode, struct file *file);
+int simple_link(struct dentry *, struct inode *, struct dentry *);
+int simple_unlink(struct inode *, struct dentry *);
+int simple_rmdir(struct inode *, struct dentry *);
+int simple_rename(struct inode *, struct dentry *, struct inode *,
+ struct dentry *);
+int noop_fsync(struct file *, loff_t, loff_t, int);
+int simple_empty(struct dentry *);
+int simple_readpage(struct file *file, struct page *page);
+int simple_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags,
+ struct page **pagep, void **fsdata);
+int simple_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata);
+int always_delete_dentry(const struct dentry *);
+struct inode *alloc_anon_inode(struct super_block *);
extern const struct dentry_operations simple_dentry_operations;
-extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
-extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
+struct dentry *simple_lookup(struct inode *, struct dentry *,
+ unsigned int flags);
+ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *);
extern const struct file_operations simple_dir_operations;
extern const struct inode_operations simple_dir_inode_operations;
struct tree_descr { char *name; const struct file_operations *ops; int mode; };
struct dentry *d_alloc_name(struct dentry *, const char *);
-extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *);
-extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count);
-extern void simple_release_fs(struct vfsmount **mount, int *count);
+int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *);
+int simple_pin_fs(struct file_system_type *, struct vfsmount **mount,
+ int *count);
+void simple_release_fs(struct vfsmount **mount, int *count);
-extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
- loff_t *ppos, const void *from, size_t available);
-extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
- const void __user *from, size_t count);
+ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
+ const void *from, size_t available);
+ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
+ const void __user *from, size_t count);
-extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
-extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
+int __generic_file_fsync(struct file *, loff_t, loff_t, int);
+int generic_file_fsync(struct file *, loff_t, loff_t, int);
-extern int generic_check_addressable(unsigned, u64);
+int generic_check_addressable(unsigned, u64);
#ifdef CONFIG_MIGRATION
-extern int buffer_migrate_page(struct address_space *,
- struct page *, struct page *,
- enum migrate_mode);
+int buffer_migrate_page(struct address_space *, struct page *, struct page *,
+ enum migrate_mode);
#else
#define buffer_migrate_page NULL
#endif
-extern int inode_change_ok(const struct inode *, struct iattr *);
-extern int inode_newsize_ok(const struct inode *, loff_t offset);
-extern void setattr_copy(struct inode *inode, const struct iattr *attr);
+int inode_change_ok(const struct inode *, struct iattr *);
+int inode_newsize_ok(const struct inode *, loff_t offset);
+void setattr_copy(struct inode *inode, const struct iattr *attr);
-extern int file_update_time(struct file *file);
+int file_update_time(struct file *file);
-extern int generic_show_options(struct seq_file *m, struct dentry *root);
-extern void save_mount_options(struct super_block *sb, char *options);
-extern void replace_mount_options(struct super_block *sb, char *options);
+int generic_show_options(struct seq_file *m, struct dentry *root);
+void save_mount_options(struct super_block *sb, char *options);
+void replace_mount_options(struct super_block *sb, char *options);
static inline ino_t parent_ino(struct dentry *dentry)
{
@@ -2652,7 +2690,7 @@ struct simple_transaction_argresp {
#define SIMPLE_TRANSACTION_LIMIT (PAGE_SIZE - sizeof(struct simple_transaction_argresp))
char *simple_transaction_get(struct file *file, const char __user *buf,
- size_t size);
+ size_t size);
ssize_t simple_transaction_read(struct file *file, char __user *buf,
size_t size, loff_t *pos);
int simple_transaction_release(struct inode *inode, struct file *file);
@@ -2693,7 +2731,7 @@ static const struct file_operations __fops = { \
static inline __printf(1, 2)
void __simple_attr_check_format(const char *fmt, ...)
{
- /* don't do anything, just let the compiler check the arguments; */
+/* don't do anything, just let the compiler check the arguments; */
}
int simple_attr_open(struct inode *inode, struct file *file,
@@ -2709,16 +2747,16 @@ struct ctl_table;
int proc_nr_files(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
int proc_nr_dentry(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos);
+ void __user *buffer, size_t *lenp, loff_t *ppos);
int proc_nr_inodes(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
int __init get_filesystem_list(char *buf);
-#define __FMODE_EXEC ((__force int) FMODE_EXEC)
-#define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY)
+#define __FMODE_EXEC ((__force int)FMODE_EXEC)
+#define __FMODE_NONOTIFY ((__force int)FMODE_NONOTIFY)
-#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
-#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
+#define ACC_MODE(x) ("\004\002\006\006"[(x) & O_ACCMODE])
+#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \
(flag & __FMODE_NONOTIFY)))
static inline int is_sxid(umode_t mode)
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 5e3a906cc089..142ec544167c 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -237,6 +237,12 @@ extern int iomem_is_exclusive(u64 addr);
extern int
walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
void *arg, int (*func)(unsigned long, unsigned long, void *));
+extern int
+walk_system_ram_res(u64 start, u64 end, void *arg,
+ int (*func)(u64, u64, void *));
+extern int
+walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end, void *arg,
+ int (*func)(u64, u64, void *));
/* True if any part of r1 overlaps r2 */
static inline bool resource_overlaps(struct resource *r1, struct resource *r2)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e9892041cb41..6e3d497ac5c3 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -376,10 +376,6 @@ extern unsigned long simple_strtoul(const char *,char **,unsigned int);
extern long simple_strtol(const char *,char **,unsigned int);
extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
extern long long simple_strtoll(const char *,char **,unsigned int);
-#define strict_strtoul kstrtoul
-#define strict_strtol kstrtol
-#define strict_strtoull kstrtoull
-#define strict_strtoll kstrtoll
extern int num_to_str(char *buf, int size, unsigned long long num);
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index a75641930049..4b2a0e11cc5b 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -10,6 +10,7 @@
#include <linux/ioport.h>
#include <linux/elfcore.h>
#include <linux/elf.h>
+#include <linux/module.h>
#include <asm/kexec.h>
/* Verify architecture specific macros are defined */
@@ -69,7 +70,18 @@ typedef unsigned long kimage_entry_t;
#define IND_SOURCE 0x8
struct kexec_segment {
- void __user *buf;
+ /*
+ * This pointer can point to user memory if kexec_load() system
+ * call is used or will point to kernel memory if
+ * kexec_file_load() system call is used.
+ *
+ * Use ->buf when expecting to deal with user memory and use ->kbuf
+ * when expecting to deal with kernel memory.
+ */
+ union {
+ void __user *buf;
+ void *kbuf;
+ };
size_t bufsz;
unsigned long mem;
size_t memsz;
@@ -84,6 +96,27 @@ struct compat_kexec_segment {
};
#endif
+struct kexec_sha_region {
+ unsigned long start;
+ unsigned long len;
+};
+
+struct purgatory_info {
+ /* Pointer to elf header of read only purgatory */
+ Elf_Ehdr *ehdr;
+
+ /* Pointer to purgatory sechdrs which are modifiable */
+ Elf_Shdr *sechdrs;
+ /*
+ * Temporary buffer location where purgatory is loaded and relocated
+ * This memory can be freed post image load
+ */
+ void *purgatory_buf;
+
+ /* Address where purgatory is finally loaded and is executed from */
+ unsigned long purgatory_load_addr;
+};
+
struct kimage {
kimage_entry_t head;
kimage_entry_t *entry;
@@ -100,7 +133,7 @@ struct kimage {
struct list_head control_pages;
struct list_head dest_pages;
- struct list_head unuseable_pages;
+ struct list_head unusable_pages;
/* Address of next control page to allocate for crash kernels. */
unsigned long control_page;
@@ -110,13 +143,63 @@ struct kimage {
#define KEXEC_TYPE_DEFAULT 0
#define KEXEC_TYPE_CRASH 1
unsigned int preserve_context : 1;
+ /* If set, we are using file mode kexec syscall */
+ unsigned int file_mode:1;
#ifdef ARCH_HAS_KIMAGE_ARCH
struct kimage_arch arch;
#endif
+
+ /* Additional fields for file based kexec syscall */
+ void *kernel_buf;
+ unsigned long kernel_buf_len;
+
+ void *initrd_buf;
+ unsigned long initrd_buf_len;
+
+ char *cmdline_buf;
+ unsigned long cmdline_buf_len;
+
+ /* File operations provided by image loader */
+ struct kexec_file_ops *fops;
+
+ /* Image loader handling the kernel can store a pointer here */
+ void *image_loader_data;
+
+ /* Information for loading purgatory */
+ struct purgatory_info purgatory_info;
};
+/*
+ * Keeps track of buffer parameters as provided by caller for requesting
+ * memory placement of buffer.
+ */
+struct kexec_buf {
+ struct kimage *image;
+ char *buffer;
+ unsigned long bufsz;
+ unsigned long memsz;
+ unsigned long buf_align;
+ unsigned long buf_min;
+ unsigned long buf_max;
+ bool top_down; /* allocate from top of memory hole */
+};
+typedef int (kexec_probe_t)(const char *kernel_buf, unsigned long kernel_size);
+typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len);
+typedef int (kexec_cleanup_t)(void *loader_data);
+typedef int (kexec_verify_sig_t)(const char *kernel_buf,
+ unsigned long kernel_len);
+
+struct kexec_file_ops {
+ kexec_probe_t *probe;
+ kexec_load_t *load;
+ kexec_cleanup_t *cleanup;
+ kexec_verify_sig_t *verify_sig;
+};
/* kexec interface functions */
extern void machine_kexec(struct kimage *image);
@@ -127,8 +210,21 @@ extern asmlinkage long sys_kexec_load(unsigned long entry,
struct kexec_segment __user *segments,
unsigned long flags);
extern int kernel_kexec(void);
+extern int kexec_add_buffer(struct kimage *image, char *buffer,
+ unsigned long bufsz, unsigned long memsz,
+ unsigned long buf_align, unsigned long buf_min,
+ unsigned long buf_max, bool top_down,
+ unsigned long *load_addr);
extern struct page *kimage_alloc_control_pages(struct kimage *image,
unsigned int order);
+extern int kexec_load_purgatory(struct kimage *image, unsigned long min,
+ unsigned long max, int top_down,
+ unsigned long *load_addr);
+extern int kexec_purgatory_get_set_symbol(struct kimage *image,
+ const char *name, void *buf,
+ unsigned int size, bool get_value);
+extern void *kexec_purgatory_get_symbol_addr(struct kimage *image,
+ const char *name);
extern void crash_kexec(struct pt_regs *);
int kexec_should_crash(struct task_struct *);
void crash_save_cpu(struct pt_regs *regs, int cpu);
@@ -177,6 +273,10 @@ extern int kexec_load_disabled;
#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)
#endif
+/* List of defined/legal kexec file flags */
+#define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \
+ KEXEC_FILE_NO_INITRAMFS)
+
#define VMCOREINFO_BYTES (4096)
#define VMCOREINFO_NOTE_NAME "VMCOREINFO"
#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e03dd29145a0..8981cc882ed2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2014,13 +2014,20 @@ static inline bool kernel_page_present(struct page *page) { return true; }
#endif /* CONFIG_HIBERNATION */
#endif
+#ifdef __HAVE_ARCH_GATE_AREA
extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
-#ifdef __HAVE_ARCH_GATE_AREA
-int in_gate_area_no_mm(unsigned long addr);
-int in_gate_area(struct mm_struct *mm, unsigned long addr);
+extern int in_gate_area_no_mm(unsigned long addr);
+extern int in_gate_area(struct mm_struct *mm, unsigned long addr);
#else
-int in_gate_area_no_mm(unsigned long addr);
-#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);})
+static inline struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
+{
+ return NULL;
+}
+static inline int in_gate_area_no_mm(unsigned long addr) { return 0; }
+static inline int in_gate_area(struct mm_struct *mm, unsigned long addr)
+{
+ return 0;
+}
#endif /* __HAVE_ARCH_GATE_AREA */
#ifdef CONFIG_SYSCTL
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 857ba40426ba..b9dc87203577 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -377,6 +377,8 @@ extern int in_sched_functions(unsigned long addr);
#define MAX_SCHEDULE_TIMEOUT LONG_MAX
extern signed long schedule_timeout(signed long timeout);
extern signed long schedule_timeout_interruptible(signed long timeout);
+extern signed long
+schedule_timeout_deferrable_interruptible(signed long timeout);
extern signed long schedule_timeout_killable(signed long timeout);
extern signed long schedule_timeout_uninterruptible(signed long timeout);
asmlinkage void schedule(void);
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 4d1771c2d29f..50777b5b1e4c 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -1,6 +1,7 @@
#ifndef __SHMEM_FS_H
#define __SHMEM_FS_H
+#include <linux/file.h>
#include <linux/swap.h>
#include <linux/mempolicy.h>
#include <linux/pagemap.h>
@@ -11,6 +12,7 @@
struct shmem_inode_info {
spinlock_t lock;
+ unsigned int seals; /* shmem seals */
unsigned long flags;
unsigned long alloced; /* data pages alloced to file */
union {
@@ -65,4 +67,19 @@ static inline struct page *shmem_read_mapping_page(
mapping_gfp_mask(mapping));
}
+#ifdef CONFIG_TMPFS
+
+extern int shmem_add_seals(struct file *file, unsigned int seals);
+extern int shmem_get_seals(struct file *file);
+extern long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
+
+#else
+
+static inline long shmem_fcntl(struct file *f, unsigned int c, unsigned long a)
+{
+ return -EINVAL;
+}
+
+#endif
+
#endif
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 701daff5d899..0f86d85a9ce4 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -317,6 +317,10 @@ asmlinkage long sys_restart_syscall(void);
asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
struct kexec_segment __user *segments,
unsigned long flags);
+asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd,
+ unsigned long cmdline_len,
+ const char __user *cmdline_ptr,
+ unsigned long flags);
asmlinkage long sys_exit(int error_code);
asmlinkage long sys_exit_group(int error_code);
@@ -802,6 +806,7 @@ asmlinkage long sys_timerfd_settime(int ufd, int flags,
asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr);
asmlinkage long sys_eventfd(unsigned int count);
asmlinkage long sys_eventfd2(unsigned int count, int flags);
+asmlinkage long sys_memfd_create(const char __user *uname_ptr, unsigned int flags);
asmlinkage long sys_fallocate(int fd, int mode, loff_t offset, loff_t len);
asmlinkage long sys_old_readdir(unsigned int, struct old_linux_dirent __user *, unsigned int);
asmlinkage long sys_pselect6(int, fd_set __user *, fd_set __user *,
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index 074b886c6be0..beed138bd359 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -28,6 +28,21 @@
#define F_GETPIPE_SZ (F_LINUX_SPECIFIC_BASE + 8)
/*
+ * Set/Get seals
+ */
+#define F_ADD_SEALS (F_LINUX_SPECIFIC_BASE + 9)
+#define F_GET_SEALS (F_LINUX_SPECIFIC_BASE + 10)
+
+/*
+ * Types of seals
+ */
+#define F_SEAL_SEAL 0x0001 /* prevent further seals from being set */
+#define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */
+#define F_SEAL_GROW 0x0004 /* prevent file from growing */
+#define F_SEAL_WRITE 0x0008 /* prevent writes */
+/* (1U << 31) is reserved for signed error codes */
+
+/*
* Types of directory notifications that may be requested.
*/
#define DN_ACCESS 0x00000001 /* File accessed */
diff --git a/include/uapi/linux/kexec.h b/include/uapi/linux/kexec.h
index d6629d49a243..6925f5b42f89 100644
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -13,6 +13,17 @@
#define KEXEC_PRESERVE_CONTEXT 0x00000002
#define KEXEC_ARCH_MASK 0xffff0000
+/*
+ * Kexec file load interface flags.
+ * KEXEC_FILE_UNLOAD : Unload already loaded kexec/kdump image.
+ * KEXEC_FILE_ON_CRASH : Load/unload operation belongs to kdump image.
+ * KEXEC_FILE_NO_INITRAMFS : No initramfs is being loaded. Ignore the initrd
+ * fd field.
+ */
+#define KEXEC_FILE_UNLOAD 0x00000001
+#define KEXEC_FILE_ON_CRASH 0x00000002
+#define KEXEC_FILE_NO_INITRAMFS 0x00000004
+
/* These values match the ELF architecture values.
* Unless there is a good reason that should continue to be the case.
*/
diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h
new file mode 100644
index 000000000000..534e364bda92
--- /dev/null
+++ b/include/uapi/linux/memfd.h
@@ -0,0 +1,8 @@
+#ifndef _UAPI_LINUX_MEMFD_H
+#define _UAPI_LINUX_MEMFD_H
+
+/* flags for memfd_create(2) (unsigned int) */
+#define MFD_CLOEXEC 0x0001U
+#define MFD_ALLOW_SEALING 0x0002U
+
+#endif /* _UAPI_LINUX_MEMFD_H */
diff --git a/init/Kconfig b/init/Kconfig
index d3ef635b9779..1dfdd81ff9bd 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -783,8 +783,13 @@ endchoice
endmenu # "RCU Subsystem"
+config BUILD_BIN2C
+ bool
+ default n
+
config IKCONFIG
tristate "Kernel .config support"
+ select BUILD_BIN2C
---help---
This option enables the complete Linux kernel ".config" file
contents to be saved in the kernel. It provides documentation
diff --git a/init/main.c b/init/main.c
index 5d8c83ae6c55..e3c4cdd94d5b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -6,7 +6,7 @@
* GK 2/5/95 - Changed to support mounting root fs via NFS
* Added initrd & change_root: Werner Almesberger & Hans Lermen, Feb '96
* Moan early if gcc is old, avoiding bogus kernels - Paul Gortmaker, May '96
- * Simplified starting of init: Michael A. Griffith <grif@acm.org>
+ * Simplified starting of init: Michael A. Griffith <grif@acm.org>
*/
#define DEBUG /* Enable initcall_debug */
@@ -136,7 +136,7 @@ static char *ramdisk_execute_command;
* Used to generate warnings if static_key manipulation functions are used
* before jump_label_init is called.
*/
-bool static_key_initialized __read_mostly = false;
+bool static_key_initialized __read_mostly;
EXPORT_SYMBOL_GPL(static_key_initialized);
/*
@@ -159,8 +159,8 @@ static int __init set_reset_devices(char *str)
__setup("reset_devices", set_reset_devices);
-static const char * argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
-const char * envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
+static const char *argv_init[MAX_INIT_ARGS+2] = { "init", NULL, };
+const char *envp_init[MAX_INIT_ENVS+2] = { "HOME=/", "TERM=linux", NULL, };
static const char *panic_later, *panic_param;
extern const struct obs_kernel_param __setup_start[], __setup_end[];
@@ -199,7 +199,6 @@ static int __init obsolete_checksetup(char *line)
* still work even if initially too large, it will just take slightly longer
*/
unsigned long loops_per_jiffy = (1<<12);
-
EXPORT_SYMBOL(loops_per_jiffy);
static int __init debug_kernel(char *str)
@@ -376,8 +375,8 @@ static void __init setup_command_line(char *command_line)
initcall_command_line =
memblock_virt_alloc(strlen(boot_command_line) + 1, 0);
static_command_line = memblock_virt_alloc(strlen(command_line) + 1, 0);
- strcpy (saved_command_line, boot_command_line);
- strcpy (static_command_line, command_line);
+ strcpy(saved_command_line, boot_command_line);
+ strcpy(static_command_line, command_line);
}
/*
@@ -445,8 +444,8 @@ void __init parse_early_options(char *cmdline)
/* Arch code calls this early on, or if not, just before other parsing. */
void __init parse_early_param(void)
{
- static __initdata int done = 0;
- static __initdata char tmp_cmdline[COMMAND_LINE_SIZE];
+ static int done __initdata;
+ static char tmp_cmdline[COMMAND_LINE_SIZE] __initdata;
if (done)
return;
@@ -500,7 +499,8 @@ static void __init mm_init(void)
asmlinkage __visible void __init start_kernel(void)
{
- char * command_line, *after_dashes;
+ char *command_line;
+ char *after_dashes;
extern const struct kernel_param __start___param[], __stop___param[];
/*
@@ -572,7 +572,8 @@ asmlinkage __visible void __init start_kernel(void)
* fragile until we cpu_idle() for the first time.
*/
preempt_disable();
- if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n"))
+ if (WARN(!irqs_disabled(),
+ "Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
idr_init_cache();
rcu_init();
diff --git a/kernel/Makefile b/kernel/Makefile
index 0026cf531769..dc5c77544fd6 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -105,7 +105,7 @@ targets += config_data.gz
$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
$(call if_changed,gzip)
- filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/bin2c; echo "MAGIC_END;")
+ filechk_ikconfiggz = (echo "static const char kernel_config_data[] __used = MAGIC_START"; cat $< | scripts/basic/bin2c; echo "MAGIC_END;")
targets += config_data.h
$(obj)/config_data.h: $(obj)/config_data.gz FORCE
$(call filechk,ikconfiggz)
diff --git a/kernel/fork.c b/kernel/fork.c
index fa9124322cd4..1380d8ace334 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -429,7 +429,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
atomic_dec(&inode->i_writecount);
mutex_lock(&mapping->i_mmap_mutex);
if (tmp->vm_flags & VM_SHARED)
- mapping->i_mmap_writable++;
+ atomic_inc(&mapping->i_mmap_writable);
flush_dcache_mmap_lock(mapping);
/* insert tmp into the share list, just after mpnt */
if (unlikely(tmp->vm_flags & VM_NONLINEAR))
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 4b8f0c925884..0b49a0a58102 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -6,6 +6,8 @@
* Version 2. See the file COPYING for more details.
*/
+#define pr_fmt(fmt) "kexec: " fmt
+
#include <linux/capability.h>
#include <linux/mm.h>
#include <linux/file.h>
@@ -40,6 +42,9 @@
#include <asm/io.h>
#include <asm/sections.h>
+#include <crypto/hash.h>
+#include <crypto/sha.h>
+
/* Per cpu memory for storing cpu states in case of system crash. */
note_buf_t __percpu *crash_notes;
@@ -52,6 +57,15 @@ size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data);
/* Flag to indicate we are going to kexec a new kernel */
bool kexec_in_progress = false;
+/*
+ * Declare these symbols weak so that if architecture provides a purgatory,
+ * these will be overridden.
+ */
+char __weak kexec_purgatory[0];
+size_t __weak kexec_purgatory_size = 0;
+
+static int kexec_calculate_store_digests(struct kimage *image);
+
/* Location of the reserved area for the crash kernel */
struct resource crashk_res = {
.name = "Crash kernel",
@@ -125,45 +139,27 @@ static struct page *kimage_alloc_page(struct kimage *image,
gfp_t gfp_mask,
unsigned long dest);
-static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
- unsigned long nr_segments,
- struct kexec_segment __user *segments)
+static int copy_user_segment_list(struct kimage *image,
+ unsigned long nr_segments,
+ struct kexec_segment __user *segments)
{
+ int ret;
size_t segment_bytes;
- struct kimage *image;
- unsigned long i;
- int result;
-
- /* Allocate a controlling structure */
- result = -ENOMEM;
- image = kzalloc(sizeof(*image), GFP_KERNEL);
- if (!image)
- goto out;
-
- image->head = 0;
- image->entry = &image->head;
- image->last_entry = &image->head;
- image->control_page = ~0; /* By default this does not apply */
- image->start = entry;
- image->type = KEXEC_TYPE_DEFAULT;
-
- /* Initialize the list of control pages */
- INIT_LIST_HEAD(&image->control_pages);
-
- /* Initialize the list of destination pages */
- INIT_LIST_HEAD(&image->dest_pages);
-
- /* Initialize the list of unusable pages */
- INIT_LIST_HEAD(&image->unuseable_pages);
/* Read in the segments */
image->nr_segments = nr_segments;
segment_bytes = nr_segments * sizeof(*segments);
- result = copy_from_user(image->segment, segments, segment_bytes);
- if (result) {
- result = -EFAULT;
- goto out;
- }
+ ret = copy_from_user(image->segment, segments, segment_bytes);
+ if (ret)
+ ret = -EFAULT;
+
+ return ret;
+}
+
+static int sanity_check_segment_list(struct kimage *image)
+{
+ int result, i;
+ unsigned long nr_segments = image->nr_segments;
/*
* Verify we have good destination addresses. The caller is
@@ -185,9 +181,9 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
mstart = image->segment[i].mem;
mend = mstart + image->segment[i].memsz;
if ((mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK))
- goto out;
+ return result;
if (mend >= KEXEC_DESTINATION_MEMORY_LIMIT)
- goto out;
+ return result;
}
/* Verify our destination addresses do not overlap.
@@ -208,7 +204,7 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
pend = pstart + image->segment[j].memsz;
/* Do the segments overlap ? */
if ((mend > pstart) && (mstart < pend))
- goto out;
+ return result;
}
}
@@ -220,130 +216,401 @@ static int do_kimage_alloc(struct kimage **rimage, unsigned long entry,
result = -EINVAL;
for (i = 0; i < nr_segments; i++) {
if (image->segment[i].bufsz > image->segment[i].memsz)
- goto out;
+ return result;
}
- result = 0;
-out:
- if (result == 0)
- *rimage = image;
- else
- kfree(image);
+ /*
+ * Verify we have good destination addresses. Normally
+ * the caller is responsible for making certain we don't
+ * attempt to load the new image into invalid or reserved
+ * areas of RAM. But crash kernels are preloaded into a
+ * reserved area of ram. We must ensure the addresses
+ * are in the reserved area otherwise preloading the
+ * kernel could corrupt things.
+ */
- return result;
+ if (image->type == KEXEC_TYPE_CRASH) {
+ result = -EADDRNOTAVAIL;
+ for (i = 0; i < nr_segments; i++) {
+ unsigned long mstart, mend;
+
+ mstart = image->segment[i].mem;
+ mend = mstart + image->segment[i].memsz - 1;
+ /* Ensure we are within the crash kernel limits */
+ if ((mstart < crashk_res.start) ||
+ (mend > crashk_res.end))
+ return result;
+ }
+ }
+ return 0;
+}
+
+static struct kimage *do_kimage_alloc_init(void)
+{
+ struct kimage *image;
+
+ /* Allocate a controlling structure */
+ image = kzalloc(sizeof(*image), GFP_KERNEL);
+ if (!image)
+ return NULL;
+
+ image->head = 0;
+ image->entry = &image->head;
+ image->last_entry = &image->head;
+ image->control_page = ~0; /* By default this does not apply */
+ image->type = KEXEC_TYPE_DEFAULT;
+
+ /* Initialize the list of control pages */
+ INIT_LIST_HEAD(&image->control_pages);
+
+ /* Initialize the list of destination pages */
+ INIT_LIST_HEAD(&image->dest_pages);
+
+ /* Initialize the list of unusable pages */
+ INIT_LIST_HEAD(&image->unusable_pages);
+
+ return image;
}
static void kimage_free_page_list(struct list_head *list);
-static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
- unsigned long nr_segments,
- struct kexec_segment __user *segments)
+static int kimage_alloc_init(struct kimage **rimage, unsigned long entry,
+ unsigned long nr_segments,
+ struct kexec_segment __user *segments,
+ unsigned long flags)
{
- int result;
+ int ret;
struct kimage *image;
+ bool kexec_on_panic = flags & KEXEC_ON_CRASH;
+
+ if (kexec_on_panic) {
+ /* Verify we have a valid entry point */
+ if ((entry < crashk_res.start) || (entry > crashk_res.end))
+ return -EADDRNOTAVAIL;
+ }
/* Allocate and initialize a controlling structure */
- image = NULL;
- result = do_kimage_alloc(&image, entry, nr_segments, segments);
- if (result)
- goto out;
+ image = do_kimage_alloc_init();
+ if (!image)
+ return -ENOMEM;
+
+ image->start = entry;
+
+ ret = copy_user_segment_list(image, nr_segments, segments);
+ if (ret)
+ goto out_free_image;
+
+ ret = sanity_check_segment_list(image);
+ if (ret)
+ goto out_free_image;
+
+ /* Enable the special crash kernel control page allocation policy. */
+ if (kexec_on_panic) {
+ image->control_page = crashk_res.start;
+ image->type = KEXEC_TYPE_CRASH;
+ }
/*
* Find a location for the control code buffer, and add it
* the vector of segments so that it's pages will also be
* counted as destination pages.
*/
- result = -ENOMEM;
+ ret = -ENOMEM;
image->control_code_page = kimage_alloc_control_pages(image,
get_order(KEXEC_CONTROL_PAGE_SIZE));
if (!image->control_code_page) {
pr_err("Could not allocate control_code_buffer\n");
- goto out_free;
+ goto out_free_image;
}
- image->swap_page = kimage_alloc_control_pages(image, 0);
- if (!image->swap_page) {
- pr_err("Could not allocate swap buffer\n");
- goto out_free;
+ if (!kexec_on_panic) {
+ image->swap_page = kimage_alloc_control_pages(image, 0);
+ if (!image->swap_page) {
+ pr_err("Could not allocate swap buffer\n");
+ goto out_free_control_pages;
+ }
}
*rimage = image;
return 0;
-
-out_free:
+out_free_control_pages:
kimage_free_page_list(&image->control_pages);
+out_free_image:
kfree(image);
-out:
- return result;
+ return ret;
}
-static int kimage_crash_alloc(struct kimage **rimage, unsigned long entry,
- unsigned long nr_segments,
- struct kexec_segment __user *segments)
+static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
{
- int result;
- struct kimage *image;
- unsigned long i;
+ struct fd f = fdget(fd);
+ int ret;
+ struct kstat stat;
+ loff_t pos;
+ ssize_t bytes = 0;
- image = NULL;
- /* Verify we have a valid entry point */
- if ((entry < crashk_res.start) || (entry > crashk_res.end)) {
- result = -EADDRNOTAVAIL;
+ if (!f.file)
+ return -EBADF;
+
+ ret = vfs_getattr(&f.file->f_path, &stat);
+ if (ret)
+ goto out;
+
+ if (stat.size > INT_MAX) {
+ ret = -EFBIG;
goto out;
}
- /* Allocate and initialize a controlling structure */
- result = do_kimage_alloc(&image, entry, nr_segments, segments);
- if (result)
+ /* Don't hand 0 to vmalloc, it whines. */
+ if (stat.size == 0) {
+ ret = -EINVAL;
goto out;
+ }
- /* Enable the special crash kernel control page
- * allocation policy.
- */
- image->control_page = crashk_res.start;
- image->type = KEXEC_TYPE_CRASH;
+ *buf = vmalloc(stat.size);
+ if (!*buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
- /*
- * Verify we have good destination addresses. Normally
- * the caller is responsible for making certain we don't
- * attempt to load the new image into invalid or reserved
- * areas of RAM. But crash kernels are preloaded into a
- * reserved area of ram. We must ensure the addresses
- * are in the reserved area otherwise preloading the
- * kernel could corrupt things.
- */
- result = -EADDRNOTAVAIL;
- for (i = 0; i < nr_segments; i++) {
- unsigned long mstart, mend;
+ pos = 0;
+ while (pos < stat.size) {
+ bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
+ stat.size - pos);
+ if (bytes < 0) {
+ vfree(*buf);
+ ret = bytes;
+ goto out;
+ }
- mstart = image->segment[i].mem;
- mend = mstart + image->segment[i].memsz - 1;
- /* Ensure we are within the crash kernel limits */
- if ((mstart < crashk_res.start) || (mend > crashk_res.end))
- goto out_free;
+ if (bytes == 0)
+ break;
+ pos += bytes;
}
+ if (pos != stat.size) {
+ ret = -EBADF;
+ vfree(*buf);
+ goto out;
+ }
+
+ *buf_len = pos;
+out:
+ fdput(f);
+ return ret;
+}
+
+/* Architectures can provide this probe function */
+int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ return -ENOEXEC;
+}
+
+void * __weak arch_kexec_kernel_image_load(struct kimage *image)
+{
+ return ERR_PTR(-ENOEXEC);
+}
+
+void __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+}
+
+int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ return -EKEYREJECTED;
+}
+
+/* Apply relocations of type RELA */
+int __weak
+arch_kexec_apply_relocations_add(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+ unsigned int relsec)
+{
+ pr_err("RELA relocation unsupported.\n");
+ return -ENOEXEC;
+}
+
+/* Apply relocations of type REL */
+int __weak
+arch_kexec_apply_relocations(const Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+ unsigned int relsec)
+{
+ pr_err("REL relocation unsupported.\n");
+ return -ENOEXEC;
+}
+
+/*
+ * Free up memory used by kernel, initrd, and comand line. This is temporary
+ * memory allocation which is not needed any more after these buffers have
+ * been loaded into separate segments and have been copied elsewhere.
+ */
+static void kimage_file_post_load_cleanup(struct kimage *image)
+{
+ struct purgatory_info *pi = &image->purgatory_info;
+
+ vfree(image->kernel_buf);
+ image->kernel_buf = NULL;
+
+ vfree(image->initrd_buf);
+ image->initrd_buf = NULL;
+
+ kfree(image->cmdline_buf);
+ image->cmdline_buf = NULL;
+
+ vfree(pi->purgatory_buf);
+ pi->purgatory_buf = NULL;
+
+ vfree(pi->sechdrs);
+ pi->sechdrs = NULL;
+
+ /* See if architecture has anything to cleanup post load */
+ arch_kimage_file_post_load_cleanup(image);
+
/*
- * Find a location for the control code buffer, and add
- * the vector of segments so that it's pages will also be
- * counted as destination pages.
+ * Above call should have called into bootloader to free up
+ * any data stored in kimage->image_loader_data. It should
+ * be ok now to free it up.
*/
- result = -ENOMEM;
+ kfree(image->image_loader_data);
+ image->image_loader_data = NULL;
+}
+
+/*
+ * In file mode list of segments is prepared by kernel. Copy relevant
+ * data from user space, do error checking, prepare segment list
+ */
+static int
+kimage_file_prepare_segments(struct kimage *image, int kernel_fd, int initrd_fd,
+ const char __user *cmdline_ptr,
+ unsigned long cmdline_len, unsigned flags)
+{
+ int ret = 0;
+ void *ldata;
+
+ ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
+ &image->kernel_buf_len);
+ if (ret)
+ return ret;
+
+ /* Call arch image probe handlers */
+ ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
+ image->kernel_buf_len);
+
+ if (ret)
+ goto out;
+
+#ifdef CONFIG_KEXEC_VERIFY_SIG
+ ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
+ image->kernel_buf_len);
+ if (ret) {
+ pr_debug("kernel signature verification failed.\n");
+ goto out;
+ }
+ pr_debug("kernel signature verification successful.\n");
+#endif
+ /* It is possible that there no initramfs is being loaded */
+ if (!(flags & KEXEC_FILE_NO_INITRAMFS)) {
+ ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
+ &image->initrd_buf_len);
+ if (ret)
+ goto out;
+ }
+
+ if (cmdline_len) {
+ image->cmdline_buf = kzalloc(cmdline_len, GFP_KERNEL);
+ if (!image->cmdline_buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = copy_from_user(image->cmdline_buf, cmdline_ptr,
+ cmdline_len);
+ if (ret) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ image->cmdline_buf_len = cmdline_len;
+
+ /* command line should be a string with last byte null */
+ if (image->cmdline_buf[cmdline_len - 1] != '\0') {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ /* Call arch image load handlers */
+ ldata = arch_kexec_kernel_image_load(image);
+
+ if (IS_ERR(ldata)) {
+ ret = PTR_ERR(ldata);
+ goto out;
+ }
+
+ image->image_loader_data = ldata;
+out:
+ /* In case of error, free up all allocated memory in this function */
+ if (ret)
+ kimage_file_post_load_cleanup(image);
+ return ret;
+}
+
+static int
+kimage_file_alloc_init(struct kimage **rimage, int kernel_fd,
+ int initrd_fd, const char __user *cmdline_ptr,
+ unsigned long cmdline_len, unsigned long flags)
+{
+ int ret;
+ struct kimage *image;
+ bool kexec_on_panic = flags & KEXEC_FILE_ON_CRASH;
+
+ image = do_kimage_alloc_init();
+ if (!image)
+ return -ENOMEM;
+
+ image->file_mode = 1;
+
+ if (kexec_on_panic) {
+ /* Enable special crash kernel control page alloc policy. */
+ image->control_page = crashk_res.start;
+ image->type = KEXEC_TYPE_CRASH;
+ }
+
+ ret = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
+ cmdline_ptr, cmdline_len, flags);
+ if (ret)
+ goto out_free_image;
+
+ ret = sanity_check_segment_list(image);
+ if (ret)
+ goto out_free_post_load_bufs;
+
+ ret = -ENOMEM;
image->control_code_page = kimage_alloc_control_pages(image,
get_order(KEXEC_CONTROL_PAGE_SIZE));
if (!image->control_code_page) {
pr_err("Could not allocate control_code_buffer\n");
- goto out_free;
+ goto out_free_post_load_bufs;
+ }
+
+ if (!kexec_on_panic) {
+ image->swap_page = kimage_alloc_control_pages(image, 0);
+ if (!image->swap_page) {
+ pr_err(KERN_ERR "Could not allocate swap buffer\n");
+ goto out_free_control_pages;
+ }
}
*rimage = image;
return 0;
-
-out_free:
+out_free_control_pages:
+ kimage_free_page_list(&image->control_pages);
+out_free_post_load_bufs:
+ kimage_file_post_load_cleanup(image);
+out_free_image:
kfree(image);
-out:
- return result;
+ return ret;
}
static int kimage_is_destination_range(struct kimage *image,
@@ -609,7 +876,7 @@ static void kimage_free_extra_pages(struct kimage *image)
kimage_free_page_list(&image->dest_pages);
/* Walk through and free any unusable pages I have cached */
- kimage_free_page_list(&image->unuseable_pages);
+ kimage_free_page_list(&image->unusable_pages);
}
static void kimage_terminate(struct kimage *image)
@@ -663,6 +930,14 @@ static void kimage_free(struct kimage *image)
/* Free the kexec control pages... */
kimage_free_page_list(&image->control_pages);
+
+ /*
+ * Free up any temporary buffers allocated. This might hit if
+ * error occurred much later after buffer allocation.
+ */
+ if (image->file_mode)
+ kimage_file_post_load_cleanup(image);
+
kfree(image);
}
@@ -732,7 +1007,7 @@ static struct page *kimage_alloc_page(struct kimage *image,
/* If the page cannot be used file it away */
if (page_to_pfn(page) >
(KEXEC_SOURCE_MEMORY_LIMIT >> PAGE_SHIFT)) {
- list_add(&page->lru, &image->unuseable_pages);
+ list_add(&page->lru, &image->unusable_pages);
continue;
}
addr = page_to_pfn(page) << PAGE_SHIFT;
@@ -791,10 +1066,14 @@ static int kimage_load_normal_segment(struct kimage *image,
unsigned long maddr;
size_t ubytes, mbytes;
int result;
- unsigned char __user *buf;
+ unsigned char __user *buf = NULL;
+ unsigned char *kbuf = NULL;
result = 0;
- buf = segment->buf;
+ if (image->file_mode)
+ kbuf = segment->kbuf;
+ else
+ buf = segment->buf;
ubytes = segment->bufsz;
mbytes = segment->memsz;
maddr = segment->mem;
@@ -826,7 +1105,11 @@ static int kimage_load_normal_segment(struct kimage *image,
PAGE_SIZE - (maddr & ~PAGE_MASK));
uchunk = min(ubytes, mchunk);
- result = copy_from_user(ptr, buf, uchunk);
+ /* For file based kexec, source pages are in kernel memory */
+ if (image->file_mode)
+ memcpy(ptr, kbuf, uchunk);
+ else
+ result = copy_from_user(ptr, buf, uchunk);
kunmap(page);
if (result) {
result = -EFAULT;
@@ -834,7 +1117,10 @@ static int kimage_load_normal_segment(struct kimage *image,
}
ubytes -= uchunk;
maddr += mchunk;
- buf += mchunk;
+ if (image->file_mode)
+ kbuf += mchunk;
+ else
+ buf += mchunk;
mbytes -= mchunk;
}
out:
@@ -851,10 +1137,14 @@ static int kimage_load_crash_segment(struct kimage *image,
unsigned long maddr;
size_t ubytes, mbytes;
int result;
- unsigned char __user *buf;
+ unsigned char __user *buf = NULL;
+ unsigned char *kbuf = NULL;
result = 0;
- buf = segment->buf;
+ if (image->file_mode)
+ kbuf = segment->kbuf;
+ else
+ buf = segment->buf;
ubytes = segment->bufsz;
mbytes = segment->memsz;
maddr = segment->mem;
@@ -877,7 +1167,12 @@ static int kimage_load_crash_segment(struct kimage *image,
/* Zero the trailing part of the page */
memset(ptr + uchunk, 0, mchunk - uchunk);
}
- result = copy_from_user(ptr, buf, uchunk);
+
+ /* For file based kexec, source pages are in kernel memory */
+ if (image->file_mode)
+ memcpy(ptr, kbuf, uchunk);
+ else
+ result = copy_from_user(ptr, buf, uchunk);
kexec_flush_icache_page(page);
kunmap(page);
if (result) {
@@ -886,7 +1181,10 @@ static int kimage_load_crash_segment(struct kimage *image,
}
ubytes -= uchunk;
maddr += mchunk;
- buf += mchunk;
+ if (image->file_mode)
+ kbuf += mchunk;
+ else
+ buf += mchunk;
mbytes -= mchunk;
}
out:
@@ -986,16 +1284,16 @@ SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments,
/* Loading another kernel to reboot into */
if ((flags & KEXEC_ON_CRASH) == 0)
- result = kimage_normal_alloc(&image, entry,
- nr_segments, segments);
+ result = kimage_alloc_init(&image, entry, nr_segments,
+ segments, flags);
/* Loading another kernel to switch to if this one crashes */
else if (flags & KEXEC_ON_CRASH) {
/* Free any current crash dump kernel before
* we corrupt it.
*/
kimage_free(xchg(&kexec_crash_image, NULL));
- result = kimage_crash_alloc(&image, entry,
- nr_segments, segments);
+ result = kimage_alloc_init(&image, entry, nr_segments,
+ segments, flags);
crash_map_reserved_pages();
}
if (result)
@@ -1077,6 +1375,82 @@ COMPAT_SYSCALL_DEFINE4(kexec_load, compat_ulong_t, entry,
}
#endif
+SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd,
+ unsigned long, cmdline_len, const char __user *, cmdline_ptr,
+ unsigned long, flags)
+{
+ int ret = 0, i;
+ struct kimage **dest_image, *image;
+
+ /* We only trust the superuser with rebooting the system. */
+ if (!capable(CAP_SYS_BOOT) || kexec_load_disabled)
+ return -EPERM;
+
+ /* Make sure we have a legal set of flags */
+ if (flags != (flags & KEXEC_FILE_FLAGS))
+ return -EINVAL;
+
+ image = NULL;
+
+ if (!mutex_trylock(&kexec_mutex))
+ return -EBUSY;
+
+ dest_image = &kexec_image;
+ if (flags & KEXEC_FILE_ON_CRASH)
+ dest_image = &kexec_crash_image;
+
+ if (flags & KEXEC_FILE_UNLOAD)
+ goto exchange;
+
+ /*
+ * In case of crash, new kernel gets loaded in reserved region. It is
+ * same memory where old crash kernel might be loaded. Free any
+ * current crash dump kernel before we corrupt it.
+ */
+ if (flags & KEXEC_FILE_ON_CRASH)
+ kimage_free(xchg(&kexec_crash_image, NULL));
+
+ ret = kimage_file_alloc_init(&image, kernel_fd, initrd_fd, cmdline_ptr,
+ cmdline_len, flags);
+ if (ret)
+ goto out;
+
+ ret = machine_kexec_prepare(image);
+ if (ret)
+ goto out;
+
+ ret = kexec_calculate_store_digests(image);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < image->nr_segments; i++) {
+ struct kexec_segment *ksegment;
+
+ ksegment = &image->segment[i];
+ pr_debug("Loading segment %d: buf=0x%p bufsz=0x%zx mem=0x%lx memsz=0x%zx\n",
+ i, ksegment->buf, ksegment->bufsz, ksegment->mem,
+ ksegment->memsz);
+
+ ret = kimage_load_segment(image, &image->segment[i]);
+ if (ret)
+ goto out;
+ }
+
+ kimage_terminate(image);
+
+ /*
+ * Free up any temporary buffers allocated which are not needed
+ * after image has been loaded
+ */
+ kimage_file_post_load_cleanup(image);
+exchange:
+ image = xchg(dest_image, image);
+out:
+ mutex_unlock(&kexec_mutex);
+ kimage_free(image);
+ return ret;
+}
+
void crash_kexec(struct pt_regs *regs)
{
/* Take the kexec_mutex here to prevent sys_kexec_load
@@ -1632,6 +2006,683 @@ static int __init crash_save_vmcoreinfo_init(void)
subsys_initcall(crash_save_vmcoreinfo_init);
+static int __kexec_add_segment(struct kimage *image, char *buf,
+ unsigned long bufsz, unsigned long mem,
+ unsigned long memsz)
+{
+ struct kexec_segment *ksegment;
+
+ ksegment = &image->segment[image->nr_segments];
+ ksegment->kbuf = buf;
+ ksegment->bufsz = bufsz;
+ ksegment->mem = mem;
+ ksegment->memsz = memsz;
+ image->nr_segments++;
+
+ return 0;
+}
+
+static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
+ struct kexec_buf *kbuf)
+{
+ struct kimage *image = kbuf->image;
+ unsigned long temp_start, temp_end;
+
+ temp_end = min(end, kbuf->buf_max);
+ temp_start = temp_end - kbuf->memsz;
+
+ do {
+ /* align down start */
+ temp_start = temp_start & (~(kbuf->buf_align - 1));
+
+ if (temp_start < start || temp_start < kbuf->buf_min)
+ return 0;
+
+ temp_end = temp_start + kbuf->memsz - 1;
+
+ /*
+ * Make sure this does not conflict with any of existing
+ * segments
+ */
+ if (kimage_is_destination_range(image, temp_start, temp_end)) {
+ temp_start = temp_start - PAGE_SIZE;
+ continue;
+ }
+
+ /* We found a suitable memory range */
+ break;
+ } while (1);
+
+ /* If we are here, we found a suitable memory range */
+ __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+ kbuf->memsz);
+
+ /* Success, stop navigating through remaining System RAM ranges */
+ return 1;
+}
+
+static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
+ struct kexec_buf *kbuf)
+{
+ struct kimage *image = kbuf->image;
+ unsigned long temp_start, temp_end;
+
+ temp_start = max(start, kbuf->buf_min);
+
+ do {
+ temp_start = ALIGN(temp_start, kbuf->buf_align);
+ temp_end = temp_start + kbuf->memsz - 1;
+
+ if (temp_end > end || temp_end > kbuf->buf_max)
+ return 0;
+ /*
+ * Make sure this does not conflict with any of existing
+ * segments
+ */
+ if (kimage_is_destination_range(image, temp_start, temp_end)) {
+ temp_start = temp_start + PAGE_SIZE;
+ continue;
+ }
+
+ /* We found a suitable memory range */
+ break;
+ } while (1);
+
+ /* If we are here, we found a suitable memory range */
+ __kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+ kbuf->memsz);
+
+ /* Success, stop navigating through remaining System RAM ranges */
+ return 1;
+}
+
+static int locate_mem_hole_callback(u64 start, u64 end, void *arg)
+{
+ struct kexec_buf *kbuf = (struct kexec_buf *)arg;
+ unsigned long sz = end - start + 1;
+
+ /* Returning 0 will take to next memory range */
+ if (sz < kbuf->memsz)
+ return 0;
+
+ if (end < kbuf->buf_min || start > kbuf->buf_max)
+ return 0;
+
+ /*
+ * Allocate memory top down with-in ram range. Otherwise bottom up
+ * allocation.
+ */
+ if (kbuf->top_down)
+ return locate_mem_hole_top_down(start, end, kbuf);
+ return locate_mem_hole_bottom_up(start, end, kbuf);
+}
+
+/*
+ * Helper function for placing a buffer in a kexec segment. This assumes
+ * that kexec_mutex is held.
+ */
+int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
+ unsigned long memsz, unsigned long buf_align,
+ unsigned long buf_min, unsigned long buf_max,
+ bool top_down, unsigned long *load_addr)
+{
+
+ struct kexec_segment *ksegment;
+ struct kexec_buf buf, *kbuf;
+ int ret;
+
+ /* Currently adding segment this way is allowed only in file mode */
+ if (!image->file_mode)
+ return -EINVAL;
+
+ if (image->nr_segments >= KEXEC_SEGMENT_MAX)
+ return -EINVAL;
+
+ /*
+ * Make sure we are not trying to add buffer after allocating
+ * control pages. All segments need to be placed first before
+ * any control pages are allocated. As control page allocation
+ * logic goes through list of segments to make sure there are
+ * no destination overlaps.
+ */
+ if (!list_empty(&image->control_pages)) {
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ memset(&buf, 0, sizeof(struct kexec_buf));
+ kbuf = &buf;
+ kbuf->image = image;
+ kbuf->buffer = buffer;
+ kbuf->bufsz = bufsz;
+
+ kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
+ kbuf->buf_align = max(buf_align, PAGE_SIZE);
+ kbuf->buf_min = buf_min;
+ kbuf->buf_max = buf_max;
+ kbuf->top_down = top_down;
+
+ /* Walk the RAM ranges and allocate a suitable range for the buffer */
+ if (image->type == KEXEC_TYPE_CRASH)
+ ret = walk_iomem_res("Crash kernel",
+ IORESOURCE_MEM | IORESOURCE_BUSY,
+ crashk_res.start, crashk_res.end, kbuf,
+ locate_mem_hole_callback);
+ else
+ ret = walk_system_ram_res(0, -1, kbuf,
+ locate_mem_hole_callback);
+ if (ret != 1) {
+ /* A suitable memory range could not be found for buffer */
+ return -EADDRNOTAVAIL;
+ }
+
+ /* Found a suitable memory range */
+ ksegment = &image->segment[image->nr_segments - 1];
+ *load_addr = ksegment->mem;
+ return 0;
+}
+
+/* Calculate and store the digest of segments */
+static int kexec_calculate_store_digests(struct kimage *image)
+{
+ struct crypto_shash *tfm;
+ struct shash_desc *desc;
+ int ret = 0, i, j, zero_buf_sz, sha_region_sz;
+ size_t desc_size, nullsz;
+ char *digest;
+ void *zero_buf;
+ struct kexec_sha_region *sha_regions;
+ struct purgatory_info *pi = &image->purgatory_info;
+
+ zero_buf = __va(page_to_pfn(ZERO_PAGE(0)) << PAGE_SHIFT);
+ zero_buf_sz = PAGE_SIZE;
+
+ tfm = crypto_alloc_shash("sha256", 0, 0);
+ if (IS_ERR(tfm)) {
+ ret = PTR_ERR(tfm);
+ goto out;
+ }
+
+ desc_size = crypto_shash_descsize(tfm) + sizeof(*desc);
+ desc = kzalloc(desc_size, GFP_KERNEL);
+ if (!desc) {
+ ret = -ENOMEM;
+ goto out_free_tfm;
+ }
+
+ sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region);
+ sha_regions = vzalloc(sha_region_sz);
+ if (!sha_regions)
+ goto out_free_desc;
+
+ desc->tfm = tfm;
+ desc->flags = 0;
+
+ ret = crypto_shash_init(desc);
+ if (ret < 0)
+ goto out_free_sha_regions;
+
+ digest = kzalloc(SHA256_DIGEST_SIZE, GFP_KERNEL);
+ if (!digest) {
+ ret = -ENOMEM;
+ goto out_free_sha_regions;
+ }
+
+ for (j = i = 0; i < image->nr_segments; i++) {
+ struct kexec_segment *ksegment;
+
+ ksegment = &image->segment[i];
+ /*
+ * Skip purgatory as it will be modified once we put digest
+ * info in purgatory.
+ */
+ if (ksegment->kbuf == pi->purgatory_buf)
+ continue;
+
+ ret = crypto_shash_update(desc, ksegment->kbuf,
+ ksegment->bufsz);
+ if (ret)
+ break;
+
+ /*
+ * Assume rest of the buffer is filled with zero and
+ * update digest accordingly.
+ */
+ nullsz = ksegment->memsz - ksegment->bufsz;
+ while (nullsz) {
+ unsigned long bytes = nullsz;
+
+ if (bytes > zero_buf_sz)
+ bytes = zero_buf_sz;
+ ret = crypto_shash_update(desc, zero_buf, bytes);
+ if (ret)
+ break;
+ nullsz -= bytes;
+ }
+
+ if (ret)
+ break;
+
+ sha_regions[j].start = ksegment->mem;
+ sha_regions[j].len = ksegment->memsz;
+ j++;
+ }
+
+ if (!ret) {
+ ret = crypto_shash_final(desc, digest);
+ if (ret)
+ goto out_free_digest;
+ ret = kexec_purgatory_get_set_symbol(image, "sha_regions",
+ sha_regions, sha_region_sz, 0);
+ if (ret)
+ goto out_free_digest;
+
+ ret = kexec_purgatory_get_set_symbol(image, "sha256_digest",
+ digest, SHA256_DIGEST_SIZE, 0);
+ if (ret)
+ goto out_free_digest;
+ }
+
+out_free_digest:
+ kfree(digest);
+out_free_sha_regions:
+ vfree(sha_regions);
+out_free_desc:
+ kfree(desc);
+out_free_tfm:
+ kfree(tfm);
+out:
+ return ret;
+}
+
+/* Actually load purgatory. Lot of code taken from kexec-tools */
+static int __kexec_load_purgatory(struct kimage *image, unsigned long min,
+ unsigned long max, int top_down)
+{
+ struct purgatory_info *pi = &image->purgatory_info;
+ unsigned long align, buf_align, bss_align, buf_sz, bss_sz, bss_pad;
+ unsigned long memsz, entry, load_addr, curr_load_addr, bss_addr, offset;
+ unsigned char *buf_addr, *src;
+ int i, ret = 0, entry_sidx = -1;
+ const Elf_Shdr *sechdrs_c;
+ Elf_Shdr *sechdrs = NULL;
+ void *purgatory_buf = NULL;
+
+ /*
+ * sechdrs_c points to section headers in purgatory and are read
+ * only. No modifications allowed.
+ */
+ sechdrs_c = (void *)pi->ehdr + pi->ehdr->e_shoff;
+
+ /*
+ * We can not modify sechdrs_c[] and its fields. It is read only.
+ * Copy it over to a local copy where one can store some temporary
+ * data and free it at the end. We need to modify ->sh_addr and
+ * ->sh_offset fields to keep track of permanent and temporary
+ * locations of sections.
+ */
+ sechdrs = vzalloc(pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+ if (!sechdrs)
+ return -ENOMEM;
+
+ memcpy(sechdrs, sechdrs_c, pi->ehdr->e_shnum * sizeof(Elf_Shdr));
+
+ /*
+ * We seem to have multiple copies of sections. First copy is which
+ * is embedded in kernel in read only section. Some of these sections
+ * will be copied to a temporary buffer and relocated. And these
+ * sections will finally be copied to their final destination at
+ * segment load time.
+ *
+ * Use ->sh_offset to reflect section address in memory. It will
+ * point to original read only copy if section is not allocatable.
+ * Otherwise it will point to temporary copy which will be relocated.
+ *
+ * Use ->sh_addr to contain final address of the section where it
+ * will go during execution time.
+ */
+ for (i = 0; i < pi->ehdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type == SHT_NOBITS)
+ continue;
+
+ sechdrs[i].sh_offset = (unsigned long)pi->ehdr +
+ sechdrs[i].sh_offset;
+ }
+
+ /*
+ * Identify entry point section and make entry relative to section
+ * start.
+ */
+ entry = pi->ehdr->e_entry;
+ for (i = 0; i < pi->ehdr->e_shnum; i++) {
+ if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+ continue;
+
+ if (!(sechdrs[i].sh_flags & SHF_EXECINSTR))
+ continue;
+
+ /* Make entry section relative */
+ if (sechdrs[i].sh_addr <= pi->ehdr->e_entry &&
+ ((sechdrs[i].sh_addr + sechdrs[i].sh_size) >
+ pi->ehdr->e_entry)) {
+ entry_sidx = i;
+ entry -= sechdrs[i].sh_addr;
+ break;
+ }
+ }
+
+ /* Determine how much memory is needed to load relocatable object. */
+ buf_align = 1;
+ bss_align = 1;
+ buf_sz = 0;
+ bss_sz = 0;
+
+ for (i = 0; i < pi->ehdr->e_shnum; i++) {
+ if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+ continue;
+
+ align = sechdrs[i].sh_addralign;
+ if (sechdrs[i].sh_type != SHT_NOBITS) {
+ if (buf_align < align)
+ buf_align = align;
+ buf_sz = ALIGN(buf_sz, align);
+ buf_sz += sechdrs[i].sh_size;
+ } else {
+ /* bss section */
+ if (bss_align < align)
+ bss_align = align;
+ bss_sz = ALIGN(bss_sz, align);
+ bss_sz += sechdrs[i].sh_size;
+ }
+ }
+
+ /* Determine the bss padding required to align bss properly */
+ bss_pad = 0;
+ if (buf_sz & (bss_align - 1))
+ bss_pad = bss_align - (buf_sz & (bss_align - 1));
+
+ memsz = buf_sz + bss_pad + bss_sz;
+
+ /* Allocate buffer for purgatory */
+ purgatory_buf = vzalloc(buf_sz);
+ if (!purgatory_buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (buf_align < bss_align)
+ buf_align = bss_align;
+
+ /* Add buffer to segment list */
+ ret = kexec_add_buffer(image, purgatory_buf, buf_sz, memsz,
+ buf_align, min, max, top_down,
+ &pi->purgatory_load_addr);
+ if (ret)
+ goto out;
+
+ /* Load SHF_ALLOC sections */
+ buf_addr = purgatory_buf;
+ load_addr = curr_load_addr = pi->purgatory_load_addr;
+ bss_addr = load_addr + buf_sz + bss_pad;
+
+ for (i = 0; i < pi->ehdr->e_shnum; i++) {
+ if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+ continue;
+
+ align = sechdrs[i].sh_addralign;
+ if (sechdrs[i].sh_type != SHT_NOBITS) {
+ curr_load_addr = ALIGN(curr_load_addr, align);
+ offset = curr_load_addr - load_addr;
+ /* We already modifed ->sh_offset to keep src addr */
+ src = (char *) sechdrs[i].sh_offset;
+ memcpy(buf_addr + offset, src, sechdrs[i].sh_size);
+
+ /* Store load address and source address of section */
+ sechdrs[i].sh_addr = curr_load_addr;
+
+ /*
+ * This section got copied to temporary buffer. Update
+ * ->sh_offset accordingly.
+ */
+ sechdrs[i].sh_offset = (unsigned long)(buf_addr + offset);
+
+ /* Advance to the next address */
+ curr_load_addr += sechdrs[i].sh_size;
+ } else {
+ bss_addr = ALIGN(bss_addr, align);
+ sechdrs[i].sh_addr = bss_addr;
+ bss_addr += sechdrs[i].sh_size;
+ }
+ }
+
+ /* Update entry point based on load address of text section */
+ if (entry_sidx >= 0)
+ entry += sechdrs[entry_sidx].sh_addr;
+
+ /* Make kernel jump to purgatory after shutdown */
+ image->start = entry;
+
+ /* Used later to get/set symbol values */
+ pi->sechdrs = sechdrs;
+
+ /*
+ * Used later to identify which section is purgatory and skip it
+ * from checksumming.
+ */
+ pi->purgatory_buf = purgatory_buf;
+ return ret;
+out:
+ vfree(sechdrs);
+ vfree(purgatory_buf);
+ return ret;
+}
+
+static int kexec_apply_relocations(struct kimage *image)
+{
+ int i, ret;
+ struct purgatory_info *pi = &image->purgatory_info;
+ Elf_Shdr *sechdrs = pi->sechdrs;
+
+ /* Apply relocations */
+ for (i = 0; i < pi->ehdr->e_shnum; i++) {
+ Elf_Shdr *section, *symtab;
+
+ if (sechdrs[i].sh_type != SHT_RELA &&
+ sechdrs[i].sh_type != SHT_REL)
+ continue;
+
+ /*
+ * For section of type SHT_RELA/SHT_REL,
+ * ->sh_link contains section header index of associated
+ * symbol table. And ->sh_info contains section header
+ * index of section to which relocations apply.
+ */
+ if (sechdrs[i].sh_info >= pi->ehdr->e_shnum ||
+ sechdrs[i].sh_link >= pi->ehdr->e_shnum)
+ return -ENOEXEC;
+
+ section = &sechdrs[sechdrs[i].sh_info];
+ symtab = &sechdrs[sechdrs[i].sh_link];
+
+ if (!(section->sh_flags & SHF_ALLOC))
+ continue;
+
+ /*
+ * symtab->sh_link contain section header index of associated
+ * string table.
+ */
+ if (symtab->sh_link >= pi->ehdr->e_shnum)
+ /* Invalid section number? */
+ continue;
+
+ /*
+ * Respective archicture needs to provide support for applying
+ * relocations of type SHT_RELA/SHT_REL.
+ */
+ if (sechdrs[i].sh_type == SHT_RELA)
+ ret = arch_kexec_apply_relocations_add(pi->ehdr,
+ sechdrs, i);
+ else if (sechdrs[i].sh_type == SHT_REL)
+ ret = arch_kexec_apply_relocations(pi->ehdr,
+ sechdrs, i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Load relocatable purgatory object and relocate it appropriately */
+int kexec_load_purgatory(struct kimage *image, unsigned long min,
+ unsigned long max, int top_down,
+ unsigned long *load_addr)
+{
+ struct purgatory_info *pi = &image->purgatory_info;
+ int ret;
+
+ if (kexec_purgatory_size <= 0)
+ return -EINVAL;
+
+ if (kexec_purgatory_size < sizeof(Elf_Ehdr))
+ return -ENOEXEC;
+
+ pi->ehdr = (Elf_Ehdr *)kexec_purgatory;
+
+ if (memcmp(pi->ehdr->e_ident, ELFMAG, SELFMAG) != 0
+ || pi->ehdr->e_type != ET_REL
+ || !elf_check_arch(pi->ehdr)
+ || pi->ehdr->e_shentsize != sizeof(Elf_Shdr))
+ return -ENOEXEC;
+
+ if (pi->ehdr->e_shoff >= kexec_purgatory_size
+ || (pi->ehdr->e_shnum * sizeof(Elf_Shdr) >
+ kexec_purgatory_size - pi->ehdr->e_shoff))
+ return -ENOEXEC;
+
+ ret = __kexec_load_purgatory(image, min, max, top_down);
+ if (ret)
+ return ret;
+
+ ret = kexec_apply_relocations(image);
+ if (ret)
+ goto out;
+
+ *load_addr = pi->purgatory_load_addr;
+ return 0;
+out:
+ vfree(pi->sechdrs);
+ vfree(pi->purgatory_buf);
+ return ret;
+}
+
+static Elf_Sym *kexec_purgatory_find_symbol(struct purgatory_info *pi,
+ const char *name)
+{
+ Elf_Sym *syms;
+ Elf_Shdr *sechdrs;
+ Elf_Ehdr *ehdr;
+ int i, k;
+ const char *strtab;
+
+ if (!pi->sechdrs || !pi->ehdr)
+ return NULL;
+
+ sechdrs = pi->sechdrs;
+ ehdr = pi->ehdr;
+
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (sechdrs[i].sh_type != SHT_SYMTAB)
+ continue;
+
+ if (sechdrs[i].sh_link >= ehdr->e_shnum)
+ /* Invalid strtab section number */
+ continue;
+ strtab = (char *)sechdrs[sechdrs[i].sh_link].sh_offset;
+ syms = (Elf_Sym *)sechdrs[i].sh_offset;
+
+ /* Go through symbols for a match */
+ for (k = 0; k < sechdrs[i].sh_size/sizeof(Elf_Sym); k++) {
+ if (ELF_ST_BIND(syms[k].st_info) != STB_GLOBAL)
+ continue;
+
+ if (strcmp(strtab + syms[k].st_name, name) != 0)
+ continue;
+
+ if (syms[k].st_shndx == SHN_UNDEF ||
+ syms[k].st_shndx >= ehdr->e_shnum) {
+ pr_debug("Symbol: %s has bad section index %d.\n",
+ name, syms[k].st_shndx);
+ return NULL;
+ }
+
+ /* Found the symbol we are looking for */
+ return &syms[k];
+ }
+ }
+
+ return NULL;
+}
+
+void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name)
+{
+ struct purgatory_info *pi = &image->purgatory_info;
+ Elf_Sym *sym;
+ Elf_Shdr *sechdr;
+
+ sym = kexec_purgatory_find_symbol(pi, name);
+ if (!sym)
+ return ERR_PTR(-EINVAL);
+
+ sechdr = &pi->sechdrs[sym->st_shndx];
+
+ /*
+ * Returns the address where symbol will finally be loaded after
+ * kexec_load_segment()
+ */
+ return (void *)(sechdr->sh_addr + sym->st_value);
+}
+
+/*
+ * Get or set value of a symbol. If "get_value" is true, symbol value is
+ * returned in buf otherwise symbol value is set based on value in buf.
+ */
+int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
+ void *buf, unsigned int size, bool get_value)
+{
+ Elf_Sym *sym;
+ Elf_Shdr *sechdrs;
+ struct purgatory_info *pi = &image->purgatory_info;
+ char *sym_buf;
+
+ sym = kexec_purgatory_find_symbol(pi, name);
+ if (!sym)
+ return -EINVAL;
+
+ if (sym->st_size != size) {
+ pr_err("symbol %s size mismatch: expected %lu actual %u\n",
+ name, (unsigned long)sym->st_size, size);
+ return -EINVAL;
+ }
+
+ sechdrs = pi->sechdrs;
+
+ if (sechdrs[sym->st_shndx].sh_type == SHT_NOBITS) {
+ pr_err("symbol %s is in a bss section. Cannot %s\n", name,
+ get_value ? "get" : "set");
+ return -EINVAL;
+ }
+
+ sym_buf = (unsigned char *)sechdrs[sym->st_shndx].sh_offset +
+ sym->st_value;
+
+ if (get_value)
+ memcpy((void *)buf, sym_buf, size);
+ else
+ memcpy((void *)sym_buf, buf, size);
+
+ return 0;
+}
+
/*
* Move into place and start executing a preloaded standalone
* executable. If nothing was preloaded return an error.
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 734e9a7d280b..81807b0b2c99 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -76,7 +76,7 @@ static bool kprobes_all_disarmed;
/* This protects kprobe_table and optimizing_list */
static DEFINE_MUTEX(kprobe_mutex);
-static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
+static DEFINE_PER_CPU(struct kprobe *, kprobe_instance);
static struct {
raw_spinlock_t lock ____cacheline_aligned_in_smp;
} kretprobe_table_locks[KPROBE_TABLE_SIZE];
@@ -297,9 +297,9 @@ static inline void reset_kprobe_instance(void)
/*
* This routine is called either:
- * - under the kprobe_mutex - during kprobe_[un]register()
- * OR
- * - with preemption disabled - from arch/xxx/kernel/kprobes.c
+ * - under the kprobe_mutex - during kprobe_[un]register()
+ * OR
+ * - with preemption disabled - from arch/xxx/kernel/kprobes.c
*/
struct kprobe *get_kprobe(void *addr)
{
@@ -567,7 +567,8 @@ static void wait_for_kprobe_optimizer(void)
{
mutex_lock(&kprobe_mutex);
- while (!list_empty(&optimizing_list) || !list_empty(&unoptimizing_list)) {
+ while (!list_empty(&optimizing_list) ||
+ !list_empty(&unoptimizing_list)) {
mutex_unlock(&kprobe_mutex);
/* this will also make optimizing_work execute immmediately */
@@ -676,8 +677,8 @@ static void reuse_unused_kprobe(struct kprobe *ap)
*/
op = container_of(ap, struct optimized_kprobe, kp);
if (unlikely(list_empty(&op->list)))
- printk(KERN_WARNING "Warning: found a stray unused "
- "aggrprobe@%p\n", ap->addr);
+ pr_warn("Warning: found a stray unused aggrprobe@%p\n",
+ ap->addr);
/* Enable the probe again */
ap->flags &= ~KPROBE_FLAG_DISABLED;
/* Optimize it again (remove from op->list) */
@@ -794,7 +795,7 @@ static void optimize_all_kprobes(void)
if (!kprobe_disabled(p))
optimize_kprobe(p);
}
- printk(KERN_INFO "Kprobes globally optimized\n");
+ pr_info("Kprobes globally optimized\n");
out:
mutex_unlock(&kprobe_mutex);
}
@@ -824,7 +825,7 @@ static void unoptimize_all_kprobes(void)
/* Wait for unoptimizing completion */
wait_for_kprobe_optimizer();
- printk(KERN_INFO "Kprobes globally unoptimized\n");
+ pr_info("Kprobes globally unoptimized\n");
}
static DEFINE_MUTEX(kprobe_sysctl_mutex);
@@ -896,7 +897,7 @@ static void __disarm_kprobe(struct kprobe *p, bool reopt)
/* There should be no unused kprobes can be reused without optimization */
static void reuse_unused_kprobe(struct kprobe *ap)
{
- printk(KERN_ERR "Error: There should be no unused kprobe here.\n");
+ pr_err("Error: There should be no unused kprobe here.\n");
BUG_ON(kprobe_unused(ap));
}
@@ -955,7 +956,8 @@ static void disarm_kprobe_ftrace(struct kprobe *p)
}
ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
(unsigned long)p->addr, 1, 0);
- WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n", p->addr, ret);
+ WARN(ret < 0, "Failed to disarm kprobe-ftrace at %p (%d)\n",
+ p->addr, ret);
}
#else /* !CONFIG_KPROBES_ON_FTRACE */
#define prepare_kprobe(p) arch_prepare_kprobe(p)
@@ -1389,7 +1391,7 @@ static struct kprobe *__get_valid_kprobe(struct kprobe *p)
if (p != ap) {
list_for_each_entry_rcu(list_p, &ap->list, list)
if (list_p == p)
- /* kprobe p is a valid probe */
+ /* kprobe p is a valid probe */
goto valid;
return NULL;
}
@@ -2018,8 +2020,8 @@ EXPORT_SYMBOL_GPL(enable_kprobe);
void dump_kprobe(struct kprobe *kp)
{
- printk(KERN_WARNING "Dumping kprobe:\n");
- printk(KERN_WARNING "Name: %s\nAddress: %p\nOffset: %x\n",
+ pr_warn("Dumping kprobe:\n");
+ pr_warn("Name: %s\nAddress: %p\nOffset: %x\n",
kp->symbol_name, kp->addr, kp->offset);
}
NOKPROBE_SYMBOL(dump_kprobe);
@@ -2132,7 +2134,7 @@ static int __init init_kprobes(void)
kprobe_lookup_name(kretprobe_blacklist[i].name,
kretprobe_blacklist[i].addr);
if (!kretprobe_blacklist[i].addr)
- printk("kretprobe: lookup failed: %s\n",
+ pr_warn("kretprobe: lookup failed: %s\n",
kretprobe_blacklist[i].name);
}
}
@@ -2314,7 +2316,7 @@ static void arm_all_kprobes(void)
}
kprobes_all_disarmed = false;
- printk(KERN_INFO "Kprobes globally enabled\n");
+ pr_info("Kprobes globally enabled\n");
already_enabled:
mutex_unlock(&kprobe_mutex);
@@ -2336,7 +2338,7 @@ static void disarm_all_kprobes(void)
}
kprobes_all_disarmed = true;
- printk(KERN_INFO "Kprobes globally disabled\n");
+ pr_info("Kprobes globally disabled\n");
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
diff --git a/kernel/resource.c b/kernel/resource.c
index 3c2237ac32db..da14b8d09296 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -59,10 +59,12 @@ static DEFINE_RWLOCK(resource_lock);
static struct resource *bootmem_resource_free;
static DEFINE_SPINLOCK(bootmem_resource_lock);
-static void *r_next(struct seq_file *m, void *v, loff_t *pos)
+static struct resource *next_resource(struct resource *p, bool sibling_only)
{
- struct resource *p = v;
- (*pos)++;
+ /* Caller wants to traverse through siblings only */
+ if (sibling_only)
+ return p->sibling;
+
if (p->child)
return p->child;
while (!p->sibling && p->parent)
@@ -70,6 +72,13 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos)
return p->sibling;
}
+static void *r_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ struct resource *p = v;
+ (*pos)++;
+ return (void *)next_resource(p, false);
+}
+
#ifdef CONFIG_PROC_FS
enum { MAX_IORES_LEVEL = 5 };
@@ -322,16 +331,19 @@ int release_resource(struct resource *old)
EXPORT_SYMBOL(release_resource);
-#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
/*
- * Finds the lowest memory reosurce exists within [res->start.res->end)
+ * Finds the lowest iomem reosurce exists with-in [res->start.res->end)
* the caller must specify res->start, res->end, res->flags and "name".
* If found, returns 0, res is overwritten, if not found, returns -1.
+ * This walks through whole tree and not just first level children
+ * until and unless first_level_children_only is true.
*/
-static int find_next_system_ram(struct resource *res, char *name)
+static int find_next_iomem_res(struct resource *res, char *name,
+ bool first_level_children_only)
{
resource_size_t start, end;
struct resource *p;
+ bool sibling_only = false;
BUG_ON(!res);
@@ -340,8 +352,14 @@ static int find_next_system_ram(struct resource *res, char *name)
BUG_ON(start >= end);
read_lock(&resource_lock);
- for (p = iomem_resource.child; p ; p = p->sibling) {
- /* system ram is just marked as IORESOURCE_MEM */
+
+ if (first_level_children_only) {
+ p = iomem_resource.child;
+ sibling_only = true;
+ } else
+ p = &iomem_resource;
+
+ while ((p = next_resource(p, sibling_only))) {
if (p->flags != res->flags)
continue;
if (name && strcmp(p->name, name))
@@ -353,6 +371,7 @@ static int find_next_system_ram(struct resource *res, char *name)
if ((p->end >= start) && (p->start < end))
break;
}
+
read_unlock(&resource_lock);
if (!p)
return -1;
@@ -365,6 +384,70 @@ static int find_next_system_ram(struct resource *res, char *name)
}
/*
+ * Walks through iomem resources and calls func() with matching resource
+ * ranges. This walks through whole tree and not just first level children.
+ * All the memory ranges which overlap start,end and also match flags and
+ * name are valid candidates.
+ *
+ * @name: name of resource
+ * @flags: resource flags
+ * @start: start addr
+ * @end: end addr
+ */
+int walk_iomem_res(char *name, unsigned long flags, u64 start, u64 end,
+ void *arg, int (*func)(u64, u64, void *))
+{
+ struct resource res;
+ u64 orig_end;
+ int ret = -1;
+
+ res.start = start;
+ res.end = end;
+ res.flags = flags;
+ orig_end = res.end;
+ while ((res.start < res.end) &&
+ (!find_next_iomem_res(&res, name, false))) {
+ ret = (*func)(res.start, res.end, arg);
+ if (ret)
+ break;
+ res.start = res.end + 1;
+ res.end = orig_end;
+ }
+ return ret;
+}
+
+/*
+ * This function calls callback against all memory range of "System RAM"
+ * which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
+ * Now, this function is only for "System RAM". This function deals with
+ * full ranges and not pfn. If resources are not pfn aligned, dealing
+ * with pfn can truncate ranges.
+ */
+int walk_system_ram_res(u64 start, u64 end, void *arg,
+ int (*func)(u64, u64, void *))
+{
+ struct resource res;
+ u64 orig_end;
+ int ret = -1;
+
+ res.start = start;
+ res.end = end;
+ res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ orig_end = res.end;
+ while ((res.start < res.end) &&
+ (!find_next_iomem_res(&res, "System RAM", true))) {
+ ret = (*func)(res.start, res.end, arg);
+ if (ret)
+ break;
+ res.start = res.end + 1;
+ res.end = orig_end;
+ }
+ return ret;
+}
+
+#if !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
+
+/*
* This function calls callback against all memory range of "System RAM"
* which are marked as IORESOURCE_MEM and IORESOUCE_BUSY.
* Now, this function is only for "System RAM".
@@ -382,7 +465,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
orig_end = res.end;
while ((res.start < res.end) &&
- (find_next_system_ram(&res, "System RAM") >= 0)) {
+ (find_next_iomem_res(&res, "System RAM", true) >= 0)) {
pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT;
end_pfn = (res.end + 1) >> PAGE_SHIFT;
if (end_pfn > pfn)
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 2904a2105914..391d4ddb6f4b 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -25,6 +25,7 @@ cond_syscall(sys_swapon);
cond_syscall(sys_swapoff);
cond_syscall(sys_kexec_load);
cond_syscall(compat_sys_kexec_load);
+cond_syscall(sys_kexec_file_load);
cond_syscall(sys_init_module);
cond_syscall(sys_finit_module);
cond_syscall(sys_delete_module);
@@ -197,6 +198,7 @@ cond_syscall(compat_sys_timerfd_settime);
cond_syscall(compat_sys_timerfd_gettime);
cond_syscall(sys_eventfd);
cond_syscall(sys_eventfd2);
+cond_syscall(sys_memfd_create);
/* performance counters: */
cond_syscall(sys_perf_event_open);
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 42b463ad90f2..a81083797e97 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -73,7 +73,7 @@ static DEFINE_SPINLOCK(hash_lock);
* SIGEV values. Here we put out an error if this assumption fails.
*/
#if SIGEV_THREAD_ID != (SIGEV_THREAD_ID & \
- ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
+ ~(SIGEV_SIGNAL | SIGEV_NONE | SIGEV_THREAD))
#error "SIGEV_THREAD_ID must not share bit with other SIGEV values!"
#endif
@@ -254,7 +254,8 @@ static int posix_get_monotonic_coarse(clockid_t which_clock,
return 0;
}
-static int posix_get_coarse_res(const clockid_t which_clock, struct timespec *tp)
+static int posix_get_coarse_res(const clockid_t which_clock,
+ struct timespec *tp)
{
*tp = ktime_to_timespec(KTIME_LOW_RES);
return 0;
@@ -335,14 +336,16 @@ static __init int init_posix_timers(void)
posix_timers_register_clock(CLOCK_REALTIME, &clock_realtime);
posix_timers_register_clock(CLOCK_MONOTONIC, &clock_monotonic);
posix_timers_register_clock(CLOCK_MONOTONIC_RAW, &clock_monotonic_raw);
- posix_timers_register_clock(CLOCK_REALTIME_COARSE, &clock_realtime_coarse);
- posix_timers_register_clock(CLOCK_MONOTONIC_COARSE, &clock_monotonic_coarse);
+ posix_timers_register_clock(CLOCK_REALTIME_COARSE,
+ &clock_realtime_coarse);
+ posix_timers_register_clock(CLOCK_MONOTONIC_COARSE,
+ &clock_monotonic_coarse);
posix_timers_register_clock(CLOCK_BOOTTIME, &clock_boottime);
posix_timers_register_clock(CLOCK_TAI, &clock_tai);
posix_timers_cache = kmem_cache_create("posix_timers_cache",
- sizeof (struct k_itimer), 0, SLAB_PANIC,
- NULL);
+ sizeof(struct k_itimer), 0,
+ SLAB_PANIC, NULL);
return 0;
}
@@ -496,11 +499,11 @@ static enum hrtimer_restart posix_timer_fn(struct hrtimer *timer)
return ret;
}
-static struct pid *good_sigevent(sigevent_t * event)
+static struct pid *good_sigevent(sigevent_t *event)
{
struct task_struct *rtn = current->group_leader;
- if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
+ if ((event->sigev_notify & SIGEV_THREAD_ID) &&
(!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
!same_thread_group(rtn, current) ||
(event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_SIGNAL))
@@ -517,18 +520,18 @@ void posix_timers_register_clock(const clockid_t clock_id,
struct k_clock *new_clock)
{
if ((unsigned) clock_id >= MAX_CLOCKS) {
- printk(KERN_WARNING "POSIX clock register failed for clock_id %d\n",
+ pr_warn("POSIX clock register failed for clock_id %d\n",
clock_id);
return;
}
if (!new_clock->clock_get) {
- printk(KERN_WARNING "POSIX clock id %d lacks clock_get()\n",
+ pr_warn("POSIX clock id %d lacks clock_get()\n",
clock_id);
return;
}
if (!new_clock->clock_getres) {
- printk(KERN_WARNING "POSIX clock id %d lacks clock_getres()\n",
+ pr_warn("POSIX clock id %d lacks clock_getres()\n",
clock_id);
return;
}
@@ -537,7 +540,7 @@ void posix_timers_register_clock(const clockid_t clock_id,
}
EXPORT_SYMBOL_GPL(posix_timers_register_clock);
-static struct k_itimer * alloc_posix_timer(void)
+static struct k_itimer *alloc_posix_timer(void)
{
struct k_itimer *tmr;
tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
@@ -624,7 +627,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
new_timer->it_overrun = -1;
if (timer_event_spec) {
- if (copy_from_user(&event, timer_event_spec, sizeof (event))) {
+ if (copy_from_user(&event, timer_event_spec, sizeof(event))) {
error = -EFAULT;
goto out;
}
@@ -649,7 +652,7 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
new_timer->sigq->info.si_code = SI_TIMER;
if (copy_to_user(created_timer_id,
- &new_timer_id, sizeof (new_timer_id))) {
+ &new_timer_id, sizeof(new_timer_id))) {
error = -EFAULT;
goto out;
}
@@ -750,7 +753,8 @@ common_timer_get(struct k_itimer *timr, struct itimerspec *cur_setting)
*/
if (iv.tv64 && (timr->it_requeue_pending & REQUEUE_PENDING ||
(timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE))
- timr->it_overrun += (unsigned int) hrtimer_forward(timer, now, iv);
+ timr->it_overrun += (unsigned int) hrtimer_forward(timer, now,
+ iv);
remaining = ktime_sub(hrtimer_get_expires(timer), now);
/* Return 0 only, when the timer is expired and not pending */
@@ -787,7 +791,7 @@ SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
unlock_timer(timr, flags);
- if (!ret && copy_to_user(setting, &cur_setting, sizeof (cur_setting)))
+ if (!ret && copy_to_user(setting, &cur_setting, sizeof(cur_setting)))
return -EFAULT;
return ret;
@@ -839,7 +843,7 @@ common_timer_set(struct k_itimer *timr, int flags,
if (hrtimer_try_to_cancel(timer) < 0)
return TIMER_RETRY;
- timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
+ timr->it_requeue_pending = (timr->it_requeue_pending + 2) &
~REQUEUE_PENDING;
timr->it_overrun_last = 0;
@@ -859,9 +863,8 @@ common_timer_set(struct k_itimer *timr, int flags,
/* SIGEV_NONE timers are not queued ! See common_timer_get */
if (((timr->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE)) {
/* Setup correct expiry time for relative timers */
- if (mode == HRTIMER_MODE_REL) {
+ if (mode == HRTIMER_MODE_REL)
hrtimer_add_expires(timer, timer->base->get_time());
- }
return 0;
}
@@ -884,7 +887,7 @@ SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
if (!new_setting)
return -EINVAL;
- if (copy_from_user(&new_spec, new_setting, sizeof (new_spec)))
+ if (copy_from_user(&new_spec, new_setting, sizeof(new_spec)))
return -EFAULT;
if (!timespec_valid(&new_spec.it_interval) ||
@@ -903,12 +906,12 @@ retry:
unlock_timer(timr, flag);
if (error == TIMER_RETRY) {
- rtn = NULL; // We already got the old time...
+ rtn = NULL; /* We already got the old time... */
goto retry;
}
if (old_setting && !error &&
- copy_to_user(old_setting, &old_spec, sizeof (old_spec)))
+ copy_to_user(old_setting, &old_spec, sizeof(old_spec)))
error = -EFAULT;
return error;
@@ -1010,14 +1013,14 @@ SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
if (!kc || !kc->clock_set)
return -EINVAL;
- if (copy_from_user(&new_tp, tp, sizeof (*tp)))
+ if (copy_from_user(&new_tp, tp, sizeof(*tp)))
return -EFAULT;
return kc->clock_set(which_clock, &new_tp);
}
SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
- struct timespec __user *,tp)
+ struct timespec __user *, tp)
{
struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec kernel_tp;
@@ -1028,7 +1031,7 @@ SYSCALL_DEFINE2(clock_gettime, const clockid_t, which_clock,
error = kc->clock_get(which_clock, &kernel_tp);
- if (!error && copy_to_user(tp, &kernel_tp, sizeof (kernel_tp)))
+ if (!error && copy_to_user(tp, &kernel_tp, sizeof(kernel_tp)))
error = -EFAULT;
return error;
@@ -1069,7 +1072,7 @@ SYSCALL_DEFINE2(clock_getres, const clockid_t, which_clock,
error = kc->clock_getres(which_clock, &rtn_tp);
- if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof (rtn_tp)))
+ if (!error && tp && copy_to_user(tp, &rtn_tp, sizeof(rtn_tp)))
error = -EFAULT;
return error;
@@ -1098,7 +1101,7 @@ SYSCALL_DEFINE4(clock_nanosleep, const clockid_t, which_clock, int, flags,
if (!kc->nsleep)
return -ENANOSLEEP_NOTSUP;
- if (copy_from_user(&t, rqtp, sizeof (struct timespec)))
+ if (copy_from_user(&t, rqtp, sizeof(struct timespec)))
return -EFAULT;
if (!timespec_valid(&t))
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index aca5dfe2fa3d..e81a30181e41 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1432,7 +1432,7 @@ static void process_timeout(unsigned long __data)
}
/**
- * schedule_timeout - sleep until timeout
+ * __schedule_timeout - sleep until timeout
* @timeout: timeout value in jiffies
*
* Make the current task sleep until @timeout jiffies have
@@ -1457,7 +1457,8 @@ static void process_timeout(unsigned long __data)
*
* In all cases the return value is guaranteed to be non-negative.
*/
-signed long __sched schedule_timeout(signed long timeout)
+static signed long
+__sched __schedule_timeout(signed long timeout, unsigned long flag)
{
struct timer_list timer;
unsigned long expire;
@@ -1493,7 +1494,13 @@ signed long __sched schedule_timeout(signed long timeout)
expire = timeout + jiffies;
- setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
+ if (flag & TIMER_DEFERRABLE)
+ setup_deferrable_timer_on_stack(&timer, process_timeout,
+ (unsigned long)current);
+ else
+ setup_timer_on_stack(&timer, process_timeout,
+ (unsigned long)current);
+
__mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
schedule();
del_singleshot_timer_sync(&timer);
@@ -1506,12 +1513,26 @@ signed long __sched schedule_timeout(signed long timeout)
out:
return timeout < 0 ? 0 : timeout;
}
+
+signed long __sched schedule_timeout(signed long timeout)
+{
+ return __schedule_timeout(timeout, 0);
+}
EXPORT_SYMBOL(schedule_timeout);
/*
* We can use __set_current_state() here because schedule_timeout() calls
* schedule() unconditionally.
*/
+
+signed long
+__sched schedule_timeout_deferrable_interruptible(signed long timeout)
+{
+ __set_current_state(TASK_INTERRUPTIBLE);
+ return __schedule_timeout(timeout, TIMER_DEFERRABLE);
+}
+EXPORT_SYMBOL(schedule_timeout_deferrable_interruptible);
+
signed long __sched schedule_timeout_interruptible(signed long timeout)
{
__set_current_state(TASK_INTERRUPTIBLE);
diff --git a/mm/Makefile b/mm/Makefile
index 632ae77e6070..a96e3a1dea1f 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -3,7 +3,7 @@
#
mmu-y := nommu.o
-mmu-$(CONFIG_MMU) := fremap.o gup.o highmem.o madvise.o memory.o mincore.o \
+mmu-$(CONFIG_MMU) := gup.o highmem.o madvise.o memory.o mincore.o \
mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
vmalloc.o pagewalk.o pgtable-generic.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 1706cbbdf5f0..382f8ece1cb2 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -234,11 +234,46 @@ static ssize_t stable_pages_required_show(struct device *dev,
}
static DEVICE_ATTR_RO(stable_pages_required);
+static ssize_t strictlimit_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t count)
+{
+ struct backing_dev_info *bdi = dev_get_drvdata(dev);
+ unsigned int val;
+ ssize_t ret;
+
+ ret = kstrtouint(buf, 10, &val);
+ if (ret < 0)
+ return ret;
+
+ switch (val) {
+ case 0:
+ bdi->capabilities &= ~BDI_CAP_STRICTLIMIT;
+ break;
+ case 1:
+ bdi->capabilities |= BDI_CAP_STRICTLIMIT;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return count;
+}
+static ssize_t strictlimit_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct backing_dev_info *bdi = dev_get_drvdata(dev);
+
+ return snprintf(page, PAGE_SIZE-1, "%d\n",
+ !!(bdi->capabilities & BDI_CAP_STRICTLIMIT));
+}
+static DEVICE_ATTR_RW(strictlimit);
+
static struct attribute *bdi_dev_attrs[] = {
&dev_attr_read_ahead_kb.attr,
&dev_attr_min_ratio.attr,
&dev_attr_max_ratio.attr,
&dev_attr_stable_pages_required.attr,
+ &dev_attr_strictlimit.attr,
NULL,
};
ATTRIBUTE_GROUPS(bdi_dev);
diff --git a/mm/fremap.c b/mm/fremap.c
deleted file mode 100644
index 72b8fa361433..000000000000
--- a/mm/fremap.c
+++ /dev/null
@@ -1,283 +0,0 @@
-/*
- * linux/mm/fremap.c
- *
- * Explicit pagetable population and nonlinear (random) mappings support.
- *
- * started by Ingo Molnar, Copyright (C) 2002, 2003
- */
-#include <linux/export.h>
-#include <linux/backing-dev.h>
-#include <linux/mm.h>
-#include <linux/swap.h>
-#include <linux/file.h>
-#include <linux/mman.h>
-#include <linux/pagemap.h>
-#include <linux/swapops.h>
-#include <linux/rmap.h>
-#include <linux/syscalls.h>
-#include <linux/mmu_notifier.h>
-
-#include <asm/mmu_context.h>
-#include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
-
-#include "internal.h"
-
-static int mm_counter(struct page *page)
-{
- return PageAnon(page) ? MM_ANONPAGES : MM_FILEPAGES;
-}
-
-static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, pte_t *ptep)
-{
- pte_t pte = *ptep;
- struct page *page;
- swp_entry_t entry;
-
- if (pte_present(pte)) {
- flush_cache_page(vma, addr, pte_pfn(pte));
- pte = ptep_clear_flush(vma, addr, ptep);
- page = vm_normal_page(vma, addr, pte);
- if (page) {
- if (pte_dirty(pte))
- set_page_dirty(page);
- update_hiwater_rss(mm);
- dec_mm_counter(mm, mm_counter(page));
- page_remove_rmap(page);
- page_cache_release(page);
- }
- } else { /* zap_pte() is not called when pte_none() */
- if (!pte_file(pte)) {
- update_hiwater_rss(mm);
- entry = pte_to_swp_entry(pte);
- if (non_swap_entry(entry)) {
- if (is_migration_entry(entry)) {
- page = migration_entry_to_page(entry);
- dec_mm_counter(mm, mm_counter(page));
- }
- } else {
- free_swap_and_cache(entry);
- dec_mm_counter(mm, MM_SWAPENTS);
- }
- }
- pte_clear_not_present_full(mm, addr, ptep, 0);
- }
-}
-
-/*
- * Install a file pte to a given virtual memory address, release any
- * previously existing mapping.
- */
-static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, unsigned long pgoff, pgprot_t prot)
-{
- int err = -ENOMEM;
- pte_t *pte, ptfile;
- spinlock_t *ptl;
-
- pte = get_locked_pte(mm, addr, &ptl);
- if (!pte)
- goto out;
-
- ptfile = pgoff_to_pte(pgoff);
-
- if (!pte_none(*pte))
- zap_pte(mm, vma, addr, pte);
-
- set_pte_at(mm, addr, pte, pte_file_mksoft_dirty(ptfile));
- /*
- * We don't need to run update_mmu_cache() here because the "file pte"
- * being installed by install_file_pte() is not a real pte - it's a
- * non-present entry (like a swap entry), noting what file offset should
- * be mapped there when there's a fault (in a non-linear vma where
- * that's not obvious).
- */
- pte_unmap_unlock(pte, ptl);
- err = 0;
-out:
- return err;
-}
-
-int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr,
- unsigned long size, pgoff_t pgoff)
-{
- struct mm_struct *mm = vma->vm_mm;
- int err;
-
- do {
- err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot);
- if (err)
- return err;
-
- size -= PAGE_SIZE;
- addr += PAGE_SIZE;
- pgoff++;
- } while (size);
-
- return 0;
-}
-EXPORT_SYMBOL(generic_file_remap_pages);
-
-/**
- * sys_remap_file_pages - remap arbitrary pages of an existing VM_SHARED vma
- * @start: start of the remapped virtual memory range
- * @size: size of the remapped virtual memory range
- * @prot: new protection bits of the range (see NOTE)
- * @pgoff: to-be-mapped page of the backing store file
- * @flags: 0 or MAP_NONBLOCKED - the later will cause no IO.
- *
- * sys_remap_file_pages remaps arbitrary pages of an existing VM_SHARED vma
- * (shared backing store file).
- *
- * This syscall works purely via pagetables, so it's the most efficient
- * way to map the same (large) file into a given virtual window. Unlike
- * mmap()/mremap() it does not create any new vmas. The new mappings are
- * also safe across swapout.
- *
- * NOTE: the @prot parameter right now is ignored (but must be zero),
- * and the vma's default protection is used. Arbitrary protections
- * might be implemented in the future.
- */
-SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
- unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
-{
- struct mm_struct *mm = current->mm;
- struct address_space *mapping;
- struct vm_area_struct *vma;
- int err = -EINVAL;
- int has_write_lock = 0;
- vm_flags_t vm_flags = 0;
-
- pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
- "See Documentation/vm/remap_file_pages.txt.\n",
- current->comm, current->pid);
-
- if (prot)
- return err;
- /*
- * Sanitize the syscall parameters:
- */
- start = start & PAGE_MASK;
- size = size & PAGE_MASK;
-
- /* Does the address range wrap, or is the span zero-sized? */
- if (start + size <= start)
- return err;
-
- /* Does pgoff wrap? */
- if (pgoff + (size >> PAGE_SHIFT) < pgoff)
- return err;
-
- /* Can we represent this offset inside this architecture's pte's? */
-#if PTE_FILE_MAX_BITS < BITS_PER_LONG
- if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS))
- return err;
-#endif
-
- /* We need down_write() to change vma->vm_flags. */
- down_read(&mm->mmap_sem);
- retry:
- vma = find_vma(mm, start);
-
- /*
- * Make sure the vma is shared, that it supports prefaulting,
- * and that the remapped range is valid and fully within
- * the single existing vma.
- */
- if (!vma || !(vma->vm_flags & VM_SHARED))
- goto out;
-
- if (!vma->vm_ops || !vma->vm_ops->remap_pages)
- goto out;
-
- if (start < vma->vm_start || start + size > vma->vm_end)
- goto out;
-
- /* Must set VM_NONLINEAR before any pages are populated. */
- if (!(vma->vm_flags & VM_NONLINEAR)) {
- /*
- * vm_private_data is used as a swapout cursor
- * in a VM_NONLINEAR vma.
- */
- if (vma->vm_private_data)
- goto out;
-
- /* Don't need a nonlinear mapping, exit success */
- if (pgoff == linear_page_index(vma, start)) {
- err = 0;
- goto out;
- }
-
- if (!has_write_lock) {
-get_write_lock:
- up_read(&mm->mmap_sem);
- down_write(&mm->mmap_sem);
- has_write_lock = 1;
- goto retry;
- }
- mapping = vma->vm_file->f_mapping;
- /*
- * page_mkclean doesn't work on nonlinear vmas, so if
- * dirty pages need to be accounted, emulate with linear
- * vmas.
- */
- if (mapping_cap_account_dirty(mapping)) {
- unsigned long addr;
- struct file *file = get_file(vma->vm_file);
- /* mmap_region may free vma; grab the info now */
- vm_flags = vma->vm_flags;
-
- addr = mmap_region(file, start, size, vm_flags, pgoff);
- fput(file);
- if (IS_ERR_VALUE(addr)) {
- err = addr;
- } else {
- BUG_ON(addr != start);
- err = 0;
- }
- goto out_freed;
- }
- mutex_lock(&mapping->i_mmap_mutex);
- flush_dcache_mmap_lock(mapping);
- vma->vm_flags |= VM_NONLINEAR;
- vma_interval_tree_remove(vma, &mapping->i_mmap);
- vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
- flush_dcache_mmap_unlock(mapping);
- mutex_unlock(&mapping->i_mmap_mutex);
- }
-
- if (vma->vm_flags & VM_LOCKED) {
- /*
- * drop PG_Mlocked flag for over-mapped range
- */
- if (!has_write_lock)
- goto get_write_lock;
- vm_flags = vma->vm_flags;
- munlock_vma_pages_range(vma, start, start + size);
- vma->vm_flags = vm_flags;
- }
-
- mmu_notifier_invalidate_range_start(mm, start, start + size);
- err = vma->vm_ops->remap_pages(vma, start, size, pgoff);
- mmu_notifier_invalidate_range_end(mm, start, start + size);
-
- /*
- * We can't clear VM_NONLINEAR because we'd have to do
- * it after ->populate completes, and that would prevent
- * downgrading the lock. (Locks can't be upgraded).
- */
-
-out:
- if (vma)
- vm_flags = vma->vm_flags;
-out_freed:
- if (likely(!has_write_lock))
- up_read(&mm->mmap_sem);
- else
- up_write(&mm->mmap_sem);
- if (!err && ((vm_flags & VM_LOCKED) || !(flags & MAP_NONBLOCK)))
- mm_populate(start, size);
-
- return err;
-}
diff --git a/mm/ksm.c b/mm/ksm.c
index fb7590222706..f7de4c07c693 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -223,6 +223,9 @@ static unsigned int ksm_thread_pages_to_scan = 100;
/* Milliseconds ksmd should sleep between batches */
static unsigned int ksm_thread_sleep_millisecs = 20;
+/* Boolean to indicate whether to use deferrable timer or not */
+static bool use_deferrable_timer;
+
#ifdef CONFIG_NUMA
/* Zeroed when merging across nodes is not allowed */
static unsigned int ksm_merge_across_nodes = 1;
@@ -1705,6 +1708,11 @@ static void ksm_do_scan(unsigned int scan_npages)
}
}
+static void process_timeout(unsigned long __data)
+{
+ wake_up_process((struct task_struct *)__data);
+}
+
static int ksmd_should_run(void)
{
return (ksm_run & KSM_RUN_MERGE) && !list_empty(&ksm_mm_head.mm_list);
@@ -1725,8 +1733,13 @@ static int ksm_scan_thread(void *nothing)
try_to_freeze();
if (ksmd_should_run()) {
- schedule_timeout_interruptible(
- msecs_to_jiffies(ksm_thread_sleep_millisecs));
+ signed long to;
+
+ to = msecs_to_jiffies(ksm_thread_sleep_millisecs);
+ if (use_deferrable_timer)
+ schedule_timeout_deferrable_interruptible(to);
+ else
+ schedule_timeout_interruptible(to);
} else {
wait_event_freezable(ksm_thread_wait,
ksmd_should_run() || kthread_should_stop());
@@ -2175,6 +2188,29 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
}
KSM_ATTR(run);
+static ssize_t deferrable_timer_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return snprintf(buf, 8, "%d\n", use_deferrable_timer);
+}
+
+static ssize_t deferrable_timer_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned long enable;
+ int err;
+
+ err = kstrtoul(buf, 10, &enable);
+ if (err < 0)
+ return err;
+ if (enable >= 1)
+ return -EINVAL;
+ use_deferrable_timer = enable;
+ return count;
+}
+KSM_ATTR(deferrable_timer);
+
#ifdef CONFIG_NUMA
static ssize_t merge_across_nodes_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
@@ -2287,6 +2323,7 @@ static struct attribute *ksm_attrs[] = {
&pages_unshared_attr.attr,
&pages_volatile_attr.attr,
&full_scans_attr.attr,
+ &deferrable_timer_attr.attr,
#ifdef CONFIG_NUMA
&merge_across_nodes_attr.attr,
#endif
diff --git a/mm/memory.c b/mm/memory.c
index 84de6dacd0cc..6ea15ed23ec4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3444,44 +3444,6 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
}
#endif /* __PAGETABLE_PMD_FOLDED */
-#if !defined(__HAVE_ARCH_GATE_AREA)
-
-#if defined(AT_SYSINFO_EHDR)
-static struct vm_area_struct gate_vma;
-
-static int __init gate_vma_init(void)
-{
- gate_vma.vm_mm = NULL;
- gate_vma.vm_start = FIXADDR_USER_START;
- gate_vma.vm_end = FIXADDR_USER_END;
- gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
- gate_vma.vm_page_prot = __P101;
-
- return 0;
-}
-__initcall(gate_vma_init);
-#endif
-
-struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
-{
-#ifdef AT_SYSINFO_EHDR
- return &gate_vma;
-#else
- return NULL;
-#endif
-}
-
-int in_gate_area_no_mm(unsigned long addr)
-{
-#ifdef AT_SYSINFO_EHDR
- if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END))
- return 1;
-#endif
- return 0;
-}
-
-#endif /* __HAVE_ARCH_GATE_AREA */
-
static int __follow_pte(struct mm_struct *mm, unsigned long address,
pte_t **ptepp, spinlock_t **ptlp)
{
diff --git a/mm/mmap.c b/mm/mmap.c
index 64c9d736155c..80217c935608 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -221,7 +221,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
if (vma->vm_flags & VM_DENYWRITE)
atomic_inc(&file_inode(file)->i_writecount);
if (vma->vm_flags & VM_SHARED)
- mapping->i_mmap_writable--;
+ mapping_unmap_writable(mapping);
flush_dcache_mmap_lock(mapping);
if (unlikely(vma->vm_flags & VM_NONLINEAR))
@@ -622,7 +622,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
if (vma->vm_flags & VM_DENYWRITE)
atomic_dec(&file_inode(file)->i_writecount);
if (vma->vm_flags & VM_SHARED)
- mapping->i_mmap_writable++;
+ atomic_inc(&mapping->i_mmap_writable);
flush_dcache_mmap_lock(mapping);
if (unlikely(vma->vm_flags & VM_NONLINEAR))
@@ -1577,6 +1577,17 @@ munmap_back:
if (error)
goto free_vma;
}
+ if (vm_flags & VM_SHARED) {
+ error = mapping_map_writable(file->f_mapping);
+ if (error)
+ goto allow_write_and_free_vma;
+ }
+
+ /* ->mmap() can change vma->vm_file, but must guarantee that
+ * vma_link() below can deny write-access if VM_DENYWRITE is set
+ * and map writably if VM_SHARED is set. This usually means the
+ * new file must not have been exposed to user-space, yet.
+ */
vma->vm_file = get_file(file);
error = file->f_op->mmap(file, vma);
if (error)
@@ -1616,8 +1627,12 @@ munmap_back:
vma_link(mm, vma, prev, rb_link, rb_parent);
/* Once vma denies write, undo our temporary denial count */
- if (vm_flags & VM_DENYWRITE)
- allow_write_access(file);
+ if (file) {
+ if (vm_flags & VM_SHARED)
+ mapping_unmap_writable(file->f_mapping);
+ if (vm_flags & VM_DENYWRITE)
+ allow_write_access(file);
+ }
file = vma->vm_file;
out:
perf_event_mmap(vma);
@@ -1646,14 +1661,17 @@ out:
return addr;
unmap_and_free_vma:
- if (vm_flags & VM_DENYWRITE)
- allow_write_access(file);
vma->vm_file = NULL;
fput(file);
/* Undo any partial mapping done by a device driver. */
unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
charged = 0;
+ if (vm_flags & VM_SHARED)
+ mapping_unmap_writable(file->f_mapping);
+allow_write_and_free_vma:
+ if (vm_flags & VM_DENYWRITE)
+ allow_write_access(file);
free_vma:
kmem_cache_free(vm_area_cachep, vma);
unacct_error:
@@ -2586,6 +2604,75 @@ SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
return vm_munmap(addr, len);
}
+
+/*
+ * Emulation of deprecated remap_file_pages() syscall.
+ */
+SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
+ unsigned long, prot, unsigned long, pgoff, unsigned long, flags)
+{
+
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long populate = 0;
+ unsigned long ret = -EINVAL;
+ struct file *file;
+
+ pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
+ "See Documentation/vm/remap_file_pages.txt.\n",
+ current->comm, current->pid);
+
+ if (prot)
+ return ret;
+ start = start & PAGE_MASK;
+ size = size & PAGE_MASK;
+
+ if (start + size <= start)
+ return ret;
+
+ /* Does pgoff wrap? */
+ if (pgoff + (size >> PAGE_SHIFT) < pgoff)
+ return ret;
+
+ down_write(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+
+ if (!vma || !(vma->vm_flags & VM_SHARED))
+ goto out;
+
+ if (start < vma->vm_start || start + size > vma->vm_end)
+ goto out;
+
+ if (pgoff == linear_page_index(vma, start)) {
+ ret = 0;
+ goto out;
+ }
+
+ prot |= vma->vm_flags & VM_READ ? PROT_READ : 0;
+ prot |= vma->vm_flags & VM_WRITE ? PROT_WRITE : 0;
+ prot |= vma->vm_flags & VM_EXEC ? PROT_EXEC : 0;
+
+ flags &= MAP_NONBLOCK;
+ flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
+ if (vma->vm_flags & VM_LOCKED) {
+ flags |= MAP_LOCKED;
+ /* drop PG_Mlocked flag for over-mapped range */
+ munlock_vma_pages_range(vma, start, start + size);
+ }
+
+ file = get_file(vma->vm_file);
+ ret = do_mmap_pgoff(vma->vm_file, start, size,
+ prot, flags, pgoff, &populate);
+ fput(file);
+out:
+ up_write(&mm->mmap_sem);
+ if (populate)
+ mm_populate(ret, populate);
+ if (!IS_ERR_VALUE(ret))
+ ret = 0;
+ return ret;
+}
+
static inline void verify_mm_writelocked(struct mm_struct *mm)
{
#ifdef CONFIG_DEBUG_VM
diff --git a/mm/nommu.c b/mm/nommu.c
index 4a852f6c5709..026ac6375aaa 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1981,11 +1981,6 @@ error:
return -ENOMEM;
}
-int in_gate_area_no_mm(unsigned long addr)
-{
- return 0;
-}
-
int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
BUG();
@@ -1999,14 +1994,6 @@ void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf)
}
EXPORT_SYMBOL(filemap_map_pages);
-int generic_file_remap_pages(struct vm_area_struct *vma, unsigned long addr,
- unsigned long size, pgoff_t pgoff)
-{
- BUG();
- return 0;
-}
-EXPORT_SYMBOL(generic_file_remap_pages);
-
static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
unsigned long addr, void *buf, int len, int write)
{
diff --git a/mm/shmem.c b/mm/shmem.c
index 5909f298c3e7..0e5fb225007c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -66,6 +66,9 @@ static struct vfsmount *shm_mnt;
#include <linux/highmem.h>
#include <linux/seq_file.h>
#include <linux/magic.h>
+#include <linux/syscalls.h>
+#include <linux/fcntl.h>
+#include <uapi/linux/memfd.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
@@ -547,6 +550,7 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
+ struct shmem_inode_info *info = SHMEM_I(inode);
int error;
error = inode_change_ok(inode, attr);
@@ -557,6 +561,11 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
loff_t oldsize = inode->i_size;
loff_t newsize = attr->ia_size;
+ /* protected by i_mutex */
+ if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
+ (newsize > oldsize && (info->seals & F_SEAL_GROW)))
+ return -EPERM;
+
if (newsize != oldsize) {
error = shmem_reacct_size(SHMEM_I(inode)->flags,
oldsize, newsize);
@@ -1412,6 +1421,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
info = SHMEM_I(inode);
memset(info, 0, (char *)inode - (char *)info);
spin_lock_init(&info->lock);
+ info->seals = F_SEAL_SEAL;
info->flags = flags & VM_NORESERVE;
INIT_LIST_HEAD(&info->swaplist);
simple_xattrs_init(&info->xattrs);
@@ -1470,7 +1480,17 @@ shmem_write_begin(struct file *file, struct address_space *mapping,
struct page **pagep, void **fsdata)
{
struct inode *inode = mapping->host;
+ struct shmem_inode_info *info = SHMEM_I(inode);
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+
+ /* i_mutex is held by caller */
+ if (unlikely(info->seals)) {
+ if (info->seals & F_SEAL_WRITE)
+ return -EPERM;
+ if ((info->seals & F_SEAL_GROW) && pos + len > inode->i_size)
+ return -EPERM;
+ }
+
return shmem_getpage(inode, index, pagep, SGP_WRITE, NULL);
}
@@ -1808,11 +1828,233 @@ static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
return offset;
}
+/*
+ * We need a tag: a new tag would expand every radix_tree_node by 8 bytes,
+ * so reuse a tag which we firmly believe is never set or cleared on shmem.
+ */
+#define SHMEM_TAG_PINNED PAGECACHE_TAG_TOWRITE
+#define LAST_SCAN 4 /* about 150ms max */
+
+static void shmem_tag_pins(struct address_space *mapping)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+ pgoff_t start;
+ struct page *page;
+
+ lru_add_drain();
+ start = 0;
+ rcu_read_lock();
+
+restart:
+ radix_tree_for_each_slot(slot, &mapping->page_tree, &iter, start) {
+ page = radix_tree_deref_slot(slot);
+ if (!page || radix_tree_exception(page)) {
+ if (radix_tree_deref_retry(page))
+ goto restart;
+ } else if (page_count(page) - page_mapcount(page) > 1) {
+ spin_lock_irq(&mapping->tree_lock);
+ radix_tree_tag_set(&mapping->page_tree, iter.index,
+ SHMEM_TAG_PINNED);
+ spin_unlock_irq(&mapping->tree_lock);
+ }
+
+ if (need_resched()) {
+ cond_resched_rcu();
+ start = iter.index + 1;
+ goto restart;
+ }
+ }
+ rcu_read_unlock();
+}
+
+/*
+ * Setting SEAL_WRITE requires us to verify there's no pending writer. However,
+ * via get_user_pages(), drivers might have some pending I/O without any active
+ * user-space mappings (eg., direct-IO, AIO). Therefore, we look at all pages
+ * and see whether it has an elevated ref-count. If so, we tag them and wait for
+ * them to be dropped.
+ * The caller must guarantee that no new user will acquire writable references
+ * to those pages to avoid races.
+ */
+static int shmem_wait_for_pins(struct address_space *mapping)
+{
+ struct radix_tree_iter iter;
+ void **slot;
+ pgoff_t start;
+ struct page *page;
+ int error, scan;
+
+ shmem_tag_pins(mapping);
+
+ error = 0;
+ for (scan = 0; scan <= LAST_SCAN; scan++) {
+ if (!radix_tree_tagged(&mapping->page_tree, SHMEM_TAG_PINNED))
+ break;
+
+ if (!scan)
+ lru_add_drain_all();
+ else if (schedule_timeout_killable((HZ << scan) / 200))
+ scan = LAST_SCAN;
+
+ start = 0;
+ rcu_read_lock();
+restart:
+ radix_tree_for_each_tagged(slot, &mapping->page_tree, &iter,
+ start, SHMEM_TAG_PINNED) {
+
+ page = radix_tree_deref_slot(slot);
+ if (radix_tree_exception(page)) {
+ if (radix_tree_deref_retry(page))
+ goto restart;
+
+ page = NULL;
+ }
+
+ if (page &&
+ page_count(page) - page_mapcount(page) != 1) {
+ if (scan < LAST_SCAN)
+ goto continue_resched;
+
+ /*
+ * On the last scan, we clean up all those tags
+ * we inserted; but make a note that we still
+ * found pages pinned.
+ */
+ error = -EBUSY;
+ }
+
+ spin_lock_irq(&mapping->tree_lock);
+ radix_tree_tag_clear(&mapping->page_tree,
+ iter.index, SHMEM_TAG_PINNED);
+ spin_unlock_irq(&mapping->tree_lock);
+continue_resched:
+ if (need_resched()) {
+ cond_resched_rcu();
+ start = iter.index + 1;
+ goto restart;
+ }
+ }
+ rcu_read_unlock();
+ }
+
+ return error;
+}
+
+#define F_ALL_SEALS (F_SEAL_SEAL | \
+ F_SEAL_SHRINK | \
+ F_SEAL_GROW | \
+ F_SEAL_WRITE)
+
+int shmem_add_seals(struct file *file, unsigned int seals)
+{
+ struct inode *inode = file_inode(file);
+ struct shmem_inode_info *info = SHMEM_I(inode);
+ int error;
+
+ /*
+ * SEALING
+ * Sealing allows multiple parties to share a shmem-file but restrict
+ * access to a specific subset of file operations. Seals can only be
+ * added, but never removed. This way, mutually untrusted parties can
+ * share common memory regions with a well-defined policy. A malicious
+ * peer can thus never perform unwanted operations on a shared object.
+ *
+ * Seals are only supported on special shmem-files and always affect
+ * the whole underlying inode. Once a seal is set, it may prevent some
+ * kinds of access to the file. Currently, the following seals are
+ * defined:
+ * SEAL_SEAL: Prevent further seals from being set on this file
+ * SEAL_SHRINK: Prevent the file from shrinking
+ * SEAL_GROW: Prevent the file from growing
+ * SEAL_WRITE: Prevent write access to the file
+ *
+ * As we don't require any trust relationship between two parties, we
+ * must prevent seals from being removed. Therefore, sealing a file
+ * only adds a given set of seals to the file, it never touches
+ * existing seals. Furthermore, the "setting seals"-operation can be
+ * sealed itself, which basically prevents any further seal from being
+ * added.
+ *
+ * Semantics of sealing are only defined on volatile files. Only
+ * anonymous shmem files support sealing. More importantly, seals are
+ * never written to disk. Therefore, there's no plan to support it on
+ * other file types.
+ */
+
+ if (file->f_op != &shmem_file_operations)
+ return -EINVAL;
+ if (!(file->f_mode & FMODE_WRITE))
+ return -EPERM;
+ if (seals & ~(unsigned int)F_ALL_SEALS)
+ return -EINVAL;
+
+ mutex_lock(&inode->i_mutex);
+
+ if (info->seals & F_SEAL_SEAL) {
+ error = -EPERM;
+ goto unlock;
+ }
+
+ if ((seals & F_SEAL_WRITE) && !(info->seals & F_SEAL_WRITE)) {
+ error = mapping_deny_writable(file->f_mapping);
+ if (error)
+ goto unlock;
+
+ error = shmem_wait_for_pins(file->f_mapping);
+ if (error) {
+ mapping_allow_writable(file->f_mapping);
+ goto unlock;
+ }
+ }
+
+ info->seals |= seals;
+ error = 0;
+
+unlock:
+ mutex_unlock(&inode->i_mutex);
+ return error;
+}
+EXPORT_SYMBOL_GPL(shmem_add_seals);
+
+int shmem_get_seals(struct file *file)
+{
+ if (file->f_op != &shmem_file_operations)
+ return -EINVAL;
+
+ return SHMEM_I(file_inode(file))->seals;
+}
+EXPORT_SYMBOL_GPL(shmem_get_seals);
+
+long shmem_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ long error;
+
+ switch (cmd) {
+ case F_ADD_SEALS:
+ /* disallow upper 32bit */
+ if (arg > UINT_MAX)
+ return -EINVAL;
+
+ error = shmem_add_seals(file, arg);
+ break;
+ case F_GET_SEALS:
+ error = shmem_get_seals(file);
+ break;
+ default:
+ error = -EINVAL;
+ break;
+ }
+
+ return error;
+}
+
static long shmem_fallocate(struct file *file, int mode, loff_t offset,
loff_t len)
{
struct inode *inode = file_inode(file);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_falloc shmem_falloc;
pgoff_t start, index, end;
int error;
@@ -1828,6 +2070,12 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
+ /* protected by i_mutex */
+ if (info->seals & F_SEAL_WRITE) {
+ error = -EPERM;
+ goto out;
+ }
+
shmem_falloc.waitq = &shmem_falloc_waitq;
shmem_falloc.start = unmap_start >> PAGE_SHIFT;
shmem_falloc.next = (unmap_end + 1) >> PAGE_SHIFT;
@@ -1854,6 +2102,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
if (error)
goto out;
+ if ((info->seals & F_SEAL_GROW) && offset + len > inode->i_size) {
+ error = -EPERM;
+ goto out;
+ }
+
start = offset >> PAGE_CACHE_SHIFT;
end = (offset + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
/* Try to avoid a swapstorm if len is impossible to satisfy */
@@ -2617,6 +2870,77 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root)
shmem_show_mpol(seq, sbinfo->mpol);
return 0;
}
+
+#define MFD_NAME_PREFIX "memfd:"
+#define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1)
+#define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN)
+
+#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING)
+
+SYSCALL_DEFINE2(memfd_create,
+ const char __user *, uname,
+ unsigned int, flags)
+{
+ struct shmem_inode_info *info;
+ struct file *file;
+ int fd, error;
+ char *name;
+ long len;
+
+ if (flags & ~(unsigned int)MFD_ALL_FLAGS)
+ return -EINVAL;
+
+ /* length includes terminating zero */
+ len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1);
+ if (len <= 0)
+ return -EFAULT;
+ if (len > MFD_NAME_MAX_LEN + 1)
+ return -EINVAL;
+
+ name = kmalloc(len + MFD_NAME_PREFIX_LEN, GFP_TEMPORARY);
+ if (!name)
+ return -ENOMEM;
+
+ strcpy(name, MFD_NAME_PREFIX);
+ if (copy_from_user(&name[MFD_NAME_PREFIX_LEN], uname, len)) {
+ error = -EFAULT;
+ goto err_name;
+ }
+
+ /* terminating-zero may have changed after strnlen_user() returned */
+ if (name[len + MFD_NAME_PREFIX_LEN - 1]) {
+ error = -EFAULT;
+ goto err_name;
+ }
+
+ fd = get_unused_fd_flags((flags & MFD_CLOEXEC) ? O_CLOEXEC : 0);
+ if (fd < 0) {
+ error = fd;
+ goto err_name;
+ }
+
+ file = shmem_file_setup(name, 0, VM_NORESERVE);
+ if (IS_ERR(file)) {
+ error = PTR_ERR(file);
+ goto err_fd;
+ }
+ info = SHMEM_I(file_inode(file));
+ file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE;
+ file->f_flags |= O_RDWR | O_LARGEFILE;
+ if (flags & MFD_ALLOW_SEALING)
+ info->seals &= ~F_SEAL_SEAL;
+
+ fd_install(fd, file);
+ kfree(name);
+ return fd;
+
+err_fd:
+ put_unused_fd(fd);
+err_name:
+ kfree(name);
+ return error;
+}
+
#endif /* CONFIG_TMPFS */
static void shmem_put_super(struct super_block *sb)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index e160151da6b8..3e0ec83d000c 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -39,6 +39,7 @@ static struct backing_dev_info swap_backing_dev_info = {
struct address_space swapper_spaces[MAX_SWAPFILES] = {
[0 ... MAX_SWAPFILES - 1] = {
.page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN),
+ .i_mmap_writable = ATOMIC_INIT(0),
.a_ops = &swap_aops,
.backing_dev_info = &swap_backing_dev_info,
}
diff --git a/scripts/.gitignore b/scripts/.gitignore
index fb070fa1038f..5ecfe93f2028 100644
--- a/scripts/.gitignore
+++ b/scripts/.gitignore
@@ -4,7 +4,6 @@
conmakehash
kallsyms
pnmtologo
-bin2c
unifdef
ihex2fw
recordmcount
diff --git a/scripts/Makefile b/scripts/Makefile
index 890df5c6adfb..72902b5f2721 100644
--- a/scripts/Makefile
+++ b/scripts/Makefile
@@ -13,7 +13,6 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include
hostprogs-$(CONFIG_KALLSYMS) += kallsyms
hostprogs-$(CONFIG_LOGO) += pnmtologo
hostprogs-$(CONFIG_VT) += conmakehash
-hostprogs-$(CONFIG_IKCONFIG) += bin2c
hostprogs-$(BUILD_C_RECORDMCOUNT) += recordmcount
hostprogs-$(CONFIG_BUILDTIME_EXTABLE_SORT) += sortextable
hostprogs-$(CONFIG_ASN1) += asn1_compiler
diff --git a/scripts/basic/.gitignore b/scripts/basic/.gitignore
index a776371a3502..9528ec9e5adc 100644
--- a/scripts/basic/.gitignore
+++ b/scripts/basic/.gitignore
@@ -1 +1,2 @@
fixdep
+bin2c
diff --git a/scripts/basic/Makefile b/scripts/basic/Makefile
index 4fcef87bb875..ec10d9345bc2 100644
--- a/scripts/basic/Makefile
+++ b/scripts/basic/Makefile
@@ -9,6 +9,7 @@
# fixdep: Used to generate dependency information during build process
hostprogs-y := fixdep
+hostprogs-$(CONFIG_BUILD_BIN2C) += bin2c
always := $(hostprogs-y)
# fixdep is needed to compile other host programs
diff --git a/scripts/bin2c.c b/scripts/basic/bin2c.c
index 96dd2bcbb407..af187e695345 100644
--- a/scripts/bin2c.c
+++ b/scripts/basic/bin2c.c
@@ -11,7 +11,7 @@
int main(int argc, char *argv[])
{
- int ch, total=0;
+ int ch, total = 0;
if (argc > 1)
printf("const char %s[] %s=\n",
@@ -19,10 +19,9 @@ int main(int argc, char *argv[])
do {
printf("\t\"");
- while ((ch = getchar()) != EOF)
- {
+ while ((ch = getchar()) != EOF) {
total++;
- printf("\\x%02x",ch);
+ printf("\\x%02x", ch);
if (total % 16 == 0)
break;
}
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index d36d79e35c76..36ff2e4c7b6f 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -2,6 +2,7 @@ TARGETS = breakpoints
TARGETS += cpu-hotplug
TARGETS += efivarfs
TARGETS += kcmp
+TARGETS += memfd
TARGETS += memory-hotplug
TARGETS += mqueue
TARGETS += mount
diff --git a/tools/testing/selftests/memfd/.gitignore b/tools/testing/selftests/memfd/.gitignore
new file mode 100644
index 000000000000..afe87c40ac80
--- /dev/null
+++ b/tools/testing/selftests/memfd/.gitignore
@@ -0,0 +1,4 @@
+fuse_mnt
+fuse_test
+memfd_test
+memfd-test-file
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
new file mode 100644
index 000000000000..6816c491c5ff
--- /dev/null
+++ b/tools/testing/selftests/memfd/Makefile
@@ -0,0 +1,41 @@
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
+ifeq ($(ARCH),i386)
+ ARCH := X86
+endif
+ifeq ($(ARCH),x86_64)
+ ARCH := X86
+endif
+
+CFLAGS += -D_FILE_OFFSET_BITS=64
+CFLAGS += -I../../../../arch/x86/include/generated/uapi/
+CFLAGS += -I../../../../arch/x86/include/uapi/
+CFLAGS += -I../../../../include/uapi/
+CFLAGS += -I../../../../include/
+
+all:
+ifeq ($(ARCH),X86)
+ gcc $(CFLAGS) memfd_test.c -o memfd_test
+else
+ echo "Not an x86 target, can't build memfd selftest"
+endif
+
+run_tests: all
+ifeq ($(ARCH),X86)
+ gcc $(CFLAGS) memfd_test.c -o memfd_test
+endif
+ @./memfd_test || echo "memfd_test: [FAIL]"
+
+build_fuse:
+ifeq ($(ARCH),X86)
+ gcc $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt
+ gcc $(CFLAGS) fuse_test.c -o fuse_test
+else
+ echo "Not an x86 target, can't build memfd selftest"
+endif
+
+run_fuse: build_fuse
+ @./run_fuse_test.sh || echo "fuse_test: [FAIL]"
+
+clean:
+ $(RM) memfd_test fuse_test
diff --git a/tools/testing/selftests/memfd/fuse_mnt.c b/tools/testing/selftests/memfd/fuse_mnt.c
new file mode 100644
index 000000000000..feacf1280fcd
--- /dev/null
+++ b/tools/testing/selftests/memfd/fuse_mnt.c
@@ -0,0 +1,110 @@
+/*
+ * memfd test file-system
+ * This file uses FUSE to create a dummy file-system with only one file /memfd.
+ * This file is read-only and takes 1s per read.
+ *
+ * This file-system is used by the memfd test-cases to force the kernel to pin
+ * pages during reads(). Due to the 1s delay of this file-system, this is a
+ * nice way to test race-conditions against get_user_pages() in the kernel.
+ *
+ * We use direct_io==1 to force the kernel to use direct-IO for this
+ * file-system.
+ */
+
+#define FUSE_USE_VERSION 26
+
+#include <fuse.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static const char memfd_content[] = "memfd-example-content";
+static const char memfd_path[] = "/memfd";
+
+static int memfd_getattr(const char *path, struct stat *st)
+{
+ memset(st, 0, sizeof(*st));
+
+ if (!strcmp(path, "/")) {
+ st->st_mode = S_IFDIR | 0755;
+ st->st_nlink = 2;
+ } else if (!strcmp(path, memfd_path)) {
+ st->st_mode = S_IFREG | 0444;
+ st->st_nlink = 1;
+ st->st_size = strlen(memfd_content);
+ } else {
+ return -ENOENT;
+ }
+
+ return 0;
+}
+
+static int memfd_readdir(const char *path,
+ void *buf,
+ fuse_fill_dir_t filler,
+ off_t offset,
+ struct fuse_file_info *fi)
+{
+ if (strcmp(path, "/"))
+ return -ENOENT;
+
+ filler(buf, ".", NULL, 0);
+ filler(buf, "..", NULL, 0);
+ filler(buf, memfd_path + 1, NULL, 0);
+
+ return 0;
+}
+
+static int memfd_open(const char *path, struct fuse_file_info *fi)
+{
+ if (strcmp(path, memfd_path))
+ return -ENOENT;
+
+ if ((fi->flags & 3) != O_RDONLY)
+ return -EACCES;
+
+ /* force direct-IO */
+ fi->direct_io = 1;
+
+ return 0;
+}
+
+static int memfd_read(const char *path,
+ char *buf,
+ size_t size,
+ off_t offset,
+ struct fuse_file_info *fi)
+{
+ size_t len;
+
+ if (strcmp(path, memfd_path) != 0)
+ return -ENOENT;
+
+ sleep(1);
+
+ len = strlen(memfd_content);
+ if (offset < len) {
+ if (offset + size > len)
+ size = len - offset;
+
+ memcpy(buf, memfd_content + offset, size);
+ } else {
+ size = 0;
+ }
+
+ return size;
+}
+
+static struct fuse_operations memfd_ops = {
+ .getattr = memfd_getattr,
+ .readdir = memfd_readdir,
+ .open = memfd_open,
+ .read = memfd_read,
+};
+
+int main(int argc, char *argv[])
+{
+ return fuse_main(argc, argv, &memfd_ops, NULL);
+}
diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
new file mode 100644
index 000000000000..67908b18f035
--- /dev/null
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -0,0 +1,311 @@
+/*
+ * memfd GUP test-case
+ * This tests memfd interactions with get_user_pages(). We require the
+ * fuse_mnt.c program to provide a fake direct-IO FUSE mount-point for us. This
+ * file-system delays _all_ reads by 1s and forces direct-IO. This means, any
+ * read() on files in that file-system will pin the receive-buffer pages for at
+ * least 1s via get_user_pages().
+ *
+ * We use this trick to race ADD_SEALS against a write on a memfd object. The
+ * ADD_SEALS must fail if the memfd pages are still pinned. Note that we use
+ * the read() syscall with our memory-mapped memfd object as receive buffer to
+ * force the kernel to write into our memfd object.
+ */
+
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <linux/falloc.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define MFD_DEF_SIZE 8192
+#define STACK_SIZE 65535
+
+static int sys_memfd_create(const char *name,
+ unsigned int flags)
+{
+ return syscall(__NR_memfd_create, name, flags);
+}
+
+static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
+{
+ int r, fd;
+
+ fd = sys_memfd_create(name, flags);
+ if (fd < 0) {
+ printf("memfd_create(\"%s\", %u) failed: %m\n",
+ name, flags);
+ abort();
+ }
+
+ r = ftruncate(fd, sz);
+ if (r < 0) {
+ printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
+ abort();
+ }
+
+ return fd;
+}
+
+static __u64 mfd_assert_get_seals(int fd)
+{
+ long r;
+
+ r = fcntl(fd, F_GET_SEALS);
+ if (r < 0) {
+ printf("GET_SEALS(%d) failed: %m\n", fd);
+ abort();
+ }
+
+ return r;
+}
+
+static void mfd_assert_has_seals(int fd, __u64 seals)
+{
+ __u64 s;
+
+ s = mfd_assert_get_seals(fd);
+ if (s != seals) {
+ printf("%llu != %llu = GET_SEALS(%d)\n",
+ (unsigned long long)seals, (unsigned long long)s, fd);
+ abort();
+ }
+}
+
+static void mfd_assert_add_seals(int fd, __u64 seals)
+{
+ long r;
+ __u64 s;
+
+ s = mfd_assert_get_seals(fd);
+ r = fcntl(fd, F_ADD_SEALS, seals);
+ if (r < 0) {
+ printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n",
+ fd, (unsigned long long)s, (unsigned long long)seals);
+ abort();
+ }
+}
+
+static int mfd_busy_add_seals(int fd, __u64 seals)
+{
+ long r;
+ __u64 s;
+
+ r = fcntl(fd, F_GET_SEALS);
+ if (r < 0)
+ s = 0;
+ else
+ s = r;
+
+ r = fcntl(fd, F_ADD_SEALS, seals);
+ if (r < 0 && errno != EBUSY) {
+ printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected with EBUSY: %m\n",
+ fd, (unsigned long long)s, (unsigned long long)seals);
+ abort();
+ }
+
+ return r;
+}
+
+static void *mfd_assert_mmap_shared(int fd)
+{
+ void *p;
+
+ p = mmap(NULL,
+ MFD_DEF_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ return p;
+}
+
+static void *mfd_assert_mmap_private(int fd)
+{
+ void *p;
+
+ p = mmap(NULL,
+ MFD_DEF_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ return p;
+}
+
+static int global_mfd = -1;
+static void *global_p = NULL;
+
+static int sealing_thread_fn(void *arg)
+{
+ int sig, r;
+
+ /*
+ * This thread first waits 200ms so any pending operation in the parent
+ * is correctly started. After that, it tries to seal @global_mfd as
+ * SEAL_WRITE. This _must_ fail as the parent thread has a read() into
+ * that memory mapped object still ongoing.
+ * We then wait one more second and try sealing again. This time it
+ * must succeed as there shouldn't be anyone else pinning the pages.
+ */
+
+ /* wait 200ms for FUSE-request to be active */
+ usleep(200000);
+
+ /* unmount mapping before sealing to avoid i_mmap_writable failures */
+ munmap(global_p, MFD_DEF_SIZE);
+
+ /* Try sealing the global file; expect EBUSY or success. Current
+ * kernels will never succeed, but in the future, kernels might
+ * implement page-replacements or other fancy ways to avoid racing
+ * writes. */
+ r = mfd_busy_add_seals(global_mfd, F_SEAL_WRITE);
+ if (r >= 0) {
+ printf("HURRAY! This kernel fixed GUP races!\n");
+ } else {
+ /* wait 1s more so the FUSE-request is done */
+ sleep(1);
+
+ /* try sealing the global file again */
+ mfd_assert_add_seals(global_mfd, F_SEAL_WRITE);
+ }
+
+ return 0;
+}
+
+static pid_t spawn_sealing_thread(void)
+{
+ uint8_t *stack;
+ pid_t pid;
+
+ stack = malloc(STACK_SIZE);
+ if (!stack) {
+ printf("malloc(STACK_SIZE) failed: %m\n");
+ abort();
+ }
+
+ pid = clone(sealing_thread_fn,
+ stack + STACK_SIZE,
+ SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM,
+ NULL);
+ if (pid < 0) {
+ printf("clone() failed: %m\n");
+ abort();
+ }
+
+ return pid;
+}
+
+static void join_sealing_thread(pid_t pid)
+{
+ waitpid(pid, NULL, 0);
+}
+
+int main(int argc, char **argv)
+{
+ static const char zero[MFD_DEF_SIZE];
+ int fd, mfd, r;
+ void *p;
+ int was_sealed;
+ pid_t pid;
+
+ if (argc < 2) {
+ printf("error: please pass path to file in fuse_mnt mount-point\n");
+ abort();
+ }
+
+ /* open FUSE memfd file for GUP testing */
+ printf("opening: %s\n", argv[1]);
+ fd = open(argv[1], O_RDONLY | O_CLOEXEC);
+ if (fd < 0) {
+ printf("cannot open(\"%s\"): %m\n", argv[1]);
+ abort();
+ }
+
+ /* create new memfd-object */
+ mfd = mfd_assert_new("kern_memfd_fuse",
+ MFD_DEF_SIZE,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ /* mmap memfd-object for writing */
+ p = mfd_assert_mmap_shared(mfd);
+
+ /* pass mfd+mapping to a separate sealing-thread which tries to seal
+ * the memfd objects with SEAL_WRITE while we write into it */
+ global_mfd = mfd;
+ global_p = p;
+ pid = spawn_sealing_thread();
+
+ /* Use read() on the FUSE file to read into our memory-mapped memfd
+ * object. This races the other thread which tries to seal the
+ * memfd-object.
+ * If @fd is on the memfd-fake-FUSE-FS, the read() is delayed by 1s.
+ * This guarantees that the receive-buffer is pinned for 1s until the
+ * data is written into it. The racing ADD_SEALS should thus fail as
+ * the pages are still pinned. */
+ r = read(fd, p, MFD_DEF_SIZE);
+ if (r < 0) {
+ printf("read() failed: %m\n");
+ abort();
+ } else if (!r) {
+ printf("unexpected EOF on read()\n");
+ abort();
+ }
+
+ was_sealed = mfd_assert_get_seals(mfd) & F_SEAL_WRITE;
+
+ /* Wait for sealing-thread to finish and verify that it
+ * successfully sealed the file after the second try. */
+ join_sealing_thread(pid);
+ mfd_assert_has_seals(mfd, F_SEAL_WRITE);
+
+ /* *IF* the memfd-object was sealed at the time our read() returned,
+ * then the kernel did a page-replacement or canceled the read() (or
+ * whatever magic it did..). In that case, the memfd object is still
+ * all zero.
+ * In case the memfd-object was *not* sealed, the read() was successfull
+ * and the memfd object must *not* be all zero.
+ * Note that in real scenarios, there might be a mixture of both, but
+ * in this test-cases, we have explicit 200ms delays which should be
+ * enough to avoid any in-flight writes. */
+
+ p = mfd_assert_mmap_private(mfd);
+ if (was_sealed && memcmp(p, zero, MFD_DEF_SIZE)) {
+ printf("memfd sealed during read() but data not discarded\n");
+ abort();
+ } else if (!was_sealed && !memcmp(p, zero, MFD_DEF_SIZE)) {
+ printf("memfd sealed after read() but data discarded\n");
+ abort();
+ }
+
+ close(mfd);
+ close(fd);
+
+ printf("fuse: DONE\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
new file mode 100644
index 000000000000..3634c909b1b0
--- /dev/null
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -0,0 +1,913 @@
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <linux/falloc.h>
+#include <linux/fcntl.h>
+#include <linux/memfd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#define MFD_DEF_SIZE 8192
+#define STACK_SIZE 65535
+
+static int sys_memfd_create(const char *name,
+ unsigned int flags)
+{
+ return syscall(__NR_memfd_create, name, flags);
+}
+
+static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
+{
+ int r, fd;
+
+ fd = sys_memfd_create(name, flags);
+ if (fd < 0) {
+ printf("memfd_create(\"%s\", %u) failed: %m\n",
+ name, flags);
+ abort();
+ }
+
+ r = ftruncate(fd, sz);
+ if (r < 0) {
+ printf("ftruncate(%llu) failed: %m\n", (unsigned long long)sz);
+ abort();
+ }
+
+ return fd;
+}
+
+static void mfd_fail_new(const char *name, unsigned int flags)
+{
+ int r;
+
+ r = sys_memfd_create(name, flags);
+ if (r >= 0) {
+ printf("memfd_create(\"%s\", %u) succeeded, but failure expected\n",
+ name, flags);
+ close(r);
+ abort();
+ }
+}
+
+static __u64 mfd_assert_get_seals(int fd)
+{
+ long r;
+
+ r = fcntl(fd, F_GET_SEALS);
+ if (r < 0) {
+ printf("GET_SEALS(%d) failed: %m\n", fd);
+ abort();
+ }
+
+ return r;
+}
+
+static void mfd_assert_has_seals(int fd, __u64 seals)
+{
+ __u64 s;
+
+ s = mfd_assert_get_seals(fd);
+ if (s != seals) {
+ printf("%llu != %llu = GET_SEALS(%d)\n",
+ (unsigned long long)seals, (unsigned long long)s, fd);
+ abort();
+ }
+}
+
+static void mfd_assert_add_seals(int fd, __u64 seals)
+{
+ long r;
+ __u64 s;
+
+ s = mfd_assert_get_seals(fd);
+ r = fcntl(fd, F_ADD_SEALS, seals);
+ if (r < 0) {
+ printf("ADD_SEALS(%d, %llu -> %llu) failed: %m\n",
+ fd, (unsigned long long)s, (unsigned long long)seals);
+ abort();
+ }
+}
+
+static void mfd_fail_add_seals(int fd, __u64 seals)
+{
+ long r;
+ __u64 s;
+
+ r = fcntl(fd, F_GET_SEALS);
+ if (r < 0)
+ s = 0;
+ else
+ s = r;
+
+ r = fcntl(fd, F_ADD_SEALS, seals);
+ if (r >= 0) {
+ printf("ADD_SEALS(%d, %llu -> %llu) didn't fail as expected\n",
+ fd, (unsigned long long)s, (unsigned long long)seals);
+ abort();
+ }
+}
+
+static void mfd_assert_size(int fd, size_t size)
+{
+ struct stat st;
+ int r;
+
+ r = fstat(fd, &st);
+ if (r < 0) {
+ printf("fstat(%d) failed: %m\n", fd);
+ abort();
+ } else if (st.st_size != size) {
+ printf("wrong file size %lld, but expected %lld\n",
+ (long long)st.st_size, (long long)size);
+ abort();
+ }
+}
+
+static int mfd_assert_dup(int fd)
+{
+ int r;
+
+ r = dup(fd);
+ if (r < 0) {
+ printf("dup(%d) failed: %m\n", fd);
+ abort();
+ }
+
+ return r;
+}
+
+static void *mfd_assert_mmap_shared(int fd)
+{
+ void *p;
+
+ p = mmap(NULL,
+ MFD_DEF_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ return p;
+}
+
+static void *mfd_assert_mmap_private(int fd)
+{
+ void *p;
+
+ p = mmap(NULL,
+ MFD_DEF_SIZE,
+ PROT_READ,
+ MAP_PRIVATE,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ return p;
+}
+
+static int mfd_assert_open(int fd, int flags, mode_t mode)
+{
+ char buf[512];
+ int r;
+
+ sprintf(buf, "/proc/self/fd/%d", fd);
+ r = open(buf, flags, mode);
+ if (r < 0) {
+ printf("open(%s) failed: %m\n", buf);
+ abort();
+ }
+
+ return r;
+}
+
+static void mfd_fail_open(int fd, int flags, mode_t mode)
+{
+ char buf[512];
+ int r;
+
+ sprintf(buf, "/proc/self/fd/%d", fd);
+ r = open(buf, flags, mode);
+ if (r >= 0) {
+ printf("open(%s) didn't fail as expected\n");
+ abort();
+ }
+}
+
+static void mfd_assert_read(int fd)
+{
+ char buf[16];
+ void *p;
+ ssize_t l;
+
+ l = read(fd, buf, sizeof(buf));
+ if (l != sizeof(buf)) {
+ printf("read() failed: %m\n");
+ abort();
+ }
+
+ /* verify PROT_READ *is* allowed */
+ p = mmap(NULL,
+ MFD_DEF_SIZE,
+ PROT_READ,
+ MAP_PRIVATE,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+ munmap(p, MFD_DEF_SIZE);
+
+ /* verify MAP_PRIVATE is *always* allowed (even writable) */
+ p = mmap(NULL,
+ MFD_DEF_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+ munmap(p, MFD_DEF_SIZE);
+}
+
+static void mfd_assert_write(int fd)
+{
+ ssize_t l;
+ void *p;
+ int r;
+
+ /* verify write() succeeds */
+ l = write(fd, "\0\0\0\0", 4);
+ if (l != 4) {
+ printf("write() failed: %m\n");
+ abort();
+ }
+
+ /* verify PROT_READ | PROT_WRITE is allowed */
+ p = mmap(NULL,
+ MFD_DEF_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+ *(char *)p = 0;
+ munmap(p, MFD_DEF_SIZE);
+
+ /* verify PROT_WRITE is allowed */
+ p = mmap(NULL,
+ MFD_DEF_SIZE,
+ PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+ *(char *)p = 0;
+ munmap(p, MFD_DEF_SIZE);
+
+ /* verify PROT_READ with MAP_SHARED is allowed and a following
+ * mprotect(PROT_WRITE) allows writing */
+ p = mmap(NULL,
+ MFD_DEF_SIZE,
+ PROT_READ,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p == MAP_FAILED) {
+ printf("mmap() failed: %m\n");
+ abort();
+ }
+
+ r = mprotect(p, MFD_DEF_SIZE, PROT_READ | PROT_WRITE);
+ if (r < 0) {
+ printf("mprotect() failed: %m\n");
+ abort();
+ }
+
+ *(char *)p = 0;
+ munmap(p, MFD_DEF_SIZE);
+
+ /* verify PUNCH_HOLE works */
+ r = fallocate(fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ 0,
+ MFD_DEF_SIZE);
+ if (r < 0) {
+ printf("fallocate(PUNCH_HOLE) failed: %m\n");
+ abort();
+ }
+}
+
+static void mfd_fail_write(int fd)
+{
+ ssize_t l;
+ void *p;
+ int r;
+
+ /* verify write() fails */
+ l = write(fd, "data", 4);
+ if (l != -EPERM) {
+ printf("expected EPERM on write(), but got %d: %m\n", (int)l);
+ abort();
+ }
+
+ /* verify PROT_READ | PROT_WRITE is not allowed */
+ p = mmap(NULL,
+ MFD_DEF_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p != MAP_FAILED) {
+ printf("mmap() didn't fail as expected\n");
+ abort();
+ }
+
+ /* verify PROT_WRITE is not allowed */
+ p = mmap(NULL,
+ MFD_DEF_SIZE,
+ PROT_WRITE,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p != MAP_FAILED) {
+ printf("mmap() didn't fail as expected\n");
+ abort();
+ }
+
+ /* Verify PROT_READ with MAP_SHARED with a following mprotect is not
+ * allowed. Note that for r/w the kernel already prevents the mmap. */
+ p = mmap(NULL,
+ MFD_DEF_SIZE,
+ PROT_READ,
+ MAP_SHARED,
+ fd,
+ 0);
+ if (p != MAP_FAILED) {
+ r = mprotect(p, MFD_DEF_SIZE, PROT_READ | PROT_WRITE);
+ if (r >= 0) {
+ printf("mmap()+mprotect() didn't fail as expected\n");
+ abort();
+ }
+ }
+
+ /* verify PUNCH_HOLE fails */
+ r = fallocate(fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ 0,
+ MFD_DEF_SIZE);
+ if (r >= 0) {
+ printf("fallocate(PUNCH_HOLE) didn't fail as expected\n");
+ abort();
+ }
+}
+
+static void mfd_assert_shrink(int fd)
+{
+ int r, fd2;
+
+ r = ftruncate(fd, MFD_DEF_SIZE / 2);
+ if (r < 0) {
+ printf("ftruncate(SHRINK) failed: %m\n");
+ abort();
+ }
+
+ mfd_assert_size(fd, MFD_DEF_SIZE / 2);
+
+ fd2 = mfd_assert_open(fd,
+ O_RDWR | O_CREAT | O_TRUNC,
+ S_IRUSR | S_IWUSR);
+ close(fd2);
+
+ mfd_assert_size(fd, 0);
+}
+
+static void mfd_fail_shrink(int fd)
+{
+ int r;
+
+ r = ftruncate(fd, MFD_DEF_SIZE / 2);
+ if (r >= 0) {
+ printf("ftruncate(SHRINK) didn't fail as expected\n");
+ abort();
+ }
+
+ mfd_fail_open(fd,
+ O_RDWR | O_CREAT | O_TRUNC,
+ S_IRUSR | S_IWUSR);
+}
+
+static void mfd_assert_grow(int fd)
+{
+ int r;
+
+ r = ftruncate(fd, MFD_DEF_SIZE * 2);
+ if (r < 0) {
+ printf("ftruncate(GROW) failed: %m\n");
+ abort();
+ }
+
+ mfd_assert_size(fd, MFD_DEF_SIZE * 2);
+
+ r = fallocate(fd,
+ 0,
+ 0,
+ MFD_DEF_SIZE * 4);
+ if (r < 0) {
+ printf("fallocate(ALLOC) failed: %m\n");
+ abort();
+ }
+
+ mfd_assert_size(fd, MFD_DEF_SIZE * 4);
+}
+
+static void mfd_fail_grow(int fd)
+{
+ int r;
+
+ r = ftruncate(fd, MFD_DEF_SIZE * 2);
+ if (r >= 0) {
+ printf("ftruncate(GROW) didn't fail as expected\n");
+ abort();
+ }
+
+ r = fallocate(fd,
+ 0,
+ 0,
+ MFD_DEF_SIZE * 4);
+ if (r >= 0) {
+ printf("fallocate(ALLOC) didn't fail as expected\n");
+ abort();
+ }
+}
+
+static void mfd_assert_grow_write(int fd)
+{
+ static char buf[MFD_DEF_SIZE * 8];
+ ssize_t l;
+
+ l = pwrite(fd, buf, sizeof(buf), 0);
+ if (l != sizeof(buf)) {
+ printf("pwrite() failed: %m\n");
+ abort();
+ }
+
+ mfd_assert_size(fd, MFD_DEF_SIZE * 8);
+}
+
+static void mfd_fail_grow_write(int fd)
+{
+ static char buf[MFD_DEF_SIZE * 8];
+ ssize_t l;
+
+ l = pwrite(fd, buf, sizeof(buf), 0);
+ if (l == sizeof(buf)) {
+ printf("pwrite() didn't fail as expected\n");
+ abort();
+ }
+}
+
+static int idle_thread_fn(void *arg)
+{
+ sigset_t set;
+ int sig;
+
+ /* dummy waiter; SIGTERM terminates us anyway */
+ sigemptyset(&set);
+ sigaddset(&set, SIGTERM);
+ sigwait(&set, &sig);
+
+ return 0;
+}
+
+static pid_t spawn_idle_thread(unsigned int flags)
+{
+ uint8_t *stack;
+ pid_t pid;
+
+ stack = malloc(STACK_SIZE);
+ if (!stack) {
+ printf("malloc(STACK_SIZE) failed: %m\n");
+ abort();
+ }
+
+ pid = clone(idle_thread_fn,
+ stack + STACK_SIZE,
+ SIGCHLD | flags,
+ NULL);
+ if (pid < 0) {
+ printf("clone() failed: %m\n");
+ abort();
+ }
+
+ return pid;
+}
+
+static void join_idle_thread(pid_t pid)
+{
+ kill(pid, SIGTERM);
+ waitpid(pid, NULL, 0);
+}
+
+/*
+ * Test memfd_create() syscall
+ * Verify syscall-argument validation, including name checks, flag validation
+ * and more.
+ */
+static void test_create(void)
+{
+ char buf[2048];
+ int fd;
+
+ /* test NULL name */
+ mfd_fail_new(NULL, 0);
+
+ /* test over-long name (not zero-terminated) */
+ memset(buf, 0xff, sizeof(buf));
+ mfd_fail_new(buf, 0);
+
+ /* test over-long zero-terminated name */
+ memset(buf, 0xff, sizeof(buf));
+ buf[sizeof(buf) - 1] = 0;
+ mfd_fail_new(buf, 0);
+
+ /* verify "" is a valid name */
+ fd = mfd_assert_new("", 0, 0);
+ close(fd);
+
+ /* verify invalid O_* open flags */
+ mfd_fail_new("", 0x0100);
+ mfd_fail_new("", ~MFD_CLOEXEC);
+ mfd_fail_new("", ~MFD_ALLOW_SEALING);
+ mfd_fail_new("", ~0);
+ mfd_fail_new("", 0x80000000U);
+
+ /* verify MFD_CLOEXEC is allowed */
+ fd = mfd_assert_new("", 0, MFD_CLOEXEC);
+ close(fd);
+
+ /* verify MFD_ALLOW_SEALING is allowed */
+ fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING);
+ close(fd);
+
+ /* verify MFD_ALLOW_SEALING | MFD_CLOEXEC is allowed */
+ fd = mfd_assert_new("", 0, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+ close(fd);
+}
+
+/*
+ * Test basic sealing
+ * A very basic sealing test to see whether setting/retrieving seals works.
+ */
+static void test_basic(void)
+{
+ int fd;
+
+ fd = mfd_assert_new("kern_memfd_basic",
+ MFD_DEF_SIZE,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+
+ /* add basic seals */
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_WRITE);
+
+ /* add them again */
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_WRITE);
+
+ /* add more seals and seal against sealing */
+ mfd_assert_add_seals(fd, F_SEAL_GROW | F_SEAL_SEAL);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_GROW |
+ F_SEAL_WRITE |
+ F_SEAL_SEAL);
+
+ /* verify that sealing no longer works */
+ mfd_fail_add_seals(fd, F_SEAL_GROW);
+ mfd_fail_add_seals(fd, 0);
+
+ close(fd);
+
+ /* verify sealing does not work without MFD_ALLOW_SEALING */
+ fd = mfd_assert_new("kern_memfd_basic",
+ MFD_DEF_SIZE,
+ MFD_CLOEXEC);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+ mfd_fail_add_seals(fd, F_SEAL_SHRINK |
+ F_SEAL_GROW |
+ F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+ close(fd);
+}
+
+/*
+ * Test SEAL_WRITE
+ * Test whether SEAL_WRITE actually prevents modifications.
+ */
+static void test_seal_write(void)
+{
+ int fd;
+
+ fd = mfd_assert_new("kern_memfd_seal_write",
+ MFD_DEF_SIZE,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE);
+
+ mfd_assert_read(fd);
+ mfd_fail_write(fd);
+ mfd_assert_shrink(fd);
+ mfd_assert_grow(fd);
+ mfd_fail_grow_write(fd);
+
+ close(fd);
+}
+
+/*
+ * Test SEAL_SHRINK
+ * Test whether SEAL_SHRINK actually prevents shrinking
+ */
+static void test_seal_shrink(void)
+{
+ int fd;
+
+ fd = mfd_assert_new("kern_memfd_seal_shrink",
+ MFD_DEF_SIZE,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK);
+
+ mfd_assert_read(fd);
+ mfd_assert_write(fd);
+ mfd_fail_shrink(fd);
+ mfd_assert_grow(fd);
+ mfd_assert_grow_write(fd);
+
+ close(fd);
+}
+
+/*
+ * Test SEAL_GROW
+ * Test whether SEAL_GROW actually prevents growing
+ */
+static void test_seal_grow(void)
+{
+ int fd;
+
+ fd = mfd_assert_new("kern_memfd_seal_grow",
+ MFD_DEF_SIZE,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_GROW);
+ mfd_assert_has_seals(fd, F_SEAL_GROW);
+
+ mfd_assert_read(fd);
+ mfd_assert_write(fd);
+ mfd_assert_shrink(fd);
+ mfd_fail_grow(fd);
+ mfd_fail_grow_write(fd);
+
+ close(fd);
+}
+
+/*
+ * Test SEAL_SHRINK | SEAL_GROW
+ * Test whether SEAL_SHRINK | SEAL_GROW actually prevents resizing
+ */
+static void test_seal_resize(void)
+{
+ int fd;
+
+ fd = mfd_assert_new("kern_memfd_seal_resize",
+ MFD_DEF_SIZE,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK | F_SEAL_GROW);
+
+ mfd_assert_read(fd);
+ mfd_assert_write(fd);
+ mfd_fail_shrink(fd);
+ mfd_fail_grow(fd);
+ mfd_fail_grow_write(fd);
+
+ close(fd);
+}
+
+/*
+ * Test sharing via dup()
+ * Test that seals are shared between dupped FDs and they're all equal.
+ */
+static void test_share_dup(void)
+{
+ int fd, fd2;
+
+ fd = mfd_assert_new("kern_memfd_share_dup",
+ MFD_DEF_SIZE,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+
+ fd2 = mfd_assert_dup(fd);
+ mfd_assert_has_seals(fd2, 0);
+
+ mfd_assert_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE);
+
+ mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+ mfd_assert_add_seals(fd, F_SEAL_SEAL);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+
+ mfd_fail_add_seals(fd, F_SEAL_GROW);
+ mfd_fail_add_seals(fd2, F_SEAL_GROW);
+ mfd_fail_add_seals(fd, F_SEAL_SEAL);
+ mfd_fail_add_seals(fd2, F_SEAL_SEAL);
+
+ close(fd2);
+
+ mfd_fail_add_seals(fd, F_SEAL_GROW);
+ close(fd);
+}
+
+/*
+ * Test sealing with active mmap()s
+ * Modifying seals is only allowed if no other mmap() refs exist.
+ */
+static void test_share_mmap(void)
+{
+ int fd;
+ void *p;
+
+ fd = mfd_assert_new("kern_memfd_share_mmap",
+ MFD_DEF_SIZE,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+
+ /* shared/writable ref prevents sealing WRITE, but allows others */
+ p = mfd_assert_mmap_shared(fd);
+ mfd_fail_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd, F_SEAL_SHRINK);
+ munmap(p, MFD_DEF_SIZE);
+
+ /* readable ref allows sealing */
+ p = mfd_assert_mmap_private(fd);
+ mfd_assert_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+ munmap(p, MFD_DEF_SIZE);
+
+ close(fd);
+}
+
+/*
+ * Test sealing with open(/proc/self/fd/%d)
+ * Via /proc we can get access to a separate file-context for the same memfd.
+ * This is *not* like dup(), but like a real separate open(). Make sure the
+ * semantics are as expected and we correctly check for RDONLY / WRONLY / RDWR.
+ */
+static void test_share_open(void)
+{
+ int fd, fd2;
+
+ fd = mfd_assert_new("kern_memfd_share_open",
+ MFD_DEF_SIZE,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+
+ fd2 = mfd_assert_open(fd, O_RDWR, 0);
+ mfd_assert_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE);
+
+ mfd_assert_add_seals(fd2, F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+ close(fd);
+ fd = mfd_assert_open(fd2, O_RDONLY, 0);
+
+ mfd_fail_add_seals(fd, F_SEAL_SEAL);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK);
+
+ close(fd2);
+ fd2 = mfd_assert_open(fd, O_RDWR, 0);
+
+ mfd_assert_add_seals(fd2, F_SEAL_SEAL);
+ mfd_assert_has_seals(fd, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+ mfd_assert_has_seals(fd2, F_SEAL_WRITE | F_SEAL_SHRINK | F_SEAL_SEAL);
+
+ close(fd2);
+ close(fd);
+}
+
+/*
+ * Test sharing via fork()
+ * Test whether seal-modifications work as expected with forked childs.
+ */
+static void test_share_fork(void)
+{
+ int fd;
+ pid_t pid;
+
+ fd = mfd_assert_new("kern_memfd_share_fork",
+ MFD_DEF_SIZE,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_has_seals(fd, 0);
+
+ pid = spawn_idle_thread(0);
+ mfd_assert_add_seals(fd, F_SEAL_SEAL);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+ mfd_fail_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+ join_idle_thread(pid);
+
+ mfd_fail_add_seals(fd, F_SEAL_WRITE);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+
+ close(fd);
+}
+
+int main(int argc, char **argv)
+{
+ pid_t pid;
+
+ printf("memfd: CREATE\n");
+ test_create();
+ printf("memfd: BASIC\n");
+ test_basic();
+
+ printf("memfd: SEAL-WRITE\n");
+ test_seal_write();
+ printf("memfd: SEAL-SHRINK\n");
+ test_seal_shrink();
+ printf("memfd: SEAL-GROW\n");
+ test_seal_grow();
+ printf("memfd: SEAL-RESIZE\n");
+ test_seal_resize();
+
+ printf("memfd: SHARE-DUP\n");
+ test_share_dup();
+ printf("memfd: SHARE-MMAP\n");
+ test_share_mmap();
+ printf("memfd: SHARE-OPEN\n");
+ test_share_open();
+ printf("memfd: SHARE-FORK\n");
+ test_share_fork();
+
+ /* Run test-suite in a multi-threaded environment with a shared
+ * file-table. */
+ pid = spawn_idle_thread(CLONE_FILES | CLONE_FS | CLONE_VM);
+ printf("memfd: SHARE-DUP (shared file-table)\n");
+ test_share_dup();
+ printf("memfd: SHARE-MMAP (shared file-table)\n");
+ test_share_mmap();
+ printf("memfd: SHARE-OPEN (shared file-table)\n");
+ test_share_open();
+ printf("memfd: SHARE-FORK (shared file-table)\n");
+ test_share_fork();
+ join_idle_thread(pid);
+
+ printf("memfd: DONE\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/memfd/run_fuse_test.sh b/tools/testing/selftests/memfd/run_fuse_test.sh
new file mode 100644
index 000000000000..69b930e1e041
--- /dev/null
+++ b/tools/testing/selftests/memfd/run_fuse_test.sh
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+if test -d "./mnt" ; then
+ fusermount -u ./mnt
+ rmdir ./mnt
+fi
+
+set -e
+
+mkdir mnt
+./fuse_mnt ./mnt
+./fuse_test ./mnt/memfd
+fusermount -u ./mnt
+rmdir ./mnt