diff options
author | Izik Eidus <ieidus@redhat.com> | 2009-10-26 17:04:30 +0200 |
---|---|---|
committer | Yaniv Kamay <ykamay@redhat.com> | 2009-10-26 22:53:08 +0200 |
commit | 94bca281a7d09eb16acf184291ae1b5dd4497bb3 (patch) | |
tree | 491145cbfd7e7189613f934655db13f0082d68ce | |
parent | 2681e63660fa133ceab27cf3509427125ac4dd7d (diff) |
vdesktop: remove kernel dir
Signed-off-by: Izik Eidus <ieidus@redhat.com>
42 files changed, 0 insertions, 7008 deletions
diff --git a/kernel/Kbuild b/kernel/Kbuild deleted file mode 100644 index ec34c43e..00000000 --- a/kernel/Kbuild +++ /dev/null @@ -1,2 +0,0 @@ -obj-$(CONFIG_X86) += x86/ -obj-$(CONFIG_IA64) += ia64/ diff --git a/kernel/Makefile b/kernel/Makefile deleted file mode 100644 index c0f636c4..00000000 --- a/kernel/Makefile +++ /dev/null @@ -1,143 +0,0 @@ -include ../config.mak -include config.kbuild - -ARCH_DIR = $(if $(filter $(ARCH),x86_64 i386),x86,$(ARCH)) -ARCH_CONFIG := $(shell echo $(ARCH_DIR) | tr '[:lower:]' '[:upper:]') -# NONARCH_CONFIG used for unifdef, and only cover X86 and IA64 now -NONARCH_CONFIG = $(filter-out $(ARCH_CONFIG),X86 IA64) - -KVERREL = $(patsubst /lib/modules/%/build,%,$(KERNELDIR)) - -DESTDIR= - -MAKEFILE_PRE = $(ARCH_DIR)/Makefile.pre - -INSTALLDIR = $(patsubst %/build,%/extra,$(KERNELDIR)) -ORIGMODDIR = $(patsubst %/build,%/kernel,$(KERNELDIR)) - -rpmrelease = devel - -LINUX = ../linux-2.6 - -version = $(shell cd $(LINUX); git describe) - -_hack = mv $1 $1.orig && \ - gawk -v version=$(version) -f $(ARCH_DIR)/hack-module.awk $1.orig \ - | sed '/\#include/! s/\blapic\b/l_apic/g' > $1 && rm $1.orig - -unifdef = mv $1 $1.orig && cat unifdef.h $1.orig > $1 && rm $1.orig - -hack = $(call _hack,$T/$(strip $1)) - -hack-files-x86 = kvm_main.c mmu.c vmx.c svm.c x86.c irq.h lapic.c i8254.c kvm_trace.c -hack-files-ia64 = kvm_main.c kvm_fw.c kvm_lib.c - -hack-files = $(hack-files-$(ARCH_DIR)) - -ifeq ($(EXT_CONFIG_KVM_TRACE),y) -module_defines += -DEXT_CONFIG_KVM_TRACE=y -endif - -all:: header-link prerequisite -# include header priority 1) $LINUX 2) $KERNELDIR 3) include-compat - $(MAKE) -C $(KERNELDIR) M=`pwd` \ - LINUXINCLUDE="-I`pwd`/include -Iinclude \ - $(if $(KERNELSOURCEDIR),-Iinclude2 -I$(KERNELSOURCEDIR)/include) \ - -Iarch/${ARCH_DIR}/include -I`pwd`/include-compat \ - -include include/linux/autoconf.h \ - -include `pwd`/$(ARCH_DIR)/external-module-compat.h $(module_defines)" - "$$@" -ifndef SKIP_KSM - cp Module.symvers ksm -# $(MAKE) -C $(KERNELDIR) M=`pwd`/ksm \ - LINUXINCLUDE="-I`pwd`/include -Iinclude \ - $(if $(KERNELSOURCEDIR),-Iinclude2 -I$(KERNELSOURCEDIR)/include) \ - -Iarch/${ARCH_DIR}/include -I`pwd`/include-compat \ - -include include/linux/autoconf.h \ - -include `pwd`/$(ARCH_DIR)/external-module-compat.h $(module_defines)" - "$$@" -endif - -sync: header-sync source-sync header-link - -header-link: - rm -f include/asm include-compat/asm - ln -sf asm-$(ARCH_DIR) include/asm - ln -sf asm-$(ARCH_DIR) include-compat/asm - -T = $(subst -sync,,$@)-tmp - -headers-old = $(LINUX)/./include/asm-$(ARCH_DIR)/kvm*.h -headers-new = $(LINUX)/arch/$(ARCH_DIR)/include/asm/./kvm*.h \ - $(LINUX)/arch/$(ARCH_DIR)/include/asm/./vmx*.h \ - $(LINUX)/arch/$(ARCH_DIR)/include/asm/./svm*.h \ - $(LINUX)/arch/$(ARCH_DIR)/include/asm/./virtext*.h - -header-sync: - rm -rf $T - rsync -R -L \ - "$(LINUX)"/./include/linux/kvm*.h \ - $(if $(wildcard $(headers-old)), $(headers-old)) \ - $T/ - $(if $(wildcard $(headers-new)), \ - rsync -R -L \ - $(wildcard $(headers-new)) \ - $T/include/asm-$(ARCH_DIR)/) - - for i in $$(find $T -name '*.h'); do \ - $(call unifdef,$$i); done - $(call hack, include/linux/kvm.h) - set -e && for i in $$(find $T -type f -printf '%P '); \ - do mkdir -p $$(dirname $$i); cmp -s $$i $T/$$i || cp $T/$$i $$i; done - rm -rf $T - -source-sync: - rm -rf $T - rsync --exclude='*.mod.c' -R \ - "$(LINUX)"/arch/$(ARCH_DIR)/kvm/./*.[cSh] \ - "$(LINUX)"/virt/kvm/./*.[cSh] \ - $T/ - - for i in $$(find $T -name '*.c'); do \ - $(call unifdef,$$i); done - - for i in $(hack-files); \ - do $(call hack, $$i); done - - for i in $$(find $T -type f -printf '%P '); \ - do cmp -s $(ARCH_DIR)/$$i $T/$$i || cp $T/$$i $(ARCH_DIR)/$$i; done - rm -rf $T - -include $(MAKEFILE_PRE) - -install: - mkdir -p $(DESTDIR)/$(INSTALLDIR) - cp $(ARCH_DIR)/*.ko $(DESTDIR)/$(INSTALLDIR) -ifndef SKIP_KSM - cp $(ARCH_DIR)/ksm/*.ko $(DESTDIR)/$(INSTALLDIR) -endif - for i in $(ORIGMODDIR)/drivers/kvm/*.ko \ - $(ORIGMODDIR)/arch/$(ARCH_DIR)/kvm/*.ko; do \ - if [ -f "$$i" ]; then mv "$$i" "$$i.orig"; fi; \ - done - /sbin/depmod -a $(DEPMOD_VERSION) - -tmpspec = .tmp.kvm-kmod.spec - -rpm-topdir := $$(pwd)/../rpmtop - -RPMDIR = $(rpm-topdir)/RPMS - -rpm: all - mkdir -p $(rpm-topdir)/BUILD $(RPMDIR)/$$(uname -i) - sed 's/^Release:.*/Release: $(rpmrelease)/; s/^%define kverrel.*/%define kverrel $(KVERREL)/' \ - kvm-kmod.spec > $(tmpspec) - rpmbuild --define="kverrel $(KVERREL)" \ - --define="objdir $$(pwd)/$(ARCH_DIR)" \ - --define="_rpmdir $(RPMDIR)" \ - --define="_topdir $(rpm-topdir)" \ - -bb $(tmpspec) - -clean: - $(MAKE) -C $(KERNELDIR) M=`pwd` $@ - $(MAKE) -C $(KERNELDIR) M=`pwd`/ksm $@ diff --git a/kernel/anon_inodes.c b/kernel/anon_inodes.c deleted file mode 100644 index 7a7fc459..00000000 --- a/kernel/anon_inodes.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * fs/anon_inodes.c - * - * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> - * - * Thanks to Arnd Bergmann for code review and suggestions. - * More changes for Thomas Gleixner suggestions. - * - */ - -#include <linux/file.h> -#include <linux/poll.h> -#include <linux/slab.h> -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/mount.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/magic.h> -#include <linux/anon_inodes.h> - -#include <asm/uaccess.h> - -/* anon_inodes on RHEL >= 5.2 is equivalent to 2.6.27 version */ -#ifdef RHEL_RELEASE_CODE -# if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,2)) && defined(CONFIG_ANON_INODES) -# define RHEL_ANON_INODES -# endif -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) && !defined(RHEL_ANON_INODES) - -static struct vfsmount *anon_inode_mnt __read_mostly; -static struct inode *anon_inode_inode; -static struct file_operations anon_inode_fops; - -#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,17) - -static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data, - struct vfsmount *mnt) -{ - return get_sb_pseudo(fs_type, "kvm_anon_inode:", NULL, 0x99700426, mnt); -} - -#else - -static struct super_block *anon_inodefs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) -{ - return get_sb_pseudo(fs_type, "kvm_anon_inode:", NULL, 0x99700426); -} - -#endif - -static int anon_inodefs_delete_dentry(struct dentry *dentry) -{ - /* - * We faked vfs to believe the dentry was hashed when we created it. - * Now we restore the flag so that dput() will work correctly. - */ - dentry->d_flags |= DCACHE_UNHASHED; - return 1; -} - -static struct file_system_type anon_inode_fs_type = { - .name = "kvm_anon_inodefs", - .get_sb = anon_inodefs_get_sb, - .kill_sb = kill_anon_super, -}; -static struct dentry_operations anon_inodefs_dentry_operations = { - .d_delete = anon_inodefs_delete_dentry, -}; - -/** - * anon_inode_getfd - creates a new file instance by hooking it up to and - * anonymous inode, and a dentry that describe the "class" - * of the file - * - * @name: [in] name of the "class" of the new file - * @fops [in] file operations for the new file - * @priv [in] private data for the new file (will be file's private_data) - * - * Creates a new file by hooking it on a single inode. This is useful for files - * that do not need to have a full-fledged inode in order to operate correctly. - * All the files created with anon_inode_getfd() will share a single inode, by - * hence saving memory and avoiding code duplication for the file/inode/dentry - * setup. Returns new descriptor or -error. - */ -int anon_inode_getfd(const char *name, const struct file_operations *fops, - void *priv, int flags) -{ - struct qstr this; - struct dentry *dentry; - struct inode *inode; - struct file *file; - int error, fd; - - if (IS_ERR(anon_inode_inode)) - return -ENODEV; - file = get_empty_filp(); - if (!file) - return -ENFILE; - - inode = igrab(anon_inode_inode); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); - goto err_put_filp; - } - - error = get_unused_fd(); - if (error < 0) - goto err_iput; - fd = error; - - /* - * Link the inode to a directory entry by creating a unique name - * using the inode sequence number. - */ - error = -ENOMEM; - this.name = name; - this.len = strlen(name); - this.hash = 0; - dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); - if (!dentry) - goto err_put_unused_fd; - dentry->d_op = &anon_inodefs_dentry_operations; - /* Do not publish this dentry inside the global dentry hash table */ - dentry->d_flags &= ~DCACHE_UNHASHED; - d_instantiate(dentry, inode); - - file->f_vfsmnt = mntget(anon_inode_mnt); - file->f_dentry = dentry; - file->f_mapping = inode->i_mapping; - - file->f_pos = 0; - file->f_flags = O_RDWR; - file->f_op = (struct file_operations *)fops; - file->f_mode = FMODE_READ | FMODE_WRITE; - file->f_version = 0; - file->private_data = priv; - - fd_install(fd, file); - - return fd; - -err_put_unused_fd: - put_unused_fd(fd); -err_iput: - iput(inode); -err_put_filp: - fput(file); - return error; -} -EXPORT_SYMBOL(anon_inode_getfd); - -/* - * A single inode exist for all anon_inode files. Contrary to pipes, - * anon_inode inodes has no per-instance data associated, so we can avoid - * the allocation of multiple of them. - */ -static struct inode *anon_inode_mkinode(void) -{ - struct inode *inode = new_inode(anon_inode_mnt->mnt_sb); - - if (!inode) - return ERR_PTR(-ENOMEM); - - inode->i_fop = &anon_inode_fops; - - /* - * Mark the inode dirty from the very beginning, - * that way it will never be moved to the dirty - * list because mark_inode_dirty() will think - * that it already _is_ on the dirty list. - */ - inode->i_state = I_DIRTY; - inode->i_mode = S_IRUSR | S_IWUSR; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; - inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; - return inode; -} - -static int anon_inode_init(void) -{ - int error; - - error = register_filesystem(&anon_inode_fs_type); - if (error) - goto err_exit; - anon_inode_mnt = kern_mount(&anon_inode_fs_type); - if (IS_ERR(anon_inode_mnt)) { - error = PTR_ERR(anon_inode_mnt); - goto err_unregister_filesystem; - } - anon_inode_inode = anon_inode_mkinode(); - if (IS_ERR(anon_inode_inode)) { - error = PTR_ERR(anon_inode_inode); - goto err_mntput; - } - - return 0; - -err_mntput: - mntput(anon_inode_mnt); -err_unregister_filesystem: - unregister_filesystem(&anon_inode_fs_type); -err_exit: - return -ENOMEM; -} - -int kvm_init_anon_inodes(void) -{ - return anon_inode_init(); -} - -void kvm_exit_anon_inodes(void) -{ - iput(anon_inode_inode); - mntput(anon_inode_mnt); - unregister_filesystem(&anon_inode_fs_type); -} -EXPORT_SYMBOL(kvm_anon_inode_getfd); - -#else - -int kvm_init_anon_inodes(void) -{ - return 0; -} - -void kvm_exit_anon_inodes(void) -{ -} - -#undef anon_inode_getfd - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) && !defined(RHEL_ANON_INODES) - -int kvm_anon_inode_getfd(const char *name, - const struct file_operations *fops, - void *priv, int flags) -{ - int r; - int fd; - struct inode *inode; - struct file *file; - - r = anon_inode_getfd(&fd, &inode, &file, name, fops, priv); - if (r < 0) - return r; - return fd; -} - -#elif LINUX_VERSION_CODE == KERNEL_VERSION(2,6,26) && !defined(RHEL_ANON_INODES) - -int kvm_anon_inode_getfd(const char *name, - const struct file_operations *fops, - void *priv, int flags) -{ - return anon_inode_getfd(name, fops, priv); -} -EXPORT_SYMBOL(kvm_anon_inode_getfd); - -#else - -int kvm_anon_inode_getfd(const char *name, - const struct file_operations *fops, - void *priv, int flags) -{ - return anon_inode_getfd(name, fops, priv, flags); -} -EXPORT_SYMBOL(kvm_anon_inode_getfd); - -#endif - -#endif diff --git a/kernel/external-module-compat-comm.h b/kernel/external-module-compat-comm.h deleted file mode 100644 index e324f781..00000000 --- a/kernel/external-module-compat-comm.h +++ /dev/null @@ -1,770 +0,0 @@ - -/* - * Compatibility header for building as an external module. - */ - -/* - * Avoid picking up the kernel's kvm.h in case we have a newer one. - */ - -#include <linux/compiler.h> -#include <linux/version.h> -#include <linux/string.h> -#include <linux/kvm.h> -#include <linux/kvm_para.h> -#include <linux/cpu.h> -#include <linux/time.h> -#include <asm/processor.h> -#include <linux/hrtimer.h> -#include <asm/bitops.h> - -/* Override CONFIG_KVM_TRACE */ -#ifdef EXT_CONFIG_KVM_TRACE -# define CONFIG_KVM_TRACE 1 -#else -# undef CONFIG_KVM_TRACE -#endif - -/* - * 2.6.16 does not have GFP_NOWAIT - */ - -#include <linux/gfp.h> - -#ifndef GFP_NOWAIT -#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH) -#endif - - -/* - * kvm profiling support needs 2.6.20 - */ -#include <linux/profile.h> - -#ifndef KVM_PROFILING -#define KVM_PROFILING 1234 -#define prof_on 4321 -#endif - -/* - * smp_call_function_single() is not exported below 2.6.20, and has different - * semantics below 2.6.23. The 'nonatomic' argument was removed in 2.6.27. - */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) - -int kvm_smp_call_function_single(int cpu, void (*func)(void *info), - void *info, int wait); - -#define smp_call_function_single kvm_smp_call_function_single - -#endif - -/* on_each_cpu() lost an argument in 2.6.27. */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) - -#define kvm_on_each_cpu(func, info, wait) on_each_cpu(func, info, 0, wait) - -#else - -#define kvm_on_each_cpu(func, info, wait) on_each_cpu(func, info, wait) - -#endif - -/* - * The cpu hotplug stubs are broken if !CONFIG_CPU_HOTPLUG - */ - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,15) -#define DEFINE_MUTEX(a) DECLARE_MUTEX(a) -#define mutex_lock_interruptible(a) down_interruptible(a) -#define mutex_unlock(a) up(a) -#define mutex_lock(a) down(a) -#define mutex_init(a) init_MUTEX(a) -#define mutex_trylock(a) down_trylock(a) -#define mutex semaphore -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) -#ifndef kzalloc -#define kzalloc(size,flags) \ -({ \ - void *__ret = kmalloc(size, flags); \ - if (__ret) \ - memset(__ret, 0, size); \ - __ret; \ -}) -#endif -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) -#ifndef kmem_cache_zalloc -#define kmem_cache_zalloc(cache,flags) \ -({ \ - void *__ret = kmem_cache_alloc(cache, flags); \ - if (__ret) \ - memset(__ret, 0, kmem_cache_size(cache)); \ - __ret; \ -}) -#endif -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) - -#ifndef CONFIG_HOTPLUG_CPU -#define register_cpu_notifier(nb) (0) -#endif - -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) -#define nr_cpu_ids NR_CPUS -#endif - -#include <linux/miscdevice.h> -#ifndef KVM_MINOR -#define KVM_MINOR 232 -#endif - -#include <linux/notifier.h> -#ifndef CPU_TASKS_FROZEN - -#define CPU_TASKS_FROZEN 0x0010 -#define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN) -#define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN) -#define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN) -#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN) -#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) -#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) - -#endif - -#ifndef CPU_DYING -#define CPU_DYING 0x000A -#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN) -#endif - -#include <asm/system.h> - -struct inode; -#include <linux/anon_inodes.h> -#define anon_inode_getfd kvm_anon_inode_getfd -int kvm_init_anon_inodes(void); -void kvm_exit_anon_inodes(void); -int anon_inode_getfd(const char *name, - const struct file_operations *fops, - void *priv , int flags); - -/* - * 2.6.23 removed the cache destructor - */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) -# define kmem_cache_create(name, size, align, flags, ctor) \ - kmem_cache_create(name, size, align, flags, ctor, NULL) -#endif - -/* HRTIMER_MODE_ABS started life with a different name */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) -#define HRTIMER_MODE_ABS HRTIMER_ABS -#endif - -/* div64_u64 is fairly new */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) - -#define div64_u64 kvm_div64_u64 - -#ifdef CONFIG_64BIT - -static inline uint64_t div64_u64(uint64_t dividend, uint64_t divisor) -{ - return dividend / divisor; -} - -#else - -uint64_t div64_u64(uint64_t dividend, uint64_t divisor); - -#endif - -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) - -#ifdef RHEL_RELEASE_CODE -#if RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,2) -#define RHEL_BOOL 1 -#endif -#endif - -#ifndef RHEL_BOOL - -typedef _Bool bool; - -#endif - -#endif - -/* - * PF_VCPU is a Linux 2.6.24 addition - */ - -#include <linux/sched.h> - -#ifndef PF_VCPU -#define PF_VCPU 0 -#endif - -/* - * smp_call_function_mask() is not defined/exported below 2.6.24 on all - * targets and below 2.6.26 on x86-64 - */ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) || \ - (defined CONFIG_X86_64 && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) - -int kvm_smp_call_function_mask(cpumask_t mask, void (*func) (void *info), - void *info, int wait); - -#define smp_call_function_mask kvm_smp_call_function_mask - -#if (!defined(RHEL_RELEASE_CODE) || \ - RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,4) || \ - !defined(CONFIG_X86_64)) - -void kvm_smp_send_reschedule(int cpu); - -#else - -#define kvm_smp_send_reschedule smp_send_reschedule - -#endif -#endif - -/* empty_zero_page isn't exported in all kernels */ -#include <asm/pgtable.h> - -#define empty_zero_page kvm_empty_zero_page - -static char empty_zero_page[PAGE_SIZE]; - -static inline void blahblah(void) -{ - (void)empty_zero_page[0]; -} - -/* __mmdrop() is not exported before 2.6.25 */ -#include <linux/sched.h> - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) - -#define mmdrop(x) do { (void)(x); } while (0) -#define mmget(x) do { (void)(x); } while (0) - -#else - -#define mmget(x) do { atomic_inc(x); } while (0) - -#endif - -/* pagefault_enable(), page_fault_disable() - 2.6.20 */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) -#if RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,2) - -static inline void pagefault_disable(void) -{ - inc_preempt_count(); - /* - * make sure to have issued the store before a pagefault - * can hit. - */ - barrier(); -} - -static inline void pagefault_enable(void) -{ - /* - * make sure to issue those last loads/stores before enabling - * the pagefault handler again. - */ - barrier(); - dec_preempt_count(); - /* - * make sure we do.. - */ - barrier(); - preempt_check_resched(); -} - -#else -#include <linux/uaccess.h> -#endif -#endif - -/* vm ops ->fault() was introduced in 2.6.23. */ -#include <linux/mm.h> - -#ifdef KVM_MAIN -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) - -struct vm_fault { - unsigned int flags; - pgoff_t pgoff; - void __user *virtual_address; - struct page *page; -}; - -static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf); -static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf); - -static inline struct page *kvm_nopage_to_fault( - int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf), - struct vm_area_struct *vma, - unsigned long address, - int *type) -{ - struct vm_fault vmf; - int ret; - - vmf.pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; - vmf.virtual_address = (void __user *)address; - ret = fault(vma, &vmf); - if (ret) - return NOPAGE_SIGBUS; - *type = VM_FAULT_MINOR; - return vmf.page; -} - -static inline struct page *__kvm_vcpu_fault(struct vm_area_struct *vma, - unsigned long address, - int *type) -{ - return kvm_nopage_to_fault(kvm_vcpu_fault, vma, address, type); -} - -static inline struct page *__kvm_vm_fault(struct vm_area_struct *vma, - unsigned long address, - int *type) -{ - return kvm_nopage_to_fault(kvm_vm_fault, vma, address, type); -} - -#define VMA_OPS_FAULT(x) nopage -#define VMA_OPS_FAULT_FUNC(x) __##x - -#else - -#define VMA_OPS_FAULT(x) x -#define VMA_OPS_FAULT_FUNC(x) x - -#endif -#endif - -/* simple vfs attribute getter signature has changed to add a return code */ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) - -#define MAKE_SIMPLE_ATTRIBUTE_GETTER(x) \ - static u64 x(void *v) \ - { \ - u64 ret = 0; \ - \ - __##x(v, &ret); \ - return ret; \ - } - -#else - -#define MAKE_SIMPLE_ATTRIBUTE_GETTER(x) \ - static int x(void *v, u64 *val) \ - { \ - return __##x(v, val); \ - } - -#endif - -/* set_kset_name() is gone in 2.6.25 */ - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) - -#define set_kset_name(x) .name = x - -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) -#ifndef FASTCALL -#define FASTCALL(x) x -#define fastcall -#endif -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) -extern unsigned int tsc_khz; -#endif - -#define kvm_tsc_khz tsc_khz - -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,21) - -#include <linux/ktime.h> -#include <linux/hrtimer.h> - -#define ktime_get kvm_ktime_get - -static inline ktime_t ktime_get(void) -{ - struct timespec now; - - ktime_get_ts(&now); - - return timespec_to_ktime(now); -} - -#endif - -/* __aligned arrived in 2.6.21 */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) -#if !defined(RHEL_RELEASE_CODE) || (RHEL_RELEASE_CODE <= RHEL_RELEASE_VERSION(5,2) && !defined(__aligned)) -#define __aligned(x) __attribute__((__aligned__(x))) -#endif -#endif - -#include <linux/mm.h> - -/* The shrinker API changed in 2.6.23 */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) - -struct kvm_shrinker { - int (*shrink)(int nr_to_scan, gfp_t gfp_mask); - int seeks; - struct shrinker *kshrinker; -}; - -static inline void register_shrinker(struct kvm_shrinker *shrinker) -{ - shrinker->kshrinker = set_shrinker(shrinker->seeks, shrinker->shrink); -} - -static inline void unregister_shrinker(struct kvm_shrinker *shrinker) -{ - if (shrinker->kshrinker) - remove_shrinker(shrinker->kshrinker); -} - -#define shrinker kvm_shrinker - -#endif - -/* clocksource */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) -static inline u32 clocksource_khz2mult(u32 khz, u32 shift_constant) -{ - /* khz = cyc/(Million ns) - * mult/2^shift = ns/cyc - * mult = ns/cyc * 2^shift - * mult = 1Million/khz * 2^shift - * mult = 1000000 * 2^shift / khz - * mult = (1000000<<shift) / khz - */ - u64 tmp = ((u64)1000000) << shift_constant; - - tmp += khz/2; /* round for do_div */ - do_div(tmp, khz); - - return (u32)tmp; -} -#else -#include <linux/clocksource.h> -#endif - -/* manually export hrtimer_init/start/cancel */ -#include <linux/kallsyms.h> -extern void (*hrtimer_init_p)(struct hrtimer *timer, clockid_t which_clock, - enum hrtimer_mode mode); -extern int (*hrtimer_start_p)(struct hrtimer *timer, ktime_t tim, - const enum hrtimer_mode mode); -extern int (*hrtimer_cancel_p)(struct hrtimer *timer); - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) && defined(CONFIG_KALLSYMS) -static inline void hrtimer_kallsyms_resolve(void) -{ - hrtimer_init_p = (void *) kallsyms_lookup_name("hrtimer_init"); - BUG_ON(!hrtimer_init_p); - hrtimer_start_p = (void *) kallsyms_lookup_name("hrtimer_start"); - BUG_ON(!hrtimer_start_p); - hrtimer_cancel_p = (void *) kallsyms_lookup_name("hrtimer_cancel"); - BUG_ON(!hrtimer_cancel_p); -} -#else -static inline void hrtimer_kallsyms_resolve(void) -{ - hrtimer_init_p = hrtimer_init; - hrtimer_start_p = hrtimer_start; - hrtimer_cancel_p = hrtimer_cancel; -} -#endif - -/* handle old hrtimer API with data pointer */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) -static inline void hrtimer_data_pointer(struct hrtimer *timer) -{ - timer->data = (void *)timer; -} -#else -static inline void hrtimer_data_pointer(struct hrtimer *timer) {} -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) - -#define ns_to_timespec kvm_ns_to_timespec - -struct timespec kvm_ns_to_timespec(const s64 nsec); - -#endif - -/* work_struct lost the 'data' field in 2.6.20 */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) - -#define kvm_INIT_WORK(work, handler) \ - INIT_WORK(work, (void (*)(void *))handler, work) - -#else - -#define kvm_INIT_WORK(work, handler) INIT_WORK(work, handler) - -#endif - -/* cancel_work_sync() was flush_work() in 2.6.21 */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) - -static inline int cancel_work_sync(struct work_struct *work) -{ - /* - * FIXME: actually cancel. How? Add own implementation of workqueues? - */ - return 0; -} - -/* ... and it returned void before 2.6.23 */ -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) - -#define cancel_work_sync(work) ({ cancel_work_sync(work); 0; }) - -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) - -struct pci_dev; - -struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn); - -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) - -#include <linux/relay.h> - -/* relay_open() interface has changed on 2.6.21 */ - -struct rchan *kvm_relay_open(const char *base_filename, - struct dentry *parent, - size_t subbuf_size, - size_t n_subbufs, - struct rchan_callbacks *cb, - void *private_data); - -#else - -#define kvm_relay_open relay_open - -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) - -static inline int kvm_get_user_pages_fast(unsigned long start, int nr_pages, - int write, struct page **pages) -{ - int npages; - - down_read(¤t->mm->mmap_sem); - npages = get_user_pages(current, current->mm, start, nr_pages, write, - 0, pages, NULL); - up_read(¤t->mm->mmap_sem); - - return npages; -} -#else - -#define kvm_get_user_pages_fast get_user_pages_fast - -#endif - -/* spin_needbreak() was called something else in 2.6.24 */ -#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,24) - -#define spin_needbreak need_lockbreak - -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - -static inline void kvm_hrtimer_add_expires_ns(struct hrtimer *timer, u64 delta) -{ - timer->expires = ktime_add_ns(timer->expires, delta); -} - -static inline ktime_t kvm_hrtimer_get_expires(struct hrtimer *timer) -{ - return timer->expires; -} - -static inline u64 kvm_hrtimer_get_expires_ns(struct hrtimer *timer) -{ - return ktime_to_ns(timer->expires); -} - -static inline void kvm_hrtimer_start_expires(struct hrtimer *timer, int mode) -{ - hrtimer_start_p(timer, timer->expires, mode); -} - -static inline ktime_t kvm_hrtimer_expires_remaining(const struct hrtimer *timer) -{ - return ktime_sub(timer->expires, timer->base->get_time()); -} - -#else - -#define kvm_hrtimer_add_expires_ns hrtimer_add_expires_ns -#define kvm_hrtimer_get_expires hrtimer_get_expires -#define kvm_hrtimer_get_expires_ns hrtimer_get_expires_ns -#define kvm_hrtimer_start_expires hrtimer_start_expires -#define kvm_hrtimer_expires_remaining hrtimer_expires_remaining - -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) -#if !defined(RHEL_RELEASE_CODE) || RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,4) - -static inline int pci_reset_function(struct pci_dev *dev) -{ - return 0; -} - -#endif -#endif - -#ifndef KMEM_CACHE -/* - * Please use this macro to create slab caches. Simply specify the - * name of the structure and maybe some flags that are listed above. - * - * The alignment of the struct determines object alignment. If you - * f.e. add ____cacheline_aligned_in_smp to the struct declaration - * then the objects will be properly aligned in SMP configurations. - */ -#define KMEM_CACHE(__struct, __flags) kmem_cache_create(#__struct,\ - sizeof(struct __struct), __alignof__(struct __struct),\ - (__flags), NULL) -#endif - -#include <linux/interrupt.h> -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) - -typedef irqreturn_t (*kvm_irq_handler_t)(int, void *); -int kvm_request_irq(unsigned int a, kvm_irq_handler_t handler, unsigned long c, - const char *d, void *e); -void kvm_free_irq(unsigned int irq, void *dev_id); - -#else - -#define kvm_request_irq request_irq -#define kvm_free_irq free_irq - -#endif - -/* dynamically allocated cpu masks introduced in 2.6.28 */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - -typedef cpumask_t cpumask_var_t[1]; - -static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) -{ - return 1; -} - -static inline void free_cpumask_var(cpumask_var_t mask) -{ -} - -static inline void cpumask_clear(cpumask_var_t mask) -{ - cpus_clear(*mask); -} - -static inline void cpumask_set_cpu(int cpu, cpumask_var_t mask) -{ - cpu_set(cpu, *mask); -} - -static inline int smp_call_function_many(cpumask_var_t cpus, - void (*func)(void *data), void *data, - int sync) -{ - return smp_call_function_mask(*cpus, func, data, sync); -} - -static inline int cpumask_empty(cpumask_var_t mask) -{ - return cpus_empty(*mask); -} - -static inline int cpumask_test_cpu(int cpu, cpumask_var_t mask) -{ - return cpu_isset(cpu, *mask); -} - -static inline void cpumask_clear_cpu(int cpu, cpumask_var_t mask) -{ - cpu_clear(cpu, *mask); -} - -#define cpu_online_mask (&cpu_online_map) - -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) - -#define IF_ANON_INODES_DOES_REFCOUNTS(x) - -#else - -#define IF_ANON_INODES_DOES_REFCOUNTS(x) x - -#endif - - -/* Macro introduced only on newer kernels: */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) -#define marker_synchronize_unregister() synchronize_sched() -#endif - -/* compound_head() was introduced in 2.6.22 */ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) -# define NEED_COMPOUND_HEAD 1 -# ifdef RHEL_RELEASE_CODE -# if RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,2) -# undef NEED_COMPOUND_HEAD -# endif -# endif -#endif - -#ifdef NEED_COMPOUND_HEAD - -static inline struct page *compound_head(struct page *page) -{ - if (PageCompound(page)) - page = (struct page *)page_private(page); - return page; -} - -#endif diff --git a/kernel/external-module-compat.c b/kernel/external-module-compat.c deleted file mode 100644 index 29414731..00000000 --- a/kernel/external-module-compat.c +++ /dev/null @@ -1,359 +0,0 @@ - -/* - * smp_call_function_single() is not exported below 2.6.20. - */ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) - -#undef smp_call_function_single - -#include <linux/spinlock.h> -#include <linux/smp.h> - -struct scfs_thunk_info { - int cpu; - void (*func)(void *info); - void *info; -}; - -static void scfs_thunk(void *_thunk) -{ - struct scfs_thunk_info *thunk = _thunk; - - if (raw_smp_processor_id() == thunk->cpu) - thunk->func(thunk->info); -} - -int kvm_smp_call_function_single(int cpu, void (*func)(void *info), - void *info, int wait) -{ - int r, this_cpu; - struct scfs_thunk_info thunk; - - this_cpu = get_cpu(); - WARN_ON(irqs_disabled()); - if (cpu == this_cpu) { - r = 0; - local_irq_disable(); - func(info); - local_irq_enable(); - } else { - thunk.cpu = cpu; - thunk.func = func; - thunk.info = info; - r = smp_call_function(scfs_thunk, &thunk, 0, 1); - } - put_cpu(); - return r; -} - -#define smp_call_function_single kvm_smp_call_function_single - -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) -/* - * pre 2.6.23 doesn't handle smp_call_function_single on current cpu - */ - -#undef smp_call_function_single - -#include <linux/smp.h> - -int kvm_smp_call_function_single(int cpu, void (*func)(void *info), - void *info, int wait) -{ - int this_cpu, r; - - this_cpu = get_cpu(); - WARN_ON(irqs_disabled()); - if (cpu == this_cpu) { - r = 0; - local_irq_disable(); - func(info); - local_irq_enable(); - } else - r = smp_call_function_single(cpu, func, info, 0, wait); - put_cpu(); - return r; -} - -#define smp_call_function_single kvm_smp_call_function_single - -#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) - -/* The 'nonatomic' argument was removed in 2.6.27. */ - -#undef smp_call_function_single - -#include <linux/smp.h> - -int kvm_smp_call_function_single(int cpu, void (*func)(void *info), - void *info, int wait) -{ - return smp_call_function_single(cpu, func, info, 0, wait); -} - -#define smp_call_function_single kvm_smp_call_function_single - -#endif - -/* div64_u64 is fairly new */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) - -#ifndef CONFIG_64BIT - -/* 64bit divisor, dividend and result. dynamic precision */ -uint64_t div64_u64(uint64_t dividend, uint64_t divisor) -{ - uint32_t high, d; - - high = divisor >> 32; - if (high) { - unsigned int shift = fls(high); - - d = divisor >> shift; - dividend >>= shift; - } else - d = divisor; - - do_div(dividend, d); - - return dividend; -} - -#endif - -#endif - -/* - * smp_call_function_mask() is not defined/exported below 2.6.24 on all - * targets and below 2.6.26 on x86-64 - */ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) || \ - (defined CONFIG_X86_64 && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) - -#include <linux/smp.h> - -struct kvm_call_data_struct { - void (*func) (void *info); - void *info; - atomic_t started; - atomic_t finished; - int wait; -}; - -static void kvm_ack_smp_call(void *_data) -{ - struct kvm_call_data_struct *data = _data; - /* if wait == 0, data can be out of scope - * after atomic_inc(info->started) - */ - void (*func) (void *info) = data->func; - void *info = data->info; - int wait = data->wait; - - smp_mb(); - atomic_inc(&data->started); - (*func)(info); - if (wait) { - smp_mb(); - atomic_inc(&data->finished); - } -} - -int kvm_smp_call_function_mask(cpumask_t mask, - void (*func) (void *info), void *info, int wait) -{ - struct kvm_call_data_struct data; - cpumask_t allbutself; - int cpus; - int cpu; - int me; - - me = get_cpu(); - WARN_ON(irqs_disabled()); - allbutself = cpu_online_map; - cpu_clear(me, allbutself); - - cpus_and(mask, mask, allbutself); - cpus = cpus_weight(mask); - - if (!cpus) - goto out; - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - for (cpu = first_cpu(mask); cpu != NR_CPUS; cpu = next_cpu(cpu, mask)) - smp_call_function_single(cpu, kvm_ack_smp_call, &data, 0); - - while (atomic_read(&data.started) != cpus) { - cpu_relax(); - barrier(); - } - - if (!wait) - goto out; - - while (atomic_read(&data.finished) != cpus) { - cpu_relax(); - barrier(); - } -out: - put_cpu(); - return 0; -} - -#include <linux/workqueue.h> - -static void vcpu_kick_intr(void *info) -{ -} - -struct kvm_kick { - int cpu; - struct work_struct work; -}; - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) -static void kvm_do_smp_call_function(void *data) -{ - int me; - struct kvm_kick *kvm_kick = data; -#else -static void kvm_do_smp_call_function(struct work_struct *work) -{ - int me; - struct kvm_kick *kvm_kick = container_of(work, struct kvm_kick, work); -#endif - me = get_cpu(); - - if (kvm_kick->cpu != me) - smp_call_function_single(kvm_kick->cpu, vcpu_kick_intr, - NULL, 0); - kfree(kvm_kick); - put_cpu(); -} - -void kvm_queue_smp_call_function(int cpu) -{ - struct kvm_kick *kvm_kick = kmalloc(sizeof(struct kvm_kick), GFP_ATOMIC); - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) - INIT_WORK(&kvm_kick->work, kvm_do_smp_call_function, kvm_kick); -#else - INIT_WORK(&kvm_kick->work, kvm_do_smp_call_function); -#endif - - schedule_work(&kvm_kick->work); -} - -#if (!defined(RHEL_RELEASE_CODE) || \ - RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,4) || \ - !defined(CONFIG_X86_64)) -void kvm_smp_send_reschedule(int cpu) -{ - if (irqs_disabled()) { - kvm_queue_smp_call_function(cpu); - return; - } - smp_call_function_single(cpu, vcpu_kick_intr, NULL, 0); -} -#endif - -#endif - -/* manually export hrtimer_init/start/cancel */ -void (*hrtimer_init_p)(struct hrtimer *timer, clockid_t which_clock, - enum hrtimer_mode mode); -int (*hrtimer_start_p)(struct hrtimer *timer, ktime_t tim, - const enum hrtimer_mode mode); -int (*hrtimer_cancel_p)(struct hrtimer *timer); - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) - -static void kvm_set_normalized_timespec(struct timespec *ts, time_t sec, - long nsec) -{ - while (nsec >= NSEC_PER_SEC) { - nsec -= NSEC_PER_SEC; - ++sec; - } - while (nsec < 0) { - nsec += NSEC_PER_SEC; - --sec; - } - ts->tv_sec = sec; - ts->tv_nsec = nsec; -} - -struct timespec kvm_ns_to_timespec(const s64 nsec) -{ - struct timespec ts; - - if (!nsec) - return (struct timespec) {0, 0}; - - ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, &ts.tv_nsec); - if (unlikely(nsec < 0)) - kvm_set_normalized_timespec(&ts, ts.tv_sec, ts.tv_nsec); - - return ts; -} - -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) - -#include <linux/pci.h> - -struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn) -{ - struct pci_dev *dev = NULL; - - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { - if (pci_domain_nr(dev->bus) == 0 && - (dev->bus->number == bus && dev->devfn == devfn)) - return dev; - } - return NULL; -} - -#endif - -#include <linux/intel-iommu.h> - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - -int intel_iommu_found() -{ - return 0; -} - -#endif - - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) - -/* relay_open() interface has changed on 2.6.21 */ - -struct rchan *kvm_relay_open(const char *base_filename, - struct dentry *parent, - size_t subbuf_size, - size_t n_subbufs, - struct rchan_callbacks *cb, - void *private_data) -{ - struct rchan *chan = relay_open(base_filename, parent, - subbuf_size, n_subbufs, - cb); - if (chan) - chan->private_data = private_data; - return chan; -} - -#endif diff --git a/kernel/ia64/Kbuild b/kernel/ia64/Kbuild deleted file mode 100644 index e2c8acc9..00000000 --- a/kernel/ia64/Kbuild +++ /dev/null @@ -1,13 +0,0 @@ -obj-m := kvm.o kvm-intel.o - -kvm-objs := kvm_main.o ioapic.o coalesced_mmio.o kvm-ia64.o kvm_fw.o \ - irq_comm.o ../anon_inodes.o ../external-module-compat.o \ - ../request-irq-compat.o - -ifeq ($(CONFIG_IOMMU_API),y) -kvm-objs += iommu.o -endif - -EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 -kvm-intel-objs := vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ - vtlb.o process.o memset.o memcpy.o kvm_lib.o diff --git a/kernel/ia64/Makefile.pre b/kernel/ia64/Makefile.pre deleted file mode 100644 index 4d3410f6..00000000 --- a/kernel/ia64/Makefile.pre +++ /dev/null @@ -1,27 +0,0 @@ -prerequisite: asm-offsets.h ia64/memset.S ia64/memcpy.S - cp -f $(KERNELDIR)/arch/ia64/lib/memcpy.S ia64/memcpy.S - cp -f $(KERNELDIR)/arch/ia64/lib/memset.S ia64/memset.S - cmp -s asm-offset.h ia64/asm-offset.h || mv -f asm-offsets.* ia64/ - cp -f $(KERNELDIR)/lib/vsprintf.c ia64/vsprintf.c - cp -f $(KERNELDIR)/lib/ctype.c ia64/ctype.c - sed -i /^EXPORT_SYMBOL/d ia64/vsprintf.c - sed -i /^EXPORT_SYMBOL/d ia64/ctype.c - -asm-offsets.h: asm-offsets.s - @(set -e; \ - echo "/*"; \ - echo " * DO NOT MODIFY."; \ - echo " *"; \ - echo " * This file was auto-generated from $<"; \ - echo " *"; \ - echo " */"; \ - echo ""; \ - echo "#ifndef __KVM_ASM_OFFSETS_H__"; \ - echo "#define __KVM_ASM_OFFSETS_H__"; \ - echo ""; \ - sed -ne "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"; \ - echo ""; \ - echo "#endif") <$< >$@ - -asm-offsets.s: ia64/asm-offsets.c - gcc -S -D__KERNEL__ -I./include -I$(KERNELDIR)/include -I$(KERNELDIR)/arch/ia64/include ia64/asm-offsets.c diff --git a/kernel/ia64/external-module-compat.h b/kernel/ia64/external-module-compat.h deleted file mode 100644 index 592733c7..00000000 --- a/kernel/ia64/external-module-compat.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Compatibility header for building as an external module. - */ - -#ifndef __ASSEMBLY__ -#include <linux/version.h> - -#include "../external-module-compat-comm.h" - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) -#error "KVM/IA-64 Can't be compiled if kernel version < 2.6.26" -#endif - -#ifndef CONFIG_PREEMPT_NOTIFIERS -/*Now, Just print an error message if no preempt notifiers configured!! - TODO: Implement it later! */ -#error "KVM/IA-64 depends on preempt notifiers in kernel." -#endif - -/* smp_call_function() lost an argument in 2.6.27. */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) - -#define kvm_smp_call_function(func, info, wait) smp_call_function(func, info, 0, wait) - -#else - -#define kvm_smp_call_function(func, info, wait) smp_call_function(func, info, wait) - -#endif - -/*There is no struct fdesc definition <2.6.27*/ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) -struct fdesc { - uint64_t ip; - uint64_t gp; -}; -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) - -typedef u64 phys_addr_t; - -#endif - -#endif - -#ifndef CONFIG_HAVE_KVM_IRQCHIP -#define CONFIG_HAVE_KVM_IRQCHIP 1 -#endif diff --git a/kernel/ia64/hack-module.awk b/kernel/ia64/hack-module.awk deleted file mode 100644 index 5240f5b2..00000000 --- a/kernel/ia64/hack-module.awk +++ /dev/null @@ -1,28 +0,0 @@ -BEGIN { split("INIT_WORK on_each_cpu smp_call_function smp_send_reschedule " \ - "hrtimer_add_expires_ns hrtimer_get_expires " \ - "hrtimer_get_expires_ns hrtimer_start_expires " \ - "hrtimer_expires_remaining " \ - "request_irq free_irq", compat_apis); } - -/MODULE_AUTHOR/ { - printf("MODULE_INFO(version, \"%s\");\n", version) -} - -{ sub(/..\/..\/..\/lib\/vsprintf\.c/, "vsprintf.c") } -{ sub(/..\/..\/..\/lib\/ctype\.c/, "ctype.c") } -/#undef CONFIG_MODULES/ { $0 = "" } - -{ - for (i in compat_apis) { - ident = compat_apis[i] - sub("\\<" ident "\\>", "kvm_" ident) - } -} - -/#include <linux\/compiler.h>/ { $0 = "" } - -{ sub(/linux\/mm_types\.h/, "linux/mm.h") } - -{ sub(/\<__user\>/, " ") } - -{ print } diff --git a/kernel/include-compat/asm-x86/asm.h b/kernel/include-compat/asm-x86/asm.h deleted file mode 100644 index 3ad6aab9..00000000 --- a/kernel/include-compat/asm-x86/asm.h +++ /dev/null @@ -1,3 +0,0 @@ -/* - * Empty file to satisfy #include <linux/asm.h> for older kernels. - */ diff --git a/kernel/include-compat/asm-x86/cmpxchg.h b/kernel/include-compat/asm-x86/cmpxchg.h deleted file mode 100644 index 68daeebc..00000000 --- a/kernel/include-compat/asm-x86/cmpxchg.h +++ /dev/null @@ -1,3 +0,0 @@ -/* - * Empty file to satisfy #include <linux/cmpxchg.h> for older kernels. - */ diff --git a/kernel/include-compat/asm-x86/msidef.h b/kernel/include-compat/asm-x86/msidef.h deleted file mode 100644 index 6706b300..00000000 --- a/kernel/include-compat/asm-x86/msidef.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef _ASM_X86_MSIDEF_H -#define _ASM_X86_MSIDEF_H - -/* - * Constants for Intel APIC based MSI messages. - */ - -/* - * Shifts for MSI data - */ - -#define MSI_DATA_VECTOR_SHIFT 0 -#define MSI_DATA_VECTOR_MASK 0x000000ff -#define MSI_DATA_VECTOR(v) (((v) << MSI_DATA_VECTOR_SHIFT) & \ - MSI_DATA_VECTOR_MASK) - -#define MSI_DATA_DELIVERY_MODE_SHIFT 8 -#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT) -#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT) - -#define MSI_DATA_LEVEL_SHIFT 14 -#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT) -#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT) - -#define MSI_DATA_TRIGGER_SHIFT 15 -#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT) -#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT) - -/* - * Shift/mask fields for msi address - */ - -#define MSI_ADDR_BASE_HI 0 -#define MSI_ADDR_BASE_LO 0xfee00000 - -#define MSI_ADDR_DEST_MODE_SHIFT 2 -#define MSI_ADDR_DEST_MODE_PHYSICAL (0 << MSI_ADDR_DEST_MODE_SHIFT) -#define MSI_ADDR_DEST_MODE_LOGICAL (1 << MSI_ADDR_DEST_MODE_SHIFT) - -#define MSI_ADDR_REDIRECTION_SHIFT 3 -#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT) - /* dedicated cpu */ -#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT) - /* lowest priority */ - -#define MSI_ADDR_DEST_ID_SHIFT 12 -#define MSI_ADDR_DEST_ID_MASK 0x00ffff0 -#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ - MSI_ADDR_DEST_ID_MASK) - -#define MSI_ADDR_IR_EXT_INT (1 << 4) -#define MSI_ADDR_IR_SHV (1 << 3) -#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13) -#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5) -#endif /* _ASM_X86_MSIDEF_H */ diff --git a/kernel/include-compat/asm-x86/msr-index.h b/kernel/include-compat/asm-x86/msr-index.h deleted file mode 100644 index e7625b1d..00000000 --- a/kernel/include-compat/asm-x86/msr-index.h +++ /dev/null @@ -1,339 +0,0 @@ -#ifndef _ASM_X86_MSR_INDEX_H -#define _ASM_X86_MSR_INDEX_H - -/* CPU model specific register (MSR) numbers */ - -/* x86-64 specific MSRs */ -#define MSR_EFER 0xc0000080 /* extended feature register */ -#define MSR_STAR 0xc0000081 /* legacy mode SYSCALL target */ -#define MSR_LSTAR 0xc0000082 /* long mode SYSCALL target */ -#define MSR_CSTAR 0xc0000083 /* compat mode SYSCALL target */ -#define MSR_SYSCALL_MASK 0xc0000084 /* EFLAGS mask for syscall */ -#define MSR_FS_BASE 0xc0000100 /* 64bit FS base */ -#define MSR_GS_BASE 0xc0000101 /* 64bit GS base */ -#define MSR_KERNEL_GS_BASE 0xc0000102 /* SwapGS GS shadow */ - -/* EFER bits: */ -#define _EFER_SCE 0 /* SYSCALL/SYSRET */ -#define _EFER_LME 8 /* Long mode enable */ -#define _EFER_LMA 10 /* Long mode active (read-only) */ -#define _EFER_NX 11 /* No execute enable */ -#define _EFER_SVME 12 /* Enable virtualization */ - -#define EFER_SCE (1<<_EFER_SCE) -#define EFER_LME (1<<_EFER_LME) -#define EFER_LMA (1<<_EFER_LMA) -#define EFER_NX (1<<_EFER_NX) -#define EFER_SVME (1<<_EFER_SVME) - -/* Intel MSRs. Some also available on other CPUs */ -#define MSR_IA32_PERFCTR0 0x000000c1 -#define MSR_IA32_PERFCTR1 0x000000c2 -#define MSR_FSB_FREQ 0x000000cd - -#define MSR_MTRRcap 0x000000fe -#define MSR_IA32_BBL_CR_CTL 0x00000119 - -#define MSR_IA32_SYSENTER_CS 0x00000174 -#define MSR_IA32_SYSENTER_ESP 0x00000175 -#define MSR_IA32_SYSENTER_EIP 0x00000176 - -#define MSR_IA32_MCG_CAP 0x00000179 -#define MSR_IA32_MCG_STATUS 0x0000017a -#define MSR_IA32_MCG_CTL 0x0000017b - -#define MSR_IA32_PEBS_ENABLE 0x000003f1 -#define MSR_IA32_DS_AREA 0x00000600 -#define MSR_IA32_PERF_CAPABILITIES 0x00000345 - -#define MSR_MTRRfix64K_00000 0x00000250 -#define MSR_MTRRfix16K_80000 0x00000258 -#define MSR_MTRRfix16K_A0000 0x00000259 -#define MSR_MTRRfix4K_C0000 0x00000268 -#define MSR_MTRRfix4K_C8000 0x00000269 -#define MSR_MTRRfix4K_D0000 0x0000026a -#define MSR_MTRRfix4K_D8000 0x0000026b -#define MSR_MTRRfix4K_E0000 0x0000026c -#define MSR_MTRRfix4K_E8000 0x0000026d -#define MSR_MTRRfix4K_F0000 0x0000026e -#define MSR_MTRRfix4K_F8000 0x0000026f -#define MSR_MTRRdefType 0x000002ff - -#define MSR_IA32_CR_PAT 0x00000277 - -#define MSR_IA32_DEBUGCTLMSR 0x000001d9 -#define MSR_IA32_LASTBRANCHFROMIP 0x000001db -#define MSR_IA32_LASTBRANCHTOIP 0x000001dc -#define MSR_IA32_LASTINTFROMIP 0x000001dd -#define MSR_IA32_LASTINTTOIP 0x000001de - -/* DEBUGCTLMSR bits (others vary by model): */ -#define _DEBUGCTLMSR_LBR 0 /* last branch recording */ -#define _DEBUGCTLMSR_BTF 1 /* single-step on branches */ - -#define DEBUGCTLMSR_LBR (1UL << _DEBUGCTLMSR_LBR) -#define DEBUGCTLMSR_BTF (1UL << _DEBUGCTLMSR_BTF) - -#define MSR_IA32_MC0_CTL 0x00000400 -#define MSR_IA32_MC0_STATUS 0x00000401 -#define MSR_IA32_MC0_ADDR 0x00000402 -#define MSR_IA32_MC0_MISC 0x00000403 - -#define MSR_P6_PERFCTR0 0x000000c1 -#define MSR_P6_PERFCTR1 0x000000c2 -#define MSR_P6_EVNTSEL0 0x00000186 -#define MSR_P6_EVNTSEL1 0x00000187 - -/* AMD64 MSRs. Not complete. See the architecture manual for a more - complete list. */ - -#define MSR_AMD64_NB_CFG 0xc001001f -#define MSR_AMD64_IBSFETCHCTL 0xc0011030 -#define MSR_AMD64_IBSFETCHLINAD 0xc0011031 -#define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 -#define MSR_AMD64_IBSOPCTL 0xc0011033 -#define MSR_AMD64_IBSOPRIP 0xc0011034 -#define MSR_AMD64_IBSOPDATA 0xc0011035 -#define MSR_AMD64_IBSOPDATA2 0xc0011036 -#define MSR_AMD64_IBSOPDATA3 0xc0011037 -#define MSR_AMD64_IBSDCLINAD 0xc0011038 -#define MSR_AMD64_IBSDCPHYSAD 0xc0011039 -#define MSR_AMD64_IBSCTL 0xc001103a - -/* Fam 10h MSRs */ -#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 -#define FAM10H_MMIO_CONF_ENABLE (1<<0) -#define FAM10H_MMIO_CONF_BUSRANGE_MASK 0xf -#define FAM10H_MMIO_CONF_BUSRANGE_SHIFT 2 -#define FAM10H_MMIO_CONF_BASE_MASK 0xfffffff -#define FAM10H_MMIO_CONF_BASE_SHIFT 20 - -/* K8 MSRs */ -#define MSR_K8_TOP_MEM1 0xc001001a -#define MSR_K8_TOP_MEM2 0xc001001d -#define MSR_K8_SYSCFG 0xc0010010 -#define MSR_K8_HWCR 0xc0010015 -#define MSR_K8_INT_PENDING_MSG 0xc0010055 -/* C1E active bits in int pending message */ -#define K8_INTP_C1E_ACTIVE_MASK 0x18000000 -#define MSR_K8_TSEG_ADDR 0xc0010112 -#define K8_MTRRFIXRANGE_DRAM_ENABLE 0x00040000 /* MtrrFixDramEn bit */ -#define K8_MTRRFIXRANGE_DRAM_MODIFY 0x00080000 /* MtrrFixDramModEn bit */ -#define K8_MTRR_RDMEM_WRMEM_MASK 0x18181818 /* Mask: RdMem|WrMem */ - -/* K7 MSRs */ -#define MSR_K7_EVNTSEL0 0xc0010000 -#define MSR_K7_PERFCTR0 0xc0010004 -#define MSR_K7_EVNTSEL1 0xc0010001 -#define MSR_K7_PERFCTR1 0xc0010005 -#define MSR_K7_EVNTSEL2 0xc0010002 -#define MSR_K7_PERFCTR2 0xc0010006 -#define MSR_K7_EVNTSEL3 0xc0010003 -#define MSR_K7_PERFCTR3 0xc0010007 -#define MSR_K7_CLK_CTL 0xc001001b -#define MSR_K7_HWCR 0xc0010015 -#define MSR_K7_FID_VID_CTL 0xc0010041 -#define MSR_K7_FID_VID_STATUS 0xc0010042 - -/* K6 MSRs */ -#define MSR_K6_EFER 0xc0000080 -#define MSR_K6_STAR 0xc0000081 -#define MSR_K6_WHCR 0xc0000082 -#define MSR_K6_UWCCR 0xc0000085 -#define MSR_K6_EPMR 0xc0000086 -#define MSR_K6_PSOR 0xc0000087 -#define MSR_K6_PFIR 0xc0000088 - -/* Centaur-Hauls/IDT defined MSRs. */ -#define MSR_IDT_FCR1 0x00000107 -#define MSR_IDT_FCR2 0x00000108 -#define MSR_IDT_FCR3 0x00000109 -#define MSR_IDT_FCR4 0x0000010a - -#define MSR_IDT_MCR0 0x00000110 -#define MSR_IDT_MCR1 0x00000111 -#define MSR_IDT_MCR2 0x00000112 -#define MSR_IDT_MCR3 0x00000113 -#define MSR_IDT_MCR4 0x00000114 -#define MSR_IDT_MCR5 0x00000115 -#define MSR_IDT_MCR6 0x00000116 -#define MSR_IDT_MCR7 0x00000117 -#define MSR_IDT_MCR_CTRL 0x00000120 - -/* VIA Cyrix defined MSRs*/ -#define MSR_VIA_FCR 0x00001107 -#define MSR_VIA_LONGHAUL 0x0000110a -#define MSR_VIA_RNG 0x0000110b -#define MSR_VIA_BCR2 0x00001147 - -/* Transmeta defined MSRs */ -#define MSR_TMTA_LONGRUN_CTRL 0x80868010 -#define MSR_TMTA_LONGRUN_FLAGS 0x80868011 -#define MSR_TMTA_LRTI_READOUT 0x80868018 -#define MSR_TMTA_LRTI_VOLT_MHZ 0x8086801a - -/* Intel defined MSRs. */ -#define MSR_IA32_P5_MC_ADDR 0x00000000 -#define MSR_IA32_P5_MC_TYPE 0x00000001 -#define MSR_IA32_TSC 0x00000010 -#define MSR_IA32_PLATFORM_ID 0x00000017 -#define MSR_IA32_EBL_CR_POWERON 0x0000002a -#define MSR_IA32_FEATURE_CONTROL 0x0000003a - -#define FEATURE_CONTROL_LOCKED (1<<0) -#define FEATURE_CONTROL_VMXON_ENABLED (1<<2) - -#define MSR_IA32_APICBASE 0x0000001b -#define MSR_IA32_APICBASE_BSP (1<<8) -#define MSR_IA32_APICBASE_ENABLE (1<<11) -#define MSR_IA32_APICBASE_BASE (0xfffff<<12) - -#define MSR_IA32_UCODE_WRITE 0x00000079 -#define MSR_IA32_UCODE_REV 0x0000008b - -#define MSR_IA32_PERF_STATUS 0x00000198 -#define MSR_IA32_PERF_CTL 0x00000199 - -#define MSR_IA32_MPERF 0x000000e7 -#define MSR_IA32_APERF 0x000000e8 - -#define MSR_IA32_THERM_CONTROL 0x0000019a -#define MSR_IA32_THERM_INTERRUPT 0x0000019b -#define MSR_IA32_THERM_STATUS 0x0000019c -#define MSR_IA32_MISC_ENABLE 0x000001a0 - -/* Intel Model 6 */ -#define MSR_P6_EVNTSEL0 0x00000186 -#define MSR_P6_EVNTSEL1 0x00000187 - -/* P4/Xeon+ specific */ -#define MSR_IA32_MCG_EAX 0x00000180 -#define MSR_IA32_MCG_EBX 0x00000181 -#define MSR_IA32_MCG_ECX 0x00000182 -#define MSR_IA32_MCG_EDX 0x00000183 -#define MSR_IA32_MCG_ESI 0x00000184 -#define MSR_IA32_MCG_EDI 0x00000185 -#define MSR_IA32_MCG_EBP 0x00000186 -#define MSR_IA32_MCG_ESP 0x00000187 -#define MSR_IA32_MCG_EFLAGS 0x00000188 -#define MSR_IA32_MCG_EIP 0x00000189 -#define MSR_IA32_MCG_RESERVED 0x0000018a - -/* Pentium IV performance counter MSRs */ -#define MSR_P4_BPU_PERFCTR0 0x00000300 -#define MSR_P4_BPU_PERFCTR1 0x00000301 -#define MSR_P4_BPU_PERFCTR2 0x00000302 -#define MSR_P4_BPU_PERFCTR3 0x00000303 -#define MSR_P4_MS_PERFCTR0 0x00000304 -#define MSR_P4_MS_PERFCTR1 0x00000305 -#define MSR_P4_MS_PERFCTR2 0x00000306 -#define MSR_P4_MS_PERFCTR3 0x00000307 -#define MSR_P4_FLAME_PERFCTR0 0x00000308 -#define MSR_P4_FLAME_PERFCTR1 0x00000309 -#define MSR_P4_FLAME_PERFCTR2 0x0000030a -#define MSR_P4_FLAME_PERFCTR3 0x0000030b -#define MSR_P4_IQ_PERFCTR0 0x0000030c -#define MSR_P4_IQ_PERFCTR1 0x0000030d -#define MSR_P4_IQ_PERFCTR2 0x0000030e -#define MSR_P4_IQ_PERFCTR3 0x0000030f -#define MSR_P4_IQ_PERFCTR4 0x00000310 -#define MSR_P4_IQ_PERFCTR5 0x00000311 -#define MSR_P4_BPU_CCCR0 0x00000360 -#define MSR_P4_BPU_CCCR1 0x00000361 -#define MSR_P4_BPU_CCCR2 0x00000362 -#define MSR_P4_BPU_CCCR3 0x00000363 -#define MSR_P4_MS_CCCR0 0x00000364 -#define MSR_P4_MS_CCCR1 0x00000365 -#define MSR_P4_MS_CCCR2 0x00000366 -#define MSR_P4_MS_CCCR3 0x00000367 -#define MSR_P4_FLAME_CCCR0 0x00000368 -#define MSR_P4_FLAME_CCCR1 0x00000369 -#define MSR_P4_FLAME_CCCR2 0x0000036a -#define MSR_P4_FLAME_CCCR3 0x0000036b -#define MSR_P4_IQ_CCCR0 0x0000036c -#define MSR_P4_IQ_CCCR1 0x0000036d -#define MSR_P4_IQ_CCCR2 0x0000036e -#define MSR_P4_IQ_CCCR3 0x0000036f -#define MSR_P4_IQ_CCCR4 0x00000370 -#define MSR_P4_IQ_CCCR5 0x00000371 -#define MSR_P4_ALF_ESCR0 0x000003ca -#define MSR_P4_ALF_ESCR1 0x000003cb -#define MSR_P4_BPU_ESCR0 0x000003b2 -#define MSR_P4_BPU_ESCR1 0x000003b3 -#define MSR_P4_BSU_ESCR0 0x000003a0 -#define MSR_P4_BSU_ESCR1 0x000003a1 -#define MSR_P4_CRU_ESCR0 0x000003b8 -#define MSR_P4_CRU_ESCR1 0x000003b9 -#define MSR_P4_CRU_ESCR2 0x000003cc -#define MSR_P4_CRU_ESCR3 0x000003cd -#define MSR_P4_CRU_ESCR4 0x000003e0 -#define MSR_P4_CRU_ESCR5 0x000003e1 -#define MSR_P4_DAC_ESCR0 0x000003a8 -#define MSR_P4_DAC_ESCR1 0x000003a9 -#define MSR_P4_FIRM_ESCR0 0x000003a4 -#define MSR_P4_FIRM_ESCR1 0x000003a5 -#define MSR_P4_FLAME_ESCR0 0x000003a6 -#define MSR_P4_FLAME_ESCR1 0x000003a7 -#define MSR_P4_FSB_ESCR0 0x000003a2 -#define MSR_P4_FSB_ESCR1 0x000003a3 -#define MSR_P4_IQ_ESCR0 0x000003ba -#define MSR_P4_IQ_ESCR1 0x000003bb -#define MSR_P4_IS_ESCR0 0x000003b4 -#define MSR_P4_IS_ESCR1 0x000003b5 -#define MSR_P4_ITLB_ESCR0 0x000003b6 -#define MSR_P4_ITLB_ESCR1 0x000003b7 -#define MSR_P4_IX_ESCR0 0x000003c8 -#define MSR_P4_IX_ESCR1 0x000003c9 -#define MSR_P4_MOB_ESCR0 0x000003aa -#define MSR_P4_MOB_ESCR1 0x000003ab -#define MSR_P4_MS_ESCR0 0x000003c0 -#define MSR_P4_MS_ESCR1 0x000003c1 -#define MSR_P4_PMH_ESCR0 0x000003ac -#define MSR_P4_PMH_ESCR1 0x000003ad -#define MSR_P4_RAT_ESCR0 0x000003bc -#define MSR_P4_RAT_ESCR1 0x000003bd -#define MSR_P4_SAAT_ESCR0 0x000003ae -#define MSR_P4_SAAT_ESCR1 0x000003af -#define MSR_P4_SSU_ESCR0 0x000003be -#define MSR_P4_SSU_ESCR1 0x000003bf /* guess: not in manual */ - -#define MSR_P4_TBPU_ESCR0 0x000003c2 -#define MSR_P4_TBPU_ESCR1 0x000003c3 -#define MSR_P4_TC_ESCR0 0x000003c4 -#define MSR_P4_TC_ESCR1 0x000003c5 -#define MSR_P4_U2L_ESCR0 0x000003b0 -#define MSR_P4_U2L_ESCR1 0x000003b1 - -/* Intel Core-based CPU performance counters */ -#define MSR_CORE_PERF_FIXED_CTR0 0x00000309 -#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a -#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b -#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d -#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e -#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f -#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 - -/* Geode defined MSRs */ -#define MSR_GEODE_BUSCONT_CONF0 0x00001900 - -/* Intel VT MSRs */ -#define MSR_IA32_VMX_BASIC 0x00000480 -#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 -#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 -#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 -#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 -#define MSR_IA32_VMX_MISC 0x00000485 -#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 -#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 -#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 -#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 -#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a -#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b -#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c - -/* AMD-V MSRs */ - -#define MSR_VM_CR 0xc0010114 -#define MSR_VM_HSAVE_PA 0xc0010117 - -#endif /* _ASM_X86_MSR_INDEX_H */ diff --git a/kernel/include-compat/asm-x86/pvclock-abi.h b/kernel/include-compat/asm-x86/pvclock-abi.h deleted file mode 100644 index 6857f840..00000000 --- a/kernel/include-compat/asm-x86/pvclock-abi.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef _ASM_X86_PVCLOCK_ABI_H_ -#define _ASM_X86_PVCLOCK_ABI_H_ -#ifndef __ASSEMBLY__ - -/* - * These structs MUST NOT be changed. - * They are the ABI between hypervisor and guest OS. - * Both Xen and KVM are using this. - * - * pvclock_vcpu_time_info holds the system time and the tsc timestamp - * of the last update. So the guest can use the tsc delta to get a - * more precise system time. There is one per virtual cpu. - * - * pvclock_wall_clock references the point in time when the system - * time was zero (usually boot time), thus the guest calculates the - * current wall clock by adding the system time. - * - * Protocol for the "version" fields is: hypervisor raises it (making - * it uneven) before it starts updating the fields and raises it again - * (making it even) when it is done. Thus the guest can make sure the - * time values it got are consistent by checking the version before - * and after reading them. - */ - -struct pvclock_vcpu_time_info { - u32 version; - u32 pad0; - u64 tsc_timestamp; - u64 system_time; - u32 tsc_to_system_mul; - s8 tsc_shift; - u8 pad[3]; -} __attribute__((__packed__)); /* 32 bytes */ - -struct pvclock_wall_clock { - u32 version; - u32 sec; - u32 nsec; -} __attribute__((__packed__)); - -#endif /* __ASSEMBLY__ */ -#endif /* _ASM_X86_PVCLOCK_ABI_H_ */ diff --git a/kernel/include-compat/linux/anon_inodes.h b/kernel/include-compat/linux/anon_inodes.h deleted file mode 100644 index 7b6862f2..00000000 --- a/kernel/include-compat/linux/anon_inodes.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * include/linux/anon_inodes.h - * - * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> - * - */ - -#ifndef _LINUX_ANON_INODES_H -#define _LINUX_ANON_INODES_H - -struct file_operations; - -int anon_inode_getfd(const char *name, const struct file_operations *fops, - void *priv); - -#endif /* _LINUX_ANON_INODES_H */ diff --git a/kernel/include-compat/linux/intel-iommu.h b/kernel/include-compat/linux/intel-iommu.h deleted file mode 100644 index 1490fc07..00000000 --- a/kernel/include-compat/linux/intel-iommu.h +++ /dev/null @@ -1,355 +0,0 @@ -/* - * Copyright (c) 2006, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Copyright (C) 2006-2008 Intel Corporation - * Author: Ashok Raj <ashok.raj@intel.com> - * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> - */ - -#ifndef _INTEL_IOMMU_H_ -#define _INTEL_IOMMU_H_ - -#include <linux/types.h> -#include <linux/msi.h> -#include <linux/sysdev.h> -#include "iova.h" -#include <linux/io.h> - -/* - * We need a fixed PAGE_SIZE of 4K irrespective of - * arch PAGE_SIZE for IOMMU page tables. - */ -#define PAGE_SHIFT_4K (12) -#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) -#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) -#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) - -#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) -#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) -#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) - -/* - * Intel IOMMU register specification per version 1.0 public spec. - */ - -#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ -#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ -#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ -#define DMAR_GCMD_REG 0x18 /* Global command register */ -#define DMAR_GSTS_REG 0x1c /* Global status register */ -#define DMAR_RTADDR_REG 0x20 /* Root entry table */ -#define DMAR_CCMD_REG 0x28 /* Context command reg */ -#define DMAR_FSTS_REG 0x34 /* Fault Status register */ -#define DMAR_FECTL_REG 0x38 /* Fault control register */ -#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ -#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ -#define DMAR_FEUADDR_REG 0x44 /* Upper address register */ -#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ -#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ -#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ -#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ -#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ -#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ - -#define OFFSET_STRIDE (9) -/* -#define dmar_readl(dmar, reg) readl(dmar + reg) -#define dmar_readq(dmar, reg) ({ \ - u32 lo, hi; \ - lo = readl(dmar + reg); \ - hi = readl(dmar + reg + 4); \ - (((u64) hi) << 32) + lo; }) -*/ -static inline u64 dmar_readq(void __iomem *addr) -{ - u32 lo, hi; - lo = readl(addr); - hi = readl(addr + 4); - return (((u64) hi) << 32) + lo; -} - -static inline void dmar_writeq(void __iomem *addr, u64 val) -{ - writel((u32)val, addr); - writel((u32)(val >> 32), addr + 4); -} - -#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) -#define DMAR_VER_MINOR(v) ((v) & 0x0f) - -/* - * Decoding Capability Register - */ -#define cap_read_drain(c) (((c) >> 55) & 1) -#define cap_write_drain(c) (((c) >> 54) & 1) -#define cap_max_amask_val(c) (((c) >> 48) & 0x3f) -#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1) -#define cap_pgsel_inv(c) (((c) >> 39) & 1) - -#define cap_super_page_val(c) (((c) >> 34) & 0xf) -#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \ - * OFFSET_STRIDE) + 21) - -#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16) -#define cap_max_fault_reg_offset(c) \ - (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16) - -#define cap_zlr(c) (((c) >> 22) & 1) -#define cap_isoch(c) (((c) >> 23) & 1) -#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) -#define cap_sagaw(c) (((c) >> 8) & 0x1f) -#define cap_caching_mode(c) (((c) >> 7) & 1) -#define cap_phmr(c) (((c) >> 6) & 1) -#define cap_plmr(c) (((c) >> 5) & 1) -#define cap_rwbf(c) (((c) >> 4) & 1) -#define cap_afl(c) (((c) >> 3) & 1) -#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7))) -/* - * Extended Capability Register - */ - -#define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1) -#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) -#define ecap_max_iotlb_offset(e) \ - (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) -#define ecap_coherent(e) ((e) & 0x1) - - -/* IOTLB_REG */ -#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) -#define DMA_TLB_DSI_FLUSH (((u64)2) << 60) -#define DMA_TLB_PSI_FLUSH (((u64)3) << 60) -#define DMA_TLB_IIRG(type) ((type >> 60) & 7) -#define DMA_TLB_IAIG(val) (((val) >> 57) & 7) -#define DMA_TLB_READ_DRAIN (((u64)1) << 49) -#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) -#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) -#define DMA_TLB_IVT (((u64)1) << 63) -#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) -#define DMA_TLB_MAX_SIZE (0x3f) - -/* PMEN_REG */ -#define DMA_PMEN_EPM (((u32)1)<<31) -#define DMA_PMEN_PRS (((u32)1)<<0) - -/* GCMD_REG */ -#define DMA_GCMD_TE (((u32)1) << 31) -#define DMA_GCMD_SRTP (((u32)1) << 30) -#define DMA_GCMD_SFL (((u32)1) << 29) -#define DMA_GCMD_EAFL (((u32)1) << 28) -#define DMA_GCMD_WBF (((u32)1) << 27) - -/* GSTS_REG */ -#define DMA_GSTS_TES (((u32)1) << 31) -#define DMA_GSTS_RTPS (((u32)1) << 30) -#define DMA_GSTS_FLS (((u32)1) << 29) -#define DMA_GSTS_AFLS (((u32)1) << 28) -#define DMA_GSTS_WBFS (((u32)1) << 27) - -/* CCMD_REG */ -#define DMA_CCMD_ICC (((u64)1) << 63) -#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) -#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61) -#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61) -#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32) -#define DMA_CCMD_MASK_NOBIT 0 -#define DMA_CCMD_MASK_1BIT 1 -#define DMA_CCMD_MASK_2BIT 2 -#define DMA_CCMD_MASK_3BIT 3 -#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) -#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) - -/* FECTL_REG */ -#define DMA_FECTL_IM (((u32)1) << 31) - -/* FSTS_REG */ -#define DMA_FSTS_PPF ((u32)2) -#define DMA_FSTS_PFO ((u32)1) -#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) - -/* FRCD_REG, 32 bits access */ -#define DMA_FRCD_F (((u32)1) << 31) -#define dma_frcd_type(d) ((d >> 30) & 1) -#define dma_frcd_fault_reason(c) (c & 0xff) -#define dma_frcd_source_id(c) (c & 0xffff) -#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ - -/* - * 0: Present - * 1-11: Reserved - * 12-63: Context Ptr (12 - (haw-1)) - * 64-127: Reserved - */ -struct root_entry { - u64 val; - u64 rsvd1; -}; -#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) -static inline bool root_present(struct root_entry *root) -{ - return (root->val & 1); -} -static inline void set_root_present(struct root_entry *root) -{ - root->val |= 1; -} -static inline void set_root_value(struct root_entry *root, unsigned long value) -{ - root->val |= value & PAGE_MASK_4K; -} - -struct context_entry; -static inline struct context_entry * -get_context_addr_from_root(struct root_entry *root) -{ - return (struct context_entry *) - (root_present(root)?phys_to_virt( - root->val & PAGE_MASK_4K): - NULL); -} - -/* - * low 64 bits: - * 0: present - * 1: fault processing disable - * 2-3: translation type - * 12-63: address space root - * high 64 bits: - * 0-2: address width - * 3-6: aval - * 8-23: domain id - */ -struct context_entry { - u64 lo; - u64 hi; -}; -#define context_present(c) ((c).lo & 1) -#define context_fault_disable(c) (((c).lo >> 1) & 1) -#define context_translation_type(c) (((c).lo >> 2) & 3) -#define context_address_root(c) ((c).lo & PAGE_MASK_4K) -#define context_address_width(c) ((c).hi & 7) -#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) - -#define context_set_present(c) do {(c).lo |= 1;} while (0) -#define context_set_fault_enable(c) \ - do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) -#define context_set_translation_type(c, val) \ - do { \ - (c).lo &= (((u64)-1) << 4) | 3; \ - (c).lo |= ((val) & 3) << 2; \ - } while (0) -#define CONTEXT_TT_MULTI_LEVEL 0 -#define context_set_address_root(c, val) \ - do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) -#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) -#define context_set_domain_id(c, val) \ - do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) -#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) - -/* - * 0: readable - * 1: writable - * 2-6: reserved - * 7: super page - * 8-11: available - * 12-63: Host physcial address - */ -struct dma_pte { - u64 val; -}; -#define dma_clear_pte(p) do {(p).val = 0;} while (0) - -#define DMA_PTE_READ (1) -#define DMA_PTE_WRITE (2) - -#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) -#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) -#define dma_set_pte_prot(p, prot) \ - do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) -#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) -#define dma_set_pte_addr(p, addr) do {\ - (p).val |= ((addr) & PAGE_MASK_4K); } while (0) -#define dma_pte_present(p) (((p).val & 3) != 0) - -struct intel_iommu; - -struct dmar_domain { - int id; /* domain id */ - struct intel_iommu *iommu; /* back pointer to owning iommu */ - - struct list_head devices; /* all devices' list */ - struct iova_domain iovad; /* iova's that belong to this domain */ - - struct dma_pte *pgd; /* virtual address */ - spinlock_t mapping_lock; /* page table lock */ - int gaw; /* max guest address width */ - - /* adjusted guest address width, 0 is level 2 30-bit */ - int agaw; - -#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 - int flags; -}; - -/* PCI domain-device relationship */ -struct device_domain_info { - struct list_head link; /* link to domain siblings */ - struct list_head global; /* link to global list */ - u8 bus; /* PCI bus numer */ - u8 devfn; /* PCI devfn number */ - struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ - struct dmar_domain *domain; /* pointer to domain */ -}; - -extern int init_dmars(void); - -struct intel_iommu { - void __iomem *reg; /* Pointer to hardware regs, virtual addr */ - u64 cap; - u64 ecap; - unsigned long *domain_ids; /* bitmap of domains */ - struct dmar_domain **domains; /* ptr to domains */ - int seg; - u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ - spinlock_t lock; /* protect context, domain ids */ - spinlock_t register_lock; /* protect register handling */ - struct root_entry *root_entry; /* virtual address */ - - unsigned int irq; - unsigned char name[7]; /* Device Name */ - struct msi_msg saved_msg; - struct sys_device sysdev; -}; - -#ifndef CONFIG_DMAR_GFX_WA -static inline void iommu_prepare_gfx_mapping(void) -{ - return; -} -#endif /* !CONFIG_DMAR_GFX_WA */ - -void intel_iommu_domain_exit(struct dmar_domain *domain); -struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev); -int intel_iommu_context_mapping(struct dmar_domain *domain, - struct pci_dev *pdev); -int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova, - u64 hpa, size_t size, int prot); -void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn); -struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev); -int intel_iommu_found(void); -u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova); - -#endif diff --git a/kernel/include-compat/linux/iommu.h b/kernel/include-compat/linux/iommu.h deleted file mode 100644 index 8a7bfb1b..00000000 --- a/kernel/include-compat/linux/iommu.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. - * Author: Joerg Roedel <joerg.roedel@amd.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __LINUX_IOMMU_H -#define __LINUX_IOMMU_H - -#define IOMMU_READ (1) -#define IOMMU_WRITE (2) - -struct device; - -struct iommu_domain { - void *priv; -}; - -struct iommu_ops { - int (*domain_init)(struct iommu_domain *domain); - void (*domain_destroy)(struct iommu_domain *domain); - int (*attach_dev)(struct iommu_domain *domain, struct device *dev); - void (*detach_dev)(struct iommu_domain *domain, struct device *dev); - int (*map)(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); - void (*unmap)(struct iommu_domain *domain, unsigned long iova, - size_t size); - phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, - unsigned long iova); -}; - -#ifdef CONFIG_IOMMU_API - -extern void register_iommu(struct iommu_ops *ops); -extern bool iommu_found(void); -extern struct iommu_domain *iommu_domain_alloc(void); -extern void iommu_domain_free(struct iommu_domain *domain); -extern int iommu_attach_device(struct iommu_domain *domain, - struct device *dev); -extern void iommu_detach_device(struct iommu_domain *domain, - struct device *dev); -extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int prot); -extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, - size_t size); -extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova); - -#else /* CONFIG_IOMMU_API */ - -static inline void register_iommu(struct iommu_ops *ops) -{ -} - -static inline bool iommu_found(void) -{ - return false; -} - -static inline struct iommu_domain *iommu_domain_alloc(void) -{ - return NULL; -} - -static inline void iommu_domain_free(struct iommu_domain *domain) -{ -} - -static inline int iommu_attach_device(struct iommu_domain *domain, - struct device *dev) -{ - return -ENODEV; -} - -static inline void iommu_detach_device(struct iommu_domain *domain, - struct device *dev) -{ -} - -static inline int iommu_map_range(struct iommu_domain *domain, - unsigned long iova, phys_addr_t paddr, - size_t size, int prot) -{ - return -ENODEV; -} - -static inline void iommu_unmap_range(struct iommu_domain *domain, - unsigned long iova, size_t size) -{ -} - -static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, - unsigned long iova) -{ - return 0; -} - -#endif /* CONFIG_IOMMU_API */ - -#endif /* __LINUX_IOMMU_H */ diff --git a/kernel/include-compat/linux/iova.h b/kernel/include-compat/linux/iova.h deleted file mode 100644 index 228f6c94..00000000 --- a/kernel/include-compat/linux/iova.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2006, Intel Corporation. - * - * This file is released under the GPLv2. - * - * Copyright (C) 2006-2008 Intel Corporation - * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> - * - */ - -#ifndef _IOVA_H_ -#define _IOVA_H_ - -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/rbtree.h> -#include <linux/dma-mapping.h> - -/* IO virtual address start page frame number */ -#define IOVA_START_PFN (1) - -/* iova structure */ -struct iova { - struct rb_node node; - unsigned long pfn_hi; /* IOMMU dish out addr hi */ - unsigned long pfn_lo; /* IOMMU dish out addr lo */ -}; - -/* holds all the iova translations for a domain */ -struct iova_domain { - spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */ - spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ - struct rb_root rbroot; /* iova domain rbtree root */ - struct rb_node *cached32_node; /* Save last alloced node */ - unsigned long dma_32bit_pfn; -}; - -struct iova *alloc_iova_mem(void); -void free_iova_mem(struct iova *iova); -void free_iova(struct iova_domain *iovad, unsigned long pfn); -void __free_iova(struct iova_domain *iovad, struct iova *iova); -struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn, - bool size_aligned); -struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, - unsigned long pfn_hi); -void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); -void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit); -struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); -void put_iova_domain(struct iova_domain *iovad); - -#endif diff --git a/kernel/include-compat/linux/magic.h b/kernel/include-compat/linux/magic.h deleted file mode 100644 index a9c6567f..00000000 --- a/kernel/include-compat/linux/magic.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef __LINUX_MAGIC_H__ -#define __LINUX_MAGIC_H__ - -#define ADFS_SUPER_MAGIC 0xadf5 -#define AFFS_SUPER_MAGIC 0xadff -#define AFS_SUPER_MAGIC 0x5346414F -#define AUTOFS_SUPER_MAGIC 0x0187 -#define CODA_SUPER_MAGIC 0x73757245 -#define EFS_SUPER_MAGIC 0x414A53 -#define EXT2_SUPER_MAGIC 0xEF53 -#define EXT3_SUPER_MAGIC 0xEF53 -#define EXT4_SUPER_MAGIC 0xEF53 -#define HPFS_SUPER_MAGIC 0xf995e849 -#define ISOFS_SUPER_MAGIC 0x9660 -#define JFFS2_SUPER_MAGIC 0x72b6 -#define KVMFS_SUPER_MAGIC 0x19700426 - -#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */ -#define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */ -#define MINIX2_SUPER_MAGIC 0x2468 /* minix V2 fs */ -#define MINIX2_SUPER_MAGIC2 0x2478 /* minix V2 fs, 30 char names */ -#define MINIX3_SUPER_MAGIC 0x4d5a /* minix V3 fs */ - -#define MSDOS_SUPER_MAGIC 0x4d44 /* MD */ -#define NCP_SUPER_MAGIC 0x564c /* Guess, what 0x564c is :-) */ -#define NFS_SUPER_MAGIC 0x6969 -#define OPENPROM_SUPER_MAGIC 0x9fa1 -#define PROC_SUPER_MAGIC 0x9fa0 -#define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */ - -#define REISERFS_SUPER_MAGIC 0x52654973 /* used by gcc */ - /* used by file system utilities that - look at the superblock, etc. */ -#define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" -#define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" -#define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" - -#define SMB_SUPER_MAGIC 0x517B -#define USBDEVICE_SUPER_MAGIC 0x9fa2 - -#endif /* __LINUX_MAGIC_H__ */ diff --git a/kernel/include-compat/linux/marker.h b/kernel/include-compat/linux/marker.h deleted file mode 100644 index ceef04f2..00000000 --- a/kernel/include-compat/linux/marker.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * Alternative file to satisfy #include <linux/marker.h> for older kernels. - */ -#ifndef _LINUX_MARKER_H -#define _LINUX_MARKER_H - -/* - * Code markup for dynamic and static tracing. - * - * See Documentation/marker.txt. - * - * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> - * - * This file is released under the GPLv2. - * See the file COPYING for more details. - */ - -#include <linux/types.h> - -struct module; -struct marker; - -/** - * marker_probe_func - Type of a marker probe function - * @probe_private: probe private data - * @call_private: call site private data - * @fmt: format string - * @args: variable argument list pointer. Use a pointer to overcome C's - * inability to pass this around as a pointer in a portable manner in - * the callee otherwise. - * - * Type of marker probe functions. They receive the mdata and need to parse the - * format string to recover the variable argument list. - */ -typedef void marker_probe_func(void *probe_private, void *call_private, - const char *fmt, va_list *args); - -struct marker_probe_closure { - marker_probe_func *func; /* Callback */ - void *probe_private; /* Private probe data */ -}; - -struct marker { - const char *name; /* Marker name */ - const char *format; /* Marker format string, describing the - * variable argument list. - */ - char state; /* Marker state. */ - char ptype; /* probe type : 0 : single, 1 : multi */ - void (*call)(const struct marker *mdata, /* Probe wrapper */ - void *call_private, const char *fmt, ...); - struct marker_probe_closure single; - struct marker_probe_closure *multi; -} __attribute__((aligned(8))); - -#define __trace_mark(name, call_private, format, args...) \ - __mark_check_format(format, ## args) -static inline void marker_update_probe_range(struct marker *begin, - struct marker *end) -{ } - -/** - * trace_mark - Marker - * @name: marker name, not quoted. - * @format: format string - * @args...: variable argument list - * - * Places a marker. - */ -#define trace_mark(name, format, args...) \ - __trace_mark(name, NULL, format, ## args) - -/** - * MARK_NOARGS - Format string for a marker with no argument. - */ -#define MARK_NOARGS " " - -/* To be used for string format validity checking with gcc */ -static inline void __attribute__((format(printf,1,2))) -___mark_check_format(const char *fmt, ...) -{ -} - -#define __mark_check_format(format, args...) \ - do { \ - if (0) \ - ___mark_check_format(format, ## args); \ - } while (0) - -extern marker_probe_func __mark_empty_function; - -extern void marker_probe_cb(const struct marker *mdata, - void *call_private, const char *fmt, ...); -extern void marker_probe_cb_noarg(const struct marker *mdata, - void *call_private, const char *fmt, ...); - -/* - * Connect a probe to a marker. - * private data pointer must be a valid allocated memory address, or NULL. - */ -extern int marker_probe_register(const char *name, const char *format, - marker_probe_func *probe, void *probe_private); - -/* - * Returns the private data given to marker_probe_register. - */ -extern int marker_probe_unregister(const char *name, - marker_probe_func *probe, void *probe_private); -/* - * Unregister a marker by providing the registered private data. - */ -extern int marker_probe_unregister_private_data(marker_probe_func *probe, - void *probe_private); - -extern void *marker_get_private_data(const char *name, marker_probe_func *probe, - int num); - -#endif - diff --git a/kernel/include-compat/linux/math64.h b/kernel/include-compat/linux/math64.h deleted file mode 100644 index dc7c5812..00000000 --- a/kernel/include-compat/linux/math64.h +++ /dev/null @@ -1,3 +0,0 @@ -/* - * Empty file to satisfy #include <linux/math64.h> for older kernels. - */ diff --git a/kernel/include-compat/linux/mmu_notifier.h b/kernel/include-compat/linux/mmu_notifier.h deleted file mode 100644 index a6db4bab..00000000 --- a/kernel/include-compat/linux/mmu_notifier.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _LINUX_MMU_NOTIFIER_H -#define _LINUX_MMU_NOTIFIER_H - -struct mmu_notifier {}; - -#endif diff --git a/kernel/include-compat/linux/msi.h b/kernel/include-compat/linux/msi.h deleted file mode 100644 index 8f293922..00000000 --- a/kernel/include-compat/linux/msi.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef LINUX_MSI_H -#define LINUX_MSI_H - -#include <linux/list.h> - -struct msi_msg { - u32 address_lo; /* low 32 bits of msi message address */ - u32 address_hi; /* high 32 bits of msi message address */ - u32 data; /* 16 bits of msi message data */ -}; - -/* Helper functions */ -extern void mask_msi_irq(unsigned int irq); -extern void unmask_msi_irq(unsigned int irq); -extern void read_msi_msg(unsigned int irq, struct msi_msg *msg); -extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); - -struct msi_desc { - struct { - __u8 type : 5; /* {0: unused, 5h:MSI, 11h:MSI-X} */ - __u8 maskbit : 1; /* mask-pending bit supported ? */ - __u8 masked : 1; - __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ - __u8 pos; /* Location of the msi capability */ - __u32 maskbits_mask; /* mask bits mask */ - __u16 entry_nr; /* specific enabled entry */ - unsigned default_irq; /* default pre-assigned irq */ - }msi_attrib; - - unsigned int irq; - struct list_head list; - - void __iomem *mask_base; - struct pci_dev *dev; - - /* Last set MSI message */ - struct msi_msg msg; -}; - -/* - * The arch hook for setup up msi irqs - */ -int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc); -void arch_teardown_msi_irq(unsigned int irq); -extern int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); -extern void arch_teardown_msi_irqs(struct pci_dev *dev); -extern int arch_msi_check_device(struct pci_dev* dev, int nvec, int type); - - -#endif /* LINUX_MSI_H */ diff --git a/kernel/include-compat/linux/mutex.h b/kernel/include-compat/linux/mutex.h deleted file mode 100644 index 449905c0..00000000 --- a/kernel/include-compat/linux/mutex.h +++ /dev/null @@ -1,3 +0,0 @@ -/* - * Empty file to satisfy #include <linux/mutex.h> for older kernels. - */ diff --git a/kernel/ksm/Kbuild b/kernel/ksm/Kbuild deleted file mode 100644 index 0276f4fa..00000000 --- a/kernel/ksm/Kbuild +++ /dev/null @@ -1,6 +0,0 @@ -obj-m := ksm.o -ksm-objs := ksm_main.o -ifeq ($(CONFIG_MMU_NOTIFIER),y) -else -ksm-objs += wp_notifier.o -endif diff --git a/kernel/ksm/external-module-compat.h b/kernel/ksm/external-module-compat.h deleted file mode 100644 index b9e37262..00000000 --- a/kernel/ksm/external-module-compat.h +++ /dev/null @@ -1,388 +0,0 @@ - -/* - * Compatibility header for building as an external module. - */ - -/* - * Avoid picking up the kernel's kvm.h in case we have a newer one. - */ - -#include <linux/compiler.h> -#include <linux/version.h> -#include <linux/string.h> -#include <linux/cpu.h> -#include <linux/list.h> -#include <asm/processor.h> -#include <linux/hrtimer.h> -#include <asm/bitops.h> -#include <linux/mm.h> -#include <linux/rmap.h> -#include <asm/tlbflush.h> -#include <linux/module.h> -#include <asm/cacheflush.h> -#include <asm-generic/pgtable.h> - -/* - * 2.6.16 does not have GFP_NOWAIT - */ - -#include <linux/gfp.h> - -void kvm_ksm_set_pte(struct mm_struct *mm, unsigned long address, pte_t pte); -int kvm_ksm_spte_count(struct mm_struct *mm, - unsigned long address); - -#define list_first_entry(ptr, type, member) \ - list_entry((ptr)->next, type, member) - -static struct anon_vma *page_lock_anon_vma(struct page *page) -{ - struct anon_vma *anon_vma; - unsigned long anon_mapping; - - rcu_read_lock(); - anon_mapping = (unsigned long) page->mapping; - if (!(anon_mapping & PAGE_MAPPING_ANON)) - goto out; - if (!page_mapped(page)) - goto out; - - anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); - spin_lock(&anon_vma->lock); - return anon_vma; -out: - rcu_read_unlock(); - return NULL; -} - -static void page_unlock_anon_vma(struct anon_vma *anon_vma) -{ - spin_unlock(&anon_vma->lock); - rcu_read_unlock(); -} - -/* - * At what user virtual address is page expected in @vma? - * Returns virtual address or -EFAULT if page's index/offset is not - * within the range mapped the @vma. - */ -static inline unsigned long -vma_address(struct page *page, struct vm_area_struct *vma) -{ - pgoff_t pgoff = page->index; - unsigned long address; - - address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); - if (unlikely(address < vma->vm_start || address >= vma->vm_end)) { - /* page should be within @vma mapping range */ - return -EFAULT; - } - return address; -} - -/* - * At what user virtual address is page expected in vma? checking that the - * page matches the vma: currently only used on anon pages, by unuse_vma; - */ -unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) -{ - if (PageAnon(page)) { - if ((void *)vma->anon_vma != - (void *)page->mapping - PAGE_MAPPING_ANON) - return -EFAULT; - } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { - if (!vma->vm_file || - vma->vm_file->f_mapping != page->mapping) - return -EFAULT; - } else - return -EFAULT; - return vma_address(page, vma); -} - -/* - * Check that @page is mapped at @address into @mm. - * - * On success returns with pte mapped and locked. - */ -pte_t *page_check_address(struct page *page, struct mm_struct *mm, - unsigned long address, spinlock_t **ptlp) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - spinlock_t *ptl; - - pgd = pgd_offset(mm, address); - if (!pgd_present(*pgd)) - return NULL; - - pud = pud_offset(pgd, address); - if (!pud_present(*pud)) - return NULL; - - pmd = pmd_offset(pud, address); - if (!pmd_present(*pmd)) - return NULL; - - pte = pte_offset_map(pmd, address); - /* Make a quick check before getting the lock */ - if (!pte_present(*pte)) { - pte_unmap(pte); - return NULL; - } - - ptl = pte_lockptr(mm, pmd); - spin_lock(ptl); - if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) { - *ptlp = ptl; - return pte; - } - pte_unmap_unlock(pte, ptl); - return NULL; -} - -void page_remove_rmap_old(struct page *page, struct vm_area_struct *vma) -{ - if (atomic_add_negative(-1, &page->_mapcount)) { - if (unlikely(page_mapcount(page) < 0)) { - printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page)); - printk (KERN_EMERG " page pfn = %lx\n", page_to_pfn(page)); - printk (KERN_EMERG " page->flags = %lx\n", page->flags); - printk (KERN_EMERG " page->count = %x\n", page_count(page)); - printk (KERN_EMERG " page->mapping = %p\n", page->mapping); - BUG(); - } - - /* - * It would be tidy to reset the PageAnon mapping here, - * but that might overwrite a racing page_add_anon_rmap - * which increments mapcount after us but sets mapping - * before us: so leave the reset to free_hot_cold_page, - * and remember that it's only reliable while mapped. - * Leaving it set also helps swapoff to reinstate ptes - * faster for those pages still in swapcache. - */ - __dec_zone_page_state(page, - PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); - } -} - -void page_add_file_rmap_old(struct page *page) -{ - if (atomic_inc_and_test(&page->_mapcount)) - __inc_zone_page_state(page, NR_FILE_MAPPED); -} - -static int page_wrprotect_one(struct page *page, struct vm_area_struct *vma, - int *odirect_sync, int count_offset) -{ - struct mm_struct *mm = vma->vm_mm; - unsigned long address; - pte_t *pte; - spinlock_t *ptl; - int ret = 0; - - address = vma_address(page, vma); - if (address == -EFAULT) - goto out; - - pte = page_check_address(page, mm, address, &ptl); - if (!pte) - goto out; - - if (pte_write(*pte)) { - pte_t entry; - - - flush_cache_page(vma, address, pte_pfn(*pte)); - /* - * Ok, so after ptep_clear_flush will get called the pte will - * be not present, so gup-fast will become gup-slow and will - * block on the pte_lock, now, the fact that ptep_clear_flush - * will notify all the cpu, is a way to sync it with knowing - * that by the time it return gup-fast is not running in the - * middle, beacuse gup-fast run with irq_disabled. - */ - entry = ptep_clear_flush(vma, address, pte); - - /* - * this is needed here to balance the mapcount of the page - */ - count_offset += kvm_ksm_spte_count(mm, address); - - /* - * Check that no O_DIRECT or similar I/O is in progress on the - * page - */ - if ((page_mapcount(page) + count_offset) != page_count(page)) { - *odirect_sync = 0; - set_pte_at(mm, address, pte, entry); - goto out_unlock; - } - - entry = pte_wrprotect(entry); - set_pte_at(mm, address, pte, entry); - BUG_ON(pte_write(entry)); - kvm_ksm_set_pte(mm, address, entry); - } - ret = 1; - -out_unlock: - pte_unmap_unlock(pte, ptl); -out: - return ret; -} - -static int page_wrprotect_anon(struct page *page, int *odirect_sync, - int count_offset) -{ - struct vm_area_struct *vma; - struct anon_vma *anon_vma; - int ret = 0; - - anon_vma = page_lock_anon_vma(page); - if (!anon_vma) - return ret; - - /* - * If the page is inside the swap cache, its _count number was - * increased by one, therefore we have to increase count_offset by one. - */ - if (PageSwapCache(page)) - count_offset++; - - list_for_each_entry(vma, &anon_vma->head, anon_vma_node) - ret += page_wrprotect_one(page, vma, odirect_sync, - count_offset); - - page_unlock_anon_vma(anon_vma); - - return ret; -} - -/** - * page_wrprotect - set all ptes pointing to a page as readonly - * @page: the page to set as readonly - * @odirect_sync: boolean value that is set to 0 when some of the ptes were not - * marked as readonly beacuse page_wrprotect_one() was not able - * to mark this ptes as readonly without opening window to a race - * with odirect - * @count_offset: number of times page_wrprotect() caller had called get_page() - * on the page - * - * returns the number of ptes which were marked as readonly. - * (ptes that were readonly before this function was called are counted as well) - */ -int page_wrprotect(struct page *page, int *odirect_sync, int count_offset) -{ - int ret = 0; - - /* - * Page lock is needed for anon pages for the PageSwapCache check, - * and for page_mapping for filebacked pages - */ - BUG_ON(!PageLocked(page)); - - *odirect_sync = 1; - if (PageAnon(page)) - ret = page_wrprotect_anon(page, odirect_sync, count_offset); - - return ret; -} - -/** - * replace_page - replace page in vma with new page - * @vma: vma that hold the pte oldpage is pointed by. - * @oldpage: the page we are replacing with newpage - * @newpage: the page we replace oldpage with - * @orig_pte: the original value of the pte - * @prot: page protection bits - * - * Returns 0 on success, -EFAULT on failure. - * - * Note: @newpage must not be an anonymous page because replace_page() does - * not change the mapping of @newpage to have the same values as @oldpage. - * @newpage can be mapped in several vmas at different offsets (page->index). - */ -int replace_page(struct vm_area_struct *vma, struct page *oldpage, - struct page *newpage, pte_t orig_pte, pgprot_t prot) -{ - struct mm_struct *mm = vma->vm_mm; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *ptep; - pte_t new_pte; - spinlock_t *ptl; - unsigned long addr; - int ret; - - BUG_ON(PageAnon(newpage)); - - ret = -EFAULT; - addr = page_address_in_vma(oldpage, vma); - if (addr == -EFAULT) - goto out; - - pgd = pgd_offset(mm, addr); - if (!pgd_present(*pgd)) - goto out; - - pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) - goto out; - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) - goto out; - - ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); - if (!ptep) - goto out; - - if (!pte_same(*ptep, orig_pte)) { - pte_unmap_unlock(ptep, ptl); - goto out; - } - - ret = 0; - get_page(newpage); - page_add_file_rmap_old(newpage); - - flush_cache_page(vma, addr, pte_pfn(*ptep)); - ptep_clear_flush(vma, addr, ptep); - new_pte = mk_pte(newpage, prot); - set_pte_at(mm, addr, ptep, new_pte); - update_mmu_cache(vma, addr, new_pte); - BUG_ON(pte_write(new_pte)); - kvm_ksm_set_pte(mm, addr, new_pte); - - page_remove_rmap_old(oldpage, vma); - if (PageAnon(oldpage)) { - dec_mm_counter(mm, anon_rss); - inc_mm_counter(mm, file_rss); - } - put_page(oldpage); - - pte_unmap_unlock(ptep, ptl); -out: - return ret; -} - - -#include <linux/smp.h> - -/* HRTIMER_MODE_ABS started life with a different name */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) -#define HRTIMER_MODE_ABS HRTIMER_ABS -#endif - -/* __mmdrop() is not exported before 2.6.25 */ -#include <linux/sched.h> - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) - -#define mmdrop(x) do { (void)(x); } while (0) - -#endif diff --git a/kernel/ksm/ksm.h b/kernel/ksm/ksm.h deleted file mode 100644 index 91ca2865..00000000 --- a/kernel/ksm/ksm.h +++ /dev/null @@ -1,84 +0,0 @@ -#ifndef __LINUX_KSM_H -#define __LINUX_KSM_H - -/* - * Userspace interface for /dev/ksm - kvm shared memory - */ - -#ifdef __KERNEL__ -#include <linux/types.h> -#include <linux/ioctl.h> -#else -#include <sys/types.h> -#include <sys/ioctl.h> -#endif - -#include <asm/types.h> - -#define KSM_API_VERSION 1 - -#define ksm_control_flags_run 1 - -/* for KSM_REGISTER_MEMORY_REGION */ -struct ksm_memory_region { - __u32 npages; /* number of pages to share */ - __u32 pad; - __u64 addr; /* the begining of the virtual address */ -}; - -struct ksm_user_scan { - __u32 pages_to_scan; - __u32 flags; /* control flags */ -}; - -struct ksm_kthread_info { - __u32 sleep; /* number of microsecoends to sleep */ - __u32 pages_to_scan; /* number of pages to scan */ - __u32 flags; /* control flags */ -}; - -#define KSMIO 0xAB - -/* ioctls for /dev/ksm */ - -#define KSM_GET_API_VERSION _IO(KSMIO, 0x00) -/* - * KSM_CREATE_SHARED_MEMORY_AREA - create the shared memory reagion fd - */ -#define KSM_CREATE_SHARED_MEMORY_AREA _IO(KSMIO, 0x01) /* return SMA fd */ -/* - * KSM_CREATE_SCAN - create the scanner fd - */ -#define KSM_CREATE_SCAN _IO(KSMIO, 0x02) /* return SCAN fd */ -/* - * KSM_START_STOP_KTHREAD - control the kernel thread scanning speed - * (can stop the kernel thread from working by setting running = 0) - */ -#define KSM_START_STOP_KTHREAD _IOW(KSMIO, 0x03,\ - struct ksm_kthread_info) -/* - * KSM_GET_INFO_KTHREAD - return information about the kernel thread - * scanning speed. - */ -#define KSM_GET_INFO_KTHREAD _IOW(KSMIO, 0x04,\ - struct ksm_kthread_info) - - -/* ioctls for SMA fds */ - -/* - * KSM_REGISTER_MEMORY_REGION - register virtual address memory area to be - * scanned by kvm. - */ -#define KSM_REGISTER_MEMORY_REGION _IOW(KSMIO, 0x20,\ - struct ksm_memory_region) -/* - * KSM_REMOVE_MEMORY_REGION - remove virtual address memory area from ksm. - */ -#define KSM_REMOVE_MEMORY_REGION _IO(KSMIO, 0x21) - -/* ioctls for SCAN fds */ -#define KSM_SCAN _IOW(KSMIO, 0x40,\ - struct ksm_user_scan) - -#endif diff --git a/kernel/ksm/ksm_main.c b/kernel/ksm/ksm_main.c deleted file mode 100644 index 7032b4f5..00000000 --- a/kernel/ksm/ksm_main.c +++ /dev/null @@ -1,1431 +0,0 @@ -/* - * Memory merging driver for Linux - * - * This module enables dynamic sharing of identical pages found in different - * memory areas, even if they are not shared by fork() - * - * Copyright (C) 2008 Red Hat, Inc. - * Authors: - * Izik Eidus - * Andrea Arcangeli - * Chris Wright - * - * This work is licensed under the terms of the GNU GPL, version 2. - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/mm.h> -#include <linux/fs.h> -#include <linux/miscdevice.h> -#include <linux/vmalloc.h> -#include <linux/file.h> -#include <linux/mman.h> -#include <linux/sched.h> -#include <linux/rwsem.h> -#include <linux/pagemap.h> -#include <linux/sched.h> -#include <linux/rmap.h> -#include <linux/spinlock.h> -#include <linux/jhash.h> -#include <linux/delay.h> -#include <linux/kthread.h> -#include <linux/wait.h> -#include <linux/scatterlist.h> -#include <linux/random.h> -#include <linux/slab.h> -#include <linux/swap.h> -#include <linux/rbtree.h> -#include <linux/anon_inodes.h> - -#include <asm/tlbflush.h> - -#include "ksm.h" -#include "wp_notifier.h" -#include "external-module-compat.h" - -#define KSM_MINOR 234 - -MODULE_AUTHOR("Red Hat, Inc."); -MODULE_LICENSE("GPL"); - -static int rmap_hash_size; -module_param(rmap_hash_size, int, 0); -MODULE_PARM_DESC(rmap_hash_size, "Hash table size for the reverse mapping"); - -/* - * ksm_mem_slot - hold information for an userspace scanning range - * (the scanning for this region will be from addr untill addr + - * npages * PAGE_SIZE inside mm) - */ -struct ksm_mem_slot { - struct list_head link; - struct list_head sma_link; - struct mm_struct *mm; - unsigned long addr; /* the begining of the virtual address */ - int npages; /* number of pages to share */ -}; - -/* - * ksm_sma - shared memory area, each process have its own sma that contain the - * information about the slots that it own - */ -struct ksm_sma { - struct list_head sma_slots; -}; - -/** - * struct ksm_scan - cursor for scanning - * @slot_index: the current slot we are scanning - * @page_index: the page inside the sma that is currently being scanned - * - * ksm uses it to know what are the next pages it need to scan - */ -struct ksm_scan { - struct ksm_mem_slot *slot_index; - unsigned long page_index; -}; - -/* - * Few notes about ksm scanning progress (make it easier to understand the - * structures below): - * - * In order to reduce excessive scanning, pages are sorted into the hash - * table, page_hash. After a page is inserted into the hash table, its - * contents may have changed. In this case, ksm must remove the page from - * the hash table and potentially rehash it. Ksm uses a reverse mapping, - * rmap_hash, to efficiently manage this. - */ - -struct rmap_item; - -/* - * tree_item - object of the write protected pages tree - */ -struct tree_item { - struct rb_node node; - struct rmap_item *rmap_item; -}; - -/* - * rmap_item - object of the rmap_hash hash table - * (it is holding the previous hash value (oldindex), - * pointer into the page_hash_item, and pointer into the tree_item) - */ -struct rmap_item { - struct hlist_node link; - struct mm_struct *mm; - unsigned long address; - unsigned int oldchecksum; /* old checksum value */ - unsigned char stable_tree; // 1 stable_tree 0 unstable tree - struct tree_item *tree_item; - struct rmap_item *next; - struct rmap_item *prev; -}; - -/* - * slots is linked list that hold all the memory regions that were registred - * to be scanned. - */ -static LIST_HEAD(slots); -static DECLARE_RWSEM(slots_lock); - -struct rb_root root_stable_tree = RB_ROOT; -struct rb_root root_unstable_tree = RB_ROOT; - -static int nrmaps_hash; -/* rmap_hash hash table */ -static struct hlist_head *rmap_hash; - -static struct kmem_cache *tree_item_cache; -static struct kmem_cache *rmap_item_cache; - -static int kthread_sleep; /* sleep time of the kernel thread */ -static int kthread_pages_to_scan; /* npages to scan for the kernel thread */ -static struct ksm_scan kthread_ksm_scan; -static int ksmd_flags; -static struct task_struct *kthread; -static DECLARE_WAIT_QUEUE_HEAD(kthread_wait); -static DECLARE_RWSEM(kthread_lock); - -static int ksm_slab_init(void) -{ - int ret = -ENOMEM; - - tree_item_cache = KMEM_CACHE(tree_item, 0); - if (!tree_item_cache) - goto out; - - rmap_item_cache = KMEM_CACHE(rmap_item, 0); - if (!rmap_item_cache) - goto out_free; - - return 0; - -out_free: - kmem_cache_destroy(tree_item_cache); -out: - return ret; -} - -static void ksm_slab_free(void) -{ - kmem_cache_destroy(rmap_item_cache); - kmem_cache_destroy(tree_item_cache); -} - -static inline struct tree_item *alloc_tree_item(void) -{ - return kmem_cache_zalloc(tree_item_cache, GFP_KERNEL); -} - -static void free_tree_item(struct tree_item *tree_item) -{ - kmem_cache_free(tree_item_cache, tree_item); -} - -static inline struct rmap_item *alloc_rmap_item(void) -{ - return kmem_cache_zalloc(rmap_item_cache, GFP_KERNEL); -} - -static inline void free_rmap_item(struct rmap_item *rmap_item) -{ - kmem_cache_free(rmap_item_cache, rmap_item); -} - -/* - * PageKsm - this type of pages are the write protected pages that ksm map - * into multiple vmas (this is the "shared page") - * this page was allocated using alloc_page(), every pte that pointing to it - * is always write protected (therefore its data content cant ever be changed) - * and this page cant be swapped. - */ -static inline int PageKsm(struct page *page) -{ - return !PageAnon(page); -} - -static int rmap_hash_init(void) -{ - if (!rmap_hash_size) { - struct sysinfo sinfo; - - si_meminfo(&sinfo); - rmap_hash_size = sinfo.totalram / 10; - } - nrmaps_hash = rmap_hash_size; - rmap_hash = vmalloc(nrmaps_hash * sizeof(struct hlist_head)); - if (!rmap_hash) - return -ENOMEM; - memset(rmap_hash, 0, nrmaps_hash * sizeof(struct hlist_head)); - return 0; -} - -static void rmap_hash_free(void) -{ - int i; - struct hlist_head *bucket; - struct hlist_node *node, *n; - struct rmap_item *rmap_item; - - for (i = 0; i < nrmaps_hash; ++i) { - bucket = &rmap_hash[i]; - hlist_for_each_entry_safe(rmap_item, node, n, bucket, link) { - hlist_del(&rmap_item->link); - free_rmap_item(rmap_item); - } - } - vfree(rmap_hash); -} - -static inline u32 calc_checksum(struct page *page) -{ - u32 checksum; - void *addr = kmap_atomic(page, KM_USER0); - checksum = jhash2(addr, PAGE_SIZE / 4, 17); - kunmap_atomic(addr, KM_USER0); - return checksum; -} - -static struct rmap_item *get_rmap_item(struct mm_struct *mm, unsigned long addr) -{ - struct rmap_item *rmap_item; - struct hlist_head *bucket; - struct hlist_node *node; - - bucket = &rmap_hash[addr % nrmaps_hash]; - hlist_for_each_entry(rmap_item, node, bucket, link) { - if (mm == rmap_item->mm && rmap_item->address == addr) { - return rmap_item; - } - } - return NULL; -} - -static void remove_rmap_item_from_tree(struct rmap_item *rmap_item) -{ - struct tree_item *tree_item; - - tree_item = rmap_item->tree_item; - rmap_item->tree_item = NULL; - - if (rmap_item->stable_tree) { - if (rmap_item->prev) { - BUG_ON(rmap_item->prev->next != rmap_item); - rmap_item->prev->next = rmap_item->next; - } - if (rmap_item->next) { - BUG_ON(rmap_item->next->prev != rmap_item); - rmap_item->next->prev = rmap_item->prev; - } - } - - if (tree_item) { - if (rmap_item->stable_tree) { - if (!rmap_item->next && !rmap_item->prev) { - rb_erase(&tree_item->node, &root_stable_tree); - free_tree_item(tree_item); - } else if (!rmap_item->prev) { - BUG_ON(tree_item->rmap_item != rmap_item); - tree_item->rmap_item = rmap_item->next; - } else - BUG_ON(tree_item->rmap_item == rmap_item); - } else if (!rmap_item->stable_tree) - free_tree_item(tree_item); - } - - hlist_del(&rmap_item->link); - free_rmap_item(rmap_item); -} - -static void remove_page_from_tree(struct mm_struct *mm, - unsigned long addr) -{ - struct rmap_item *rmap_item; - - rmap_item = get_rmap_item(mm, addr); - if (!rmap_item) - return; - remove_rmap_item_from_tree(rmap_item); - return; -} - -static int ksm_sma_ioctl_register_memory_region(struct ksm_sma *ksm_sma, - struct ksm_memory_region *mem) -{ - struct ksm_mem_slot *slot; - int ret = -EPERM; - - slot = kzalloc(sizeof(struct ksm_mem_slot), GFP_KERNEL); - if (!slot) { - ret = -ENOMEM; - goto out; - } - - slot->mm = get_task_mm(current); - if (!slot->mm) - goto out_free; - slot->addr = mem->addr; - slot->npages = mem->npages; - - down_write(&slots_lock); - - list_add_tail(&slot->link, &slots); - list_add_tail(&slot->sma_link, &ksm_sma->sma_slots); - - up_write(&slots_lock); - return 0; - -out_free: - kfree(slot); -out: - return ret; -} - -static void remove_mm_from_hash_and_tree(struct mm_struct *mm) -{ - struct ksm_mem_slot *slot; - int pages_count; - - list_for_each_entry(slot, &slots, link) - if (slot->mm == mm) - break; - BUG_ON(!slot); - - root_unstable_tree = RB_ROOT; - for (pages_count = 0; pages_count < slot->npages; ++pages_count) { - cond_resched(); - remove_page_from_tree(mm, slot->addr + - pages_count * PAGE_SIZE); - } - list_del(&slot->link); -} - -static int ksm_sma_ioctl_remove_memory_region(struct ksm_sma *ksm_sma) -{ - struct ksm_mem_slot *slot, *node; - - down_write(&slots_lock); - list_for_each_entry_safe(slot, node, &ksm_sma->sma_slots, sma_link) { - remove_mm_from_hash_and_tree(slot->mm); - mmput(slot->mm); - list_del(&slot->sma_link); - kfree(slot); - } - up_write(&slots_lock); - return 0; -} - -static int ksm_sma_release(struct inode *inode, struct file *filp) -{ - struct ksm_sma *ksm_sma = filp->private_data; - int r; - - r = ksm_sma_ioctl_remove_memory_region(ksm_sma); - kfree(ksm_sma); - return r; -} - -static long ksm_sma_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) -{ - struct ksm_sma *sma = filp->private_data; - void __user *argp = (void __user *)arg; - int r = EINVAL; - - switch (ioctl) { - case KSM_REGISTER_MEMORY_REGION: { - struct ksm_memory_region ksm_memory_region; - - r = -EFAULT; - if (copy_from_user(&ksm_memory_region, argp, - sizeof(ksm_memory_region))) - goto out; - r = ksm_sma_ioctl_register_memory_region(sma, - &ksm_memory_region); - break; - } - case KSM_REMOVE_MEMORY_REGION: - r = ksm_sma_ioctl_remove_memory_region(sma); - break; - } - -out: - return r; -} - -static unsigned long addr_in_vma(struct vm_area_struct *vma, struct page *page) -{ - pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); - unsigned long addr; - - addr = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); - if (unlikely(addr < vma->vm_start || addr >= vma->vm_end)) - return -EFAULT; - return addr; -} - -static pte_t *get_pte(struct mm_struct *mm, unsigned long addr) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *ptep = NULL; - - pgd = pgd_offset(mm, addr); - if (!pgd_present(*pgd)) - goto out; - - pud = pud_offset(pgd, addr); - if (!pud_present(*pud)) - goto out; - - pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) - goto out; - - ptep = pte_offset_map(pmd, addr); -out: - return ptep; -} - -static int is_present_pte(struct mm_struct *mm, unsigned long addr) -{ - pte_t *ptep; - int r; - - ptep = get_pte(mm, addr); - if (!ptep) - return 0; - - r = pte_present(*ptep); - pte_unmap(ptep); - - return r; -} - -static int memcmp_pages(struct page *page1, struct page *page2) -{ - char *addr1, *addr2; - int r; - - addr1 = kmap_atomic(page1, KM_USER0); - addr2 = kmap_atomic(page2, KM_USER1); - r = memcmp(addr1, addr2, PAGE_SIZE); - kunmap_atomic(addr1, KM_USER0); - kunmap_atomic(addr2, KM_USER1); - return r; -} - -/* pages_identical - * return 1 if identical, 0 otherwise. - */ -static inline int pages_identical(struct page *page1, struct page *page2) -{ - return !memcmp_pages(page1, page2); -} - -/* - * try_to_merge_one_page - take two pages and merge them into one - * note: - * oldpage should be anon page while newpage should be file mapped page - * - * this function return 0 if the pages were merged, 1 otherwise. - */ -static int try_to_merge_one_page(struct mm_struct *mm, - struct vm_area_struct *vma, - struct page *oldpage, - struct page *newpage, - pgprot_t newprot) -{ - int ret = 1; - int odirect_sync; - unsigned long page_addr_in_vma; - pte_t orig_pte, *orig_ptep; - - if (!PageAnon(oldpage)) - goto out; - - get_page(newpage); - get_page(oldpage); - - page_addr_in_vma = addr_in_vma(vma, oldpage); - if (page_addr_in_vma == -EFAULT) - goto out_putpage; - - orig_ptep = get_pte(mm, page_addr_in_vma); - if (!orig_ptep) - goto out_putpage; - orig_pte = *orig_ptep; - pte_unmap(orig_ptep); - if (!pte_present(orig_pte)) - goto out_putpage; - if (page_to_pfn(oldpage) != pte_pfn(orig_pte)) - goto out_putpage; - /* - * we need the page lock to read a stable PageSwapCache in - * page_wrprotect() - */ - if (TestSetPageLocked(oldpage)) - goto out_putpage; - /* - * page_wrprotect check if the page is swapped or in swap cache, - * in the future we might want to run here if_present_pte and then - * swap_free - */ - if (!page_wrprotect(oldpage, &odirect_sync, 2)) { - unlock_page(oldpage); - goto out_putpage; - } - unlock_page(oldpage); - if (!odirect_sync) - goto out_putpage; - - orig_pte = pte_wrprotect(orig_pte); - - if (pages_identical(oldpage, newpage)) - ret = replace_page(vma, oldpage, newpage, orig_pte, newprot); - -out_putpage: - put_page(oldpage); - put_page(newpage); -out: - return ret; -} - -/* - * try_to_merge_two_pages_alloc - take two identical pages and prepare them - * to be merged into one page. - * - * this function return 0 if we successfully mapped two identical pages into one - * page, 1 otherwise. - * (note this function will allocate a new kernel page, if one of the pages - * is already shared page (KsmPage), then try_to_merge_two_pages_noalloc() - * should be called.) - */ - -static int try_to_merge_two_pages_alloc(struct mm_struct *mm1, - struct page *page1, - struct mm_struct *mm2, - struct page *page2, - unsigned long addr1, - unsigned long addr2) -{ - struct vm_area_struct *vma; - pgprot_t prot; - int ret = 1; - struct page *kpage; - - - kpage = alloc_page(GFP_HIGHUSER); - if (!kpage) - return ret; - down_read(&mm1->mmap_sem); - vma = find_vma(mm1, addr1); - if (!vma) { - put_page(kpage); - up_read(&mm1->mmap_sem); - return ret; - } - prot = vma->vm_page_prot; - pgprot_val(prot) &= ~_PAGE_RW; - - copy_user_highpage(kpage, page1, addr1); - ret = try_to_merge_one_page(mm1, vma, page1, kpage, prot); - up_read(&mm1->mmap_sem); - - if (!ret) { - down_read(&mm2->mmap_sem); - vma = find_vma(mm2, addr2); - if (!vma) { - put_page(kpage); - ret = 1; - up_read(&mm2->mmap_sem); - return ret; - } - - prot = vma->vm_page_prot; - pgprot_val(prot) &= ~_PAGE_RW; - - ret = try_to_merge_one_page(mm2, vma, page2, kpage, - prot); - up_read(&mm2->mmap_sem); - /* - * If the secoend try_to_merge_one_page call was failed, - * we are in situation where we have Ksm page that have - * just one pte pointing to it, in this case we break - * it. - */ - if (ret) { - struct page *tmppage[1]; - down_read(&mm1->mmap_sem); - if (get_user_pages(current, mm1, addr1, 1, 1, 0, - tmppage, NULL) == 1) { - put_page(tmppage[0]); - } - up_read(&mm1->mmap_sem); - } - } - - put_page(kpage); - return ret; -} - -/* - * try_to_merge_two_pages_noalloc - the same astry_to_merge_two_pages_alloc, - * but no new kernel page is allocated (page2 should be KsmPage) - */ -static int try_to_merge_two_pages_noalloc(struct mm_struct *mm1, - struct page *page1, - struct page *page2, - unsigned long addr1) -{ - struct vm_area_struct *vma; - pgprot_t prot; - int ret = 1; - - /* - * If page2 is shared, we can just make the pte of mm1(page1) point to - * page2. - */ - BUG_ON(!PageKsm(page2)); - down_read(&mm1->mmap_sem); - vma = find_vma(mm1, addr1); - if (!vma) { - up_read(&mm1->mmap_sem); - return ret; - } - prot = vma->vm_page_prot; - pgprot_val(prot) &= ~_PAGE_RW; - ret = try_to_merge_one_page(mm1, vma, page1, page2, prot); - up_read(&mm1->mmap_sem); - - return ret; -} - - -static int is_zapped_item(struct rmap_item *rmap_item, - struct page **page) -{ - int ret = 0; - struct vm_area_struct *vma; - - cond_resched(); - down_read(&rmap_item->mm->mmap_sem); - if (is_present_pte(rmap_item->mm, rmap_item->address)) { - vma = find_vma(rmap_item->mm, rmap_item->address); - if (vma && !vma->vm_file) { - BUG_ON(vma->vm_flags & VM_SHARED); - ret = get_user_pages(current, rmap_item->mm, - rmap_item->address, - 1, 0, 0, page, NULL); - } - } - up_read(&rmap_item->mm->mmap_sem); - - if (ret != 1) - return 1; - - if (unlikely(!PageKsm(page[0]))) { - put_page(page[0]); - return 1; - } - return 0; -} - -static struct rmap_item *stable_tree_search(struct page *page, - struct page **page2, - struct rmap_item *rmap_item) -{ - struct rb_node *node = root_stable_tree.rb_node; - struct tree_item *tree_item; - struct rmap_item *found_rmap_item, *next_rmap_item; - - while (node) { - int ret; - - tree_item = rb_entry(node, struct tree_item, node); - found_rmap_item = tree_item->rmap_item; - while (found_rmap_item) { - BUG_ON(!found_rmap_item->stable_tree); - BUG_ON(!found_rmap_item->tree_item); - if (!rmap_item || - !(found_rmap_item->mm == rmap_item->mm && - found_rmap_item->address == rmap_item->address)) { - if (!is_zapped_item(found_rmap_item, page2)) - break; - next_rmap_item = found_rmap_item->next; - remove_rmap_item_from_tree(found_rmap_item); - found_rmap_item = next_rmap_item; - } else - found_rmap_item = found_rmap_item->next; - } - if (!found_rmap_item) - goto out_didnt_find; - - /* - * We can trust the value of the memcmp as we know the pages - * are write protected. - */ - ret = memcmp_pages(page, page2[0]); - - if (ret < 0) { - put_page(page2[0]); - node = node->rb_left; - } - else if (ret > 0) { - put_page(page2[0]); - node = node->rb_right; - } - else - goto out_found; - } -out_didnt_find: - found_rmap_item = NULL; -out_found: - return found_rmap_item; -} - -static int stable_tree_insert(struct page *page, - struct tree_item *new_tree_item, - struct rmap_item *rmap_item) -{ - struct rb_node **new = &(root_stable_tree.rb_node); - struct rb_node *parent = NULL; - struct tree_item *tree_item; - struct page *page2[1]; - - while (*new) { - int ret; - struct rmap_item *insert_rmap_item, *next_rmap_item; - - tree_item = rb_entry(*new, struct tree_item, node); - BUG_ON(!tree_item); - BUG_ON(!tree_item->rmap_item); - - insert_rmap_item = tree_item->rmap_item; - while (insert_rmap_item) { - BUG_ON(!insert_rmap_item->stable_tree); - BUG_ON(!insert_rmap_item->tree_item); - if (!rmap_item || - !(insert_rmap_item->mm == rmap_item->mm && - insert_rmap_item->address == rmap_item->address)) { - if (!is_zapped_item(insert_rmap_item, page2)) - break; - next_rmap_item = insert_rmap_item->next; - remove_rmap_item_from_tree(insert_rmap_item); - insert_rmap_item = next_rmap_item; - } else - insert_rmap_item = insert_rmap_item->next; - } - if (!insert_rmap_item) - return 1; - - ret = memcmp_pages(page, page2[0]); - - parent = *new; - if (ret < 0) { - put_page(page2[0]); - new = &((*new)->rb_left); - } - else if (ret > 0) { - put_page(page2[0]); - new = &((*new)->rb_right); - } - else { - /* - * It isnt a bug when we are here, - * beacuse after we release the stable_tree_lock - * someone else could have merge identical page to the - * tree. - */ - return 1; - } - } - - rb_link_node(&new_tree_item->node, parent, new); - rb_insert_color(&new_tree_item->node, &root_stable_tree); - rmap_item->stable_tree = 1; - rmap_item->tree_item = new_tree_item; - - return 0; -} - -static struct tree_item *unstable_tree_search_insert(struct page *page, - struct page **page2, - struct rmap_item *page_rmap_item) -{ - struct rb_node **new = &(root_unstable_tree.rb_node); - struct rb_node *parent = NULL; - struct tree_item *tree_item; - struct tree_item *new_tree_item; - struct rmap_item *rmap_item; - unsigned int checksum; - - while (*new) { - int ret; - - tree_item = rb_entry(*new, struct tree_item, node); - BUG_ON(!tree_item); - rmap_item = tree_item->rmap_item; - BUG_ON(!rmap_item); - - down_read(&rmap_item->mm->mmap_sem); - /* - * We dont want to swap in pages - */ - if (!is_present_pte(rmap_item->mm, rmap_item->address)) { - up_read(&rmap_item->mm->mmap_sem); - return NULL; - } - - ret = get_user_pages(current, rmap_item->mm, rmap_item->address, - 1, 0, 0, page2, NULL); - up_read(&rmap_item->mm->mmap_sem); - if (ret != 1) - return NULL; - - ret = memcmp_pages(page, page2[0]); - - parent = *new; - if (ret < 0) { - put_page(page2[0]); - new = &((*new)->rb_left); - } - else if (ret > 0) { - put_page(page2[0]); - new = &((*new)->rb_right); - } else - return tree_item; - } - - if (!page_rmap_item) - return NULL; - - checksum = calc_checksum(page); - if (page_rmap_item->oldchecksum != checksum) { - page_rmap_item->oldchecksum = checksum; - return NULL; - } - - new_tree_item = alloc_tree_item(); - if (!new_tree_item) - return NULL; - - page_rmap_item->tree_item = new_tree_item; - page_rmap_item->stable_tree = 0; - new_tree_item->rmap_item = page_rmap_item; - rb_link_node(&new_tree_item->node, parent, new); - rb_insert_color(&new_tree_item->node, &root_unstable_tree); - - return NULL; -} - -/* - * update_stable_tree - check if the page inside the tree got zapped, - * and if it got zapped, kick it from the tree. - */ -int update_tree(struct rmap_item *rmap_item, int *wait) -{ - if (!rmap_item->stable_tree) { - /* - * If the rmap_item is !stable_tree and in addition - * it have tree_item != NULL, it mean this rmap_item - * was inside the unstable tree, therefore we have to free - * the tree_item from it (beacuse the unstable tree was already - * flushed by the time we are here). - */ - if (rmap_item->tree_item) { - free_tree_item(rmap_item->tree_item); - rmap_item->tree_item = NULL; - return 0; - } - return 0; - } - - /* If we are here it mean the rmap_item was zapped, beacuse the - * rmap_item was pointing into the stable_tree and there all the pages - * should be KsmPages, so it shouldnt have came to here in the first - * place. (cmp_and_merge_page() shouldnt have been called) - */ - remove_rmap_item_from_tree(rmap_item); - *wait = 1; - return 1; -} - -static struct rmap_item *create_new_rmap_item(struct mm_struct *mm, - unsigned long addr, - unsigned int checksum) -{ - struct rmap_item *rmap_item; - struct hlist_head *bucket; - - rmap_item = alloc_rmap_item(); - if (!rmap_item) - return NULL; - - rmap_item->mm = mm; - rmap_item->address = addr; - rmap_item->oldchecksum = checksum; - rmap_item->stable_tree = 0; - rmap_item->tree_item = NULL; - - bucket = &rmap_hash[addr % nrmaps_hash]; - hlist_add_head(&rmap_item->link, bucket); - - return rmap_item; -} - -/* - * cmp_and_merge_page - take a page computes its hash value and check if there - * is similar hash value to different page, - * in case we find that there is similar hash to different page we call to - * try_to_merge_two_pages(). - */ -static int cmp_and_merge_page(struct ksm_scan *ksm_scan, struct page *page) -{ - struct page *page2[1]; - struct ksm_mem_slot *slot; - struct tree_item *tree_item; - struct rmap_item *rmap_item; - struct rmap_item *tree_rmap_item; - unsigned int checksum; - unsigned long addr; - int wait = 0; - - slot = ksm_scan->slot_index; - addr = slot->addr + ksm_scan->page_index * PAGE_SIZE; - rmap_item = get_rmap_item(slot->mm, addr); - if (rmap_item) { - if (update_tree(rmap_item, &wait)) - rmap_item = NULL; - } - - tree_rmap_item = stable_tree_search(page, page2, rmap_item); - if (tree_rmap_item) { - int ret; - - BUG_ON(!tree_rmap_item->tree_item); - ret = try_to_merge_two_pages_noalloc(slot->mm, page, page2[0], - addr); - put_page(page2[0]); - if (!ret) { - if (!rmap_item) - rmap_item = create_new_rmap_item(slot->mm, - addr, 0); - if (!rmap_item) - return !ret; - - - rmap_item->next = tree_rmap_item->next; - rmap_item->prev = tree_rmap_item; - - if (tree_rmap_item->next) - tree_rmap_item->next->prev = rmap_item; - - tree_rmap_item->next = rmap_item; - - rmap_item->stable_tree = 1; - rmap_item->tree_item = tree_rmap_item->tree_item; - } - return !ret; - } - - tree_item = unstable_tree_search_insert(page, page2, rmap_item); - if (tree_item) { - int ret; - struct rmap_item *tmp_rmap_item; - - tmp_rmap_item = tree_item->rmap_item; - BUG_ON(!tmp_rmap_item); - ret = try_to_merge_two_pages_alloc(slot->mm, page, - tmp_rmap_item->mm, - page2[0], addr, - tmp_rmap_item->address); - if (!ret) { - rb_erase(&tree_item->node, &root_unstable_tree); - if (!stable_tree_insert(page2[0], - tree_item, tmp_rmap_item)) { - if (rmap_item) { - rmap_item->stable_tree = 1; - rmap_item->next = tmp_rmap_item->next; - rmap_item->prev = tmp_rmap_item; - if (tmp_rmap_item->next) - tmp_rmap_item->next->prev = - rmap_item; - tmp_rmap_item->next = rmap_item; - rmap_item->tree_item = - tmp_rmap_item->tree_item; - } - } - } - put_page(page2[0]); - return !ret; - } - if (!wait && !rmap_item) { - checksum = calc_checksum(page); - create_new_rmap_item(slot->mm, addr, checksum); - return 0; - } - return 0; -} - -/* return -EAGAIN - no slots registered, nothing to be done */ -static int scan_get_next_index(struct ksm_scan *ksm_scan, int nscan) -{ - struct ksm_mem_slot *slot; - - if (list_empty(&slots)) - return -EAGAIN; - - slot = ksm_scan->slot_index; - - /* Are there pages left in this slot to scan? */ - if ((slot->npages - ksm_scan->page_index - nscan) > 0) { - ksm_scan->page_index += nscan; - return 0; - } - - list_for_each_entry_from(slot, &slots, link) { - if (slot == ksm_scan->slot_index) - continue; - ksm_scan->page_index = 0; - ksm_scan->slot_index = slot; - return 0; - } - - /* look like we finished scanning the whole memory, starting again */ - root_unstable_tree = RB_ROOT; - ksm_scan->page_index = 0; - ksm_scan->slot_index = list_first_entry(&slots, - struct ksm_mem_slot, link); - return 0; -} - -/* - * update slot_index - make sure ksm_scan will point to vaild data, - * it is possible that by the time we are here the data that ksm_scan was - * pointed to was released so we have to call this function every time after - * taking the slots_lock - */ -static void scan_update_old_index(struct ksm_scan *ksm_scan) -{ - struct ksm_mem_slot *slot; - - if (list_empty(&slots)) - return; - - list_for_each_entry(slot, &slots, link) { - if (ksm_scan->slot_index == slot) - return; - } - - ksm_scan->slot_index = list_first_entry(&slots, - struct ksm_mem_slot, link); - ksm_scan->page_index = 0; -} - -/** - * ksm_scan_start - the ksm scanner main worker function. - * @ksm_scan - the scanner. - * @scan_npages - number of pages we are want to scan before we return from this - * @function. - * - * (this function can be called from the kernel thread scanner, or from - * userspace ioctl context scanner) - * - * The function return -EAGAIN in case there are not slots to scan. - */ -static int ksm_scan_start(struct ksm_scan *ksm_scan, int scan_npages) -{ - struct ksm_mem_slot *slot; - struct page *page[1]; - int val; - int ret = 0; - - down_read(&slots_lock); - - scan_update_old_index(ksm_scan); - - while (scan_npages > 0 && likely(!freezing(current))) { - ret = scan_get_next_index(ksm_scan, 1); - if (ret) - goto out; - - slot = ksm_scan->slot_index; - - cond_resched(); - - /* - * If the page is swapped out or in swap cache, we don't want to - * scan it (it is just for performance). - */ - down_read(&slot->mm->mmap_sem); - if (is_present_pte(slot->mm, slot->addr + - ksm_scan->page_index * PAGE_SIZE)) { - val = get_user_pages(current, slot->mm, slot->addr + - ksm_scan->page_index * PAGE_SIZE , - 1, 0, 0, page, NULL); - up_read(&slot->mm->mmap_sem); - if (val == 1) { - if (!PageKsm(page[0])) - cmp_and_merge_page(ksm_scan, page[0]); - put_page(page[0]); - } - } else { - up_read(&slot->mm->mmap_sem); - } - scan_npages--; - } - scan_get_next_index(ksm_scan, 1); -out: - up_read(&slots_lock); - return ret; -} - -/* - * no multithreaded ksm for ovirt - */ -/*static int ksm_scan_ioctl_start(struct ksm_scan *ksm_scan, - struct ksm_user_scan *scan) -{ - if (!(scan->flags & ksm_control_flags_run)) - return 0; - - return ksm_scan_start(ksm_scan, scan->pages_to_scan); -}*/ - -static int ksm_scan_release(struct inode *inode, struct file *filp) -{ - struct ksm_scan *ksm_scan = filp->private_data; - - kfree(ksm_scan); - return 0; -} - -static long ksm_scan_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) -{ -// struct ksm_scan *ksm_scan = filp->private_data; - //void __user *argp = (void __user *)arg; - int r = EINVAL; - - switch (ioctl) { - /* - * i didnt implemented the locking yet, and in ovirt we dont run - * multi-threaded ksm. - */ - /*case KSM_SCAN: { - struct ksm_user_scan scan; - - r = -EFAULT; - if (copy_from_user(&scan, argp, - sizeof(struct ksm_user_scan))) - break; - - r = ksm_scan_ioctl_start(ksm_scan, &scan); - }*/ - } - return r; -} - -static struct file_operations ksm_sma_fops = { - .release = ksm_sma_release, - .unlocked_ioctl = ksm_sma_ioctl, - .compat_ioctl = ksm_sma_ioctl, -}; - -static int ksm_dev_ioctl_create_shared_memory_area(void) -{ - int fd = -1; - struct ksm_sma *ksm_sma; - - ksm_sma = kmalloc(sizeof(struct ksm_sma), GFP_KERNEL); - if (!ksm_sma) - goto out; - - INIT_LIST_HEAD(&ksm_sma->sma_slots); - - fd = anon_inode_getfd("ksm-sma", &ksm_sma_fops, ksm_sma, 0); - if (fd < 0) - goto out_free; - - return fd; -out_free: - kfree(ksm_sma); -out: - return fd; -} - -static struct file_operations ksm_scan_fops = { - .release = ksm_scan_release, - .unlocked_ioctl = ksm_scan_ioctl, - .compat_ioctl = ksm_scan_ioctl, -}; - -static struct ksm_scan *ksm_scan_create(void) -{ - return kzalloc(sizeof(struct ksm_scan), GFP_KERNEL); -} - -static int ksm_dev_ioctl_create_scan(void) -{ - int fd = -ENOMEM; - struct ksm_scan *ksm_scan; - - ksm_scan = ksm_scan_create(); - if (!ksm_scan) - goto out; - - fd = anon_inode_getfd("ksm-scan", &ksm_scan_fops, ksm_scan, 0); - if (fd < 0) - goto out_free; - return fd; - -out_free: - kfree(ksm_scan); -out: - return fd; -} - -/* - * ksm_dev_ioctl_start_stop_kthread - control the kernel thread scanning running - * speed. - * This function allow us to control on the time the kernel thread will sleep - * how many pages it will scan between sleep and sleep, and how many pages it - * will maximum merge between sleep and sleep. - */ -static int ksm_dev_ioctl_start_stop_kthread(struct ksm_kthread_info *info) -{ - int rc = 0; - - down_write(&kthread_lock); - - if (info->flags & ksm_control_flags_run) { - if (!info->pages_to_scan) { - rc = EPERM; - up_write(&kthread_lock); - goto out; - } - } - - kthread_sleep = info->sleep; - kthread_pages_to_scan = info->pages_to_scan; - ksmd_flags = info->flags; - - up_write(&kthread_lock); - - if (ksmd_flags & ksm_control_flags_run) - wake_up_interruptible(&kthread_wait); - -out: - return rc; -} - -/* - * ksm_dev_ioctl_get_info_kthread - write into info the scanning information - * of the ksm kernel thread - */ -static void ksm_dev_ioctl_get_info_kthread(struct ksm_kthread_info *info) -{ - down_read(&kthread_lock); - - info->sleep = kthread_sleep; - info->pages_to_scan = kthread_pages_to_scan; - info->flags = ksmd_flags; - - up_read(&kthread_lock); -} - -static long ksm_dev_ioctl(struct file *filp, - unsigned int ioctl, unsigned long arg) -{ - void __user *argp = (void __user *)arg; - long r = -EINVAL; - - switch (ioctl) { - case KSM_GET_API_VERSION: - r = KSM_API_VERSION; - break; - case KSM_CREATE_SHARED_MEMORY_AREA: - r = ksm_dev_ioctl_create_shared_memory_area(); - break; - case KSM_CREATE_SCAN: - r = ksm_dev_ioctl_create_scan(); - break; - case KSM_START_STOP_KTHREAD: { - struct ksm_kthread_info info; - - r = -EFAULT; - if (copy_from_user(&info, argp, - sizeof(struct ksm_kthread_info))) - break; - - r = ksm_dev_ioctl_start_stop_kthread(&info); - break; - } - case KSM_GET_INFO_KTHREAD: { - struct ksm_kthread_info info; - - ksm_dev_ioctl_get_info_kthread(&info); - r = -EFAULT; - if (copy_to_user(argp, &info, - sizeof(struct ksm_kthread_info))) - break; - r = 0; - break; - } - default: - break; - } - return r; -} - -static struct file_operations ksm_chardev_ops = { - .unlocked_ioctl = ksm_dev_ioctl, - .compat_ioctl = ksm_dev_ioctl, - .owner = THIS_MODULE, -}; - -static struct miscdevice ksm_dev = { - KSM_MINOR, - "ksm", - &ksm_chardev_ops, -}; - -int kthread_ksm_scan_thread(void *nothing) -{ - while (!kthread_should_stop()) { - if (ksmd_flags & ksm_control_flags_run) { - down_read(&kthread_lock); - ksm_scan_start(&kthread_ksm_scan, - kthread_pages_to_scan); - up_read(&kthread_lock); - schedule_timeout_interruptible( - usecs_to_jiffies(kthread_sleep)); - } else - wait_event_interruptible(kthread_wait, - ksmd_flags & ksm_control_flags_run || - kthread_should_stop()); - try_to_freeze(); - } - return 0; -} - -static int __init ksm_init(void) -{ - int r; - - r = ksm_slab_init(); - if (r) - goto out; - - r = rmap_hash_init(); - if (r) - goto out_free1; - - kthread = kthread_run(kthread_ksm_scan_thread, NULL, "kksmd"); - if (IS_ERR(kthread)) { - printk(KERN_ERR "ksm: creating kthread failed\n"); - r = PTR_ERR(kthread); - goto out_free2; - } - - r = init_wp_notifier(); - if (r) - goto out_free3; - - r = misc_register(&ksm_dev); - if (r) { - printk(KERN_ERR "ksm: misc device register failed\n"); - goto out_free4; - } - - printk(KERN_WARNING "ksm loaded\n"); - return 0; - -out_free4: - exit_wp_notifier(); -out_free3: - kthread_stop(kthread); -out_free2: - rmap_hash_free(); -out_free1: - ksm_slab_free(); -out: - return r; -} - -static void __exit ksm_exit(void) -{ - misc_deregister(&ksm_dev); - exit_wp_notifier(); - ksmd_flags = ksm_control_flags_run; - kthread_stop(kthread); - rmap_hash_free(); - ksm_slab_free(); -} - -module_init(ksm_init) -module_exit(ksm_exit) diff --git a/kernel/ksm/wp_notifier.c b/kernel/ksm/wp_notifier.c deleted file mode 100644 index 841490f0..00000000 --- a/kernel/ksm/wp_notifier.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (C) 2008-2009 Red Hat, Inc. - * Authors: - * Andrea Arcangeli - * Izik Eidus - */ - -//#define KPROBES_ENABLE_SWAP -#include <linux/version.h> -#include <linux/kernel.h> -#include <linux/kprobes.h> -#include <linux/kallsyms.h> -#ifdef KPROBES_ENABLE_SWAP -#include <linux/pagemap.h> -#include <linux/rmap.h> -#endif -#include "wp_notifier.h" - -static int pre_do_wp_page(struct kprobe *p, - struct pt_regs *regs) -{ - struct mm_struct *mm; - unsigned long address; - - /* - * kprobes runs with irq disabled and preempt disabled but we - * need irq enabled to flush the smp tlb with IPIs while - * tearing down sptes. - */ - local_irq_enable(); - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) - mm = (struct mm_struct *) regs->rdi; - address = (unsigned long) regs->rdx; -#else - mm = (struct mm_struct *) regs->di; - address = (unsigned long) regs->dx; -#endif - kvm_wp_notifier(mm, address); - - local_irq_disable(); - - return 0; -} - -#ifdef KPROBES_ENABLE_SWAP - -static unsigned long vma_address(struct page *page, struct vm_area_struct *vma) -{ - pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); - unsigned long address; - - address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); - if (unlikely(address < vma->vm_start || address >= vma->vm_end)) { - /* page should be within @vma mapping range */ - return -EFAULT; - } - return address; -} - -static struct anon_vma *__page_get_anon_vma(struct page *page) -{ - struct anon_vma *anon_vma; - unsigned long anon_mapping; - - anon_mapping = (unsigned long) page->mapping; - if (!(anon_mapping & PAGE_MAPPING_ANON)) - goto out; - if (!page_mapped(page)) - goto out; - - anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); - return anon_vma; -out: - return NULL; -} - -static int pre_page_remove_rmap(struct kprobe *p, struct pt_regs *regs) -{ - struct mm_struct *mm; - unsigned long address; - struct page *page; - struct vm_area_struct *vma; - struct anon_vma *anon_vma; - - /* - * kprobes runs with irq disabled and preempt disabled but we - * need irq enabled to flush the smp tlb with IPIs while - * tearing down sptes. - */ - local_irq_enable(); - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) - page = (struct page *) regs->rdi; -#else - page = (struct page *) regs->di; -#endif - /* - * We care just about try_to_unmap here (other cases for ksm wo_wp_page - * notifier will handle) therefore page must be lock. (make life easier - * for filebacked mapping as well) - */ - if (!PageLocked(page)) - goto out; - - /* - * Ok, every caller for page_rmap_remove that hold page lock, hold - * anom_vma->lock as well, so no need to take it. - */ - if (PageAnon(page)) { - anon_vma = __page_get_anon_vma(page); - if (!anon_vma) - goto out; - - list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { - mm = vma->vm_mm; - if (!(vma->vm_flags & VM_NONLINEAR)) { - address = vma_address(page, vma); - if (address != -EFAULT) - kvm_wp_notifier(mm, address); - } - } - } -out: - local_irq_disable(); - - return 0; -} - -static struct kprobe swap_not_kprobe; -#endif - -static struct kprobe wp_not_kprobe; - -int init_wp_notifier(void) -{ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) && defined(CONFIG_KALLSYMS) && !defined(RHEL_RELEASE_CODE) - wp_not_kprobe.addr = (kprobe_opcode_t *)kallsyms_lookup_name("do_wp_page"); - if (!wp_not_kprobe.addr) { - printk(KERN_WARNING "do_wp_page not found"); - return 1; - } -#else - wp_not_kprobe.symbol_name = "do_wp_page"; -#endif - wp_not_kprobe.pre_handler = pre_do_wp_page; - - if (register_kprobe(&wp_not_kprobe)) { - printk(KERN_WARNING "cant register kprobe for do_wp_page"); - return 1; - } - -#ifdef KPROBES_ENABLE_SWAP -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) && defined(CONFIG_KALLSYMS) && !defined(RHEL_RELEASE_CODE) - swap_not_kprobe.addr = (kprobe_opcode_t *)kallsyms_lookup_name("page_remove_rmap"); - if (!swap_not_kprobe.addr) { - printk(KERN_WARNING "page_remove_rmap not found"); - return 1; - } -#else - swap_not_kprobe.symbol_name = "page_remove_rmap"; -#endif - swap_not_kprobe.pre_handler = pre_page_remove_rmap; - - if (register_kprobe(&swap_not_kprobe)) { - printk(KERN_WARNING "cant register kprobe for do_wp_page"); - return 1; - } -#endif - - return 0; -} - -void exit_wp_notifier(void) -{ - unregister_kprobe(&wp_not_kprobe); -#ifdef KPROBES_ENABLE_SWAP - unregister_kprobe(&swap_not_kprobe); -#endif -} diff --git a/kernel/ksm/wp_notifier.h b/kernel/ksm/wp_notifier.h deleted file mode 100644 index 23638b98..00000000 --- a/kernel/ksm/wp_notifier.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef WP_NOTIFIER_H -#define WP_NOTIFIER_H - -#include <linux/mm.h> - -#ifndef CONFIG_MMU_NOTIFIER -int init_wp_notifier(void); -void exit_wp_notifier(void); -#else -static inline int init_wp_notifier(void) {return 0;} -static inline void exit_wp_notifier(void) {return;} -#endif - -void kvm_wp_notifier(struct mm_struct *mm, - unsigned long address); - -#endif diff --git a/kernel/kvm-kmod.spec b/kernel/kvm-kmod.spec deleted file mode 100644 index 89b3d882..00000000 --- a/kernel/kvm-kmod.spec +++ /dev/null @@ -1,52 +0,0 @@ -%define kmod_name kvm - -Name: kvm-kmod -Version: 0.0 -Release: 0 -Summary: %{kmod_name} kernel module - -Group: System Environment/Kernel -License: GPL -URL: http://www.qumranet.com -BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} - -ExclusiveArch: i386 x86_64 ia64 - -%description -This kernel module provides support for virtual machines using hardware support -(Intel VT-x&VT-i or AMD SVM). - -%prep - -%build - -rm -rf %{buildroot} - -%install - -%define kverrel unknown -%define moddir /lib/modules/%{kverrel}/extra -mkdir -p %{buildroot}/%{moddir} -cp %{objdir}/%{kmod_name}.ko %{objdir}/%{kmod_name}-*.ko %{buildroot}/%{moddir} -chmod u+x %{buildroot}/%{moddir}/%{kmod_name}*.ko - -%post - -depmod %{kverrel} - -%postun - -depmod %{kverrel} - -%clean -%{__rm} -rf %{buildroot} - -%files -%{moddir}/%{kmod_name}.ko -%ifarch i386 x86_64 -%{moddir}/%{kmod_name}-amd.ko -%endif -%{moddir}/%{kmod_name}-intel.ko - - -%changelog diff --git a/kernel/powerpc/Makefile.pre b/kernel/powerpc/Makefile.pre deleted file mode 100644 index e38baf13..00000000 --- a/kernel/powerpc/Makefile.pre +++ /dev/null @@ -1 +0,0 @@ -prerequisite: diff --git a/kernel/powerpc/hack-module.awk b/kernel/powerpc/hack-module.awk deleted file mode 100644 index 570b034d..00000000 --- a/kernel/powerpc/hack-module.awk +++ /dev/null @@ -1,5 +0,0 @@ -/MODULE_AUTHOR/ { - printf("MODULE_INFO(version, \"%s\");\n", version) -} - -{ print } diff --git a/kernel/request-irq-compat.c b/kernel/request-irq-compat.c deleted file mode 100644 index 51193cb3..00000000 --- a/kernel/request-irq-compat.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * compat for request_irq - */ - -#include <linux/interrupt.h> -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) - -static kvm_irq_handler_t kvm_irq_handlers[NR_IRQS]; -static DEFINE_MUTEX(kvm_irq_handlers_mutex); - -static irqreturn_t kvm_irq_thunk(int irq, void *dev_id, struct pt_regs *regs) -{ - kvm_irq_handler_t handler = kvm_irq_handlers[irq]; - return handler(irq, dev_id); -} - -int kvm_request_irq(unsigned int a, kvm_irq_handler_t handler, - unsigned long c, const char *d, void *e) -{ - int rc = -EBUSY; - kvm_irq_handler_t old; - - mutex_lock(&kvm_irq_handlers_mutex); - old = kvm_irq_handlers[a]; - if (old) - goto out; - kvm_irq_handlers[a] = handler; - rc = request_irq(a, kvm_irq_thunk, c, d, e); - if (rc) - kvm_irq_handlers[a] = NULL; -out: - mutex_unlock(&kvm_irq_handlers_mutex); - return rc; -} - -void kvm_free_irq(unsigned int irq, void *dev_id) -{ - mutex_lock(&kvm_irq_handlers_mutex); - free_irq(irq, dev_id); - kvm_irq_handlers[irq] = NULL; - mutex_unlock(&kvm_irq_handlers_mutex); -} - -#endif diff --git a/kernel/unifdef.h b/kernel/unifdef.h deleted file mode 100644 index 6fc7be08..00000000 --- a/kernel/unifdef.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef KVM_UNIFDEF_H -#define KVM_UNIFDEF_H - -#ifdef __i386__ -#ifndef CONFIG_X86_32 -#define CONFIG_X86_32 1 -#endif -#endif - -#ifdef __x86_64__ -#ifndef CONFIG_X86_64 -#define CONFIG_X86_64 1 -#endif -#endif - -#if defined(__i386__) || defined (__x86_64__) -#ifndef CONFIG_X86 -#define CONFIG_X86 1 -#endif -#endif - -#ifdef __ia64__ -#ifndef CONFIG_IA64 -#define CONFIG_IA64 1 -#endif -#endif - -#ifdef __PPC__ -#ifndef CONFIG_PPC -#define CONFIG_PPC 1 -#endif -#endif - -#ifdef __s390__ -#ifndef CONFIG_S390 -#define CONFIG_S390 1 -#endif -#endif - -#endif diff --git a/kernel/x86/Kbuild b/kernel/x86/Kbuild deleted file mode 100644 index 2457704b..00000000 --- a/kernel/x86/Kbuild +++ /dev/null @@ -1,18 +0,0 @@ -# trick to get the kvm-specific CONFIG_KVM_* definitions, -# because the kernel source tree won't have them -include $(obj)/../config.kbuild - -obj-m := kvm.o kvm-intel.o kvm-amd.o -kvm-objs := kvm_main.o x86.o mmu.o x86_emulate.o ../anon_inodes.o irq.o i8259.o \ - lapic.o ioapic.o preempt.o i8254.o coalesced_mmio.o irq_comm.o \ - ../external-module-compat.o ../request-irq-compat.o -ifeq ($(EXT_CONFIG_KVM_TRACE),y) -kvm-objs += kvm_trace.o -endif -ifeq ($(CONFIG_IOMMU_API),y) -kvm-objs += iommu.o -endif -kvm-intel-objs := vmx.o vmx-debug.o ../external-module-compat.o -kvm-amd-objs := svm.o ../external-module-compat.o - -CFLAGS_kvm_main.o = -DKVM_MAIN diff --git a/kernel/x86/Makefile.pre b/kernel/x86/Makefile.pre deleted file mode 100644 index e38baf13..00000000 --- a/kernel/x86/Makefile.pre +++ /dev/null @@ -1 +0,0 @@ -prerequisite: diff --git a/kernel/x86/debug.h b/kernel/x86/debug.h deleted file mode 100644 index 35793652..00000000 --- a/kernel/x86/debug.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __KVM_DEBUG_H -#define __KVM_DEBUG_H - -#ifdef KVM_DEBUG - -void show_msrs(struct kvm_vcpu *vcpu); - - -void show_irq(struct kvm_vcpu *vcpu, int irq); -void show_page(struct kvm_vcpu *vcpu, gva_t addr); -void show_u64(struct kvm_vcpu *vcpu, gva_t addr); -void show_code(struct kvm_vcpu *vcpu); -int vm_entry_test(struct kvm_vcpu *vcpu); - -void vmcs_dump(struct kvm_vcpu *vcpu); -void regs_dump(struct kvm_vcpu *vcpu); -void sregs_dump(struct kvm_vcpu *vcpu); -void show_pending_interrupts(struct kvm_vcpu *vcpu); -void vcpu_dump(struct kvm_vcpu *vcpu); - -#endif - -#endif diff --git a/kernel/x86/external-module-compat.h b/kernel/x86/external-module-compat.h deleted file mode 100644 index a03087db..00000000 --- a/kernel/x86/external-module-compat.h +++ /dev/null @@ -1,406 +0,0 @@ - -/* - * Compatibility header for building as an external module. - */ - -#include <linux/compiler.h> -#include <linux/version.h> - -#include "../external-module-compat-comm.h" - -#include <asm/msr.h> - -/* for the MSR_VM_* constants that were moved to svm.h */ -#include <asm/svm.h> - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) - -#ifndef _EFER_SCE -#define _EFER_SCE 0 /* SYSCALL/SYSRET */ -#endif - -#ifndef EFER_SCE -#define EFER_SCE (1<<_EFER_SCE) -#endif - -#endif - -#ifndef MSR_VM_CR -#define MSR_VM_CR 0xc0010114 -#endif - -#ifndef MSR_VM_HSAVE_PA -#define MSR_VM_HSAVE_PA 0xc0010117 -#endif - -#ifndef _EFER_SVME -#define _EFER_SVME 12 -#define EFER_SVME (1<<_EFER_SVME) -#endif - -#include <asm/cpufeature.h> - -#ifndef X86_FEATURE_SVM -#define X86_FEATURE_SVM (6*32+ 2) /* Secure virtual machine */ -#endif - -#include <linux/smp.h> - -#ifndef X86_CR0_PE -#define X86_CR0_PE 0x00000001 -#endif - -#ifndef X86_CR0_MP -#define X86_CR0_MP 0x00000002 -#endif - -#ifndef X86_CR0_EM -#define X86_CR0_EM 0x00000004 -#endif - -#ifndef X86_CR0_TS -#define X86_CR0_TS 0x00000008 -#endif - -#ifndef X86_CR0_ET -#define X86_CR0_ET 0x00000010 -#endif - -#ifndef X86_CR0_NE -#define X86_CR0_NE 0x00000020 -#endif - -#ifndef X86_CR0_WP -#define X86_CR0_WP 0x00010000 -#endif - -#ifndef X86_CR0_AM -#define X86_CR0_AM 0x00040000 -#endif - -#ifndef X86_CR0_NW -#define X86_CR0_NW 0x20000000 -#endif - -#ifndef X86_CR0_CD -#define X86_CR0_CD 0x40000000 -#endif - -#ifndef X86_CR0_PG -#define X86_CR0_PG 0x80000000 -#endif - -#ifndef X86_CR3_PWT -#define X86_CR3_PWT 0x00000008 -#endif - -#ifndef X86_CR3_PCD -#define X86_CR3_PCD 0x00000010 -#endif - -#ifndef X86_CR4_VMXE -#define X86_CR4_VMXE 0x00002000 -#endif - -#undef X86_CR8_TPR -#define X86_CR8_TPR 0x0f - -/* - * 2.6.22 does not define set_64bit() under nonpae - */ -#ifdef CONFIG_X86_32 - -#include <asm/cmpxchg.h> - -static inline void __kvm_set_64bit(u64 *ptr, u64 val) -{ - unsigned int low = val; - unsigned int high = val >> 32; - - __asm__ __volatile__ ( - "\n1:\t" - "movl (%0), %%eax\n\t" - "movl 4(%0), %%edx\n\t" - "lock cmpxchg8b (%0)\n\t" - "jnz 1b" - : /* no outputs */ - : "D"(ptr), - "b"(low), - "c"(high) - : "ax","dx","memory"); -} - -#undef set_64bit -#define set_64bit __kvm_set_64bit - -static inline unsigned long long __kvm_cmpxchg64(volatile void *ptr, - unsigned long long old, - unsigned long long new) -{ - unsigned long long prev; - __asm__ __volatile__("lock cmpxchg8b %3" - : "=A"(prev) - : "b"((unsigned long)new), - "c"((unsigned long)(new >> 32)), - "m"(*__xg(ptr)), - "0"(old) - : "memory"); - return prev; -} - -#define kvm_cmpxchg64(ptr,o,n)\ - ((__typeof__(*(ptr)))__kvm_cmpxchg64((ptr),(unsigned long long)(o),\ - (unsigned long long)(n))) - -#undef cmpxchg64 -#define cmpxchg64(ptr, o, n) kvm_cmpxchg64(ptr, o, n) - -#endif - -#ifndef CONFIG_PREEMPT_NOTIFIERS -/* - * Include sched|preempt.h before defining CONFIG_PREEMPT_NOTIFIERS to avoid - * a miscompile. - */ -#include <linux/sched.h> -#include <linux/preempt.h> -#define CONFIG_PREEMPT_NOTIFIERS -#define CONFIG_PREEMPT_NOTIFIERS_COMPAT - -struct preempt_notifier; - -struct preempt_ops { - void (*sched_in)(struct preempt_notifier *notifier, int cpu); - void (*sched_out)(struct preempt_notifier *notifier, - struct task_struct *next); -}; - -struct preempt_notifier { - struct list_head link; - struct task_struct *tsk; - struct preempt_ops *ops; -}; - -void preempt_notifier_register(struct preempt_notifier *notifier); -void preempt_notifier_unregister(struct preempt_notifier *notifier); - -static inline void preempt_notifier_init(struct preempt_notifier *notifier, - struct preempt_ops *ops) -{ - notifier->ops = ops; -} - -void start_special_insn(void); -void end_special_insn(void); -void in_special_section(void); -void special_reload_dr7(void); - -void preempt_notifier_sys_init(void); -void preempt_notifier_sys_exit(void); - -#else - -static inline void start_special_insn(void) {} -static inline void end_special_insn(void) {} -static inline void in_special_section(void) {} -static inline void special_reload_dr7(void) {} - -static inline void preempt_notifier_sys_init(void) {} -static inline void preempt_notifier_sys_exit(void) {} - -#endif - -/* CONFIG_HAS_IOMEM is apparently fairly new too (2.6.21 for x86_64). */ -#ifndef CONFIG_HAS_IOMEM -#define CONFIG_HAS_IOMEM 1 -#endif - -/* X86_FEATURE_NX is missing in some x86_64 kernels */ - -#include <asm/cpufeature.h> - -#ifndef X86_FEATURE_NX -#define X86_FEATURE_NX (1*32+20) -#endif - -#undef true -#define true 1 -#undef false -#define false 0 - -/* EFER_LMA and EFER_LME are missing in pre 2.6.24 i386 kernels */ -#ifndef EFER_LME -#define _EFER_LME 8 /* Long mode enable */ -#define _EFER_LMA 10 /* Long mode active (read-only) */ -#define EFER_LME (1<<_EFER_LME) -#define EFER_LMA (1<<_EFER_LMA) -#endif - -struct kvm_desc_struct { - union { - struct { unsigned int a, b; }; - struct { - u16 limit0; - u16 base0; - unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1; - unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8; - }; - - }; -} __attribute__((packed)); - -struct kvm_ldttss_desc64 { - u16 limit0; - u16 base0; - unsigned base1 : 8, type : 5, dpl : 2, p : 1; - unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; - u32 base3; - u32 zero1; -} __attribute__((packed)); - -struct kvm_desc_ptr { - unsigned short size; - unsigned long address; -} __attribute__((packed)); - -#include <asm/msr.h> -#ifndef MSR_FS_BASE -#define MSR_FS_BASE 0xc0000100 -#endif -#ifndef MSR_GS_BASE -#define MSR_GS_BASE 0xc0000101 -#endif - -/* undefine lapic */ -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) - -#undef lapic - -#endif - -#include <asm/hw_irq.h> -#ifndef NMI_VECTOR -#define NMI_VECTOR 2 -#endif - -#ifndef MSR_MTRRcap -#define MSR_MTRRcap 0x0fe -#define MSR_MTRRfix64K_00000 0x250 -#define MSR_MTRRfix16K_80000 0x258 -#define MSR_MTRRfix16K_A0000 0x259 -#define MSR_MTRRfix4K_C0000 0x268 -#define MSR_MTRRfix4K_C8000 0x269 -#define MSR_MTRRfix4K_D0000 0x26a -#define MSR_MTRRfix4K_D8000 0x26b -#define MSR_MTRRfix4K_E0000 0x26c -#define MSR_MTRRfix4K_E8000 0x26d -#define MSR_MTRRfix4K_F0000 0x26e -#define MSR_MTRRfix4K_F8000 0x26f -#define MSR_MTRRdefType 0x2ff -#endif - -#ifndef MSR_IA32_CR_PAT -#define MSR_IA32_CR_PAT 0x00000277 -#endif - -/* Define DEBUGCTLMSR bits */ -#ifndef DEBUGCTLMSR_LBR - -#define _DEBUGCTLMSR_LBR 0 /* last branch recording */ -#define _DEBUGCTLMSR_BTF 1 /* single-step on branches */ - -#define DEBUGCTLMSR_LBR (1UL << _DEBUGCTLMSR_LBR) -#define DEBUGCTLMSR_BTF (1UL << _DEBUGCTLMSR_BTF) - -#endif - -#include <asm/asm.h> - -#ifndef __ASM_SIZE -# define ____ASM_FORM(x) " " #x " " -# ifdef CONFIG_X86_64 -# define __ASM_SIZE(inst) ____ASM_FORM(inst##q) -# else -# define __ASM_SIZE(inst) ____ASM_FORM(inst##l) -# endif -#endif - -#ifndef _ASM_PTR -# ifdef CONFIG_X86_64 -# define _ASM_PTR ".quad" -# else -# define _ASM_PTR ".long" -# endif -#endif - -/* Intel VT MSRs */ -#ifndef MSR_IA32_VMX_BASIC -#define MSR_IA32_VMX_BASIC 0x00000480 -#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 -#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 -#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 -#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 -#define MSR_IA32_VMX_MISC 0x00000485 -#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 -#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 -#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 -#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 -#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a -#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b -#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c -#endif - -#ifndef MSR_IA32_FEATURE_CONTROL -#define MSR_IA32_FEATURE_CONTROL 0x0000003a - -#define FEATURE_CONTROL_LOCKED (1<<0) -#define FEATURE_CONTROL_VMXON_ENABLED (1<<2) -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) && defined(__x86_64__) - -#undef set_debugreg -#define set_debugreg(value, register) \ - __asm__("movq %0,%%db" #register \ - : /* no output */ \ - :"r" ((unsigned long)value)) - -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) - -struct mtrr_var_range { - u32 base_lo; - u32 base_hi; - u32 mask_lo; - u32 mask_hi; -}; - -/* In the Intel processor's MTRR interface, the MTRR type is always held in - an 8 bit field: */ -typedef u8 mtrr_type; - -#define MTRR_NUM_FIXED_RANGES 88 -#define MTRR_MAX_VAR_RANGES 256 - -struct mtrr_state_type { - struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; - mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; - unsigned char enabled; - unsigned char have_fixed; - mtrr_type def_type; -}; - -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) -#if !defined(RHEL_RELEASE_CODE) || RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,4) - -typedef u64 phys_addr_t; - -#endif -#endif - -#ifndef CONFIG_HAVE_KVM_IRQCHIP -#define CONFIG_HAVE_KVM_IRQCHIP 1 -#endif diff --git a/kernel/x86/hack-module.awk b/kernel/x86/hack-module.awk deleted file mode 100644 index 5a491833..00000000 --- a/kernel/x86/hack-module.awk +++ /dev/null @@ -1,103 +0,0 @@ -BEGIN { split("INIT_WORK tsc_khz desc_struct ldttss_desc64 desc_ptr " \ - "hrtimer_add_expires_ns hrtimer_get_expires " \ - "hrtimer_get_expires_ns hrtimer_start_expires " \ - "hrtimer_expires_remaining smp_send_reschedule " \ - "on_each_cpu relay_open request_irq free_irq get_user_pages_fast" , compat_apis); } - -/^int kvm_init\(/ { anon_inodes = 1 } - -/return 0;/ && anon_inodes { - print "\tr = kvm_init_anon_inodes();"; - print "\tif (r) {"; - print "\t\t__free_page(bad_page);"; - print "\t\tgoto out;"; - print "\t}"; - print "\tpreempt_notifier_sys_init();"; - printf("\tprintk(\"loaded kvm module (%s)\\n\");\n", version); - anon_inodes = 0 -} - -/^void kvm_exit/ { anon_inodes_exit = 1 } - -/\}/ && anon_inodes_exit { - print "\tkvm_exit_anon_inodes();"; - print "\tpreempt_notifier_sys_exit();"; - anon_inodes_exit = 0 -} - -/MODULE_AUTHOR/ { - printf("MODULE_INFO(version, \"%s\");\n", version) -} - -/^static void __vmx_load_host_state/ { - vmx_load_host_state = 1 -} - -/vmcs_readl\(HOST_GS_BASE\)/ && vmx_load_host_state { - $0 = "\t\twrmsrl(MSR_GS_BASE, gsbase);"; - vmx_load_host_state = 0 -} - -/atomic_inc\(&kvm->mm->mm_count\);/ { $0 = "mmget(&kvm->mm->mm_count);" } - -/^\t\.fault = / { - fcn = gensub(/,/, "", "g", $3) - $0 = "\t.VMA_OPS_FAULT(fault) = VMA_OPS_FAULT_FUNC(" fcn ")," -} - -/^static int (.*_stat_get|lost_records_get)/ { - $3 = "__" $3 -} - -/DEFINE_SIMPLE_ATTRIBUTE.*(_stat_get|lost_records_get)/ { - name = gensub(/,/, "", "g", $2); - print "MAKE_SIMPLE_ATTRIBUTE_GETTER(" name ")" -} - -{ sub(/linux\/mm_types\.h/, "linux/mm.h") } - -{ sub(/\<__user\>/, " ") } - -/^\t\.name = "kvm"/ { $0 = "\tset_kset_name(\"kvm\")," } - -/#include <linux\/compiler.h>/ { $0 = "" } -/#include <linux\/clocksource.h>/ { $0 = "" } - -{ sub(/\<hrtimer_init\>/, "hrtimer_init_p") } -{ sub(/\<hrtimer_start\>/, "hrtimer_start_p") } -{ sub(/\<hrtimer_cancel\>/, "hrtimer_cancel_p") } - -/case KVM_CAP_SYNC_MMU/ { $0 = "#ifdef CONFIG_MMU_NOTIFIER\n" $0 "\n#endif" } - -{ - for (i in compat_apis) { - ident = compat_apis[i] - sub("\\<" ident "\\>", "kvm_" ident) - } -} - -/\kvm_.*_fops\.owner = module;/ { $0 = "IF_ANON_INODES_DOES_REFCOUNTS(" $0 ")" } - -{ print } - -/kvm_x86_ops->run/ { - print "\tspecial_reload_dr7();" -} - -/unsigned long flags;/ && vmx_load_host_state { - print "\tunsigned long gsbase;" -} - -/local_irq_save/ && vmx_load_host_state { - print "\t\tgsbase = vmcs_readl(HOST_GS_BASE);" -} - -/\tkvm_init_debug/ { - print "\thrtimer_kallsyms_resolve();" -} -/apic->timer.dev.function =/ { - print "\thrtimer_data_pointer(&apic->timer.dev);" -} -/pt->timer.function =/ { - print "\thrtimer_data_pointer(&pt->timer);" -} diff --git a/kernel/x86/preempt.c b/kernel/x86/preempt.c deleted file mode 100644 index e59e3636..00000000 --- a/kernel/x86/preempt.c +++ /dev/null @@ -1,262 +0,0 @@ - -#ifdef CONFIG_PREEMPT_NOTIFIERS_COMPAT - -#include <linux/sched.h> -#include <linux/percpu.h> -#include <linux/kvm.h> - -static DEFINE_SPINLOCK(pn_lock); -static LIST_HEAD(pn_list); - -#define dprintk(fmt) do { \ - if (0) \ - printk("%s (%d/%d): " fmt, __FUNCTION__, \ - current->pid, raw_smp_processor_id()); \ - } while (0) - -#if !defined(CONFIG_X86_64) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)) -#define debugreg(x) debugreg[x] -#else -#define debugreg(x) debugreg##x -#endif - -static void preempt_enable_sched_out_notifiers(void) -{ - asm volatile ("mov %0, %%db0" : : "r"(schedule)); - asm volatile ("mov %0, %%db7" : : "r"(0x701ul)); - current->thread.debugreg(7) = 0ul; -#ifdef TIF_DEBUG - clear_tsk_thread_flag(current, TIF_DEBUG); -#endif -} - -static void preempt_enable_sched_in_notifiers(void * addr) -{ - asm volatile ("mov %0, %%db0" : : "r"(addr)); - asm volatile ("mov %0, %%db7" : : "r"(0x701ul)); - current->thread.debugreg(0) = (unsigned long) addr; - current->thread.debugreg(7) = 0x701ul; -#ifdef TIF_DEBUG - set_tsk_thread_flag(current, TIF_DEBUG); -#endif -} - -void special_reload_dr7(void) -{ -#ifndef KVM_CAP_SET_GUEST_DEBUG - asm volatile ("mov %0, %%db7" : : "r"(0x701ul)); -#endif -} -EXPORT_SYMBOL_GPL(special_reload_dr7); - -static void __preempt_disable_notifiers(void) -{ - asm volatile ("mov %0, %%db7" : : "r"(0ul)); -} - -static void preempt_disable_notifiers(void) -{ - __preempt_disable_notifiers(); - current->thread.debugreg(7) = 0ul; -#ifdef TIF_DEBUG - clear_tsk_thread_flag(current, TIF_DEBUG); -#endif -} - -static void fastcall __attribute__((used)) preempt_notifier_trigger(void *** ip) -{ - struct preempt_notifier *pn; - int cpu = raw_smp_processor_id(); - int found = 0; - - dprintk(" - in\n"); - //dump_stack(); - spin_lock(&pn_lock); - list_for_each_entry(pn, &pn_list, link) - if (pn->tsk == current) { - found = 1; - break; - } - spin_unlock(&pn_lock); - - if (found) { - if ((void *) *ip != schedule) { - dprintk("sched_in\n"); - preempt_enable_sched_out_notifiers(); - - preempt_disable(); - local_irq_enable(); - pn->ops->sched_in(pn, cpu); - local_irq_disable(); - preempt_enable_no_resched(); - } else { - void * sched_in_addr; - dprintk("sched_out\n"); -#ifdef CONFIG_X86_64 - sched_in_addr = **(ip+3); -#else - /* no special debug stack switch on x86 */ - sched_in_addr = (void *) *(ip+3); -#endif - preempt_enable_sched_in_notifiers(sched_in_addr); - - preempt_disable(); - local_irq_enable(); - pn->ops->sched_out(pn, NULL); - local_irq_disable(); - preempt_enable_no_resched(); - } - } else - __preempt_disable_notifiers(); - dprintk(" - out\n"); -} - -unsigned long orig_int1_handler; - -#ifdef CONFIG_X86_64 - -#define SAVE_REGS \ - "push %rax; push %rbx; push %rcx; push %rdx; " \ - "push %rsi; push %rdi; push %rbp; " \ - "push %r8; push %r9; push %r10; push %r11; " \ - "push %r12; push %r13; push %r14; push %r15" - -#define RESTORE_REGS \ - "pop %r15; pop %r14; pop %r13; pop %r12; " \ - "pop %r11; pop %r10; pop %r9; pop %r8; " \ - "pop %rbp; pop %rdi; pop %rsi; " \ - "pop %rdx; pop %rcx; pop %rbx; pop %rax " - -#define TMP "%rax" - -#else - -#define SAVE_REGS "pusha" -#define RESTORE_REGS "popa" -#define TMP "%eax" - -#endif - -asm ("pn_int1_handler: \n\t" - "push " TMP " \n\t" - "mov %db7, " TMP " \n\t" - "cmp $0x701, " TMP " \n\t" - "pop " TMP " \n\t" - "jnz .Lnotme \n\t" - "push " TMP " \n\t" - "mov %db6, " TMP " \n\t" - "test $0x1, " TMP " \n\t" - "pop " TMP " \n\t" - "jz .Lnotme \n\t" - SAVE_REGS "\n\t" -#ifdef CONFIG_X86_64 - "leaq 120(%rsp),%rdi\n\t" -#else - "leal 32(%esp),%eax\n\t" -#endif - "call preempt_notifier_trigger \n\t" - RESTORE_REGS "\n\t" -#ifdef CONFIG_X86_64 - "orq $0x10000, 16(%rsp) \n\t" - "iretq \n\t" -#else - "orl $0x10000, 8(%esp) \n\t" - "iret \n\t" -#endif - ".Lnotme: \n\t" -#ifdef CONFIG_X86_64 - "jmpq *orig_int1_handler\n\t" -#else - "jmpl *orig_int1_handler\n\t" -#endif - ); - -void preempt_notifier_register(struct preempt_notifier *notifier) -{ - unsigned long flags; - - dprintk(" - in\n"); - spin_lock_irqsave(&pn_lock, flags); - preempt_enable_sched_out_notifiers(); - notifier->tsk = current; - list_add(¬ifier->link, &pn_list); - spin_unlock_irqrestore(&pn_lock, flags); - dprintk(" - out\n"); -} - -void preempt_notifier_unregister(struct preempt_notifier *notifier) -{ - unsigned long flags; - - dprintk(" - in\n"); - spin_lock_irqsave(&pn_lock, flags); - list_del(¬ifier->link); - spin_unlock_irqrestore(&pn_lock, flags); - preempt_disable_notifiers(); - dprintk(" - out\n"); -} - -struct intr_gate { - u16 offset0; - u16 segment; - u16 junk; - u16 offset1; -#ifdef CONFIG_X86_64 - u32 offset2; - u32 blah; -#endif -} __attribute__((packed)); - -struct idt_desc { - u16 limit; - struct intr_gate *gates; -} __attribute__((packed)); - -static struct intr_gate orig_int1_gate; - -void pn_int1_handler(void); - -void preempt_notifier_sys_init(void) -{ - struct idt_desc idt_desc; - struct intr_gate *int1_gate; - - printk("kvm: emulating preempt notifiers;" - " do not benchmark on this machine\n"); - dprintk("\n"); - asm ("sidt %0" : "=m"(idt_desc)); - int1_gate = &idt_desc.gates[1]; - orig_int1_gate = *int1_gate; - orig_int1_handler = int1_gate->offset0 - | ((u32)int1_gate->offset1 << 16); -#ifdef CONFIG_X86_64 - orig_int1_handler |= (u64)int1_gate->offset2 << 32; -#endif - int1_gate->offset0 = (unsigned long)pn_int1_handler; - int1_gate->offset1 = (unsigned long)pn_int1_handler >> 16; -#ifdef CONFIG_X86_64 - int1_gate->offset2 = (unsigned long)pn_int1_handler >> 32; -#endif -} - -static void do_disable(void *blah) -{ -#ifdef TIF_DEBUG - if (!test_tsk_thread_flag(current, TIF_DEBUG)) -#else - if (!current->thread.debugreg(7)) -#endif - __preempt_disable_notifiers(); -} - -void preempt_notifier_sys_exit(void) -{ - struct idt_desc idt_desc; - - dprintk("\n"); - kvm_on_each_cpu(do_disable, NULL, 1); - asm ("sidt %0" : "=m"(idt_desc)); - idt_desc.gates[1] = orig_int1_gate; -} - -#endif diff --git a/kernel/x86/vmx-debug.c b/kernel/x86/vmx-debug.c deleted file mode 100644 index 29316a0e..00000000 --- a/kernel/x86/vmx-debug.c +++ /dev/null @@ -1,1078 +0,0 @@ -/* - * Kernel-based Virtual Machine driver for Linux - * - * This module enables machines with Intel VT-x extensions to run virtual - * machines without emulation or binary translation. - * - * Debug support - * - * Copyright (C) 2006 Qumranet, Inc. - * - * Authors: - * Yaniv Kamay <yaniv@qumranet.com> - * Avi Kivity <avi@qumranet.com> - * - */ - -#include <linux/highmem.h> - -#include <linux/kvm_host.h> -#include "debug.h" - -#ifdef KVM_DEBUG - -static const char *vmx_msr_name[] = { - "MSR_EFER", "MSR_STAR", "MSR_CSTAR", - "MSR_KERNEL_GS_BASE", "MSR_SYSCALL_MASK", "MSR_LSTAR" -}; - -#define NR_VMX_MSR (sizeof(vmx_msr_name) / sizeof(char*)) - -static unsigned long vmcs_readl(unsigned long field) -{ - unsigned long value; - - asm volatile (ASM_VMX_VMREAD_RDX_RAX - : "=a"(value) : "d"(field) : "cc"); - return value; -} - -static u16 vmcs_read16(unsigned long field) -{ - return vmcs_readl(field); -} - -static u32 vmcs_read32(unsigned long field) -{ - return vmcs_readl(field); -} - -static u64 vmcs_read64(unsigned long field) -{ -#ifdef CONFIG_X86_64 - return vmcs_readl(field); -#else - return vmcs_readl(field) | ((u64)vmcs_readl(field+1) << 32); -#endif -} - -void show_msrs(struct kvm_vcpu *vcpu) -{ - int i; - - for (i = 0; i < NR_VMX_MSR; ++i) { - vcpu_printf(vcpu, "%s: %s=0x%llx\n", - __FUNCTION__, - vmx_msr_name[i], - vcpu->guest_msrs[i].data); - } -} - -void show_code(struct kvm_vcpu *vcpu) -{ - gva_t rip = vmcs_readl(GUEST_RIP); - u8 code[50]; - char buf[30 + 3 * sizeof code]; - int i; - - if (!is_long_mode(vcpu)) - rip += vmcs_readl(GUEST_CS_BASE); - - kvm_read_guest(vcpu, rip, sizeof code, code); - for (i = 0; i < sizeof code; ++i) - sprintf(buf + i * 3, " %02x", code[i]); - vcpu_printf(vcpu, "code: %lx%s\n", rip, buf); -} - -struct gate_struct { - u16 offset_low; - u16 segment; - unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; - u16 offset_middle; - u32 offset_high; - u32 zero1; -} __attribute__((packed)); - -void show_irq(struct kvm_vcpu *vcpu, int irq) -{ - unsigned long idt_base = vmcs_readl(GUEST_IDTR_BASE); - unsigned long idt_limit = vmcs_readl(GUEST_IDTR_LIMIT); - struct gate_struct gate; - - if (!is_long_mode(vcpu)) - vcpu_printf(vcpu, "%s: not in long mode\n", __FUNCTION__); - - if (!is_long_mode(vcpu) || idt_limit < irq * sizeof(gate)) { - vcpu_printf(vcpu, "%s: 0x%x read_guest err\n", - __FUNCTION__, - irq); - return; - } - - if (kvm_read_guest(vcpu, idt_base + irq * sizeof(gate), sizeof(gate), &gate) != sizeof(gate)) { - vcpu_printf(vcpu, "%s: 0x%x read_guest err\n", - __FUNCTION__, - irq); - return; - } - vcpu_printf(vcpu, "%s: 0x%x handler 0x%llx\n", - __FUNCTION__, - irq, - ((u64)gate.offset_high << 32) | - ((u64)gate.offset_middle << 16) | - gate.offset_low); -} - -void show_page(struct kvm_vcpu *vcpu, - gva_t addr) -{ - u64 *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - - if (!buf) - return; - - addr &= PAGE_MASK; - if (kvm_read_guest(vcpu, addr, PAGE_SIZE, buf)) { - int i; - for (i = 0; i < PAGE_SIZE / sizeof(u64) ; i++) { - u8 *ptr = (u8*)&buf[i]; - int j; - vcpu_printf(vcpu, " 0x%16.16lx:", - addr + i * sizeof(u64)); - for (j = 0; j < sizeof(u64) ; j++) - vcpu_printf(vcpu, " 0x%2.2x", ptr[j]); - vcpu_printf(vcpu, "\n"); - } - } - kfree(buf); -} - -void show_u64(struct kvm_vcpu *vcpu, gva_t addr) -{ - u64 buf; - - if (kvm_read_guest(vcpu, addr, sizeof(u64), &buf) == sizeof(u64)) { - u8 *ptr = (u8*)&buf; - int j; - vcpu_printf(vcpu, " 0x%16.16lx:", addr); - for (j = 0; j < sizeof(u64) ; j++) - vcpu_printf(vcpu, " 0x%2.2x", ptr[j]); - vcpu_printf(vcpu, "\n"); - } -} - -#define IA32_DEBUGCTL_RESERVED_BITS 0xfffffffffffffe3cULL - -static int is_canonical(unsigned long addr) -{ - return addr == ((long)addr << 16) >> 16; -} - -int vm_entry_test_guest(struct kvm_vcpu *vcpu) -{ - unsigned long cr0; - unsigned long cr4; - unsigned long cr3; - unsigned long dr7; - u64 ia32_debugctl; - unsigned long sysenter_esp; - unsigned long sysenter_eip; - unsigned long rflags; - - int long_mode; - int virtual8086; - - #define RFLAGS_VM (1 << 17) - #define RFLAGS_RF (1 << 9) - - - #define VIR8086_SEG_BASE_TEST(seg)\ - if (vmcs_readl(GUEST_##seg##_BASE) != \ - (unsigned long)vmcs_read16(GUEST_##seg##_SELECTOR) << 4) {\ - vcpu_printf(vcpu, "%s: "#seg" base 0x%lx in "\ - "virtual8086 is not "#seg" selector 0x%x"\ - " shifted right 4 bits\n",\ - __FUNCTION__,\ - vmcs_readl(GUEST_##seg##_BASE),\ - vmcs_read16(GUEST_##seg##_SELECTOR));\ - return 0;\ - } - - #define VIR8086_SEG_LIMIT_TEST(seg)\ - if (vmcs_readl(GUEST_##seg##_LIMIT) != 0x0ffff) { \ - vcpu_printf(vcpu, "%s: "#seg" limit 0x%lx in "\ - "virtual8086 is not 0xffff\n",\ - __FUNCTION__,\ - vmcs_readl(GUEST_##seg##_LIMIT));\ - return 0;\ - } - - #define VIR8086_SEG_AR_TEST(seg)\ - if (vmcs_read32(GUEST_##seg##_AR_BYTES) != 0x0f3) { \ - vcpu_printf(vcpu, "%s: "#seg" AR 0x%x in "\ - "virtual8086 is not 0xf3\n",\ - __FUNCTION__,\ - vmcs_read32(GUEST_##seg##_AR_BYTES));\ - return 0;\ - } - - - cr0 = vmcs_readl(GUEST_CR0); - - if (!(cr0 & CR0_PG_MASK)) { - vcpu_printf(vcpu, "%s: cr0 0x%lx, PG is not set\n", - __FUNCTION__, cr0); - return 0; - } - - if (!(cr0 & CR0_PE_MASK)) { - vcpu_printf(vcpu, "%s: cr0 0x%lx, PE is not set\n", - __FUNCTION__, cr0); - return 0; - } - - if (!(cr0 & CR0_NE_MASK)) { - vcpu_printf(vcpu, "%s: cr0 0x%lx, NE is not set\n", - __FUNCTION__, cr0); - return 0; - } - - if (!(cr0 & CR0_WP_MASK)) { - vcpu_printf(vcpu, "%s: cr0 0x%lx, WP is not set\n", - __FUNCTION__, cr0); - } - - cr4 = vmcs_readl(GUEST_CR4); - - if (!(cr4 & CR4_VMXE_MASK)) { - vcpu_printf(vcpu, "%s: cr4 0x%lx, VMXE is not set\n", - __FUNCTION__, cr4); - return 0; - } - - if (!(cr4 & CR4_PAE_MASK)) { - vcpu_printf(vcpu, "%s: cr4 0x%lx, PAE is not set\n", - __FUNCTION__, cr4); - } - - ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); - - if (ia32_debugctl & IA32_DEBUGCTL_RESERVED_BITS ) { - vcpu_printf(vcpu, "%s: ia32_debugctl 0x%llx, reserve bits\n", - __FUNCTION__, ia32_debugctl); - return 0; - } - - long_mode = is_long_mode(vcpu); - - if (long_mode) { - } - - if ( long_mode && !(cr4 & CR4_PAE_MASK)) { - vcpu_printf(vcpu, "%s: long mode and not PAE\n", - __FUNCTION__); - return 0; - } - - cr3 = vmcs_readl(GUEST_CR3); - - if (cr3 & CR3_L_MODE_RESEVED_BITS) { - vcpu_printf(vcpu, "%s: cr3 0x%lx, reserved bits\n", - __FUNCTION__, cr3); - return 0; - } - - if ( !long_mode && (cr4 & CR4_PAE_MASK)) { - /* check the 4 PDPTEs for reserved bits */ - unsigned long pdpt_pfn = cr3 >> PAGE_SHIFT; - int i; - u64 pdpte; - unsigned offset = (cr3 & (PAGE_SIZE-1)) >> 5; - u64 *pdpt = kmap_atomic(pfn_to_page(pdpt_pfn), KM_USER0); - - for (i = 0; i < 4; ++i) { - pdpte = pdpt[offset + i]; - if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull)) - break; - } - - kunmap_atomic(pdpt, KM_USER0); - - if (i != 4) { - vcpu_printf(vcpu, "%s: pae cr3[%d] 0x%llx, reserved bits\n", - __FUNCTION__, i, pdpte); - return 0; - } - } - - dr7 = vmcs_readl(GUEST_DR7); - - if (dr7 & ~((1ULL << 32) - 1)) { - vcpu_printf(vcpu, "%s: dr7 0x%lx, reserved bits\n", - __FUNCTION__, dr7); - return 0; - } - - sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); - - if (!is_canonical(sysenter_esp)) { - vcpu_printf(vcpu, "%s: sysenter_esp 0x%lx, not canonical\n", - __FUNCTION__, sysenter_esp); - return 0; - } - - sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); - - if (!is_canonical(sysenter_eip)) { - vcpu_printf(vcpu, "%s: sysenter_eip 0x%lx, not canonical\n", - __FUNCTION__, sysenter_eip); - return 0; - } - - rflags = vmcs_readl(GUEST_RFLAGS); - virtual8086 = rflags & RFLAGS_VM; - - - if (vmcs_read16(GUEST_TR_SELECTOR) & SELECTOR_TI_MASK) { - vcpu_printf(vcpu, "%s: tr selctor 0x%x, TI is set\n", - __FUNCTION__, vmcs_read16(GUEST_TR_SELECTOR)); - return 0; - } - - if (!(vmcs_read32(GUEST_LDTR_AR_BYTES) & AR_UNUSABLE_MASK) && - vmcs_read16(GUEST_LDTR_SELECTOR) & SELECTOR_TI_MASK) { - vcpu_printf(vcpu, "%s: ldtr selctor 0x%x," - " is usable and TI is set\n", - __FUNCTION__, vmcs_read16(GUEST_LDTR_SELECTOR)); - return 0; - } - - if (!virtual8086 && - (vmcs_read16(GUEST_SS_SELECTOR) & SELECTOR_RPL_MASK) != - (vmcs_read16(GUEST_CS_SELECTOR) & SELECTOR_RPL_MASK)) { - vcpu_printf(vcpu, "%s: ss selctor 0x%x cs selctor 0x%x," - " not same RPL\n", - __FUNCTION__, - vmcs_read16(GUEST_SS_SELECTOR), - vmcs_read16(GUEST_CS_SELECTOR)); - return 0; - } - - if (virtual8086) { - VIR8086_SEG_BASE_TEST(CS); - VIR8086_SEG_BASE_TEST(SS); - VIR8086_SEG_BASE_TEST(DS); - VIR8086_SEG_BASE_TEST(ES); - VIR8086_SEG_BASE_TEST(FS); - VIR8086_SEG_BASE_TEST(GS); - } - - if (!is_canonical(vmcs_readl(GUEST_TR_BASE)) || - !is_canonical(vmcs_readl(GUEST_FS_BASE)) || - !is_canonical(vmcs_readl(GUEST_GS_BASE)) ) { - vcpu_printf(vcpu, "%s: TR 0x%lx FS 0x%lx or GS 0x%lx base" - " is not canonical\n", - __FUNCTION__, - vmcs_readl(GUEST_TR_BASE), - vmcs_readl(GUEST_FS_BASE), - vmcs_readl(GUEST_GS_BASE)); - return 0; - - } - - if (!(vmcs_read32(GUEST_LDTR_AR_BYTES) & AR_UNUSABLE_MASK) && - !is_canonical(vmcs_readl(GUEST_LDTR_BASE))) { - vcpu_printf(vcpu, "%s: LDTR base 0x%lx, usable and is not" - " canonical\n", - __FUNCTION__, - vmcs_readl(GUEST_LDTR_BASE)); - return 0; - } - - if ((vmcs_readl(GUEST_CS_BASE) & ~((1ULL << 32) - 1))) { - vcpu_printf(vcpu, "%s: CS base 0x%lx, not all bits 63-32" - " are zero\n", - __FUNCTION__, - vmcs_readl(GUEST_CS_BASE)); - return 0; - } - - #define SEG_BASE_TEST(seg)\ - if ( !(vmcs_read32(GUEST_##seg##_AR_BYTES) & AR_UNUSABLE_MASK) &&\ - (vmcs_readl(GUEST_##seg##_BASE) & ~((1ULL << 32) - 1))) {\ - vcpu_printf(vcpu, "%s: "#seg" base 0x%lx, is usable and not"\ - " all bits 63-32 are zero\n",\ - __FUNCTION__,\ - vmcs_readl(GUEST_##seg##_BASE));\ - return 0;\ - } - SEG_BASE_TEST(SS); - SEG_BASE_TEST(DS); - SEG_BASE_TEST(ES); - - if (virtual8086) { - VIR8086_SEG_LIMIT_TEST(CS); - VIR8086_SEG_LIMIT_TEST(SS); - VIR8086_SEG_LIMIT_TEST(DS); - VIR8086_SEG_LIMIT_TEST(ES); - VIR8086_SEG_LIMIT_TEST(FS); - VIR8086_SEG_LIMIT_TEST(GS); - } - - if (virtual8086) { - VIR8086_SEG_AR_TEST(CS); - VIR8086_SEG_AR_TEST(SS); - VIR8086_SEG_AR_TEST(DS); - VIR8086_SEG_AR_TEST(ES); - VIR8086_SEG_AR_TEST(FS); - VIR8086_SEG_AR_TEST(GS); - } else { - - u32 cs_ar = vmcs_read32(GUEST_CS_AR_BYTES); - u32 ss_ar = vmcs_read32(GUEST_SS_AR_BYTES); - u32 tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); - u32 ldtr_ar = vmcs_read32(GUEST_LDTR_AR_BYTES); - - #define SEG_G_TEST(seg) { \ - u32 lim = vmcs_read32(GUEST_##seg##_LIMIT); \ - u32 ar = vmcs_read32(GUEST_##seg##_AR_BYTES); \ - int err = 0; \ - if (((lim & ~PAGE_MASK) != ~PAGE_MASK) && (ar & AR_G_MASK)) \ - err = 1; \ - if ((lim & ~((1u << 20) - 1)) && !(ar & AR_G_MASK)) \ - err = 1; \ - if (err) { \ - vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, G err. lim" \ - " is 0x%x\n", \ - __FUNCTION__, \ - ar, lim); \ - return 0; \ - } \ - } - - - if (!(cs_ar & AR_TYPE_ACCESSES_MASK)) { - vcpu_printf(vcpu, "%s: cs AR 0x%x, accesses is clear\n", - __FUNCTION__, - cs_ar); - return 0; - } - - if (!(cs_ar & AR_TYPE_CODE_MASK)) { - vcpu_printf(vcpu, "%s: cs AR 0x%x, code is clear\n", - __FUNCTION__, - cs_ar); - return 0; - } - - if (!(cs_ar & AR_S_MASK)) { - vcpu_printf(vcpu, "%s: cs AR 0x%x, type is sys\n", - __FUNCTION__, - cs_ar); - return 0; - } - - if ((cs_ar & AR_TYPE_MASK) >= 8 && (cs_ar & AR_TYPE_MASK) < 12 && - AR_DPL(cs_ar) != - (vmcs_read16(GUEST_CS_SELECTOR) & SELECTOR_RPL_MASK) ) { - vcpu_printf(vcpu, "%s: cs AR 0x%x, " - "DPL(0x%x) not as RPL(0x%x)\n", - __FUNCTION__, - cs_ar, AR_DPL(cs_ar), vmcs_read16(GUEST_CS_SELECTOR) & SELECTOR_RPL_MASK); - return 0; - } - - if ((cs_ar & AR_TYPE_MASK) >= 13 && (cs_ar & AR_TYPE_MASK) < 16 && - AR_DPL(cs_ar) > - (vmcs_read16(GUEST_CS_SELECTOR) & SELECTOR_RPL_MASK) ) { - vcpu_printf(vcpu, "%s: cs AR 0x%x, " - "DPL greater than RPL\n", - __FUNCTION__, - cs_ar); - return 0; - } - - if (!(cs_ar & AR_P_MASK)) { - vcpu_printf(vcpu, "%s: CS AR 0x%x, not " - "present\n", - __FUNCTION__, - cs_ar); - return 0; - } - - if ((cs_ar & AR_RESERVD_MASK)) { - vcpu_printf(vcpu, "%s: CS AR 0x%x, reseved" - " bits are set\n", - __FUNCTION__, - cs_ar); - return 0; - } - - if (long_mode & (cs_ar & AR_L_MASK) && (cs_ar & AR_DB_MASK)) { - vcpu_printf(vcpu, "%s: CS AR 0x%x, DB and L are set" - " in long mode\n", - __FUNCTION__, - cs_ar); - return 0; - - } - - SEG_G_TEST(CS); - - if (!(ss_ar & AR_UNUSABLE_MASK)) { - if ((ss_ar & AR_TYPE_MASK) != 3 && - (ss_ar & AR_TYPE_MASK) != 7 ) { - vcpu_printf(vcpu, "%s: ss AR 0x%x, usable and type" - " is not 3 or 7\n", - __FUNCTION__, - ss_ar); - return 0; - } - - if (!(ss_ar & AR_S_MASK)) { - vcpu_printf(vcpu, "%s: ss AR 0x%x, usable and" - " is sys\n", - __FUNCTION__, - ss_ar); - return 0; - } - if (!(ss_ar & AR_P_MASK)) { - vcpu_printf(vcpu, "%s: SS AR 0x%x, usable" - " and not present\n", - __FUNCTION__, - ss_ar); - return 0; - } - - if ((ss_ar & AR_RESERVD_MASK)) { - vcpu_printf(vcpu, "%s: SS AR 0x%x, reseved" - " bits are set\n", - __FUNCTION__, - ss_ar); - return 0; - } - - SEG_G_TEST(SS); - - } - - if (AR_DPL(ss_ar) != - (vmcs_read16(GUEST_SS_SELECTOR) & SELECTOR_RPL_MASK) ) { - vcpu_printf(vcpu, "%s: SS AR 0x%x, " - "DPL not as RPL\n", - __FUNCTION__, - ss_ar); - return 0; - } - - #define SEG_AR_TEST(seg) {\ - u32 ar = vmcs_read32(GUEST_##seg##_AR_BYTES);\ - if (!(ar & AR_UNUSABLE_MASK)) {\ - if (!(ar & AR_TYPE_ACCESSES_MASK)) {\ - vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, "\ - "usable and not accesses\n",\ - __FUNCTION__,\ - ar);\ - return 0;\ - }\ - if ((ar & AR_TYPE_CODE_MASK) &&\ - !(ar & AR_TYPE_READABLE_MASK)) {\ - vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, "\ - "code and not readable\n",\ - __FUNCTION__,\ - ar);\ - return 0;\ - }\ - if (!(ar & AR_S_MASK)) {\ - vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, usable and"\ - " is sys\n",\ - __FUNCTION__,\ - ar);\ - return 0;\ - }\ - if ((ar & AR_TYPE_MASK) >= 0 && \ - (ar & AR_TYPE_MASK) < 12 && \ - AR_DPL(ar) < (vmcs_read16(GUEST_##seg##_SELECTOR) & \ - SELECTOR_RPL_MASK) ) {\ - vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, "\ - "DPL less than RPL\n",\ - __FUNCTION__,\ - ar);\ - return 0;\ - }\ - if (!(ar & AR_P_MASK)) {\ - vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, usable and"\ - " not present\n",\ - __FUNCTION__,\ - ar);\ - return 0;\ - }\ - if ((ar & AR_RESERVD_MASK)) {\ - vcpu_printf(vcpu, "%s: "#seg" AR"\ - " 0x%x, reseved"\ - " bits are set\n",\ - __FUNCTION__,\ - ar);\ - return 0;\ - }\ - SEG_G_TEST(seg)\ - }\ - } - -#undef DS -#undef ES -#undef FS -#undef GS - - SEG_AR_TEST(DS); - SEG_AR_TEST(ES); - SEG_AR_TEST(FS); - SEG_AR_TEST(GS); - - // TR test - if (long_mode) { - if ((tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { - vcpu_printf(vcpu, "%s: TR AR 0x%x, long" - " mode and not 64bit busy" - " tss\n", - __FUNCTION__, - tr_ar); - return 0; - } - } else { - if ((tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_32_TSS && - (tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_16_TSS) { - vcpu_printf(vcpu, "%s: TR AR 0x%x, legacy" - " mode and not 16/32bit " - "busy tss\n", - __FUNCTION__, - tr_ar); - return 0; - } - - } - if ((tr_ar & AR_S_MASK)) { - vcpu_printf(vcpu, "%s: TR AR 0x%x, S is set\n", - __FUNCTION__, - tr_ar); - return 0; - } - if (!(tr_ar & AR_P_MASK)) { - vcpu_printf(vcpu, "%s: TR AR 0x%x, P is not set\n", - __FUNCTION__, - tr_ar); - return 0; - } - - if ((tr_ar & (AR_RESERVD_MASK| AR_UNUSABLE_MASK))) { - vcpu_printf(vcpu, "%s: TR AR 0x%x, reserved bit are" - " set\n", - __FUNCTION__, - tr_ar); - return 0; - } - SEG_G_TEST(TR); - - // TR test - if (!(ldtr_ar & AR_UNUSABLE_MASK)) { - - if ((ldtr_ar & AR_TYPE_MASK) != AR_TYPE_LDT) { - vcpu_printf(vcpu, "%s: LDTR AR 0x%x," - " bad type\n", - __FUNCTION__, - ldtr_ar); - return 0; - } - - if ((ldtr_ar & AR_S_MASK)) { - vcpu_printf(vcpu, "%s: LDTR AR 0x%x," - " S is set\n", - __FUNCTION__, - ldtr_ar); - return 0; - } - - if (!(ldtr_ar & AR_P_MASK)) { - vcpu_printf(vcpu, "%s: LDTR AR 0x%x," - " P is not set\n", - __FUNCTION__, - ldtr_ar); - return 0; - } - if ((ldtr_ar & AR_RESERVD_MASK)) { - vcpu_printf(vcpu, "%s: LDTR AR 0x%x," - " reserved bit are set\n", - __FUNCTION__, - ldtr_ar); - return 0; - } - SEG_G_TEST(LDTR); - } - } - - // GDTR and IDTR - - - #define IDT_GDT_TEST(reg)\ - if (!is_canonical(vmcs_readl(GUEST_##reg##_BASE))) {\ - vcpu_printf(vcpu, "%s: "#reg" BASE 0x%lx, not canonical\n",\ - __FUNCTION__,\ - vmcs_readl(GUEST_##reg##_BASE));\ - return 0;\ - }\ - if (vmcs_read32(GUEST_##reg##_LIMIT) >> 16) {\ - vcpu_printf(vcpu, "%s: "#reg" LIMIT 0x%x, size err\n",\ - __FUNCTION__,\ - vmcs_read32(GUEST_##reg##_LIMIT));\ - return 0;\ - }\ - - IDT_GDT_TEST(GDTR); - IDT_GDT_TEST(IDTR); - - - // RIP - - if ((!long_mode || !(vmcs_read32(GUEST_CS_AR_BYTES) & AR_L_MASK)) && - vmcs_readl(GUEST_RIP) & ~((1ULL << 32) - 1) ){ - vcpu_printf(vcpu, "%s: RIP 0x%lx, size err\n", - __FUNCTION__, - vmcs_readl(GUEST_RIP)); - return 0; - } - - if (!is_canonical(vmcs_readl(GUEST_RIP))) { - vcpu_printf(vcpu, "%s: RIP 0x%lx, not canonical\n", - __FUNCTION__, - vmcs_readl(GUEST_RIP)); - return 0; - } - - // RFLAGS - #define RFLAGS_RESEVED_CLEAR_BITS\ - (~((1ULL << 22) - 1) | (1ULL << 15) | (1ULL << 5) | (1ULL << 3)) - #define RFLAGS_RESEVED_SET_BITS (1 << 1) - - if ((rflags & RFLAGS_RESEVED_CLEAR_BITS) || - !(rflags & RFLAGS_RESEVED_SET_BITS)) { - vcpu_printf(vcpu, "%s: RFLAGS 0x%lx, reserved bits 0x%llx 0x%x\n", - __FUNCTION__, - rflags, - RFLAGS_RESEVED_CLEAR_BITS, - RFLAGS_RESEVED_SET_BITS); - return 0; - } - - if (long_mode && virtual8086) { - vcpu_printf(vcpu, "%s: RFLAGS 0x%lx, vm and long mode\n", - __FUNCTION__, - rflags); - return 0; - } - - - if (!(rflags & RFLAGS_RF)) { - u32 vm_entry_info = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); - if ((vm_entry_info & INTR_INFO_VALID_MASK) && - (vm_entry_info & INTR_INFO_INTR_TYPE_MASK) == - INTR_TYPE_EXT_INTR) { - vcpu_printf(vcpu, "%s: RFLAGS 0x%lx, external" - " interrupt and RF is clear\n", - __FUNCTION__, - rflags); - return 0; - } - - } - - // to be continued from Checks on Guest Non-Register State (22.3.1.5) - return 1; -} - -static int check_fixed_bits(struct kvm_vcpu *vcpu, const char *reg, - unsigned long cr, - u32 msr_fixed_0, u32 msr_fixed_1) -{ - u64 fixed_bits_0, fixed_bits_1; - - rdmsrl(msr_fixed_0, fixed_bits_0); - rdmsrl(msr_fixed_1, fixed_bits_1); - if ((cr & fixed_bits_0) != fixed_bits_0) { - vcpu_printf(vcpu, "%s: %s (%lx) has one of %llx unset\n", - __FUNCTION__, reg, cr, fixed_bits_0); - return 0; - } - if ((~cr & ~fixed_bits_1) != ~fixed_bits_1) { - vcpu_printf(vcpu, "%s: %s (%lx) has one of %llx set\n", - __FUNCTION__, reg, cr, ~fixed_bits_1); - return 0; - } - return 1; -} - -static int phys_addr_width(void) -{ - unsigned eax, ebx, ecx, edx; - - cpuid(0x80000008, &eax, &ebx, &ecx, &edx); - return eax & 0xff; -} - -static int check_canonical(struct kvm_vcpu *vcpu, const char *name, - unsigned long reg) -{ -#ifdef CONFIG_X86_64 - unsigned long x; - - if (sizeof(reg) == 4) - return 1; - x = (long)reg >> 48; - if (!(x == 0 || x == ~0UL)) { - vcpu_printf(vcpu, "%s: %s (%lx) not canonical\n", - __FUNCTION__, name, reg); - return 0; - } -#endif - return 1; -} - -static int check_selector(struct kvm_vcpu *vcpu, const char *name, - int rpl_ti, int null, - u16 sel) -{ - if (rpl_ti && (sel & 7)) { - vcpu_printf(vcpu, "%s: %s (%x) nonzero rpl or ti\n", - __FUNCTION__, name, sel); - return 0; - } - if (null && !sel) { - vcpu_printf(vcpu, "%s: %s (%x) zero\n", - __FUNCTION__, name, sel); - return 0; - } - return 1; -} - -#define MSR_IA32_VMX_CR0_FIXED0 0x486 -#define MSR_IA32_VMX_CR0_FIXED1 0x487 - -#define MSR_IA32_VMX_CR4_FIXED0 0x488 -#define MSR_IA32_VMX_CR4_FIXED1 0x489 - -int vm_entry_test_host(struct kvm_vcpu *vcpu) -{ - int r = 0; - unsigned long cr0 = vmcs_readl(HOST_CR0); - unsigned long cr4 = vmcs_readl(HOST_CR4); - unsigned long cr3 = vmcs_readl(HOST_CR3); - int host_64; - - host_64 = vmcs_read32(VM_EXIT_CONTROLS) & VM_EXIT_HOST_ADD_SPACE_SIZE; - - /* 22.2.2 */ - r &= check_fixed_bits(vcpu, "host cr0", cr0, MSR_IA32_VMX_CR0_FIXED0, - MSR_IA32_VMX_CR0_FIXED1); - - r &= check_fixed_bits(vcpu, "host cr0", cr4, MSR_IA32_VMX_CR4_FIXED0, - MSR_IA32_VMX_CR4_FIXED1); - if ((u64)cr3 >> phys_addr_width()) { - vcpu_printf(vcpu, "%s: cr3 (%lx) vs phys addr width\n", - __FUNCTION__, cr3); - r = 0; - } - - r &= check_canonical(vcpu, "host ia32_sysenter_eip", - vmcs_readl(HOST_IA32_SYSENTER_EIP)); - r &= check_canonical(vcpu, "host ia32_sysenter_esp", - vmcs_readl(HOST_IA32_SYSENTER_ESP)); - - /* 22.2.3 */ - r &= check_selector(vcpu, "host cs", 1, 1, - vmcs_read16(HOST_CS_SELECTOR)); - r &= check_selector(vcpu, "host ss", 1, !host_64, - vmcs_read16(HOST_SS_SELECTOR)); - r &= check_selector(vcpu, "host ds", 1, 0, - vmcs_read16(HOST_DS_SELECTOR)); - r &= check_selector(vcpu, "host es", 1, 0, - vmcs_read16(HOST_ES_SELECTOR)); - r &= check_selector(vcpu, "host fs", 1, 0, - vmcs_read16(HOST_FS_SELECTOR)); - r &= check_selector(vcpu, "host gs", 1, 0, - vmcs_read16(HOST_GS_SELECTOR)); - r &= check_selector(vcpu, "host tr", 1, 1, - vmcs_read16(HOST_TR_SELECTOR)); - -#ifdef CONFIG_X86_64 - r &= check_canonical(vcpu, "host fs base", - vmcs_readl(HOST_FS_BASE)); - r &= check_canonical(vcpu, "host gs base", - vmcs_readl(HOST_GS_BASE)); - r &= check_canonical(vcpu, "host gdtr base", - vmcs_readl(HOST_GDTR_BASE)); - r &= check_canonical(vcpu, "host idtr base", - vmcs_readl(HOST_IDTR_BASE)); -#endif - - /* 22.2.4 */ -#ifdef CONFIG_X86_64 - if (!host_64) { - vcpu_printf(vcpu, "%s: vm exit controls: !64 bit host\n", - __FUNCTION__); - r = 0; - } - if (!(cr4 & CR4_PAE_MASK)) { - vcpu_printf(vcpu, "%s: cr4 (%lx): !pae\n", - __FUNCTION__, cr4); - r = 0; - } - r &= check_canonical(vcpu, "host rip", vmcs_readl(HOST_RIP)); -#endif - - return r; -} - -int vm_entry_test(struct kvm_vcpu *vcpu) -{ - int rg, rh; - - rg = vm_entry_test_guest(vcpu); - rh = vm_entry_test_host(vcpu); - return rg && rh; -} - -void vmcs_dump(struct kvm_vcpu *vcpu) -{ - vcpu_printf(vcpu, "************************ vmcs_dump ************************\n"); - vcpu_printf(vcpu, "VM_ENTRY_CONTROLS 0x%x\n", vmcs_read32(VM_ENTRY_CONTROLS)); - - vcpu_printf(vcpu, "GUEST_CR0 0x%lx\n", vmcs_readl(GUEST_CR0)); - vcpu_printf(vcpu, "GUEST_CR3 0x%lx\n", vmcs_readl(GUEST_CR3)); - vcpu_printf(vcpu, "GUEST_CR4 0x%lx\n", vmcs_readl(GUEST_CR4)); - - vcpu_printf(vcpu, "GUEST_SYSENTER_ESP 0x%lx\n", vmcs_readl(GUEST_SYSENTER_ESP)); - vcpu_printf(vcpu, "GUEST_SYSENTER_EIP 0x%lx\n", vmcs_readl(GUEST_SYSENTER_EIP)); - - - vcpu_printf(vcpu, "GUEST_IA32_DEBUGCTL 0x%llx\n", vmcs_read64(GUEST_IA32_DEBUGCTL)); - vcpu_printf(vcpu, "GUEST_DR7 0x%lx\n", vmcs_readl(GUEST_DR7)); - - vcpu_printf(vcpu, "GUEST_RFLAGS 0x%lx\n", vmcs_readl(GUEST_RFLAGS)); - vcpu_printf(vcpu, "GUEST_RIP 0x%lx\n", vmcs_readl(GUEST_RIP)); - - vcpu_printf(vcpu, "GUEST_CS_SELECTOR 0x%x\n", vmcs_read16(GUEST_CS_SELECTOR)); - vcpu_printf(vcpu, "GUEST_DS_SELECTOR 0x%x\n", vmcs_read16(GUEST_DS_SELECTOR)); - vcpu_printf(vcpu, "GUEST_ES_SELECTOR 0x%x\n", vmcs_read16(GUEST_ES_SELECTOR)); - vcpu_printf(vcpu, "GUEST_FS_SELECTOR 0x%x\n", vmcs_read16(GUEST_FS_SELECTOR)); - vcpu_printf(vcpu, "GUEST_GS_SELECTOR 0x%x\n", vmcs_read16(GUEST_GS_SELECTOR)); - vcpu_printf(vcpu, "GUEST_SS_SELECTOR 0x%x\n", vmcs_read16(GUEST_SS_SELECTOR)); - - vcpu_printf(vcpu, "GUEST_TR_SELECTOR 0x%x\n", vmcs_read16(GUEST_TR_SELECTOR)); - vcpu_printf(vcpu, "GUEST_LDTR_SELECTOR 0x%x\n", vmcs_read16(GUEST_LDTR_SELECTOR)); - - vcpu_printf(vcpu, "GUEST_CS_AR_BYTES 0x%x\n", vmcs_read32(GUEST_CS_AR_BYTES)); - vcpu_printf(vcpu, "GUEST_DS_AR_BYTES 0x%x\n", vmcs_read32(GUEST_DS_AR_BYTES)); - vcpu_printf(vcpu, "GUEST_ES_AR_BYTES 0x%x\n", vmcs_read32(GUEST_ES_AR_BYTES)); - vcpu_printf(vcpu, "GUEST_FS_AR_BYTES 0x%x\n", vmcs_read32(GUEST_FS_AR_BYTES)); - vcpu_printf(vcpu, "GUEST_GS_AR_BYTES 0x%x\n", vmcs_read32(GUEST_GS_AR_BYTES)); - vcpu_printf(vcpu, "GUEST_SS_AR_BYTES 0x%x\n", vmcs_read32(GUEST_SS_AR_BYTES)); - - vcpu_printf(vcpu, "GUEST_LDTR_AR_BYTES 0x%x\n", vmcs_read32(GUEST_LDTR_AR_BYTES)); - vcpu_printf(vcpu, "GUEST_TR_AR_BYTES 0x%x\n", vmcs_read32(GUEST_TR_AR_BYTES)); - - vcpu_printf(vcpu, "GUEST_CS_BASE 0x%lx\n", vmcs_readl(GUEST_CS_BASE)); - vcpu_printf(vcpu, "GUEST_DS_BASE 0x%lx\n", vmcs_readl(GUEST_DS_BASE)); - vcpu_printf(vcpu, "GUEST_ES_BASE 0x%lx\n", vmcs_readl(GUEST_ES_BASE)); - vcpu_printf(vcpu, "GUEST_FS_BASE 0x%lx\n", vmcs_readl(GUEST_FS_BASE)); - vcpu_printf(vcpu, "GUEST_GS_BASE 0x%lx\n", vmcs_readl(GUEST_GS_BASE)); - vcpu_printf(vcpu, "GUEST_SS_BASE 0x%lx\n", vmcs_readl(GUEST_SS_BASE)); - - - vcpu_printf(vcpu, "GUEST_LDTR_BASE 0x%lx\n", vmcs_readl(GUEST_LDTR_BASE)); - vcpu_printf(vcpu, "GUEST_TR_BASE 0x%lx\n", vmcs_readl(GUEST_TR_BASE)); - - vcpu_printf(vcpu, "GUEST_CS_LIMIT 0x%x\n", vmcs_read32(GUEST_CS_LIMIT)); - vcpu_printf(vcpu, "GUEST_DS_LIMIT 0x%x\n", vmcs_read32(GUEST_DS_LIMIT)); - vcpu_printf(vcpu, "GUEST_ES_LIMIT 0x%x\n", vmcs_read32(GUEST_ES_LIMIT)); - vcpu_printf(vcpu, "GUEST_FS_LIMIT 0x%x\n", vmcs_read32(GUEST_FS_LIMIT)); - vcpu_printf(vcpu, "GUEST_GS_LIMIT 0x%x\n", vmcs_read32(GUEST_GS_LIMIT)); - vcpu_printf(vcpu, "GUEST_SS_LIMIT 0x%x\n", vmcs_read32(GUEST_SS_LIMIT)); - - vcpu_printf(vcpu, "GUEST_LDTR_LIMIT 0x%x\n", vmcs_read32(GUEST_LDTR_LIMIT)); - vcpu_printf(vcpu, "GUEST_TR_LIMIT 0x%x\n", vmcs_read32(GUEST_TR_LIMIT)); - - vcpu_printf(vcpu, "GUEST_GDTR_BASE 0x%lx\n", vmcs_readl(GUEST_GDTR_BASE)); - vcpu_printf(vcpu, "GUEST_IDTR_BASE 0x%lx\n", vmcs_readl(GUEST_IDTR_BASE)); - - vcpu_printf(vcpu, "GUEST_GDTR_LIMIT 0x%x\n", vmcs_read32(GUEST_GDTR_LIMIT)); - vcpu_printf(vcpu, "GUEST_IDTR_LIMIT 0x%x\n", vmcs_read32(GUEST_IDTR_LIMIT)); - - vcpu_printf(vcpu, "EXCEPTION_BITMAP 0x%x\n", vmcs_read32(EXCEPTION_BITMAP)); - vcpu_printf(vcpu, "***********************************************************\n"); -} - -void regs_dump(struct kvm_vcpu *vcpu) -{ - #define REG_DUMP(reg) \ - vcpu_printf(vcpu, #reg" = 0x%lx(VCPU)\n", vcpu->regs[VCPU_REGS_##reg]) - #define VMCS_REG_DUMP(reg) \ - vcpu_printf(vcpu, #reg" = 0x%lx(VMCS)\n", vmcs_readl(GUEST_##reg)) - - vcpu_printf(vcpu, "************************ regs_dump ************************\n"); - REG_DUMP(RAX); - REG_DUMP(RBX); - REG_DUMP(RCX); - REG_DUMP(RDX); - REG_DUMP(RSP); - REG_DUMP(RBP); - REG_DUMP(RSI); - REG_DUMP(RDI); - REG_DUMP(R8); - REG_DUMP(R9); - REG_DUMP(R10); - REG_DUMP(R11); - REG_DUMP(R12); - REG_DUMP(R13); - REG_DUMP(R14); - REG_DUMP(R15); - - VMCS_REG_DUMP(RSP); - VMCS_REG_DUMP(RIP); - VMCS_REG_DUMP(RFLAGS); - - vcpu_printf(vcpu, "***********************************************************\n"); -} - -void sregs_dump(struct kvm_vcpu *vcpu) -{ - vcpu_printf(vcpu, "************************ sregs_dump ************************\n"); - vcpu_printf(vcpu, "cr0 = 0x%lx\n", vcpu->cr0); - vcpu_printf(vcpu, "cr2 = 0x%lx\n", vcpu->cr2); - vcpu_printf(vcpu, "cr3 = 0x%lx\n", vcpu->cr3); - vcpu_printf(vcpu, "cr4 = 0x%lx\n", vcpu->cr4); - vcpu_printf(vcpu, "cr8 = 0x%lx\n", vcpu->cr8); - vcpu_printf(vcpu, "shadow_efer = 0x%llx\n", vcpu->shadow_efer); - vcpu_printf(vcpu, "***********************************************************\n"); -} - -void show_pending_interrupts(struct kvm_vcpu *vcpu) -{ - int i; - vcpu_printf(vcpu, "************************ pending interrupts ****************\n"); - vcpu_printf(vcpu, "sumamry = 0x%lx\n", vcpu->irq_summary); - for (i=0 ; i < NR_IRQ_WORDS ; i++) - vcpu_printf(vcpu, "%lx ", vcpu->irq_pending[i]); - vcpu_printf(vcpu, "\n"); - vcpu_printf(vcpu, "************************************************************\n"); -} - -void vcpu_dump(struct kvm_vcpu *vcpu) -{ - regs_dump(vcpu); - sregs_dump(vcpu); - vmcs_dump(vcpu); - show_msrs(vcpu); - show_pending_interrupts(vcpu); - /* more ... */ -} -#endif - |