diff options
author | Izik Eidus <ieidus@redhat.com> | 2009-10-26 17:06:32 +0200 |
---|---|---|
committer | Yaniv Kamay <ykamay@redhat.com> | 2009-10-26 22:53:15 +0200 |
commit | 54d132f06ea4cda5caf6fe47295f990631f80241 (patch) | |
tree | 92942ccf82ad98ee451815384fe635c9674f23ac | |
parent | 94bca281a7d09eb16acf184291ae1b5dd4497bb3 (diff) |
vdesktop: add new kernel dir
Signed-off-by: Izik Eidus <ieidus@redhat.com>
48 files changed, 6276 insertions, 0 deletions
diff --git a/kernel/.gitignore b/kernel/.gitignore new file mode 100644 index 00000000..5c46f862 --- /dev/null +++ b/kernel/.gitignore @@ -0,0 +1,72 @@ +*.o +*.d +*~ +*.flat +*.a +.*.cmd +*.ko +*.mod.c +config.mak +modules.order +Module.symvers +Modules.symvers +Module.markers +.tmp_versions +include-compat/asm +include +x86/modules.order +x86/i825[49].[ch] +x86/kvm_main.c +x86/kvm_svm.h +x86/vmx.[ch] +x86/svm.[ch] +x86/mmu.[ch] +x86/paging_tmpl.h +x86/ioapic.[ch] +x86/iodev.h +x86/irq.[ch] +x86/lapic.[ch] +x86/tss.h +x86/x86.[ch] +x86/coalesced_mmio.[ch] +x86/kvm_cache_regs.h +x86/irq_comm.c +x86/timer.c +x86/kvm_timer.h +x86/iommu.c +x86/svm-trace.h +x86/trace-arch.h +x86/trace.h +x86/vmx-trace.h +x86/assigned-dev.c +x86/emulate.c +x86/eventfd.c +x86/mmutrace.h +ia64/asm-offsets.c +ia64/coalesced_mmio.[ch] +ia64/ioapic.[ch] +ia64/iodev.h +ia64/iommu.c +ia64/irq.h +ia64/irq_comm.c +ia64/kvm-ia64.c +ia64/kvm_fw.c +ia64/kvm_lib.c +ia64/kvm_main.c +ia64/kvm_minstate.h +ia64/lapic.h +ia64/memcpy.S +ia64/memset.S +ia64/misc.h +ia64/mmio.c +ia64/optvfault.S +ia64/process.c +ia64/trampoline.S +ia64/vcpu.[ch] +ia64/vmm.c +ia64/vmm_ivt.S +ia64/vti.h +ia64/vtlb.c +ia64/assigned-dev.c +ia64/eventfd.c +.stgit-* diff --git a/kernel/.gitmodules b/kernel/.gitmodules new file mode 100644 index 00000000..9c639211 --- /dev/null +++ b/kernel/.gitmodules @@ -0,0 +1,3 @@ +[submodule "linux-2.6"] + path = linux-2.6 + url = ../kvm.git diff --git a/kernel/COPYING b/kernel/COPYING new file mode 100644 index 00000000..fb60aad2 --- /dev/null +++ b/kernel/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/kernel/Kbuild b/kernel/Kbuild new file mode 100644 index 00000000..ec34c43e --- /dev/null +++ b/kernel/Kbuild @@ -0,0 +1,2 @@ +obj-$(CONFIG_X86) += x86/ +obj-$(CONFIG_IA64) += ia64/ diff --git a/kernel/Makefile b/kernel/Makefile new file mode 100644 index 00000000..f406e3d8 --- /dev/null +++ b/kernel/Makefile @@ -0,0 +1,74 @@ +include config.mak + +ARCH_DIR = $(if $(filter $(ARCH),x86_64 i386),x86,$(ARCH)) +ARCH_CONFIG := $(shell echo $(ARCH_DIR) | tr '[:lower:]' '[:upper:]') +# NONARCH_CONFIG used for unifdef, and only cover X86 and IA64 now +NONARCH_CONFIG = $(filter-out $(ARCH_CONFIG),X86 IA64) + +KVERREL = $(patsubst /lib/modules/%/build,%,$(KERNELDIR)) + +DESTDIR= + +MAKEFILE_PRE = $(ARCH_DIR)/Makefile.pre + +INSTALLDIR = $(patsubst %/build,%/extra,$(KERNELDIR)) +ORIGMODDIR = $(patsubst %/build,%/kernel,$(KERNELDIR)) + +rpmrelease = devel + +LINUX = ./linux-2.6 + +all:: prerequisite +# include header priority 1) $LINUX 2) $KERNELDIR 3) include-compat + $(MAKE) -C $(KERNELDIR) M=`pwd` \ + LINUXINCLUDE="-I`pwd`/include -Iinclude \ + $(if $(KERNELSOURCEDIR),\ + -Iinclude2 -I$(KERNELSOURCEDIR)/include -I$(KERNELSOURCEDIR)/arch/${ARCH_DIR}/include, \ + -Iarch/${ARCH_DIR}/include) -I`pwd`/include-compat -I`pwd`/${ARCH_DIR} \ + -include include/linux/autoconf.h \ + -include `pwd`/$(ARCH_DIR)/external-module-compat.h" \ + "$$@" + +include $(MAKEFILE_PRE) + +.PHONY: sync + +KVM_VERSION_GIT = $(if $(and $(filter kvm-devel,$(KVM_VERSION)), \ + $(wildcard $(LINUX)/.git)), \ + $(shell git --git-dir=$(LINUX)/.git describe), \ + $(KVM_VERSION)) + +sync: + ./sync -v $(KVM_VERSION_GIT) -l $(LINUX) + +install: + mkdir -p $(DESTDIR)/$(INSTALLDIR) + cp $(ARCH_DIR)/*.ko $(DESTDIR)/$(INSTALLDIR) + for i in $(DESTDIR)/$(ORIGMODDIR)/drivers/kvm/*.ko \ + $(DESTDIR)/$(ORIGMODDIR)/arch/$(ARCH_DIR)/kvm/*.ko; do \ + if [ -f "$$i" ]; then mv "$$i" "$$i.orig"; fi; \ + done + /sbin/depmod -a $(DEPMOD_VERSION) -b $(DESTDIR) + install -m 644 -D scripts/65-kvm.rules $(DESTDIR)/etc/udev/rules.d/65-kvm.rules + +tmpspec = .tmp.kvm-kmod.spec + +rpm-topdir := $$(pwd)/rpmtop + +RPMDIR = $(rpm-topdir)/RPMS + +rpm: all + mkdir -p $(rpm-topdir)/BUILD $(RPMDIR)/$$(uname -i) + sed 's/^Release:.*/Release: $(rpmrelease)/; s/^%define kverrel.*/%define kverrel $(KVERREL)/' \ + kvm-kmod.spec > $(tmpspec) + rpmbuild --define="kverrel $(KVERREL)" \ + --define="objdir $$(pwd)/$(ARCH_DIR)" \ + --define="_rpmdir $(RPMDIR)" \ + --define="_topdir $(rpm-topdir)" \ + -bb $(tmpspec) + +clean: + $(MAKE) -C $(KERNELDIR) M=`pwd` $@ + +distclean: clean + rm -f config.mak include/asm include-compat/asm diff --git a/kernel/anon_inodes.c b/kernel/anon_inodes.c new file mode 100644 index 00000000..135adaea --- /dev/null +++ b/kernel/anon_inodes.c @@ -0,0 +1,275 @@ +/* + * fs/anon_inodes.c + * + * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> + * + * Thanks to Arnd Bergmann for code review and suggestions. + * More changes for Thomas Gleixner suggestions. + * + */ + +#include <linux/file.h> +#include <linux/poll.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/magic.h> +#include <linux/anon_inodes.h> + +#include <asm/uaccess.h> + +/* anon_inodes on RHEL >= 5.2 is equivalent to 2.6.27 version */ +#ifdef RHEL_RELEASE_CODE +# if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,2)) && defined(CONFIG_ANON_INODES) +# define RHEL_ANON_INODES +# endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) && !defined(RHEL_ANON_INODES) + +static struct vfsmount *anon_inode_mnt __read_mostly; +static struct inode *anon_inode_inode; +static struct file_operations anon_inode_fops; + +#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,17) + +static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, + struct vfsmount *mnt) +{ + return get_sb_pseudo(fs_type, "kvm_anon_inode:", NULL, 0x99700426, mnt); +} + +#else + +static struct super_block *anon_inodefs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data) +{ + return get_sb_pseudo(fs_type, "kvm_anon_inode:", NULL, 0x99700426); +} + +#endif + +static int anon_inodefs_delete_dentry(struct dentry *dentry) +{ + /* + * We faked vfs to believe the dentry was hashed when we created it. + * Now we restore the flag so that dput() will work correctly. + */ + dentry->d_flags |= DCACHE_UNHASHED; + return 1; +} + +static struct file_system_type anon_inode_fs_type = { + .name = "kvm_anon_inodefs", + .get_sb = anon_inodefs_get_sb, + .kill_sb = kill_anon_super, +}; +static struct dentry_operations anon_inodefs_dentry_operations = { + .d_delete = anon_inodefs_delete_dentry, +}; + +/** + * anon_inode_getfd - creates a new file instance by hooking it up to and + * anonymous inode, and a dentry that describe the "class" + * of the file + * + * @name: [in] name of the "class" of the new file + * @fops [in] file operations for the new file + * @priv [in] private data for the new file (will be file's private_data) + * + * Creates a new file by hooking it on a single inode. This is useful for files + * that do not need to have a full-fledged inode in order to operate correctly. + * All the files created with anon_inode_getfd() will share a single inode, by + * hence saving memory and avoiding code duplication for the file/inode/dentry + * setup. Returns new descriptor or -error. + */ +int anon_inode_getfd(const char *name, const struct file_operations *fops, + void *priv, int flags) +{ + struct qstr this; + struct dentry *dentry; + struct inode *inode; + struct file *file; + int error, fd; + + if (IS_ERR(anon_inode_inode)) + return -ENODEV; + file = get_empty_filp(); + if (!file) + return -ENFILE; + + inode = igrab(anon_inode_inode); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); + goto err_put_filp; + } + + error = get_unused_fd(); + if (error < 0) + goto err_iput; + fd = error; + + /* + * Link the inode to a directory entry by creating a unique name + * using the inode sequence number. + */ + error = -ENOMEM; + this.name = name; + this.len = strlen(name); + this.hash = 0; + dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this); + if (!dentry) + goto err_put_unused_fd; + dentry->d_op = &anon_inodefs_dentry_operations; + /* Do not publish this dentry inside the global dentry hash table */ + dentry->d_flags &= ~DCACHE_UNHASHED; + d_instantiate(dentry, inode); + + file->f_vfsmnt = mntget(anon_inode_mnt); + file->f_dentry = dentry; + file->f_mapping = inode->i_mapping; + + file->f_pos = 0; + file->f_flags = O_RDWR; + file->f_op = (struct file_operations *)fops; + file->f_mode = FMODE_READ | FMODE_WRITE; + file->f_version = 0; + file->private_data = priv; + + fd_install(fd, file); + + return fd; + +err_put_unused_fd: + put_unused_fd(fd); +err_iput: + iput(inode); +err_put_filp: + fput(file); + return error; +} + +/* + * A single inode exist for all anon_inode files. Contrary to pipes, + * anon_inode inodes has no per-instance data associated, so we can avoid + * the allocation of multiple of them. + */ +static struct inode *anon_inode_mkinode(void) +{ + struct inode *inode = new_inode(anon_inode_mnt->mnt_sb); + + if (!inode) + return ERR_PTR(-ENOMEM); + + inode->i_fop = &anon_inode_fops; + + /* + * Mark the inode dirty from the very beginning, + * that way it will never be moved to the dirty + * list because mark_inode_dirty() will think + * that it already _is_ on the dirty list. + */ + inode->i_state = I_DIRTY; + inode->i_mode = S_IRUSR | S_IWUSR; + inode->i_uid = current->fsuid; + inode->i_gid = current->fsgid; + inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + return inode; +} + +static int anon_inode_init(void) +{ + int error; + + error = register_filesystem(&anon_inode_fs_type); + if (error) + goto err_exit; + anon_inode_mnt = kern_mount(&anon_inode_fs_type); + if (IS_ERR(anon_inode_mnt)) { + error = PTR_ERR(anon_inode_mnt); + goto err_unregister_filesystem; + } + anon_inode_inode = anon_inode_mkinode(); + if (IS_ERR(anon_inode_inode)) { + error = PTR_ERR(anon_inode_inode); + goto err_mntput; + } + + return 0; + +err_mntput: + mntput(anon_inode_mnt); +err_unregister_filesystem: + unregister_filesystem(&anon_inode_fs_type); +err_exit: + return -ENOMEM; +} + +int kvm_init_anon_inodes(void) +{ + return anon_inode_init(); +} + +void kvm_exit_anon_inodes(void) +{ + iput(anon_inode_inode); + mntput(anon_inode_mnt); + unregister_filesystem(&anon_inode_fs_type); +} + +#else + +int kvm_init_anon_inodes(void) +{ + return 0; +} + +void kvm_exit_anon_inodes(void) +{ +} + +#undef anon_inode_getfd + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) && !defined(RHEL_ANON_INODES) + +int kvm_anon_inode_getfd(const char *name, + const struct file_operations *fops, + void *priv, int flags) +{ + int r; + int fd; + struct inode *inode; + struct file *file; + + r = anon_inode_getfd(&fd, &inode, &file, name, fops, priv); + if (r < 0) + return r; + return fd; +} + +#elif LINUX_VERSION_CODE == KERNEL_VERSION(2,6,26) && !defined(RHEL_ANON_INODES) + +int kvm_anon_inode_getfd(const char *name, + const struct file_operations *fops, + void *priv, int flags) +{ + return anon_inode_getfd(name, fops, priv); +} + +#else + +int kvm_anon_inode_getfd(const char *name, + const struct file_operations *fops, + void *priv, int flags) +{ + return anon_inode_getfd(name, fops, priv, flags); +} + +#endif + +#endif diff --git a/kernel/configure b/kernel/configure new file mode 100755 index 00000000..7122d400 --- /dev/null +++ b/kernel/configure @@ -0,0 +1,131 @@ +#!/bin/bash + +kernelsourcedir= +kerneldir=/lib/modules/$(uname -r)/build +cc=gcc +ld=ld +objcopy=objcopy +ar=ar +want_module=1 +cross_prefix= +arch=`uname -m` +# don't use uname if kerneldir is set +no_uname= +# we only need depmod_version for kvm.ko install +depmod_version= +if [ -z "TMPDIR" ] ; then + TMPDIR=. +fi + +usage() { + cat <<-EOF + Usage: $0 [options] + + Options include: + --arch=ARCH architecture to compile for ($arch) + --cross-prefix=PREFIX prefix for cross compile + --kerneldir=DIR kernel build directory ($kerneldir) + --help this helpful text +EOF + exit 1 +} + +while [[ "$1" = -* ]]; do + opt="$1"; shift + arg= + hasarg= + if [[ "$opt" = *=* ]]; then + arg="${opt#*=}" + opt="${opt%%=*}" + hasarg=1 + fi + case "$opt" in + --kerneldir) + kerneldir="$arg" + no_uname=1 + ;; + --with-patched-kernel) + want_module= + ;; + --arch) + arch="$arg" + ;; + --cross-prefix) + cross_prefix="$arg" + ;; + --help) + usage + ;; + *) + usage + ;; + esac +done + +karch="$arch" + +case $arch in + i?86*|x86_64*) + arch=${arch/#i?86/i386} + karch="x86" + ;; +esac + +kvm_version() { + local fname="$(dirname "$0")/KVM_VERSION" + + if test -f "$fname"; then + cat "$fname" + else + echo "kvm-devel" + fi +} + +processor=${arch#*-} +arch=${arch%%-*} + +# see if we have split build and source directories +if [ -d "$kerneldir/include2" ]; then + kernelsourcedir=$kerneldir/source + if [ ! -L "$kernelsourcedir" ]; then + kernelsourcedir=${kerneldir%/build*}/source + fi +fi + +if [ -n "$no_uname" -a "$want_module" ]; then + if [ -e "$kerneldir/.kernelrelease" ]; then + depmod_version=`cat "$kerneldir/.kernelrelease"` + + elif [ -e "$kerneldir/include/config/kernel.release" ]; then + depmod_version=`cat "$kerneldir/include/config/kernel.release"` + elif [ -e "$kerneldir/.config" ]; then + depmod_version=$(awk '/Linux kernel version:/ { print $NF }' \ + "$kerneldir/.config") + else + echo + echo "Error: kernelversion not found" + echo "Please make sure your kernel is configured" + echo + exit 1 + fi +fi + +rm -f include/asm include-compat/asm +mkdir -p include +ln -sf asm-"$karch" include/asm +ln -sf asm-"$karch" include-compat/asm + +cat <<EOF > config.mak +ARCH=$arch +PROCESSOR=$processor +PREFIX=$prefix +KERNELDIR=$kerneldir +KERNELSOURCEDIR=$kernelsourcedir +CROSS_COMPILE=$cross_prefix +CC=$cross_prefix$cc +LD=$cross_prefix$ld +OBJCOPY=$cross_prefix$objcopy +AR=$cross_prefix$ar +DEPMOD_VERSION=$depmod_version +KVM_VERSION=$(kvm_version) +EOF diff --git a/kernel/external-module-compat-comm.h b/kernel/external-module-compat-comm.h new file mode 100644 index 00000000..cec117ba --- /dev/null +++ b/kernel/external-module-compat-comm.h @@ -0,0 +1,1015 @@ + +/* + * Compatibility header for building as an external module. + */ + +/* + * Avoid picking up the kernel's kvm.h in case we have a newer one. + */ + +#include <linux/compiler.h> +#include <linux/version.h> +#include <linux/string.h> +#include <linux/kvm.h> +#include <linux/kvm_para.h> +#include <linux/cpu.h> +#include <linux/time.h> +#include <asm/processor.h> +#include <linux/hrtimer.h> +#include <asm/bitops.h> + +/* + * 2.6.16 does not have GFP_NOWAIT + */ + +#include <linux/gfp.h> + +#ifndef GFP_NOWAIT +#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH) +#endif + + +/* + * kvm profiling support needs 2.6.20 + */ +#include <linux/profile.h> + +#ifndef KVM_PROFILING +#define KVM_PROFILING 1234 +#define prof_on 4321 +#endif + +/* + * smp_call_function_single() is not exported below 2.6.20, and has different + * semantics below 2.6.23. The 'nonatomic' argument was removed in 2.6.27. + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) + +int kvm_smp_call_function_single(int cpu, void (*func)(void *info), + void *info, int wait); +#undef smp_call_function_single +#define smp_call_function_single kvm_smp_call_function_single + +#endif + +/* on_each_cpu() lost an argument in 2.6.27. */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) + +#define kvm_on_each_cpu(func, info, wait) on_each_cpu(func, info, 0, wait) + +#else + +#define kvm_on_each_cpu(func, info, wait) on_each_cpu(func, info, wait) + +#endif + +/* + * The cpu hotplug stubs are broken if !CONFIG_CPU_HOTPLUG + */ + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,15) +#define DEFINE_MUTEX(a) DECLARE_MUTEX(a) +#define mutex_lock_interruptible(a) down_interruptible(a) +#define mutex_unlock(a) up(a) +#define mutex_lock(a) down(a) +#define mutex_init(a) init_MUTEX(a) +#define mutex_trylock(a) down_trylock(a) +#define mutex semaphore +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) +#ifndef kzalloc +#define kzalloc(size,flags) \ +({ \ + void *__ret = kmalloc(size, flags); \ + if (__ret) \ + memset(__ret, 0, size); \ + __ret; \ +}) +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) +#ifndef kmem_cache_zalloc +#define kmem_cache_zalloc(cache,flags) \ +({ \ + void *__ret = kmem_cache_alloc(cache, flags); \ + if (__ret) \ + memset(__ret, 0, kmem_cache_size(cache)); \ + __ret; \ +}) +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) + +#ifndef CONFIG_HOTPLUG_CPU +#define register_cpu_notifier(nb) (0) +#endif + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) +#define nr_cpu_ids NR_CPUS +#endif + +#include <linux/miscdevice.h> +#ifndef KVM_MINOR +#define KVM_MINOR 232 +#endif + +#include <linux/notifier.h> +#ifndef CPU_TASKS_FROZEN + +#define CPU_TASKS_FROZEN 0x0010 +#define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN) +#define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN) +#define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN) +#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN) +#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) +#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) + +#endif + +#ifndef CPU_DYING +#define CPU_DYING 0x000A +#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN) +#endif + +#include <asm/system.h> + +struct inode; +#include <linux/anon_inodes.h> +#define anon_inode_getfd kvm_anon_inode_getfd +int kvm_init_anon_inodes(void); +void kvm_exit_anon_inodes(void); +int anon_inode_getfd(const char *name, + const struct file_operations *fops, + void *priv , int flags); + +/* + * 2.6.23 removed the cache destructor + */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) +# define kmem_cache_create(name, size, align, flags, ctor) \ + kmem_cache_create(name, size, align, flags, ctor, NULL) +#endif + +/* HRTIMER_MODE_ABS started life with a different name */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) +#define HRTIMER_MODE_ABS HRTIMER_ABS +#endif + +/* div64_u64 is fairly new */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) + +#define div64_u64 kvm_div64_u64 + +#ifdef CONFIG_64BIT + +static inline uint64_t div64_u64(uint64_t dividend, uint64_t divisor) +{ + return dividend / divisor; +} + +#else + +uint64_t div64_u64(uint64_t dividend, uint64_t divisor); + +#endif + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) + +#ifdef RHEL_RELEASE_CODE +#if RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,2) +#define RHEL_BOOL 1 +#endif +#endif + +#ifndef RHEL_BOOL + +typedef _Bool bool; + +#define false 0 +#define true 1 + +#endif + +#endif + +/* + * PF_VCPU is a Linux 2.6.24 addition + */ + +#include <linux/sched.h> + +#ifndef PF_VCPU +#define PF_VCPU 0 +#endif + +/* + * smp_call_function_mask() is not defined/exported below 2.6.24 on all + * targets and below 2.6.26 on x86-64 + */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) || \ + (defined CONFIG_X86_64 && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) + +int kvm_smp_call_function_mask(cpumask_t mask, void (*func) (void *info), + void *info, int wait); + +#define smp_call_function_mask kvm_smp_call_function_mask + +void kvm_smp_send_reschedule(int cpu); + +#else + +#define kvm_smp_send_reschedule smp_send_reschedule + +#endif + +/* empty_zero_page isn't exported in all kernels */ +#include <asm/pgtable.h> + +#define empty_zero_page kvm_empty_zero_page + +static char empty_zero_page[PAGE_SIZE]; + +static inline void blahblah(void) +{ + (void)empty_zero_page[0]; +} + +/* __mmdrop() is not exported before 2.6.25 */ +#include <linux/sched.h> + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) + +#define mmdrop(x) do { (void)(x); } while (0) +#define mmget(x) do { (void)(x); } while (0) + +#else + +#define mmget(x) do { atomic_inc(x); } while (0) + +#endif + +/* pagefault_enable(), page_fault_disable() - 2.6.20 */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) +# define KVM_NEED_PAGEFAULT_DISABLE 1 +# ifdef RHEL_RELEASE_CODE +# if RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,3) +# undef KVM_NEED_PAGEFAULT_DISABLE +# endif +# endif +#endif + +#ifdef KVM_NEED_PAGEFAULT_DISABLE + +static inline void pagefault_disable(void) +{ + inc_preempt_count(); + /* + * make sure to have issued the store before a pagefault + * can hit. + */ + barrier(); +} + +static inline void pagefault_enable(void) +{ + /* + * make sure to issue those last loads/stores before enabling + * the pagefault handler again. + */ + barrier(); + dec_preempt_count(); + /* + * make sure we do.. + */ + barrier(); + preempt_check_resched(); +} + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) +#include <asm/uaccess.h> +#else +#include <linux/uaccess.h> +#endif + +/* vm ops ->fault() was introduced in 2.6.23. */ +#include <linux/mm.h> + +#ifdef KVM_MAIN +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + +struct vm_fault { + unsigned int flags; + pgoff_t pgoff; + void __user *virtual_address; + struct page *page; +}; + +static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf); +static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf); + +static inline struct page *kvm_nopage_to_fault( + int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf), + struct vm_area_struct *vma, + unsigned long address, + int *type) +{ + struct vm_fault vmf; + int ret; + + vmf.pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; + vmf.virtual_address = (void __user *)address; + ret = fault(vma, &vmf); + if (ret) + return NOPAGE_SIGBUS; + *type = VM_FAULT_MINOR; + return vmf.page; +} + +static inline struct page *__kvm_vcpu_fault(struct vm_area_struct *vma, + unsigned long address, + int *type) +{ + return kvm_nopage_to_fault(kvm_vcpu_fault, vma, address, type); +} + +static inline struct page *__kvm_vm_fault(struct vm_area_struct *vma, + unsigned long address, + int *type) +{ + return kvm_nopage_to_fault(kvm_vm_fault, vma, address, type); +} + +#define VMA_OPS_FAULT(x) nopage +#define VMA_OPS_FAULT_FUNC(x) __##x + +#else + +#define VMA_OPS_FAULT(x) x +#define VMA_OPS_FAULT_FUNC(x) x + +#endif +#endif + +/* simple vfs attribute getter signature has changed to add a return code */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) + +#define MAKE_SIMPLE_ATTRIBUTE_GETTER(x) \ + static u64 x(void *v) \ + { \ + u64 ret = 0; \ + \ + __##x(v, &ret); \ + return ret; \ + } + +#else + +#define MAKE_SIMPLE_ATTRIBUTE_GETTER(x) \ + static int x(void *v, u64 *val) \ + { \ + return __##x(v, val); \ + } + +#endif + +/* set_kset_name() is gone in 2.6.25 */ + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) + +#define set_kset_name(x) .name = x + +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25) +#ifndef FASTCALL +#define FASTCALL(x) x +#define fastcall +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + +unsigned kvm_get_tsc_khz(void); +#define kvm_tsc_khz (kvm_get_tsc_khz()) + +#else + +#define kvm_tsc_khz tsc_khz + +#endif + +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,21) + +#include <linux/ktime.h> +#include <linux/hrtimer.h> + +#define ktime_get kvm_ktime_get + +static inline ktime_t ktime_get(void) +{ + struct timespec now; + + ktime_get_ts(&now); + + return timespec_to_ktime(now); +} + +#endif + +/* __aligned arrived in 2.6.21 */ +#ifndef __aligned +#define __aligned(x) __attribute__((__aligned__(x))) +#endif + +#include <linux/mm.h> + +/* The shrinker API changed in 2.6.23 */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + +struct kvm_shrinker { + int (*shrink)(int nr_to_scan, gfp_t gfp_mask); + int seeks; + struct shrinker *kshrinker; +}; + +static inline void register_shrinker(struct kvm_shrinker *shrinker) +{ + shrinker->kshrinker = set_shrinker(shrinker->seeks, shrinker->shrink); +} + +static inline void unregister_shrinker(struct kvm_shrinker *shrinker) +{ + if (shrinker->kshrinker) + remove_shrinker(shrinker->kshrinker); +} + +#define shrinker kvm_shrinker + +#endif + +/* clocksource */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) +static inline u32 clocksource_khz2mult(u32 khz, u32 shift_constant) +{ + /* khz = cyc/(Million ns) + * mult/2^shift = ns/cyc + * mult = ns/cyc * 2^shift + * mult = 1Million/khz * 2^shift + * mult = 1000000 * 2^shift / khz + * mult = (1000000<<shift) / khz + */ + u64 tmp = ((u64)1000000) << shift_constant; + + tmp += khz/2; /* round for do_div */ + do_div(tmp, khz); + + return (u32)tmp; +} +#else +#include <linux/clocksource.h> +#endif + +/* manually export hrtimer_init/start/cancel */ +#include <linux/kallsyms.h> +extern void (*hrtimer_init_p)(struct hrtimer *timer, clockid_t which_clock, + enum hrtimer_mode mode); +extern int (*hrtimer_start_p)(struct hrtimer *timer, ktime_t tim, + const enum hrtimer_mode mode); +extern int (*hrtimer_cancel_p)(struct hrtimer *timer); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) && defined(CONFIG_KALLSYMS) +static inline void hrtimer_kallsyms_resolve(void) +{ + hrtimer_init_p = (void *) kallsyms_lookup_name("hrtimer_init"); + BUG_ON(!hrtimer_init_p); + hrtimer_start_p = (void *) kallsyms_lookup_name("hrtimer_start"); + BUG_ON(!hrtimer_start_p); + hrtimer_cancel_p = (void *) kallsyms_lookup_name("hrtimer_cancel"); + BUG_ON(!hrtimer_cancel_p); +} +#else +static inline void hrtimer_kallsyms_resolve(void) +{ + hrtimer_init_p = hrtimer_init; + hrtimer_start_p = hrtimer_start; + hrtimer_cancel_p = hrtimer_cancel; +} +#endif + +/* handle old hrtimer API with data pointer */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) +static inline void hrtimer_data_pointer(struct hrtimer *timer) +{ + timer->data = (void *)timer; +} +#else +static inline void hrtimer_data_pointer(struct hrtimer *timer) {} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + +#define ns_to_timespec kvm_ns_to_timespec + +struct timespec kvm_ns_to_timespec(const s64 nsec); + +#endif + +/* work_struct lost the 'data' field in 2.6.20 */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +#define kvm_INIT_WORK(work, handler) \ + INIT_WORK(work, (void (*)(void *))handler, work) + +#else + +#define kvm_INIT_WORK(work, handler) INIT_WORK(work, handler) + +#endif + +/* cancel_work_sync() was flush_work() in 2.6.21 */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + +static inline int cancel_work_sync(struct work_struct *work) +{ + /* + * FIXME: actually cancel. How? Add own implementation of workqueues? + */ + return 0; +} + +/* ... and it returned void before 2.6.23 */ +#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + +#define cancel_work_sync(work) ({ cancel_work_sync(work); 0; }) + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) + +static inline void flush_work(struct work_struct *work) +{ + cancel_work_sync(work); +} + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +struct pci_dev; + +struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn); + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) +#include <linux/relayfs_fs.h> +#else +#include <linux/relay.h> +#endif + +/* relay_open() interface has changed on 2.6.21 */ + +struct rchan *kvm_relay_open(const char *base_filename, + struct dentry *parent, + size_t subbuf_size, + size_t n_subbufs, + struct rchan_callbacks *cb, + void *private_data); + +#else + +#define kvm_relay_open relay_open + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) + +static inline int get_user_pages_fast(unsigned long start, int nr_pages, + int write, struct page **pages) +{ + int npages; + + down_read(¤t->mm->mmap_sem); + npages = get_user_pages(current, current->mm, start, nr_pages, write, + 0, pages, NULL); + up_read(¤t->mm->mmap_sem); + + return npages; +} + +#endif + +/* spin_needbreak() was called something else in 2.6.24 */ +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,24) + +#define spin_needbreak need_lockbreak + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + +static inline void kvm_hrtimer_add_expires_ns(struct hrtimer *timer, u64 delta) +{ + timer->expires = ktime_add_ns(timer->expires, delta); +} + +static inline ktime_t kvm_hrtimer_get_expires(struct hrtimer *timer) +{ + return timer->expires; +} + +static inline u64 kvm_hrtimer_get_expires_ns(struct hrtimer *timer) +{ + return ktime_to_ns(timer->expires); +} + +static inline void kvm_hrtimer_start_expires(struct hrtimer *timer, int mode) +{ + hrtimer_start_p(timer, timer->expires, mode); +} + +static inline ktime_t kvm_hrtimer_expires_remaining(const struct hrtimer *timer) +{ + return ktime_sub(timer->expires, timer->base->get_time()); +} + +#else + +#define kvm_hrtimer_add_expires_ns hrtimer_add_expires_ns +#define kvm_hrtimer_get_expires hrtimer_get_expires +#define kvm_hrtimer_get_expires_ns hrtimer_get_expires_ns +#define kvm_hrtimer_start_expires hrtimer_start_expires +#define kvm_hrtimer_expires_remaining hrtimer_expires_remaining + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + +static inline int pci_reset_function(struct pci_dev *dev) +{ + return 0; +} + +#endif + +#include <linux/interrupt.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) + +typedef irqreturn_t (*kvm_irq_handler_t)(int, void *); +int kvm_request_irq(unsigned int a, kvm_irq_handler_t handler, unsigned long c, + const char *d, void *e); +void kvm_free_irq(unsigned int irq, void *dev_id); + +#else + +#define kvm_request_irq request_irq +#define kvm_free_irq free_irq + +#endif + +/* dynamically allocated cpu masks introduced in 2.6.28 */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + +typedef cpumask_t cpumask_var_t[1]; + +static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + return 1; +} + +static inline void free_cpumask_var(cpumask_var_t mask) +{ +} + +static inline void cpumask_clear(cpumask_var_t mask) +{ + cpus_clear(*mask); +} + +static inline void cpumask_set_cpu(int cpu, cpumask_var_t mask) +{ + cpu_set(cpu, *mask); +} + +static inline int smp_call_function_many(cpumask_var_t cpus, + void (*func)(void *data), void *data, + int sync) +{ + return smp_call_function_mask(*cpus, func, data, sync); +} + +static inline int cpumask_empty(cpumask_var_t mask) +{ + return cpus_empty(*mask); +} + +static inline int cpumask_test_cpu(int cpu, cpumask_var_t mask) +{ + return cpu_isset(cpu, *mask); +} + +static inline void cpumask_clear_cpu(int cpu, cpumask_var_t mask) +{ + cpu_clear(cpu, *mask); +} + +#define cpu_online_mask (&cpu_online_map) + +#endif + +/* A zeroing constructor was added late 2.6.30 */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) + +static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) +{ + bool ret; + + ret = alloc_cpumask_var(mask, flags); + if (ret) + cpumask_clear(*mask); + return ret; +} + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) + +#define IF_ANON_INODES_DOES_REFCOUNTS(x) + +#else + +#define IF_ANON_INODES_DOES_REFCOUNTS(x) x + +#endif + + +/* Macro introduced only on newer kernels: */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) +#define marker_synchronize_unregister() synchronize_sched() +#endif + +/* pci_dev.msi_enable was introduced in 2.6.18 */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) + +struct pci_dev; + +int kvm_pcidev_msi_enabled(struct pci_dev *dev); + +#else + +#define kvm_pcidev_msi_enabled(dev) (dev)->msi_enabled + +#endif + +/* compound_head() was introduced in 2.6.22 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) +# define NEED_COMPOUND_HEAD 1 +# ifdef RHEL_RELEASE_CODE +# if RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,2) +# undef NEED_COMPOUND_HEAD +# endif +# endif +#endif + +#ifdef NEED_COMPOUND_HEAD + +static inline struct page *compound_head(struct page *page) +{ + if (PageCompound(page)) + page = (struct page *)page_private(page); + return page; +} + +#endif + +#include <linux/iommu.h> +#ifndef IOMMU_CACHE + +#define IOMMU_CACHE (4) +#define IOMMU_CAP_CACHE_COHERENCY 0x1 +static inline int iommu_domain_has_cap(struct iommu_domain *domain, + unsigned long cap) +{ + return 0; +} + +#endif + +#include <linux/file.h> + +/* eventfd_fget() will be introduced in 2.6.32 */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) + +static inline struct file *eventfd_fget(int fd) +{ + return fget(fd); +} + +#endif + +/* srcu was born in 2.6.19 */ + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19) + +#define kvm_init_srcu_struct init_srcu_struct +#define kvm_cleanup_srcu_struct cleanup_srcu_struct +#define kvm_srcu_read_lock srcu_read_lock +#define kvm_srcu_read_unlock srcu_read_unlock +#define kvm_synchronize_srcu synchronize_srcu +#define kvm_srcu_batches_completed srcu_batches_completed + +#endif + +/* tracepoints were introduced in 2.6.28, but changed in 2.6.30 */ + +#include <linux/tracepoint.h> + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) + +struct tracepoint; + +#undef DECLARE_TRACE +#undef DEFINE_TRACE +#undef PARAMS +#undef TP_PROTO +#undef TP_ARGS +#undef EXPORT_TRACEPOINT_SYMBOL +#undef EXPORT_TRACEPOINT_SYMBOL_GPL + +#define DECLARE_TRACE(name, proto, args) \ + static inline void _do_trace_##name(struct tracepoint *tp, proto) \ + { } \ + static inline void trace_##name(proto) \ + { } \ + static inline int register_trace_##name(void (*probe)(proto)) \ + { \ + return -ENOSYS; \ + } \ + static inline int unregister_trace_##name(void (*probe)(proto)) \ + { \ + return -ENOSYS; \ + } + +#define tracepoint_update_probe_range(begin, end) do {} while (0) + +#define DEFINE_TRACE(name) +#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) +#define EXPORT_TRACEPOINT_SYMBOL(name) + +#define PARAMS(args...) args +#define TP_PROTO(args...) args +#define TP_ARGS(args...) args + +#define TRACE_EVENT(name, proto, args, struct, assign, print) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + +#undef tracepoint_synchronize_unregister +#define tracepoint_synchronize_unregister() do {} while (0) + +#endif + +#include <linux/ftrace_event.h> + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) + +struct trace_print_flags { + unsigned long mask; + const char *name; +}; + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) + +#define alloc_pages_exact_node alloc_pages_node + +#endif + +/* eventfd accessors, new in 2.6.31 */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) + +#include <linux/eventfd.h> +#include <linux/fs.h> + +struct eventfd_ctx; + +static inline struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx) +{ + struct file *filp = (struct file *)ctx; + + get_file(filp); + return ctx; +} + +static inline void eventfd_ctx_put(struct eventfd_ctx *ctx) +{ + struct file *filp = (struct file *)ctx; + + fput(filp); +} + +static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd) +{ + struct file *filp = eventfd_fget(fd); + + return (struct eventfd_ctx *)filp; +} + +static inline struct eventfd_ctx *eventfd_ctx_fileget(struct file *file) +{ + return (struct eventfd_ctx *)file; +} + +static inline int kvm_eventfd_signal(struct eventfd_ctx *ctx, int n) +{ + return -ENOSYS; +} + +#else + +#define kvm_eventfd_signal eventfd_signal + +#endif + +#include <linux/hugetlb.h> + +/* vma_kernel_pagesize, exported since 2.6.32 */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) + +#if defined(CONFIG_HUGETLB_PAGE) && LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26) +static inline +unsigned long kvm_vma_kernel_pagesize(struct vm_area_struct *vma) +{ + struct hstate *hstate; + + if (!is_vm_hugetlb_page(vma)) + return PAGE_SIZE; + + hstate = hstate_vma(vma); + + return 1UL << (hstate->order + PAGE_SHIFT); +} +#else /* !CONFIG_HUGETLB_SIZE || <= 2.6.26 */ +#define kvm_vma_kernel_pagesize(v) PAGE_SIZE +#endif + +#else /* >= 2.6.32 */ + +#define kvm_vma_kernel_pagesize vma_kernel_pagesize + +#endif + +#ifndef printk_once +/* + * Print a one-time message (analogous to WARN_ONCE() et al): + */ +#define printk_once(x...) ({ \ + static int __print_once = 1; \ + \ + if (__print_once) { \ + __print_once = 0; \ + printk(x); \ + } \ +}) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) && !defined(CONFIG_CPU_FREQ) +static inline unsigned int cpufreq_get(unsigned int cpu) +{ + return 0; +} +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) +int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode); +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27) +#ifndef CONFIG_MMU_NOTIFIER +struct mmu_notifier {}; +#endif +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) +static inline void hlist_del_init_rcu(struct hlist_node *n) +{ + if (!hlist_unhashed(n)) { + __hlist_del(n); + n->pprev = NULL; + } +} +#endif diff --git a/kernel/external-module-compat.c b/kernel/external-module-compat.c new file mode 100644 index 00000000..327fa6b7 --- /dev/null +++ b/kernel/external-module-compat.c @@ -0,0 +1,470 @@ + +/* + * smp_call_function_single() is not exported below 2.6.20. + */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +#undef smp_call_function_single + +#include <linux/spinlock.h> +#include <linux/smp.h> + +struct scfs_thunk_info { + int cpu; + void (*func)(void *info); + void *info; +}; + +static void scfs_thunk(void *_thunk) +{ + struct scfs_thunk_info *thunk = _thunk; + + if (raw_smp_processor_id() == thunk->cpu) + thunk->func(thunk->info); +} + +int kvm_smp_call_function_single(int cpu, void (*func)(void *info), + void *info, int wait) +{ + int r, this_cpu; + struct scfs_thunk_info thunk; + + this_cpu = get_cpu(); + WARN_ON(irqs_disabled()); + if (cpu == this_cpu) { + r = 0; + local_irq_disable(); + func(info); + local_irq_enable(); + } else { + thunk.cpu = cpu; + thunk.func = func; + thunk.info = info; + r = smp_call_function(scfs_thunk, &thunk, 0, 1); + } + put_cpu(); + return r; +} +EXPORT_SYMBOL_GPL(kvm_smp_call_function_single); + +#define smp_call_function_single kvm_smp_call_function_single + +#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) +/* + * pre 2.6.23 doesn't handle smp_call_function_single on current cpu + */ + +#undef smp_call_function_single + +#include <linux/smp.h> + +int kvm_smp_call_function_single(int cpu, void (*func)(void *info), + void *info, int wait) +{ + int this_cpu, r; + + this_cpu = get_cpu(); + WARN_ON(irqs_disabled()); + if (cpu == this_cpu) { + r = 0; + local_irq_disable(); + func(info); + local_irq_enable(); + } else + r = smp_call_function_single(cpu, func, info, 0, wait); + put_cpu(); + return r; +} +EXPORT_SYMBOL_GPL(kvm_smp_call_function_single); + +#define smp_call_function_single kvm_smp_call_function_single + +#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) + +/* The 'nonatomic' argument was removed in 2.6.27. */ + +#undef smp_call_function_single + +#include <linux/smp.h> + +#ifdef CONFIG_SMP +int kvm_smp_call_function_single(int cpu, void (*func)(void *info), + void *info, int wait) +{ + return smp_call_function_single(cpu, func, info, 0, wait); +} +#else /* !CONFIG_SMP */ +int kvm_smp_call_function_single(int cpu, void (*func)(void *info), + void *info, int wait) +{ + WARN_ON(cpu != 0); + local_irq_disable(); + func(info); + local_irq_enable(); + return 0; + +} +#endif /* !CONFIG_SMP */ +EXPORT_SYMBOL_GPL(kvm_smp_call_function_single); + +#define smp_call_function_single kvm_smp_call_function_single + +#endif + +/* div64_u64 is fairly new */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) + +#ifndef CONFIG_64BIT + +/* 64bit divisor, dividend and result. dynamic precision */ +uint64_t div64_u64(uint64_t dividend, uint64_t divisor) +{ + uint32_t high, d; + + high = divisor >> 32; + if (high) { + unsigned int shift = fls(high); + + d = divisor >> shift; + dividend >>= shift; + } else + d = divisor; + + do_div(dividend, d); + + return dividend; +} + +#endif + +#endif + +/* + * smp_call_function_mask() is not defined/exported below 2.6.24 on all + * targets and below 2.6.26 on x86-64 + */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) || \ + (defined CONFIG_X86_64 && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) + +#include <linux/smp.h> + +struct kvm_call_data_struct { + void (*func) (void *info); + void *info; + atomic_t started; + atomic_t finished; + int wait; +}; + +static void kvm_ack_smp_call(void *_data) +{ + struct kvm_call_data_struct *data = _data; + /* if wait == 0, data can be out of scope + * after atomic_inc(info->started) + */ + void (*func) (void *info) = data->func; + void *info = data->info; + int wait = data->wait; + + smp_mb(); + atomic_inc(&data->started); + (*func)(info); + if (wait) { + smp_mb(); + atomic_inc(&data->finished); + } +} + +int kvm_smp_call_function_mask(cpumask_t mask, + void (*func) (void *info), void *info, int wait) +{ +#ifdef CONFIG_SMP + struct kvm_call_data_struct data; + cpumask_t allbutself; + int cpus; + int cpu; + int me; + + me = get_cpu(); + WARN_ON(irqs_disabled()); + allbutself = cpu_online_map; + cpu_clear(me, allbutself); + + cpus_and(mask, mask, allbutself); + cpus = cpus_weight(mask); + + if (!cpus) + goto out; + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + for (cpu = first_cpu(mask); cpu != NR_CPUS; cpu = next_cpu(cpu, mask)) + smp_call_function_single(cpu, kvm_ack_smp_call, &data, 0); + + while (atomic_read(&data.started) != cpus) { + cpu_relax(); + barrier(); + } + + if (!wait) + goto out; + + while (atomic_read(&data.finished) != cpus) { + cpu_relax(); + barrier(); + } +out: + put_cpu(); +#endif /* CONFIG_SMP */ + return 0; +} + +#include <linux/workqueue.h> + +static void vcpu_kick_intr(void *info) +{ +} + +struct kvm_kick { + int cpu; + struct work_struct work; +}; + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) +static void kvm_do_smp_call_function(void *data) +{ + int me; + struct kvm_kick *kvm_kick = data; +#else +static void kvm_do_smp_call_function(struct work_struct *work) +{ + int me; + struct kvm_kick *kvm_kick = container_of(work, struct kvm_kick, work); +#endif + me = get_cpu(); + + if (kvm_kick->cpu != me) + smp_call_function_single(kvm_kick->cpu, vcpu_kick_intr, + NULL, 0); + kfree(kvm_kick); + put_cpu(); +} + +void kvm_queue_smp_call_function(int cpu) +{ + struct kvm_kick *kvm_kick = kmalloc(sizeof(struct kvm_kick), GFP_ATOMIC); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + INIT_WORK(&kvm_kick->work, kvm_do_smp_call_function, kvm_kick); +#else + INIT_WORK(&kvm_kick->work, kvm_do_smp_call_function); +#endif + + schedule_work(&kvm_kick->work); +} + +void kvm_smp_send_reschedule(int cpu) +{ + if (irqs_disabled()) { + kvm_queue_smp_call_function(cpu); + return; + } + smp_call_function_single(cpu, vcpu_kick_intr, NULL, 0); +} +#endif + +/* manually export hrtimer_init/start/cancel */ +void (*hrtimer_init_p)(struct hrtimer *timer, clockid_t which_clock, + enum hrtimer_mode mode); +int (*hrtimer_start_p)(struct hrtimer *timer, ktime_t tim, + const enum hrtimer_mode mode); +int (*hrtimer_cancel_p)(struct hrtimer *timer); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + +static void kvm_set_normalized_timespec(struct timespec *ts, time_t sec, + long nsec) +{ + while (nsec >= NSEC_PER_SEC) { + nsec -= NSEC_PER_SEC; + ++sec; + } + while (nsec < 0) { + nsec += NSEC_PER_SEC; + --sec; + } + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +struct timespec kvm_ns_to_timespec(const s64 nsec) +{ + struct timespec ts; + + if (!nsec) + return (struct timespec) {0, 0}; + + ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, &ts.tv_nsec); + if (unlikely(nsec < 0)) + kvm_set_normalized_timespec(&ts, ts.tv_sec, ts.tv_nsec); + + return ts; +} + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) + +#include <linux/pci.h> + +struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn) +{ + struct pci_dev *dev = NULL; + + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + if (pci_domain_nr(dev->bus) == 0 && + (dev->bus->number == bus && dev->devfn == devfn)) + return dev; + } + return NULL; +} + +#endif + +#include <linux/intel-iommu.h> + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + +int intel_iommu_found() +{ + return 0; +} + +#endif + + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) + +/* relay_open() interface has changed on 2.6.21 */ + +struct rchan *kvm_relay_open(const char *base_filename, + struct dentry *parent, + size_t subbuf_size, + size_t n_subbufs, + struct rchan_callbacks *cb, + void *private_data) +{ + struct rchan *chan = relay_open(base_filename, parent, + subbuf_size, n_subbufs, + cb); + if (chan) + chan->private_data = private_data; + return chan; +} + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) + +#include <linux/pci.h> + +int kvm_pcidev_msi_enabled(struct pci_dev *dev) +{ + int pos; + u16 control; + + if (!(pos = pci_find_capability(dev, PCI_CAP_ID_MSI))) + return 0; + + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control); + if (control & PCI_MSI_FLAGS_ENABLE) + return 1; + + return 0; +} + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) + +extern unsigned tsc_khz; +static unsigned tsc_khz_dummy = 2000000; +static unsigned *tsc_khz_p; + +unsigned kvm_get_tsc_khz(void) +{ + if (!tsc_khz_p) { + tsc_khz_p = symbol_get(tsc_khz); + if (!tsc_khz_p) + tsc_khz_p = &tsc_khz_dummy; + } + return *tsc_khz_p; +} + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + +static enum hrtimer_restart kvm_hrtimer_wakeup(struct hrtimer *timer) +{ + struct hrtimer_sleeper *t = + container_of(timer, struct hrtimer_sleeper, timer); + struct task_struct *task = t->task; + + t->task = NULL; + if (task) + wake_up_process(task); + + return HRTIMER_NORESTART; +} + +int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode) +{ + struct hrtimer_sleeper t; + + /* + * Optimize when a zero timeout value is given. It does not + * matter whether this is an absolute or a relative time. + */ + if (expires && !expires->tv64) { + __set_current_state(TASK_RUNNING); + return 0; + } + + /* + * A NULL parameter means "inifinte" + */ + if (!expires) { + schedule(); + __set_current_state(TASK_RUNNING); + return -EINTR; + } + + hrtimer_init(&t.timer, CLOCK_MONOTONIC, mode); + t.timer.expires = *expires; + + t.timer.function = kvm_hrtimer_wakeup; + t.task = current; + + hrtimer_start(&t.timer, t.timer.expires, mode); + if (!hrtimer_active(&t.timer)) + t.task = NULL; + + if (likely(t.task)) + schedule(); + + hrtimer_cancel(&t.timer); + + __set_current_state(TASK_RUNNING); + + return !t.task ? 0 : -EINTR; +} + +#endif diff --git a/kernel/ia64/Kbuild b/kernel/ia64/Kbuild new file mode 100644 index 00000000..e62f2b94 --- /dev/null +++ b/kernel/ia64/Kbuild @@ -0,0 +1,13 @@ +obj-m := kvm.o kvm-intel.o + +kvm-objs := kvm_main.o ioapic.o coalesced_mmio.o kvm-ia64.o kvm_fw.o \ + irq_comm.o ../anon_inodes.o ../external-module-compat.o \ + ../request-irq-compat.o assigned-dev.o + +ifeq ($(CONFIG_IOMMU_API),y) +kvm-objs += iommu.o +endif + +EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 +kvm-intel-objs := vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ + vtlb.o process.o memset.o memcpy.o kvm_lib.o diff --git a/kernel/ia64/Makefile.pre b/kernel/ia64/Makefile.pre new file mode 100644 index 00000000..4d3410f6 --- /dev/null +++ b/kernel/ia64/Makefile.pre @@ -0,0 +1,27 @@ +prerequisite: asm-offsets.h ia64/memset.S ia64/memcpy.S + cp -f $(KERNELDIR)/arch/ia64/lib/memcpy.S ia64/memcpy.S + cp -f $(KERNELDIR)/arch/ia64/lib/memset.S ia64/memset.S + cmp -s asm-offset.h ia64/asm-offset.h || mv -f asm-offsets.* ia64/ + cp -f $(KERNELDIR)/lib/vsprintf.c ia64/vsprintf.c + cp -f $(KERNELDIR)/lib/ctype.c ia64/ctype.c + sed -i /^EXPORT_SYMBOL/d ia64/vsprintf.c + sed -i /^EXPORT_SYMBOL/d ia64/ctype.c + +asm-offsets.h: asm-offsets.s + @(set -e; \ + echo "/*"; \ + echo " * DO NOT MODIFY."; \ + echo " *"; \ + echo " * This file was auto-generated from $<"; \ + echo " *"; \ + echo " */"; \ + echo ""; \ + echo "#ifndef __KVM_ASM_OFFSETS_H__"; \ + echo "#define __KVM_ASM_OFFSETS_H__"; \ + echo ""; \ + sed -ne "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"; \ + echo ""; \ + echo "#endif") <$< >$@ + +asm-offsets.s: ia64/asm-offsets.c + gcc -S -D__KERNEL__ -I./include -I$(KERNELDIR)/include -I$(KERNELDIR)/arch/ia64/include ia64/asm-offsets.c diff --git a/kernel/ia64/external-module-compat.h b/kernel/ia64/external-module-compat.h new file mode 100644 index 00000000..60a83a1b --- /dev/null +++ b/kernel/ia64/external-module-compat.h @@ -0,0 +1,60 @@ +/* + * Compatibility header for building as an external module. + */ + +#ifndef __ASSEMBLY__ +#include <linux/version.h> + +#include <linux/types.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) + +typedef u64 phys_addr_t; + +#endif + +#include "../external-module-compat-comm.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) +#error "KVM/IA-64 Can't be compiled if kernel version < 2.6.26" +#endif + +#ifndef CONFIG_PREEMPT_NOTIFIERS +/*Now, Just print an error message if no preempt notifiers configured!! + TODO: Implement it later! */ +#error "KVM/IA-64 depends on preempt notifiers in kernel." +#endif + +#ifndef CONFIG_KVM_APIC_ARCHITECTURE +#define CONFIG_KVM_APIC_ARCHITECTURE +#endif + +/* smp_call_function() lost an argument in 2.6.27. */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) + +#define kvm_smp_call_function(func, info, wait) smp_call_function(func, info, 0, wait) + +#else + +#define kvm_smp_call_function(func, info, wait) smp_call_function(func, info, wait) + +#endif + +/*There is no struct fdesc definition <2.6.27*/ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) +struct fdesc { + uint64_t ip; + uint64_t gp; +}; +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) + +#define PAGE_KERNEL_UC __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX | \ + _PAGE_MA_UC) +#endif + +#endif + +#ifndef CONFIG_HAVE_KVM_IRQCHIP +#define CONFIG_HAVE_KVM_IRQCHIP 1 +#endif diff --git a/kernel/include-compat/asm-ia64/msidef.h b/kernel/include-compat/asm-ia64/msidef.h new file mode 100644 index 00000000..592c1047 --- /dev/null +++ b/kernel/include-compat/asm-ia64/msidef.h @@ -0,0 +1,42 @@ +#ifndef _IA64_MSI_DEF_H +#define _IA64_MSI_DEF_H + +/* + * Shifts for APIC-based data + */ + +#define MSI_DATA_VECTOR_SHIFT 0 +#define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT) +#define MSI_DATA_VECTOR_MASK 0xffffff00 + +#define MSI_DATA_DELIVERY_MODE_SHIFT 8 +#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT) +#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT) + +#define MSI_DATA_LEVEL_SHIFT 14 +#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT) +#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT) + +#define MSI_DATA_TRIGGER_SHIFT 15 +#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT) +#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT) + +/* + * Shift/mask fields for APIC-based bus address + */ + +#define MSI_ADDR_DEST_ID_SHIFT 4 +#define MSI_ADDR_HEADER 0xfee00000 + +#define MSI_ADDR_DEST_ID_MASK 0xfff0000f +#define MSI_ADDR_DEST_ID_CPU(cpu) ((cpu) << MSI_ADDR_DEST_ID_SHIFT) + +#define MSI_ADDR_DEST_MODE_SHIFT 2 +#define MSI_ADDR_DEST_MODE_PHYS (0 << MSI_ADDR_DEST_MODE_SHIFT) +#define MSI_ADDR_DEST_MODE_LOGIC (1 << MSI_ADDR_DEST_MODE_SHIFT) + +#define MSI_ADDR_REDIRECTION_SHIFT 3 +#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT) +#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT) + +#endif/* _IA64_MSI_DEF_H */ diff --git a/kernel/include-compat/asm-x86/asm.h b/kernel/include-compat/asm-x86/asm.h new file mode 100644 index 00000000..3ad6aab9 --- /dev/null +++ b/kernel/include-compat/asm-x86/asm.h @@ -0,0 +1,3 @@ +/* + * Empty file to satisfy #include <linux/asm.h> for older kernels. + */ diff --git a/kernel/include-compat/asm-x86/cmpxchg.h b/kernel/include-compat/asm-x86/cmpxchg.h new file mode 100644 index 00000000..68daeebc --- /dev/null +++ b/kernel/include-compat/asm-x86/cmpxchg.h @@ -0,0 +1,3 @@ +/* + * Empty file to satisfy #include <linux/cmpxchg.h> for older kernels. + */ diff --git a/kernel/include-compat/asm-x86/mce.h b/kernel/include-compat/asm-x86/mce.h new file mode 100644 index 00000000..1eb03c6f --- /dev/null +++ b/kernel/include-compat/asm-x86/mce.h @@ -0,0 +1 @@ +/* empty file to keep #include happy */ diff --git a/kernel/include-compat/asm-x86/msidef.h b/kernel/include-compat/asm-x86/msidef.h new file mode 100644 index 00000000..6706b300 --- /dev/null +++ b/kernel/include-compat/asm-x86/msidef.h @@ -0,0 +1,55 @@ +#ifndef _ASM_X86_MSIDEF_H +#define _ASM_X86_MSIDEF_H + +/* + * Constants for Intel APIC based MSI messages. + */ + +/* + * Shifts for MSI data + */ + +#define MSI_DATA_VECTOR_SHIFT 0 +#define MSI_DATA_VECTOR_MASK 0x000000ff +#define MSI_DATA_VECTOR(v) (((v) << MSI_DATA_VECTOR_SHIFT) & \ + MSI_DATA_VECTOR_MASK) + +#define MSI_DATA_DELIVERY_MODE_SHIFT 8 +#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT) +#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT) + +#define MSI_DATA_LEVEL_SHIFT 14 +#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT) +#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT) + +#define MSI_DATA_TRIGGER_SHIFT 15 +#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT) +#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT) + +/* + * Shift/mask fields for msi address + */ + +#define MSI_ADDR_BASE_HI 0 +#define MSI_ADDR_BASE_LO 0xfee00000 + +#define MSI_ADDR_DEST_MODE_SHIFT 2 +#define MSI_ADDR_DEST_MODE_PHYSICAL (0 << MSI_ADDR_DEST_MODE_SHIFT) +#define MSI_ADDR_DEST_MODE_LOGICAL (1 << MSI_ADDR_DEST_MODE_SHIFT) + +#define MSI_ADDR_REDIRECTION_SHIFT 3 +#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT) + /* dedicated cpu */ +#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT) + /* lowest priority */ + +#define MSI_ADDR_DEST_ID_SHIFT 12 +#define MSI_ADDR_DEST_ID_MASK 0x00ffff0 +#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ + MSI_ADDR_DEST_ID_MASK) + +#define MSI_ADDR_IR_EXT_INT (1 << 4) +#define MSI_ADDR_IR_SHV (1 << 3) +#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13) +#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5) +#endif /* _ASM_X86_MSIDEF_H */ diff --git a/kernel/include-compat/asm-x86/msr-index.h b/kernel/include-compat/asm-x86/msr-index.h new file mode 100644 index 00000000..1eb03c6f --- /dev/null +++ b/kernel/include-compat/asm-x86/msr-index.h @@ -0,0 +1 @@ +/* empty file to keep #include happy */ diff --git a/kernel/include-compat/asm-x86/pvclock-abi.h b/kernel/include-compat/asm-x86/pvclock-abi.h new file mode 100644 index 00000000..6857f840 --- /dev/null +++ b/kernel/include-compat/asm-x86/pvclock-abi.h @@ -0,0 +1,42 @@ +#ifndef _ASM_X86_PVCLOCK_ABI_H_ +#define _ASM_X86_PVCLOCK_ABI_H_ +#ifndef __ASSEMBLY__ + +/* + * These structs MUST NOT be changed. + * They are the ABI between hypervisor and guest OS. + * Both Xen and KVM are using this. + * + * pvclock_vcpu_time_info holds the system time and the tsc timestamp + * of the last update. So the guest can use the tsc delta to get a + * more precise system time. There is one per virtual cpu. + * + * pvclock_wall_clock references the point in time when the system + * time was zero (usually boot time), thus the guest calculates the + * current wall clock by adding the system time. + * + * Protocol for the "version" fields is: hypervisor raises it (making + * it uneven) before it starts updating the fields and raises it again + * (making it even) when it is done. Thus the guest can make sure the + * time values it got are consistent by checking the version before + * and after reading them. + */ + +struct pvclock_vcpu_time_info { + u32 version; + u32 pad0; + u64 tsc_timestamp; + u64 system_time; + u32 tsc_to_system_mul; + s8 tsc_shift; + u8 pad[3]; +} __attribute__((__packed__)); /* 32 bytes */ + +struct pvclock_wall_clock { + u32 version; + u32 sec; + u32 nsec; +} __attribute__((__packed__)); + +#endif /* __ASSEMBLY__ */ +#endif /* _ASM_X86_PVCLOCK_ABI_H_ */ diff --git a/kernel/include-compat/linux/anon_inodes.h b/kernel/include-compat/linux/anon_inodes.h new file mode 100644 index 00000000..7b6862f2 --- /dev/null +++ b/kernel/include-compat/linux/anon_inodes.h @@ -0,0 +1,16 @@ +/* + * include/linux/anon_inodes.h + * + * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> + * + */ + +#ifndef _LINUX_ANON_INODES_H +#define _LINUX_ANON_INODES_H + +struct file_operations; + +int anon_inode_getfd(const char *name, const struct file_operations *fops, + void *priv); + +#endif /* _LINUX_ANON_INODES_H */ diff --git a/kernel/include-compat/linux/eventfd.h b/kernel/include-compat/linux/eventfd.h new file mode 100644 index 00000000..c3580fb7 --- /dev/null +++ b/kernel/include-compat/linux/eventfd.h @@ -0,0 +1 @@ +/* Dummy file */ diff --git a/kernel/include-compat/linux/ftrace_event.h b/kernel/include-compat/linux/ftrace_event.h new file mode 100644 index 00000000..c89c4c9a --- /dev/null +++ b/kernel/include-compat/linux/ftrace_event.h @@ -0,0 +1 @@ +/* dummy file for #include compatibility */ diff --git a/kernel/include-compat/linux/intel-iommu.h b/kernel/include-compat/linux/intel-iommu.h new file mode 100644 index 00000000..1490fc07 --- /dev/null +++ b/kernel/include-compat/linux/intel-iommu.h @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Copyright (C) 2006-2008 Intel Corporation + * Author: Ashok Raj <ashok.raj@intel.com> + * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> + */ + +#ifndef _INTEL_IOMMU_H_ +#define _INTEL_IOMMU_H_ + +#include <linux/types.h> +#include <linux/msi.h> +#include <linux/sysdev.h> +#include "iova.h" +#include <linux/io.h> + +/* + * We need a fixed PAGE_SIZE of 4K irrespective of + * arch PAGE_SIZE for IOMMU page tables. + */ +#define PAGE_SHIFT_4K (12) +#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K) +#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K) +#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K) + +#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K) +#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) +#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) + +/* + * Intel IOMMU register specification per version 1.0 public spec. + */ + +#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */ +#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */ +#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */ +#define DMAR_GCMD_REG 0x18 /* Global command register */ +#define DMAR_GSTS_REG 0x1c /* Global status register */ +#define DMAR_RTADDR_REG 0x20 /* Root entry table */ +#define DMAR_CCMD_REG 0x28 /* Context command reg */ +#define DMAR_FSTS_REG 0x34 /* Fault Status register */ +#define DMAR_FECTL_REG 0x38 /* Fault control register */ +#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */ +#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */ +#define DMAR_FEUADDR_REG 0x44 /* Upper address register */ +#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */ +#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */ +#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */ +#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ +#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ +#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ + +#define OFFSET_STRIDE (9) +/* +#define dmar_readl(dmar, reg) readl(dmar + reg) +#define dmar_readq(dmar, reg) ({ \ + u32 lo, hi; \ + lo = readl(dmar + reg); \ + hi = readl(dmar + reg + 4); \ + (((u64) hi) << 32) + lo; }) +*/ +static inline u64 dmar_readq(void __iomem *addr) +{ + u32 lo, hi; + lo = readl(addr); + hi = readl(addr + 4); + return (((u64) hi) << 32) + lo; +} + +static inline void dmar_writeq(void __iomem *addr, u64 val) +{ + writel((u32)val, addr); + writel((u32)(val >> 32), addr + 4); +} + +#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) +#define DMAR_VER_MINOR(v) ((v) & 0x0f) + +/* + * Decoding Capability Register + */ +#define cap_read_drain(c) (((c) >> 55) & 1) +#define cap_write_drain(c) (((c) >> 54) & 1) +#define cap_max_amask_val(c) (((c) >> 48) & 0x3f) +#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1) +#define cap_pgsel_inv(c) (((c) >> 39) & 1) + +#define cap_super_page_val(c) (((c) >> 34) & 0xf) +#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \ + * OFFSET_STRIDE) + 21) + +#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16) +#define cap_max_fault_reg_offset(c) \ + (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16) + +#define cap_zlr(c) (((c) >> 22) & 1) +#define cap_isoch(c) (((c) >> 23) & 1) +#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1) +#define cap_sagaw(c) (((c) >> 8) & 0x1f) +#define cap_caching_mode(c) (((c) >> 7) & 1) +#define cap_phmr(c) (((c) >> 6) & 1) +#define cap_plmr(c) (((c) >> 5) & 1) +#define cap_rwbf(c) (((c) >> 4) & 1) +#define cap_afl(c) (((c) >> 3) & 1) +#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7))) +/* + * Extended Capability Register + */ + +#define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1) +#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) +#define ecap_max_iotlb_offset(e) \ + (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) +#define ecap_coherent(e) ((e) & 0x1) + + +/* IOTLB_REG */ +#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60) +#define DMA_TLB_DSI_FLUSH (((u64)2) << 60) +#define DMA_TLB_PSI_FLUSH (((u64)3) << 60) +#define DMA_TLB_IIRG(type) ((type >> 60) & 7) +#define DMA_TLB_IAIG(val) (((val) >> 57) & 7) +#define DMA_TLB_READ_DRAIN (((u64)1) << 49) +#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48) +#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32) +#define DMA_TLB_IVT (((u64)1) << 63) +#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) +#define DMA_TLB_MAX_SIZE (0x3f) + +/* PMEN_REG */ +#define DMA_PMEN_EPM (((u32)1)<<31) +#define DMA_PMEN_PRS (((u32)1)<<0) + +/* GCMD_REG */ +#define DMA_GCMD_TE (((u32)1) << 31) +#define DMA_GCMD_SRTP (((u32)1) << 30) +#define DMA_GCMD_SFL (((u32)1) << 29) +#define DMA_GCMD_EAFL (((u32)1) << 28) +#define DMA_GCMD_WBF (((u32)1) << 27) + +/* GSTS_REG */ +#define DMA_GSTS_TES (((u32)1) << 31) +#define DMA_GSTS_RTPS (((u32)1) << 30) +#define DMA_GSTS_FLS (((u32)1) << 29) +#define DMA_GSTS_AFLS (((u32)1) << 28) +#define DMA_GSTS_WBFS (((u32)1) << 27) + +/* CCMD_REG */ +#define DMA_CCMD_ICC (((u64)1) << 63) +#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) +#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61) +#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61) +#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32) +#define DMA_CCMD_MASK_NOBIT 0 +#define DMA_CCMD_MASK_1BIT 1 +#define DMA_CCMD_MASK_2BIT 2 +#define DMA_CCMD_MASK_3BIT 3 +#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16) +#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff)) + +/* FECTL_REG */ +#define DMA_FECTL_IM (((u32)1) << 31) + +/* FSTS_REG */ +#define DMA_FSTS_PPF ((u32)2) +#define DMA_FSTS_PFO ((u32)1) +#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) + +/* FRCD_REG, 32 bits access */ +#define DMA_FRCD_F (((u32)1) << 31) +#define dma_frcd_type(d) ((d >> 30) & 1) +#define dma_frcd_fault_reason(c) (c & 0xff) +#define dma_frcd_source_id(c) (c & 0xffff) +#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ + +/* + * 0: Present + * 1-11: Reserved + * 12-63: Context Ptr (12 - (haw-1)) + * 64-127: Reserved + */ +struct root_entry { + u64 val; + u64 rsvd1; +}; +#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) +static inline bool root_present(struct root_entry *root) +{ + return (root->val & 1); +} +static inline void set_root_present(struct root_entry *root) +{ + root->val |= 1; +} +static inline void set_root_value(struct root_entry *root, unsigned long value) +{ + root->val |= value & PAGE_MASK_4K; +} + +struct context_entry; +static inline struct context_entry * +get_context_addr_from_root(struct root_entry *root) +{ + return (struct context_entry *) + (root_present(root)?phys_to_virt( + root->val & PAGE_MASK_4K): + NULL); +} + +/* + * low 64 bits: + * 0: present + * 1: fault processing disable + * 2-3: translation type + * 12-63: address space root + * high 64 bits: + * 0-2: address width + * 3-6: aval + * 8-23: domain id + */ +struct context_entry { + u64 lo; + u64 hi; +}; +#define context_present(c) ((c).lo & 1) +#define context_fault_disable(c) (((c).lo >> 1) & 1) +#define context_translation_type(c) (((c).lo >> 2) & 3) +#define context_address_root(c) ((c).lo & PAGE_MASK_4K) +#define context_address_width(c) ((c).hi & 7) +#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1)) + +#define context_set_present(c) do {(c).lo |= 1;} while (0) +#define context_set_fault_enable(c) \ + do {(c).lo &= (((u64)-1) << 2) | 1;} while (0) +#define context_set_translation_type(c, val) \ + do { \ + (c).lo &= (((u64)-1) << 4) | 3; \ + (c).lo |= ((val) & 3) << 2; \ + } while (0) +#define CONTEXT_TT_MULTI_LEVEL 0 +#define context_set_address_root(c, val) \ + do {(c).lo |= (val) & PAGE_MASK_4K;} while (0) +#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0) +#define context_set_domain_id(c, val) \ + do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0) +#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0) + +/* + * 0: readable + * 1: writable + * 2-6: reserved + * 7: super page + * 8-11: available + * 12-63: Host physcial address + */ +struct dma_pte { + u64 val; +}; +#define dma_clear_pte(p) do {(p).val = 0;} while (0) + +#define DMA_PTE_READ (1) +#define DMA_PTE_WRITE (2) + +#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) +#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) +#define dma_set_pte_prot(p, prot) \ + do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) +#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) +#define dma_set_pte_addr(p, addr) do {\ + (p).val |= ((addr) & PAGE_MASK_4K); } while (0) +#define dma_pte_present(p) (((p).val & 3) != 0) + +struct intel_iommu; + +struct dmar_domain { + int id; /* domain id */ + struct intel_iommu *iommu; /* back pointer to owning iommu */ + + struct list_head devices; /* all devices' list */ + struct iova_domain iovad; /* iova's that belong to this domain */ + + struct dma_pte *pgd; /* virtual address */ + spinlock_t mapping_lock; /* page table lock */ + int gaw; /* max guest address width */ + + /* adjusted guest address width, 0 is level 2 30-bit */ + int agaw; + +#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 + int flags; +}; + +/* PCI domain-device relationship */ +struct device_domain_info { + struct list_head link; /* link to domain siblings */ + struct list_head global; /* link to global list */ + u8 bus; /* PCI bus numer */ + u8 devfn; /* PCI devfn number */ + struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ + struct dmar_domain *domain; /* pointer to domain */ +}; + +extern int init_dmars(void); + +struct intel_iommu { + void __iomem *reg; /* Pointer to hardware regs, virtual addr */ + u64 cap; + u64 ecap; + unsigned long *domain_ids; /* bitmap of domains */ + struct dmar_domain **domains; /* ptr to domains */ + int seg; + u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ + spinlock_t lock; /* protect context, domain ids */ + spinlock_t register_lock; /* protect register handling */ + struct root_entry *root_entry; /* virtual address */ + + unsigned int irq; + unsigned char name[7]; /* Device Name */ + struct msi_msg saved_msg; + struct sys_device sysdev; +}; + +#ifndef CONFIG_DMAR_GFX_WA +static inline void iommu_prepare_gfx_mapping(void) +{ + return; +} +#endif /* !CONFIG_DMAR_GFX_WA */ + +void intel_iommu_domain_exit(struct dmar_domain *domain); +struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev); +int intel_iommu_context_mapping(struct dmar_domain *domain, + struct pci_dev *pdev); +int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova, + u64 hpa, size_t size, int prot); +void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn); +struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev); +int intel_iommu_found(void); +u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova); + +#endif diff --git a/kernel/include-compat/linux/iommu.h b/kernel/include-compat/linux/iommu.h new file mode 100644 index 00000000..8a7bfb1b --- /dev/null +++ b/kernel/include-compat/linux/iommu.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Author: Joerg Roedel <joerg.roedel@amd.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __LINUX_IOMMU_H +#define __LINUX_IOMMU_H + +#define IOMMU_READ (1) +#define IOMMU_WRITE (2) + +struct device; + +struct iommu_domain { + void *priv; +}; + +struct iommu_ops { + int (*domain_init)(struct iommu_domain *domain); + void (*domain_destroy)(struct iommu_domain *domain); + int (*attach_dev)(struct iommu_domain *domain, struct device *dev); + void (*detach_dev)(struct iommu_domain *domain, struct device *dev); + int (*map)(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot); + void (*unmap)(struct iommu_domain *domain, unsigned long iova, + size_t size); + phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, + unsigned long iova); +}; + +#ifdef CONFIG_IOMMU_API + +extern void register_iommu(struct iommu_ops *ops); +extern bool iommu_found(void); +extern struct iommu_domain *iommu_domain_alloc(void); +extern void iommu_domain_free(struct iommu_domain *domain); +extern int iommu_attach_device(struct iommu_domain *domain, + struct device *dev); +extern void iommu_detach_device(struct iommu_domain *domain, + struct device *dev); +extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot); +extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, + size_t size); +extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, + unsigned long iova); + +#else /* CONFIG_IOMMU_API */ + +static inline void register_iommu(struct iommu_ops *ops) +{ +} + +static inline bool iommu_found(void) +{ + return false; +} + +static inline struct iommu_domain *iommu_domain_alloc(void) +{ + return NULL; +} + +static inline void iommu_domain_free(struct iommu_domain *domain) +{ +} + +static inline int iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + return -ENODEV; +} + +static inline void iommu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ +} + +static inline int iommu_map_range(struct iommu_domain *domain, + unsigned long iova, phys_addr_t paddr, + size_t size, int prot) +{ + return -ENODEV; +} + +static inline void iommu_unmap_range(struct iommu_domain *domain, + unsigned long iova, size_t size) +{ +} + +static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, + unsigned long iova) +{ + return 0; +} + +#endif /* CONFIG_IOMMU_API */ + +#endif /* __LINUX_IOMMU_H */ diff --git a/kernel/include-compat/linux/iova.h b/kernel/include-compat/linux/iova.h new file mode 100644 index 00000000..228f6c94 --- /dev/null +++ b/kernel/include-compat/linux/iova.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2006, Intel Corporation. + * + * This file is released under the GPLv2. + * + * Copyright (C) 2006-2008 Intel Corporation + * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> + * + */ + +#ifndef _IOVA_H_ +#define _IOVA_H_ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/rbtree.h> +#include <linux/dma-mapping.h> + +/* IO virtual address start page frame number */ +#define IOVA_START_PFN (1) + +/* iova structure */ +struct iova { + struct rb_node node; + unsigned long pfn_hi; /* IOMMU dish out addr hi */ + unsigned long pfn_lo; /* IOMMU dish out addr lo */ +}; + +/* holds all the iova translations for a domain */ +struct iova_domain { + spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */ + spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ + struct rb_root rbroot; /* iova domain rbtree root */ + struct rb_node *cached32_node; /* Save last alloced node */ + unsigned long dma_32bit_pfn; +}; + +struct iova *alloc_iova_mem(void); +void free_iova_mem(struct iova *iova); +void free_iova(struct iova_domain *iovad, unsigned long pfn); +void __free_iova(struct iova_domain *iovad, struct iova *iova); +struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size, + unsigned long limit_pfn, + bool size_aligned); +struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, + unsigned long pfn_hi); +void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); +void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit); +struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); +void put_iova_domain(struct iova_domain *iovad); + +#endif diff --git a/kernel/include-compat/linux/magic.h b/kernel/include-compat/linux/magic.h new file mode 100644 index 00000000..a9c6567f --- /dev/null +++ b/kernel/include-compat/linux/magic.h @@ -0,0 +1,41 @@ +#ifndef __LINUX_MAGIC_H__ +#define __LINUX_MAGIC_H__ + +#define ADFS_SUPER_MAGIC 0xadf5 +#define AFFS_SUPER_MAGIC 0xadff +#define AFS_SUPER_MAGIC 0x5346414F +#define AUTOFS_SUPER_MAGIC 0x0187 +#define CODA_SUPER_MAGIC 0x73757245 +#define EFS_SUPER_MAGIC 0x414A53 +#define EXT2_SUPER_MAGIC 0xEF53 +#define EXT3_SUPER_MAGIC 0xEF53 +#define EXT4_SUPER_MAGIC 0xEF53 +#define HPFS_SUPER_MAGIC 0xf995e849 +#define ISOFS_SUPER_MAGIC 0x9660 +#define JFFS2_SUPER_MAGIC 0x72b6 +#define KVMFS_SUPER_MAGIC 0x19700426 + +#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */ +#define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */ +#define MINIX2_SUPER_MAGIC 0x2468 /* minix V2 fs */ +#define MINIX2_SUPER_MAGIC2 0x2478 /* minix V2 fs, 30 char names */ +#define MINIX3_SUPER_MAGIC 0x4d5a /* minix V3 fs */ + +#define MSDOS_SUPER_MAGIC 0x4d44 /* MD */ +#define NCP_SUPER_MAGIC 0x564c /* Guess, what 0x564c is :-) */ +#define NFS_SUPER_MAGIC 0x6969 +#define OPENPROM_SUPER_MAGIC 0x9fa1 +#define PROC_SUPER_MAGIC 0x9fa0 +#define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */ + +#define REISERFS_SUPER_MAGIC 0x52654973 /* used by gcc */ + /* used by file system utilities that + look at the superblock, etc. */ +#define REISERFS_SUPER_MAGIC_STRING "ReIsErFs" +#define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs" +#define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs" + +#define SMB_SUPER_MAGIC 0x517B +#define USBDEVICE_SUPER_MAGIC 0x9fa2 + +#endif /* __LINUX_MAGIC_H__ */ diff --git a/kernel/include-compat/linux/marker.h b/kernel/include-compat/linux/marker.h new file mode 100644 index 00000000..ceef04f2 --- /dev/null +++ b/kernel/include-compat/linux/marker.h @@ -0,0 +1,119 @@ +/* + * Alternative file to satisfy #include <linux/marker.h> for older kernels. + */ +#ifndef _LINUX_MARKER_H +#define _LINUX_MARKER_H + +/* + * Code markup for dynamic and static tracing. + * + * See Documentation/marker.txt. + * + * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca> + * + * This file is released under the GPLv2. + * See the file COPYING for more details. + */ + +#include <linux/types.h> + +struct module; +struct marker; + +/** + * marker_probe_func - Type of a marker probe function + * @probe_private: probe private data + * @call_private: call site private data + * @fmt: format string + * @args: variable argument list pointer. Use a pointer to overcome C's + * inability to pass this around as a pointer in a portable manner in + * the callee otherwise. + * + * Type of marker probe functions. They receive the mdata and need to parse the + * format string to recover the variable argument list. + */ +typedef void marker_probe_func(void *probe_private, void *call_private, + const char *fmt, va_list *args); + +struct marker_probe_closure { + marker_probe_func *func; /* Callback */ + void *probe_private; /* Private probe data */ +}; + +struct marker { + const char *name; /* Marker name */ + const char *format; /* Marker format string, describing the + * variable argument list. + */ + char state; /* Marker state. */ + char ptype; /* probe type : 0 : single, 1 : multi */ + void (*call)(const struct marker *mdata, /* Probe wrapper */ + void *call_private, const char *fmt, ...); + struct marker_probe_closure single; + struct marker_probe_closure *multi; +} __attribute__((aligned(8))); + +#define __trace_mark(name, call_private, format, args...) \ + __mark_check_format(format, ## args) +static inline void marker_update_probe_range(struct marker *begin, + struct marker *end) +{ } + +/** + * trace_mark - Marker + * @name: marker name, not quoted. + * @format: format string + * @args...: variable argument list + * + * Places a marker. + */ +#define trace_mark(name, format, args...) \ + __trace_mark(name, NULL, format, ## args) + +/** + * MARK_NOARGS - Format string for a marker with no argument. + */ +#define MARK_NOARGS " " + +/* To be used for string format validity checking with gcc */ +static inline void __attribute__((format(printf,1,2))) +___mark_check_format(const char *fmt, ...) +{ +} + +#define __mark_check_format(format, args...) \ + do { \ + if (0) \ + ___mark_check_format(format, ## args); \ + } while (0) + +extern marker_probe_func __mark_empty_function; + +extern void marker_probe_cb(const struct marker *mdata, + void *call_private, const char *fmt, ...); +extern void marker_probe_cb_noarg(const struct marker *mdata, + void *call_private, const char *fmt, ...); + +/* + * Connect a probe to a marker. + * private data pointer must be a valid allocated memory address, or NULL. + */ +extern int marker_probe_register(const char *name, const char *format, + marker_probe_func *probe, void *probe_private); + +/* + * Returns the private data given to marker_probe_register. + */ +extern int marker_probe_unregister(const char *name, + marker_probe_func *probe, void *probe_private); +/* + * Unregister a marker by providing the registered private data. + */ +extern int marker_probe_unregister_private_data(marker_probe_func *probe, + void *probe_private); + +extern void *marker_get_private_data(const char *name, marker_probe_func *probe, + int num); + +#endif + diff --git a/kernel/include-compat/linux/math64.h b/kernel/include-compat/linux/math64.h new file mode 100644 index 00000000..dc7c5812 --- /dev/null +++ b/kernel/include-compat/linux/math64.h @@ -0,0 +1,3 @@ +/* + * Empty file to satisfy #include <linux/math64.h> for older kernels. + */ diff --git a/kernel/include-compat/linux/mmu_notifier.h b/kernel/include-compat/linux/mmu_notifier.h new file mode 100644 index 00000000..a6db4bab --- /dev/null +++ b/kernel/include-compat/linux/mmu_notifier.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_MMU_NOTIFIER_H +#define _LINUX_MMU_NOTIFIER_H + +struct mmu_notifier {}; + +#endif diff --git a/kernel/include-compat/linux/msi.h b/kernel/include-compat/linux/msi.h new file mode 100644 index 00000000..8f293922 --- /dev/null +++ b/kernel/include-compat/linux/msi.h @@ -0,0 +1,50 @@ +#ifndef LINUX_MSI_H +#define LINUX_MSI_H + +#include <linux/list.h> + +struct msi_msg { + u32 address_lo; /* low 32 bits of msi message address */ + u32 address_hi; /* high 32 bits of msi message address */ + u32 data; /* 16 bits of msi message data */ +}; + +/* Helper functions */ +extern void mask_msi_irq(unsigned int irq); +extern void unmask_msi_irq(unsigned int irq); +extern void read_msi_msg(unsigned int irq, struct msi_msg *msg); +extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); + +struct msi_desc { + struct { + __u8 type : 5; /* {0: unused, 5h:MSI, 11h:MSI-X} */ + __u8 maskbit : 1; /* mask-pending bit supported ? */ + __u8 masked : 1; + __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */ + __u8 pos; /* Location of the msi capability */ + __u32 maskbits_mask; /* mask bits mask */ + __u16 entry_nr; /* specific enabled entry */ + unsigned default_irq; /* default pre-assigned irq */ + }msi_attrib; + + unsigned int irq; + struct list_head list; + + void __iomem *mask_base; + struct pci_dev *dev; + + /* Last set MSI message */ + struct msi_msg msg; +}; + +/* + * The arch hook for setup up msi irqs + */ +int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc); +void arch_teardown_msi_irq(unsigned int irq); +extern int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); +extern void arch_teardown_msi_irqs(struct pci_dev *dev); +extern int arch_msi_check_device(struct pci_dev* dev, int nvec, int type); + + +#endif /* LINUX_MSI_H */ diff --git a/kernel/include-compat/linux/mutex.h b/kernel/include-compat/linux/mutex.h new file mode 100644 index 00000000..449905c0 --- /dev/null +++ b/kernel/include-compat/linux/mutex.h @@ -0,0 +1,3 @@ +/* + * Empty file to satisfy #include <linux/mutex.h> for older kernels. + */ diff --git a/kernel/include-compat/linux/srcu.h b/kernel/include-compat/linux/srcu.h new file mode 100644 index 00000000..0d476be9 --- /dev/null +++ b/kernel/include-compat/linux/srcu.h @@ -0,0 +1,53 @@ +/* + * Sleepable Read-Copy Update mechanism for mutual exclusion + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2006 + * + * Author: Paul McKenney <paulmck@us.ibm.com> + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU/ *.txt + * + */ + +#ifndef _LINUX_SRCU_H +#define _LINUX_SRCU_H + +struct srcu_struct_array { + int c[2]; +}; + +struct srcu_struct { + int completed; + struct srcu_struct_array *per_cpu_ref; + struct mutex mutex; +}; + +#ifndef CONFIG_PREEMPT +#define srcu_barrier() barrier() +#else /* #ifndef CONFIG_PREEMPT */ +#define srcu_barrier() +#endif /* #else #ifndef CONFIG_PREEMPT */ + +int kvm_init_srcu_struct(struct srcu_struct *sp); +void kvm_cleanup_srcu_struct(struct srcu_struct *sp); +int kvm_srcu_read_lock(struct srcu_struct *sp) __acquires(sp); +void kvm_srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); +void kvm_synchronize_srcu(struct srcu_struct *sp); +long kvm_srcu_batches_completed(struct srcu_struct *sp); + +#endif diff --git a/kernel/include-compat/linux/tracepoint.h b/kernel/include-compat/linux/tracepoint.h new file mode 100644 index 00000000..f2e9a589 --- /dev/null +++ b/kernel/include-compat/linux/tracepoint.h @@ -0,0 +1 @@ +/* Dummy file to satisfy #include */ diff --git a/kernel/include-compat/trace/define_trace.h b/kernel/include-compat/trace/define_trace.h new file mode 100644 index 00000000..222c9784 --- /dev/null +++ b/kernel/include-compat/trace/define_trace.h @@ -0,0 +1,2 @@ +/* Empty file to satisfy include */ + diff --git a/kernel/kvm-kmod.spec b/kernel/kvm-kmod.spec new file mode 100644 index 00000000..89b3d882 --- /dev/null +++ b/kernel/kvm-kmod.spec @@ -0,0 +1,52 @@ +%define kmod_name kvm + +Name: kvm-kmod +Version: 0.0 +Release: 0 +Summary: %{kmod_name} kernel module + +Group: System Environment/Kernel +License: GPL +URL: http://www.qumranet.com +BuildRoot: %{_tmppath}/%{name}-%{version}-%{release} + +ExclusiveArch: i386 x86_64 ia64 + +%description +This kernel module provides support for virtual machines using hardware support +(Intel VT-x&VT-i or AMD SVM). + +%prep + +%build + +rm -rf %{buildroot} + +%install + +%define kverrel unknown +%define moddir /lib/modules/%{kverrel}/extra +mkdir -p %{buildroot}/%{moddir} +cp %{objdir}/%{kmod_name}.ko %{objdir}/%{kmod_name}-*.ko %{buildroot}/%{moddir} +chmod u+x %{buildroot}/%{moddir}/%{kmod_name}*.ko + +%post + +depmod %{kverrel} + +%postun + +depmod %{kverrel} + +%clean +%{__rm} -rf %{buildroot} + +%files +%{moddir}/%{kmod_name}.ko +%ifarch i386 x86_64 +%{moddir}/%{kmod_name}-amd.ko +%endif +%{moddir}/%{kmod_name}-intel.ko + + +%changelog diff --git a/kernel/powerpc/Makefile.pre b/kernel/powerpc/Makefile.pre new file mode 100644 index 00000000..e38baf13 --- /dev/null +++ b/kernel/powerpc/Makefile.pre @@ -0,0 +1 @@ +prerequisite: diff --git a/kernel/request-irq-compat.c b/kernel/request-irq-compat.c new file mode 100644 index 00000000..51193cb3 --- /dev/null +++ b/kernel/request-irq-compat.c @@ -0,0 +1,44 @@ +/* + * compat for request_irq + */ + +#include <linux/interrupt.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) + +static kvm_irq_handler_t kvm_irq_handlers[NR_IRQS]; +static DEFINE_MUTEX(kvm_irq_handlers_mutex); + +static irqreturn_t kvm_irq_thunk(int irq, void *dev_id, struct pt_regs *regs) +{ + kvm_irq_handler_t handler = kvm_irq_handlers[irq]; + return handler(irq, dev_id); +} + +int kvm_request_irq(unsigned int a, kvm_irq_handler_t handler, + unsigned long c, const char *d, void *e) +{ + int rc = -EBUSY; + kvm_irq_handler_t old; + + mutex_lock(&kvm_irq_handlers_mutex); + old = kvm_irq_handlers[a]; + if (old) + goto out; + kvm_irq_handlers[a] = handler; + rc = request_irq(a, kvm_irq_thunk, c, d, e); + if (rc) + kvm_irq_handlers[a] = NULL; +out: + mutex_unlock(&kvm_irq_handlers_mutex); + return rc; +} + +void kvm_free_irq(unsigned int irq, void *dev_id) +{ + mutex_lock(&kvm_irq_handlers_mutex); + free_irq(irq, dev_id); + kvm_irq_handlers[irq] = NULL; + mutex_unlock(&kvm_irq_handlers_mutex); +} + +#endif diff --git a/kernel/scripts/65-kvm.rules b/kernel/scripts/65-kvm.rules new file mode 100644 index 00000000..857b08cf --- /dev/null +++ b/kernel/scripts/65-kvm.rules @@ -0,0 +1,2 @@ +KERNEL=="kvm", MODE="0660", GROUP="kvm" +ACTION=="add|change", SUBSYSTEM=="dmi", KERNEL=="id", RUN+="/bin/sh -c 'grep -q vmx /proc/cpuinfo && /sbin/modprobe kvm-intel; grep -q svm /proc/cpuinfo && /sbin/modprobe kvm-amd'" diff --git a/kernel/scripts/make-release b/kernel/scripts/make-release new file mode 100755 index 00000000..f9205e33 --- /dev/null +++ b/kernel/scripts/make-release @@ -0,0 +1,95 @@ +#!/bin/bash -e + +usage() { + echo "usage: $0 [--upload] [--formal] commit [name]" + exit 1 +} + +[[ -f ~/.kvmreleaserc ]] && . ~/.kvmreleaserc + +upload= +formal= + +releasedir=~/sf-release +[[ -z "$TMP" ]] && TMP="/tmp" +tmpdir="$TMP/kvm-kmod-make-release.$$" +while [[ "$1" = -* ]]; do + opt="$1" + shift + case "$opt" in + --upload) + upload="yes" + ;; + --formal) + formal="yes" + ;; + *) + usage + ;; + esac +done + +commit="$1" +name="$2" + +if [[ -z "$commit" ]]; then + usage +fi + +if [[ -z "$name" ]]; then + name="$commit" +fi + +tarball="$releasedir/$name.tar.bz2" + +cd "$(dirname "$0")"/.. +LINUX="$(readlink -f "linux-2.6")" + +kvm_git="$(readlink -f .git)" +linux_git="$(readlink -f "$LINUX/.git")" + +mkdir -p "$tmpdir/$name" +mkdir -p "$tmpdir/$name/linux-2.6" + +files=("arch/*/kvm/*" "virt/kvm" "include/linux/kvm*" + "arch/*/include/asm/kvm*" "arch/x86/include/asm/virtext.h" + "arch/x86/include/asm/vmx.h" "arch/x86/include/asm/svm.h" + "include/trace/events/kvm*" + ) + +index="$tmpdir/index" + +rm -f "$index" +GIT_INDEX_FILE="$index" git --git-dir="$kvm_git" read-tree "$commit" +GIT_INDEX_FILE="$index" git --git-dir="$kvm_git" --work-tree="$tmpdir/$name" checkout "$commit" . +lcommit=($(git --git-dir="$kvm_git" ls-tree "$commit" linux-2.6)) +lcommit="${lcommit[2]}" +rm -f "$index" +GIT_INDEX_FILE="$index" git --git-dir="$linux_git" read-tree "$lcommit" +GIT_INDEX_FILE="$index" git --git-dir="$linux_git" \ + --work-tree="$tmpdir/$name/linux-2.6" \ + checkout "$lcommit" "${files[@]}" + +cd "$tmpdir/$name" + +if [[ -z "$formal" ]]; then + version="kvm-devel" +else + version="$name" +fi + +./sync "$name" -v "$version" + +rm -rf "$tmpdir/$name/linux-2.6" + +if [[ -n "$formal" ]]; then + echo "$name" > "$tmpdir/$name/KVM_VERSION" +fi + +tar cjf "$tarball" -C "$tmpdir" "$name" + +rm -rf "$tmpdir" + +if [[ -n "$upload" ]]; then + rsync --progress -h "$tarball" kiszka@frs.sourceforge.net:uploads/ +fi diff --git a/kernel/srcu.c b/kernel/srcu.c new file mode 100644 index 00000000..e9734bc6 --- /dev/null +++ b/kernel/srcu.c @@ -0,0 +1,267 @@ +/* + * Sleepable Read-Copy Update mechanism for mutual exclusion. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) IBM Corporation, 2006 + * + * Author: Paul McKenney <paulmck@us.ibm.com> + * + * For detailed explanation of Read-Copy Update mechanism see - + * Documentation/RCU/ *.txt + * + */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) + +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/percpu.h> +#include <linux/preempt.h> +#include <linux/rcupdate.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/srcu.h> + +#undef kvm_init_srcu_struct +#undef kvm_cleanup_srcu_struct +#undef kvm_srcu_read_lock +#undef kvm_srcu_read_unlock +#undef kvm_synchronize_srcu +#undef kvm_srcu_batches_completed +/** + * init_srcu_struct - initialize a sleep-RCU structure + * @sp: structure to initialize. + * + * Must invoke this on a given srcu_struct before passing that srcu_struct + * to any other function. Each srcu_struct represents a separate domain + * of SRCU protection. + */ +int kvm_init_srcu_struct(struct srcu_struct *sp) +{ + sp->completed = 0; + mutex_init(&sp->mutex); + sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array); + return (sp->per_cpu_ref ? 0 : -ENOMEM); +} + +/* + * srcu_readers_active_idx -- returns approximate number of readers + * active on the specified rank of per-CPU counters. + */ + +static int srcu_readers_active_idx(struct srcu_struct *sp, int idx) +{ + int cpu; + int sum; + + sum = 0; + for_each_possible_cpu(cpu) + sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx]; + return sum; +} + +/** + * srcu_readers_active - returns approximate number of readers. + * @sp: which srcu_struct to count active readers (holding srcu_read_lock). + * + * Note that this is not an atomic primitive, and can therefore suffer + * severe errors when invoked on an active srcu_struct. That said, it + * can be useful as an error check at cleanup time. + */ +static int srcu_readers_active(struct srcu_struct *sp) +{ + return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1); +} + +/** + * cleanup_srcu_struct - deconstruct a sleep-RCU structure + * @sp: structure to clean up. + * + * Must invoke this after you are finished using a given srcu_struct that + * was initialized via init_srcu_struct(), else you leak memory. + */ +void kvm_cleanup_srcu_struct(struct srcu_struct *sp) +{ + int sum; + + sum = srcu_readers_active(sp); + WARN_ON(sum); /* Leakage unless caller handles error. */ + if (sum != 0) + return; + free_percpu(sp->per_cpu_ref); + sp->per_cpu_ref = NULL; +} + +/** + * srcu_read_lock - register a new reader for an SRCU-protected structure. + * @sp: srcu_struct in which to register the new reader. + * + * Counts the new reader in the appropriate per-CPU element of the + * srcu_struct. Must be called from process context. + * Returns an index that must be passed to the matching srcu_read_unlock(). + */ +int kvm_srcu_read_lock(struct srcu_struct *sp) +{ + int idx; + + preempt_disable(); + idx = sp->completed & 0x1; + barrier(); /* ensure compiler looks -once- at sp->completed. */ + per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]++; + srcu_barrier(); /* ensure compiler won't misorder critical section. */ + preempt_enable(); + return idx; +} + +/** + * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. + * @sp: srcu_struct in which to unregister the old reader. + * @idx: return value from corresponding srcu_read_lock(). + * + * Removes the count for the old reader from the appropriate per-CPU + * element of the srcu_struct. Note that this may well be a different + * CPU than that which was incremented by the corresponding srcu_read_lock(). + * Must be called from process context. + */ +void kvm_srcu_read_unlock(struct srcu_struct *sp, int idx) +{ + preempt_disable(); + srcu_barrier(); /* ensure compiler won't misorder critical section. */ + per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--; + preempt_enable(); +} + +/** + * synchronize_srcu - wait for prior SRCU read-side critical-section completion + * @sp: srcu_struct with which to synchronize. + * + * Flip the completed counter, and wait for the old count to drain to zero. + * As with classic RCU, the updater must use some separate means of + * synchronizing concurrent updates. Can block; must be called from + * process context. + * + * Note that it is illegal to call synchornize_srcu() from the corresponding + * SRCU read-side critical section; doing so will result in deadlock. + * However, it is perfectly legal to call synchronize_srcu() on one + * srcu_struct from some other srcu_struct's read-side critical section. + */ +void kvm_synchronize_srcu(struct srcu_struct *sp) +{ + int idx; + + idx = sp->completed; + mutex_lock(&sp->mutex); + + /* + * Check to see if someone else did the work for us while we were + * waiting to acquire the lock. We need -two- advances of + * the counter, not just one. If there was but one, we might have + * shown up -after- our helper's first synchronize_sched(), thus + * having failed to prevent CPU-reordering races with concurrent + * srcu_read_unlock()s on other CPUs (see comment below). So we + * either (1) wait for two or (2) supply the second ourselves. + */ + + if ((sp->completed - idx) >= 2) { + mutex_unlock(&sp->mutex); + return; + } + + synchronize_sched(); /* Force memory barrier on all CPUs. */ + + /* + * The preceding synchronize_sched() ensures that any CPU that + * sees the new value of sp->completed will also see any preceding + * changes to data structures made by this CPU. This prevents + * some other CPU from reordering the accesses in its SRCU + * read-side critical section to precede the corresponding + * srcu_read_lock() -- ensuring that such references will in + * fact be protected. + * + * So it is now safe to do the flip. + */ + + idx = sp->completed & 0x1; + sp->completed++; + + synchronize_sched(); /* Force memory barrier on all CPUs. */ + + /* + * At this point, because of the preceding synchronize_sched(), + * all srcu_read_lock() calls using the old counters have completed. + * Their corresponding critical sections might well be still + * executing, but the srcu_read_lock() primitives themselves + * will have finished executing. + */ + + while (srcu_readers_active_idx(sp, idx)) + schedule_timeout_interruptible(1); + + synchronize_sched(); /* Force memory barrier on all CPUs. */ + + /* + * The preceding synchronize_sched() forces all srcu_read_unlock() + * primitives that were executing concurrently with the preceding + * for_each_possible_cpu() loop to have completed by this point. + * More importantly, it also forces the corresponding SRCU read-side + * critical sections to have also completed, and the corresponding + * references to SRCU-protected data items to be dropped. + * + * Note: + * + * Despite what you might think at first glance, the + * preceding synchronize_sched() -must- be within the + * critical section ended by the following mutex_unlock(). + * Otherwise, a task taking the early exit can race + * with a srcu_read_unlock(), which might have executed + * just before the preceding srcu_readers_active() check, + * and whose CPU might have reordered the srcu_read_unlock() + * with the preceding critical section. In this case, there + * is nothing preventing the synchronize_sched() task that is + * taking the early exit from freeing a data structure that + * is still being referenced (out of order) by the task + * doing the srcu_read_unlock(). + * + * Alternatively, the comparison with "2" on the early exit + * could be changed to "3", but this increases synchronize_srcu() + * latency for bulk loads. So the current code is preferred. + */ + + mutex_unlock(&sp->mutex); +} + +/** + * srcu_batches_completed - return batches completed. + * @sp: srcu_struct on which to report batch completion. + * + * Report the number of batches, correlated with, but not necessarily + * precisely the same as, the number of grace periods that have elapsed. + */ + +long kvm_srcu_batches_completed(struct srcu_struct *sp) +{ + return sp->completed; +} + +EXPORT_SYMBOL_GPL(kvm_init_srcu_struct); +EXPORT_SYMBOL_GPL(kvm_cleanup_srcu_struct); +EXPORT_SYMBOL_GPL(kvm_srcu_read_lock); +EXPORT_SYMBOL_GPL(kvm_srcu_read_unlock); +EXPORT_SYMBOL_GPL(kvm_synchronize_srcu); +EXPORT_SYMBOL_GPL(kvm_srcu_batches_completed); + +#endif diff --git a/kernel/sync b/kernel/sync new file mode 100755 index 00000000..da5c218a --- /dev/null +++ b/kernel/sync @@ -0,0 +1,248 @@ +#!/usr/bin/python + +import sys, os, glob, os.path, shutil, re +from optparse import OptionParser + +glob = glob.glob + +def cmd(c): + if os.system(c) != 0: + raise Exception('command execution failed: ' + c) + +parser = OptionParser(usage = 'usage: %prog [-v VERSION][-l LINUX]') +parser.add_option('-v', action = 'store', type = 'string', dest = 'version', \ + help = 'kvm-kmod release version', default = 'kvm-devel') +parser.add_option('-l', action = 'store', type = 'string', dest = 'linux', \ + help = 'Linux kernel tree to sync from', \ + default = 'linux-2.6') +parser.set_defaults() +(options, args) = parser.parse_args() +version = options.version +linux = options.linux + +_re_cache = {} + +def re_cache(regexp): + global _re_cache + if regexp not in _re_cache: + _re_cache[regexp] = re.compile(regexp) + return _re_cache[regexp] + +def __hack(data): + compat_apis = str.split( + 'INIT_WORK desc_struct ldttss_desc64 desc_ptr ' + 'hrtimer_add_expires_ns hrtimer_get_expires ' + 'hrtimer_get_expires_ns hrtimer_start_expires ' + 'hrtimer_expires_remaining smp_send_reschedule ' + 'on_each_cpu relay_open request_irq free_irq ' + 'init_srcu_struct cleanup_srcu_struct srcu_read_lock ' + 'srcu_read_unlock synchronize_srcu srcu_batches_completed ' + 'do_machine_check eventfd_signal get_desc_base get_desc_limit ' + 'vma_kernel_pagesize ' + ) + anon_inodes = anon_inodes_exit = False + mce = False + result = [] + def sub(regexp, repl, str): + return re_cache(regexp).sub(repl, str) + for line in data.splitlines(): + orig = line + def match(regexp): + return re_cache(regexp).search(line) + def w(line, result = result): + result.append(line) + f = line.split() + if match(r'^int kvm_init\('): anon_inodes = 1 + if match(r'return 0;') and anon_inodes: + w('\tr = kvm_init_anon_inodes();') + w('\tif (r) {') + w('\t\t__free_page(bad_page);') + w('\t\tgoto out;') + w('\t}') + w('\tpreempt_notifier_sys_init();') + w('\tprintk("loaded kvm module (%s)\\n");\n' % (version,)) + anon_inodes = False + if match(r'^void kvm_exit'): anon_inodes_exit = True + if match(r'\}') and anon_inodes_exit: + w('\tkvm_exit_anon_inodes();') + w('\tpreempt_notifier_sys_exit();') + anon_inodes_exit = False + if match(r'^int kvm_arch_init'): kvm_arch_init = True + if match(r'\btsc_khz\b') and kvm_arch_init: + line = sub(r'\btsc_khz\b', 'kvm_tsc_khz', line) + if match(r'^}'): kvm_arch_init = False + if match(r'MODULE_AUTHOR'): + w('MODULE_INFO(version, "%s");' % (version,)) + line = sub(r'(\w+)->dev->msi_enabled', + r'kvm_pcidev_msi_enabled(\1->dev)', line) + if match(r'atomic_inc\(&kvm->mm->mm_count\);'): + line = 'mmget(&kvm->mm->mm_count);' + if match(r'^\t\.fault = '): + fcn = sub(r',', '', f[2]) + line = '\t.VMA_OPS_FAULT(fault) = VMA_OPS_FAULT_FUNC(' + fcn + '),' + if match(r'^static int (.*_stat_get|lost_records_get)'): + line = line[0:11] + '__' + line[11:] + if match(r'DEFINE_SIMPLE_ATTRIBUTE.*(_stat_get|lost_records_get)'): + name = sub(r',', '', f[1]) + w('MAKE_SIMPLE_ATTRIBUTE_GETTER(' + name + ')') + line = sub(r'linux/mm_types\.h', 'linux/mm.h', line) + line = sub(r'\b__user\b', ' ', line) + if match(r'^\t\.name = "kvm"'): + line = '\tset_kset_name("kvm"),' + if match(r'#include <linux/compiler.h>'): + line = '' + if match(r'#include <linux/clocksource.h>'): + line = '' + if match(r'#include <linux\/types.h>'): + line = '#include <asm/types.h>' + if match(r'\t\.change_pte.*kvm_mmu_notifier_change_pte,'): + line = '#ifdef MMU_NOTIFIER_HAS_CHANGE_PTE\n' + line + '\n#endif' + if match(r'static void kvm_mmu_notifier_change_pte'): + line = sub(r'static ', '', line) + line = '#ifdef MMU_NOTIFIER_HAS_CHANGE_PTE\n' + 'static\n' + '#endif\n' + line + line = sub(r'\bhrtimer_init\b', 'hrtimer_init_p', line) + line = sub(r'\bhrtimer_start\b', 'hrtimer_start_p', line) + line = sub(r'\bhrtimer_cancel\b', 'hrtimer_cancel_p', line) + if match(r'case KVM_CAP_SYNC_MMU'): + line = '#ifdef CONFIG_MMU_NOTIFIER\n' + line + '\n#endif' + for ident in compat_apis: + line = sub(r'\b' + ident + r'\b', 'kvm_' + ident, line) + if match(r'kvm_.*_fops\.owner = module;'): + line = 'IF_ANON_INODES_DOES_REFCOUNTS(' + line + ')' + if not match(r'#include'): + line = sub(r'\blapic\n', 'l_apic', line) + if match(r'struct pt_regs regs'): + mce = True + if mce and match(r'\.cs'): + line = sub(r'cs', r'kvm_pt_regs_cs', line) + if mce and match(r'\.flags'): + line = sub(r'flags', r'kvm_pt_regs_flags', line) + mce = False + line = sub(r'boot_cpu_data.x86_phys_bits', 'kvm_x86_phys_bits', line) + if match(r'^static const struct vm_operations_struct kvm_'): + line = sub(r' const ', ' ', line) + if line == 'static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx)': + line = sub(r'\)', ', struct page **mapped_page)', line) + if line == '\treturn kmap_atomic(page, idx);': + line = '\t*mapped_page = page;\n' + line + if line == 'static void nested_svm_unmap(void *addr, enum km_type idx)': + line = sub(r'\)', ', struct page *mapped_page)', line) + if line == '\tpage = kmap_atomic_to_page(addr);': + line = '\tpage = mapped_page;' + if match(r'= nested_svm_map(.*);'): + line = '\t{ struct page *mapped_page;\n' + sub(r'\);', ', &mapped_page);', line) + if match('nested_svm_unmap(.*);'): + line = sub(r'\);', ', mapped_page); }', line) + if match(r'->thread.debugreg[0-7]'): + line = sub(r'->thread.debugreg([0-7])', r'->thread.kvm_compat_debugreg(\1)', line) + w(line) + if match(r'\tkvm_init_debug'): + w('\thrtimer_kallsyms_resolve();') + if match(r'apic->timer.dev.function ='): + w('\thrtimer_data_pointer(&apic->timer.dev);') + if match(r'pt->timer.function ='): + w('\thrtimer_data_pointer(&pt->timer);') + data = str.join('', [line + '\n' for line in result]) + return data + +def _hack(fname, arch): + data = file(fname).read() + data = __hack(data) + file(fname, 'w').write(data) + +def unifdef(fname): + data = file('unifdef.h').read() + file(fname).read() + file(fname, 'w').write(data) + +def hack(T, arch, file): + _hack(T + '/' + file, arch) + +hack_files = { + 'x86': str.split('kvm_main.c mmu.c vmx.c svm.c x86.c irq.h lapic.c' + ' i8254.c timer.c eventfd.c'), + 'ia64': str.split('kvm_main.c kvm_fw.c kvm_lib.c kvm-ia64.c'), +} + +def mkdir(dir): + if not os.path.exists(dir): + os.makedirs(dir) + +def cp(src, dst): + mkdir(os.path.dirname(dst)) + file(dst, 'w').write(file(src).read()) + +def copy_if_changed(src, dst): + for dir, subdirs, files in os.walk(src): + ndir = dst + '/' + dir[len(src)+1:] + mkdir(ndir) + for fname in files: + old = ndir + '/' + fname + new = dir + '/' + fname + try: + if file(old).read() != file(new).read(): + raise Exception('different.') + except: + cp(new, old) + +def rmtree(path): + if os.path.exists(path): + shutil.rmtree(path) + +def header_sync(arch): + T = 'header' + rmtree(T) + for file in glob('%(linux)s/include/linux/kvm*.h' % { 'linux': linux }): + out = ('%(T)s/include/linux/%(name)s' + % { 'T': T, 'name': os.path.basename(file) }) + cp(file, out) + unifdef(out) + for file in glob(('%(linux)s/include/trace/events/kvm*.h' + % { 'linux': linux })): + out = ('%(T)s/include/trace/events/%(name)s' + % { 'T': T, 'name': os.path.basename(file) }) + cp(file, out) + unifdef(out) + arch_headers = ( + [x + for dir in ['%(linux)s/arch/%(arch)s/include/asm/./kvm*.h', + '%(linux)s/arch/%(arch)s/include/asm/./vmx*.h', + '%(linux)s/arch/%(arch)s/include/asm/./svm*.h', + '%(linux)s/arch/%(arch)s/include/asm/./virtext*.h'] + for x in glob(dir % { 'arch': arch, 'linux': linux }) + ]) + for file in arch_headers: + out = ('%(T)s/include/asm-%(arch)s/%(name)s' + % { 'T': T, 'name': os.path.basename(file), 'arch': arch }) + cp(file, out) + unifdef(out) + hack(T, 'x86', 'include/linux/kvm.h') + hack(T, arch, 'include/asm-%(arch)s/kvm.h' % { 'arch': arch }) + copy_if_changed(T, '.') + rmtree(T) + +def source_sync(arch): + T = 'source' + rmtree(T) + sources = [file + for pattern in ['%(linux)s/arch/%(arch)s/kvm/*.[cSh]', + '%(linux)s/virt/kvm/*.[cSh]'] + for file in glob(pattern % { 'linux': linux, 'arch': arch }) + if not file.endswith('.mod.c') + ] + for file in sources: + out = ('%(T)s/%(name)s' + % { 'T': T, 'name': os.path.basename(file) }) + cp(file, out) + + for i in glob(T + '/*.c'): + unifdef(i) + + for i in hack_files[arch]: + hack(T, arch, i) + + copy_if_changed(T, arch) + rmtree(T) + +for arch in ['x86', 'ia64']: + header_sync(arch) + source_sync(arch) diff --git a/kernel/unifdef.h b/kernel/unifdef.h new file mode 100644 index 00000000..6fc7be08 --- /dev/null +++ b/kernel/unifdef.h @@ -0,0 +1,40 @@ +#ifndef KVM_UNIFDEF_H +#define KVM_UNIFDEF_H + +#ifdef __i386__ +#ifndef CONFIG_X86_32 +#define CONFIG_X86_32 1 +#endif +#endif + +#ifdef __x86_64__ +#ifndef CONFIG_X86_64 +#define CONFIG_X86_64 1 +#endif +#endif + +#if defined(__i386__) || defined (__x86_64__) +#ifndef CONFIG_X86 +#define CONFIG_X86 1 +#endif +#endif + +#ifdef __ia64__ +#ifndef CONFIG_IA64 +#define CONFIG_IA64 1 +#endif +#endif + +#ifdef __PPC__ +#ifndef CONFIG_PPC +#define CONFIG_PPC 1 +#endif +#endif + +#ifdef __s390__ +#ifndef CONFIG_S390 +#define CONFIG_S390 1 +#endif +#endif + +#endif diff --git a/kernel/x86/Kbuild b/kernel/x86/Kbuild new file mode 100644 index 00000000..af0824ce --- /dev/null +++ b/kernel/x86/Kbuild @@ -0,0 +1,14 @@ +obj-m := kvm.o kvm-intel.o kvm-amd.o +kvm-objs := kvm_main.o x86.o mmu.o emulate.o ../anon_inodes.o irq.o i8259.o \ + lapic.o ioapic.o preempt.o i8254.o coalesced_mmio.o irq_comm.o \ + timer.o eventfd.o \ + ../external-module-compat.o ../request-irq-compat.o +ifeq ($(CONFIG_IOMMU_API),y) +kvm-objs += iommu.o +endif +kvm-intel-objs := vmx.o vmx-debug.o +kvm-amd-objs := svm.o + +kvm-objs += ../srcu.o + +CFLAGS_kvm_main.o = -DKVM_MAIN diff --git a/kernel/x86/Makefile.pre b/kernel/x86/Makefile.pre new file mode 100644 index 00000000..e38baf13 --- /dev/null +++ b/kernel/x86/Makefile.pre @@ -0,0 +1 @@ +prerequisite: diff --git a/kernel/x86/debug.h b/kernel/x86/debug.h new file mode 100644 index 00000000..35793652 --- /dev/null +++ b/kernel/x86/debug.h @@ -0,0 +1,23 @@ +#ifndef __KVM_DEBUG_H +#define __KVM_DEBUG_H + +#ifdef KVM_DEBUG + +void show_msrs(struct kvm_vcpu *vcpu); + + +void show_irq(struct kvm_vcpu *vcpu, int irq); +void show_page(struct kvm_vcpu *vcpu, gva_t addr); +void show_u64(struct kvm_vcpu *vcpu, gva_t addr); +void show_code(struct kvm_vcpu *vcpu); +int vm_entry_test(struct kvm_vcpu *vcpu); + +void vmcs_dump(struct kvm_vcpu *vcpu); +void regs_dump(struct kvm_vcpu *vcpu); +void sregs_dump(struct kvm_vcpu *vcpu); +void show_pending_interrupts(struct kvm_vcpu *vcpu); +void vcpu_dump(struct kvm_vcpu *vcpu); + +#endif + +#endif diff --git a/kernel/x86/external-module-compat.h b/kernel/x86/external-module-compat.h new file mode 100644 index 00000000..b32e68ed --- /dev/null +++ b/kernel/x86/external-module-compat.h @@ -0,0 +1,687 @@ + +/* + * Compatibility header for building as an external module. + */ + +#include <linux/compiler.h> +#include <linux/version.h> + +#include <linux/types.h> +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) + +typedef u64 phys_addr_t; + +#endif + +#include "../external-module-compat-comm.h" + +#include <asm/msr.h> +#include <asm/asm.h> + +#ifndef CONFIG_HAVE_KVM_EVENTFD +#define CONFIG_HAVE_KVM_EVENTFD 1 +#endif + +#ifndef CONFIG_KVM_APIC_ARCHITECTURE +#define CONFIG_KVM_APIC_ARCHITECTURE +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) + +#ifdef CONFIG_X86_64 +#define DECLARE_ARGS(val, low, high) unsigned low, high +#define EAX_EDX_VAL(val, low, high) ((low) | ((u64)(high) << 32)) +#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high) +#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high) +#else +#define DECLARE_ARGS(val, low, high) unsigned long long val +#define EAX_EDX_VAL(val, low, high) (val) +#define EAX_EDX_ARGS(val, low, high) "A" (val) +#define EAX_EDX_RET(val, low, high) "=A" (val) +#endif + +#ifndef __ASM_EX_SEC +# define __ASM_EX_SEC " .section __ex_table,\"a\"\n" +#endif + +#ifndef _ASM_EXTABLE +# define _ASM_EXTABLE(from,to) \ + __ASM_EX_SEC \ + _ASM_ALIGN "\n" \ + _ASM_PTR #from "," #to "\n" \ + " .previous\n" +#endif + +#ifndef __ASM_SEL +#ifdef CONFIG_X86_32 +# define __ASM_SEL(a,b) __ASM_FORM(a) +#else +# define __ASM_SEL(a,b) __ASM_FORM(b) +#endif +#endif + +#ifndef __ASM_FORM +# define __ASM_FORM(x) " " #x " " +#endif + +#ifndef _ASM_PTR +#define _ASM_PTR __ASM_SEL(.long, .quad) +#endif + +#ifndef _ASM_ALIGN +#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) || defined(CONFIG_X86_64) + +static inline unsigned long long native_read_msr_safe(unsigned int msr, + int *err) +{ + DECLARE_ARGS(val, low, high); + + asm volatile("2: rdmsr ; xor %[err],%[err]\n" + "1:\n\t" + ".section .fixup,\"ax\"\n\t" + "3: mov %[fault],%[err] ; jmp 1b\n\t" + ".previous\n\t" + _ASM_EXTABLE(2b, 3b) + : [err] "=r" (*err), EAX_EDX_RET(val, low, high) + : "c" (msr), [fault] "i" (-EFAULT)); + return EAX_EDX_VAL(val, low, high); +} + +static inline unsigned long long native_read_tsc(void) +{ + unsigned long long val; + asm volatile("rdtsc" : "=A" (val)); + return val; +} + +#endif + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) + +static inline int rdmsrl_safe(unsigned msr, unsigned long long *p) +{ + int err; + + *p = native_read_msr_safe(msr, &err); + return err; +} + +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) + +#ifndef _EFER_SCE +#define _EFER_SCE 0 /* SYSCALL/SYSRET */ +#endif + +#ifndef EFER_SCE +#define EFER_SCE (1<<_EFER_SCE) +#endif + +#endif + +#ifndef MSR_KERNEL_GS_BASE +#define MSR_KERNEL_GS_BASE 0xc0000102 +#endif + +#ifndef MSR_VM_CR +#define MSR_VM_CR 0xc0010114 +#endif + +#ifndef MSR_VM_HSAVE_PA +#define MSR_VM_HSAVE_PA 0xc0010117 +#endif + +#ifndef _EFER_SVME +#define _EFER_SVME 12 +#define EFER_SVME (1<<_EFER_SVME) +#endif + +#ifndef _EFER_FFXSR +#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */ +#define EFER_FFXSR (1<<_EFER_FFXSR) +#endif + +#ifndef MSR_STAR +#define MSR_STAR 0xc0000081 +#endif + +#ifndef MSR_K8_INT_PENDING_MSG +#define MSR_K8_INT_PENDING_MSG 0xc0010055 +#endif + +#include <asm/cpufeature.h> + +#ifndef X86_FEATURE_SVM +#define X86_FEATURE_SVM (6*32+ 2) /* Secure virtual machine */ +#endif + +#ifndef X86_FEATURE_FXSR_OPT +#define X86_FEATURE_FXSR_OPT (1*32+25) +#endif + +#ifndef X86_FEATURE_GBPAGES +#define X86_FEATURE_GBPAGES (1*32+26) /* GB pages */ +#endif + +#ifndef X86_FEATURE_SSSE3 +#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */ +#endif + +#ifndef X86_FEATURE_XMM4_1 +#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ +#endif + +#ifndef X86_FEATURE_XMM4_2 +#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ +#endif + +#ifndef X86_FEATURE_MOVBE +#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ +#endif + +#ifndef X86_FEATURE_POPCNT +#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ +#endif + +#ifndef X86_FEATURE_CR8_LEGACY +#define X86_FEATURE_CR8_LEGACY (6*32+ 4) /* CR8 in 32-bit mode */ +#endif + +#ifndef X86_FEATURE_ABM +#define X86_FEATURE_ABM (6*32+ 5) /* Advanced bit manipulation */ +#endif + +#ifndef X86_FEATURE_SSE4A +#define X86_FEATURE_SSE4A (6*32+ 6) /* SSE-4A */ +#endif + +#ifndef X86_FEATURE_MISALIGNSSE +#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */ +#endif + +#ifndef X86_FEATURE_3DNOWPREFETCH +#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */ +#endif + +#ifndef X86_FEATURE_SSE5 +#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */ +#endif + +#ifndef X86_FEATURE_X2APIC +#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ +#endif + +#ifndef MSR_AMD64_PATCH_LOADER +#define MSR_AMD64_PATCH_LOADER 0xc0010020 +#endif + +#include <linux/smp.h> + +#ifndef X86_CR0_PE +#define X86_CR0_PE 0x00000001 +#endif + +#ifndef X86_CR0_MP +#define X86_CR0_MP 0x00000002 +#endif + +#ifndef X86_CR0_EM +#define X86_CR0_EM 0x00000004 +#endif + +#ifndef X86_CR0_TS +#define X86_CR0_TS 0x00000008 +#endif + +#ifndef X86_CR0_ET +#define X86_CR0_ET 0x00000010 +#endif + +#ifndef X86_CR0_NE +#define X86_CR0_NE 0x00000020 +#endif + +#ifndef X86_CR0_WP +#define X86_CR0_WP 0x00010000 +#endif + +#ifndef X86_CR0_AM +#define X86_CR0_AM 0x00040000 +#endif + +#ifndef X86_CR0_NW +#define X86_CR0_NW 0x20000000 +#endif + +#ifndef X86_CR0_CD +#define X86_CR0_CD 0x40000000 +#endif + +#ifndef X86_CR0_PG +#define X86_CR0_PG 0x80000000 +#endif + +#ifndef X86_CR3_PWT +#define X86_CR3_PWT 0x00000008 +#endif + +#ifndef X86_CR3_PCD +#define X86_CR3_PCD 0x00000010 +#endif + +#ifndef X86_CR4_VMXE +#define X86_CR4_VMXE 0x00002000 +#endif + +#undef X86_CR8_TPR +#define X86_CR8_TPR 0x0f + +/* + * 2.6.22 does not define set_64bit() under nonpae + */ +#ifdef CONFIG_X86_32 + +#include <asm/cmpxchg.h> + +static inline void __kvm_set_64bit(u64 *ptr, u64 val) +{ + unsigned int low = val; + unsigned int high = val >> 32; + + __asm__ __volatile__ ( + "\n1:\t" + "movl (%0), %%eax\n\t" + "movl 4(%0), %%edx\n\t" + "lock cmpxchg8b (%0)\n\t" + "jnz 1b" + : /* no outputs */ + : "D"(ptr), + "b"(low), + "c"(high) + : "ax","dx","memory"); +} + +#undef set_64bit +#define set_64bit __kvm_set_64bit + +static inline unsigned long long __kvm_cmpxchg64(volatile void *ptr, + unsigned long long old, + unsigned long long new) +{ + unsigned long long prev; + __asm__ __volatile__("lock cmpxchg8b %3" + : "=A"(prev) + : "b"((unsigned long)new), + "c"((unsigned long)(new >> 32)), + "m"(*__xg(ptr)), + "0"(old) + : "memory"); + return prev; +} + +#define kvm_cmpxchg64(ptr,o,n)\ + ((__typeof__(*(ptr)))__kvm_cmpxchg64((ptr),(unsigned long long)(o),\ + (unsigned long long)(n))) + +#undef cmpxchg64 +#define cmpxchg64(ptr, o, n) kvm_cmpxchg64(ptr, o, n) + +#endif + +#ifndef CONFIG_PREEMPT_NOTIFIERS +/* + * Include sched|preempt.h before defining CONFIG_PREEMPT_NOTIFIERS to avoid + * a miscompile. + */ +#include <linux/sched.h> +#include <linux/preempt.h> +#define CONFIG_PREEMPT_NOTIFIERS +#define CONFIG_PREEMPT_NOTIFIERS_COMPAT + +struct preempt_notifier; + +struct preempt_ops { + void (*sched_in)(struct preempt_notifier *notifier, int cpu); + void (*sched_out)(struct preempt_notifier *notifier, + struct task_struct *next); +}; + +struct preempt_notifier { + struct list_head link; + struct task_struct *tsk; + struct preempt_ops *ops; +}; + +void preempt_notifier_register(struct preempt_notifier *notifier); +void preempt_notifier_unregister(struct preempt_notifier *notifier); + +static inline void preempt_notifier_init(struct preempt_notifier *notifier, + struct preempt_ops *ops) +{ + notifier->ops = ops; +} + +void start_special_insn(void); +void end_special_insn(void); +void in_special_section(void); + +void preempt_notifier_sys_init(void); +void preempt_notifier_sys_exit(void); + +#else + +static inline void start_special_insn(void) {} +static inline void end_special_insn(void) {} +static inline void in_special_section(void) {} + +static inline void preempt_notifier_sys_init(void) {} +static inline void preempt_notifier_sys_exit(void) {} + +#endif + +/* CONFIG_HAS_IOMEM is apparently fairly new too (2.6.21 for x86_64). */ +#ifndef CONFIG_HAS_IOMEM +#define CONFIG_HAS_IOMEM 1 +#endif + +/* X86_FEATURE_NX is missing in some x86_64 kernels */ + +#include <asm/cpufeature.h> + +#ifndef X86_FEATURE_NX +#define X86_FEATURE_NX (1*32+20) +#endif + +/* EFER_LMA and EFER_LME are missing in pre 2.6.24 i386 kernels */ +#ifndef EFER_LME +#define _EFER_LME 8 /* Long mode enable */ +#define _EFER_LMA 10 /* Long mode active (read-only) */ +#define EFER_LME (1<<_EFER_LME) +#define EFER_LMA (1<<_EFER_LMA) +#endif + +struct kvm_desc_struct { + union { + struct { unsigned int a, b; }; + struct { + u16 limit0; + u16 base0; + unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1; + unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8; + }; + + }; +} __attribute__((packed)); + +struct kvm_ldttss_desc64 { + u16 limit0; + u16 base0; + unsigned base1 : 8, type : 5, dpl : 2, p : 1; + unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8; + u32 base3; + u32 zero1; +} __attribute__((packed)); + +struct kvm_desc_ptr { + unsigned short size; + unsigned long address; +} __attribute__((packed)); + +static inline unsigned long kvm_get_desc_base(const struct kvm_desc_struct *desc) +{ + return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24); +} + +static inline unsigned long kvm_get_desc_limit(const struct kvm_desc_struct *desc) +{ + return desc->limit0 | (desc->limit << 16); +} + +#include <asm/msr.h> +#ifndef MSR_FS_BASE +#define MSR_FS_BASE 0xc0000100 +#endif +#ifndef MSR_GS_BASE +#define MSR_GS_BASE 0xc0000101 +#endif + +/* undefine lapic */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) + +#undef lapic + +#endif + +#include <asm/hw_irq.h> +#ifndef NMI_VECTOR +#define NMI_VECTOR 2 +#endif + +#ifndef MSR_MTRRcap +#define MSR_MTRRcap 0x0fe +#define MSR_MTRRfix64K_00000 0x250 +#define MSR_MTRRfix16K_80000 0x258 +#define MSR_MTRRfix16K_A0000 0x259 +#define MSR_MTRRfix4K_C0000 0x268 +#define MSR_MTRRfix4K_C8000 0x269 +#define MSR_MTRRfix4K_D0000 0x26a +#define MSR_MTRRfix4K_D8000 0x26b +#define MSR_MTRRfix4K_E0000 0x26c +#define MSR_MTRRfix4K_E8000 0x26d +#define MSR_MTRRfix4K_F0000 0x26e +#define MSR_MTRRfix4K_F8000 0x26f +#define MSR_MTRRdefType 0x2ff +#endif + +#ifndef MSR_IA32_CR_PAT +#define MSR_IA32_CR_PAT 0x00000277 +#endif + +#ifndef MSR_VM_IGNNE +#define MSR_VM_IGNNE 0xc0010115 +#endif + +/* Define DEBUGCTLMSR bits */ +#ifndef DEBUGCTLMSR_LBR + +#define _DEBUGCTLMSR_LBR 0 /* last branch recording */ +#define _DEBUGCTLMSR_BTF 1 /* single-step on branches */ + +#define DEBUGCTLMSR_LBR (1UL << _DEBUGCTLMSR_LBR) +#define DEBUGCTLMSR_BTF (1UL << _DEBUGCTLMSR_BTF) + +#endif + +#ifndef MSR_FAM10H_MMIO_CONF_BASE +#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058 +#endif + +#ifndef MSR_AMD64_NB_CFG +#define MSR_AMD64_NB_CFG 0xc001001f +#endif + +#include <asm/asm.h> + +#ifndef __ASM_SIZE +# define ____ASM_FORM(x) " " #x " " +# ifdef CONFIG_X86_64 +# define __ASM_SIZE(inst) ____ASM_FORM(inst##q) +# else +# define __ASM_SIZE(inst) ____ASM_FORM(inst##l) +# endif +#endif + +#ifndef _ASM_PTR +# ifdef CONFIG_X86_64 +# define _ASM_PTR ".quad" +# else +# define _ASM_PTR ".long" +# endif +#endif + +/* Intel VT MSRs */ +#ifndef MSR_IA32_VMX_BASIC +#define MSR_IA32_VMX_BASIC 0x00000480 +#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 +#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 +#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 +#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 +#define MSR_IA32_VMX_MISC 0x00000485 +#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 +#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 +#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 +#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 +#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a +#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b +#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c +#endif + +#ifndef MSR_IA32_FEATURE_CONTROL +#define MSR_IA32_FEATURE_CONTROL 0x0000003a + +#define FEATURE_CONTROL_LOCKED (1<<0) +#define FEATURE_CONTROL_VMXON_ENABLED (1<<2) +#endif + +#ifndef MSR_IA32_TSC +#define MSR_IA32_TSC 0x00000010 +#endif + +#ifndef MSR_K7_HWCR +#define MSR_K7_HWCR 0xc0010015 +#endif + +#ifndef MSR_K8_SYSCFG +#define MSR_K8_SYSCFG 0xc0010010 +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) && defined(__x86_64__) + +#undef set_debugreg +#define set_debugreg(value, register) \ + __asm__("movq %0,%%db" #register \ + : /* no output */ \ + :"r" ((unsigned long)value)) + +#endif + +#if !defined(CONFIG_X86_64) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) +#define kvm_compat_debugreg(x) debugreg[x] +#else +#define kvm_compat_debugreg(x) debugreg##x +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) + +struct mtrr_var_range { + u32 base_lo; + u32 base_hi; + u32 mask_lo; + u32 mask_hi; +}; + +/* In the Intel processor's MTRR interface, the MTRR type is always held in + an 8 bit field: */ +typedef u8 mtrr_type; + +#define MTRR_NUM_FIXED_RANGES 88 +#define MTRR_MAX_VAR_RANGES 256 + +struct mtrr_state_type { + struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; + mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; + unsigned char enabled; + unsigned char have_fixed; + mtrr_type def_type; +}; + +#endif + +#ifndef CONFIG_HAVE_KVM_IRQCHIP +#define CONFIG_HAVE_KVM_IRQCHIP 1 +#endif + +#include <asm/mce.h> + +#ifndef MCG_CTL_P +#define MCG_CTL_P (1ULL<<8) +#define MCG_STATUS_MCIP (1ULL<<2) +#define MCI_STATUS_VAL (1ULL<<63) +#define MCI_STATUS_OVER (1ULL<<62) +#define MCI_STATUS_UC (1ULL<<61) +#endif + +/* do_machine_check() exported in 2.6.31 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) + +static inline void kvm_do_machine_check(struct pt_regs *regs, long error_code) +{ + panic("kvm machine check!\n"); +} + +#else + +#define kvm_do_machine_check do_machine_check + +#endif + +/* pt_regs.flags was once pt_regs.eflags */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) + +#define kvm_pt_regs_flags eflags + +# ifdef CONFIG_X86_64 +# define kvm_pt_regs_cs cs +# else +# define kvm_pt_regs_cs xcs +# endif + +#else + +#define kvm_pt_regs_flags flags +#define kvm_pt_regs_cs cs + +#endif + +/* boot_cpu_data.x86_phys_bits only appeared for i386 in 2.6.30 */ + +#if !defined(CONFIG_X86_64) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30)) + +#define kvm_x86_phys_bits 40 + +#else + +#define kvm_x86_phys_bits (boot_cpu_data.x86_phys_bits) + +#endif + +#include <asm/apicdef.h> + +#ifndef APIC_BASE_MSR +#define APIC_BASE_MSR 0x800 +#endif + +#ifndef APIC_SPIV_DIRECTED_EOI +#define APIC_SPIV_DIRECTED_EOI (1 << 12) +#endif + +#ifndef APIC_LVR_DIRECTED_EOI +#define APIC_LVR_DIRECTED_EOI (1 << 24) +#endif + +#ifndef APIC_SELF_IPI +#define APIC_SELF_IPI 0x3F0 +#endif + +#ifndef X2APIC_ENABLE +#define X2APIC_ENABLE (1UL << 10) +#endif + diff --git a/kernel/x86/preempt.c b/kernel/x86/preempt.c new file mode 100644 index 00000000..440060b2 --- /dev/null +++ b/kernel/x86/preempt.c @@ -0,0 +1,247 @@ + +#ifdef CONFIG_PREEMPT_NOTIFIERS_COMPAT + +#include <linux/sched.h> +#include <linux/percpu.h> + +static DEFINE_SPINLOCK(pn_lock); +static LIST_HEAD(pn_list); + +#define dprintk(fmt) do { \ + if (0) \ + printk("%s (%d/%d): " fmt, __FUNCTION__, \ + current->pid, raw_smp_processor_id()); \ + } while (0) + +static void preempt_enable_sched_out_notifiers(void) +{ + asm volatile ("mov %0, %%db0" : : "r"(schedule)); + asm volatile ("mov %0, %%db7" : : "r"(0x701ul)); + current->thread.kvm_compat_debugreg(7) = 0ul; +#ifdef TIF_DEBUG + clear_tsk_thread_flag(current, TIF_DEBUG); +#endif +} + +static void preempt_enable_sched_in_notifiers(void * addr) +{ + asm volatile ("mov %0, %%db0" : : "r"(addr)); + asm volatile ("mov %0, %%db7" : : "r"(0x701ul)); + current->thread.kvm_compat_debugreg(0) = (unsigned long) addr; + current->thread.kvm_compat_debugreg(7) = 0x701ul; +#ifdef TIF_DEBUG + set_tsk_thread_flag(current, TIF_DEBUG); +#endif +} + +static void __preempt_disable_notifiers(void) +{ + asm volatile ("mov %0, %%db7" : : "r"(0ul)); +} + +static void preempt_disable_notifiers(void) +{ + __preempt_disable_notifiers(); + current->thread.kvm_compat_debugreg(7) = 0ul; +#ifdef TIF_DEBUG + clear_tsk_thread_flag(current, TIF_DEBUG); +#endif +} + +static void fastcall __attribute__((used)) preempt_notifier_trigger(void *** ip) +{ + struct preempt_notifier *pn; + int cpu = raw_smp_processor_id(); + int found = 0; + + dprintk(" - in\n"); + //dump_stack(); + spin_lock(&pn_lock); + list_for_each_entry(pn, &pn_list, link) + if (pn->tsk == current) { + found = 1; + break; + } + spin_unlock(&pn_lock); + + if (found) { + if ((void *) *ip != schedule) { + dprintk("sched_in\n"); + preempt_enable_sched_out_notifiers(); + + preempt_disable(); + local_irq_enable(); + pn->ops->sched_in(pn, cpu); + local_irq_disable(); + preempt_enable_no_resched(); + } else { + void * sched_in_addr; + dprintk("sched_out\n"); +#ifdef CONFIG_X86_64 + sched_in_addr = **(ip+3); +#else + /* no special debug stack switch on x86 */ + sched_in_addr = (void *) *(ip+3); +#endif + preempt_enable_sched_in_notifiers(sched_in_addr); + + preempt_disable(); + local_irq_enable(); + pn->ops->sched_out(pn, NULL); + local_irq_disable(); + preempt_enable_no_resched(); + } + } else + __preempt_disable_notifiers(); + dprintk(" - out\n"); +} + +unsigned long orig_int1_handler; + +#ifdef CONFIG_X86_64 + +#define SAVE_REGS \ + "push %rax; push %rbx; push %rcx; push %rdx; " \ + "push %rsi; push %rdi; push %rbp; " \ + "push %r8; push %r9; push %r10; push %r11; " \ + "push %r12; push %r13; push %r14; push %r15" + +#define RESTORE_REGS \ + "pop %r15; pop %r14; pop %r13; pop %r12; " \ + "pop %r11; pop %r10; pop %r9; pop %r8; " \ + "pop %rbp; pop %rdi; pop %rsi; " \ + "pop %rdx; pop %rcx; pop %rbx; pop %rax " + +#define TMP "%rax" + +#else + +#define SAVE_REGS "pusha" +#define RESTORE_REGS "popa" +#define TMP "%eax" + +#endif + +asm ("pn_int1_handler: \n\t" + "push " TMP " \n\t" + "mov %db7, " TMP " \n\t" + "cmp $0x701, " TMP " \n\t" + "pop " TMP " \n\t" + "jnz .Lnotme \n\t" + "push " TMP " \n\t" + "mov %db6, " TMP " \n\t" + "test $0x1, " TMP " \n\t" + "pop " TMP " \n\t" + "jz .Lnotme \n\t" + SAVE_REGS "\n\t" +#ifdef CONFIG_X86_64 + "leaq 120(%rsp),%rdi\n\t" +#else + "leal 32(%esp),%eax\n\t" +#endif + "call preempt_notifier_trigger \n\t" + RESTORE_REGS "\n\t" +#ifdef CONFIG_X86_64 + "orq $0x10000, 16(%rsp) \n\t" + "iretq \n\t" +#else + "orl $0x10000, 8(%esp) \n\t" + "iret \n\t" +#endif + ".Lnotme: \n\t" +#ifdef CONFIG_X86_64 + "jmpq *orig_int1_handler\n\t" +#else + "jmpl *orig_int1_handler\n\t" +#endif + ); + +void preempt_notifier_register(struct preempt_notifier *notifier) +{ + unsigned long flags; + + dprintk(" - in\n"); + spin_lock_irqsave(&pn_lock, flags); + preempt_enable_sched_out_notifiers(); + notifier->tsk = current; + list_add(¬ifier->link, &pn_list); + spin_unlock_irqrestore(&pn_lock, flags); + dprintk(" - out\n"); +} + +void preempt_notifier_unregister(struct preempt_notifier *notifier) +{ + unsigned long flags; + + dprintk(" - in\n"); + spin_lock_irqsave(&pn_lock, flags); + list_del(¬ifier->link); + spin_unlock_irqrestore(&pn_lock, flags); + preempt_disable_notifiers(); + dprintk(" - out\n"); +} + +struct intr_gate { + u16 offset0; + u16 segment; + u16 junk; + u16 offset1; +#ifdef CONFIG_X86_64 + u32 offset2; + u32 blah; +#endif +} __attribute__((packed)); + +struct idt_desc { + u16 limit; + struct intr_gate *gates; +} __attribute__((packed)); + +static struct intr_gate orig_int1_gate; + +void pn_int1_handler(void); + +void preempt_notifier_sys_init(void) +{ + struct idt_desc idt_desc; + struct intr_gate *int1_gate; + + printk("kvm: emulating preempt notifiers;" + " do not benchmark on this machine\n"); + dprintk("\n"); + asm ("sidt %0" : "=m"(idt_desc)); + int1_gate = &idt_desc.gates[1]; + orig_int1_gate = *int1_gate; + orig_int1_handler = int1_gate->offset0 + | ((u32)int1_gate->offset1 << 16); +#ifdef CONFIG_X86_64 + orig_int1_handler |= (u64)int1_gate->offset2 << 32; +#endif + int1_gate->offset0 = (unsigned long)pn_int1_handler; + int1_gate->offset1 = (unsigned long)pn_int1_handler >> 16; +#ifdef CONFIG_X86_64 + int1_gate->offset2 = (unsigned long)pn_int1_handler >> 32; +#endif +} + +static void do_disable(void *blah) +{ +#ifdef TIF_DEBUG + if (!test_tsk_thread_flag(current, TIF_DEBUG)) +#else + if (!current->thread.kvm_compat_debugreg(7)) +#endif + __preempt_disable_notifiers(); +} + +void preempt_notifier_sys_exit(void) +{ + struct idt_desc idt_desc; + + dprintk("\n"); + kvm_on_each_cpu(do_disable, NULL, 1); + asm ("sidt %0" : "=m"(idt_desc)); + idt_desc.gates[1] = orig_int1_gate; +} + +#endif diff --git a/kernel/x86/vmx-debug.c b/kernel/x86/vmx-debug.c new file mode 100644 index 00000000..d466f03f --- /dev/null +++ b/kernel/x86/vmx-debug.c @@ -0,0 +1,1112 @@ +/* + * Kernel-based Virtual Machine driver for Linux + * + * This module enables machines with Intel VT-x extensions to run virtual + * machines without emulation or binary translation. + * + * Debug support + * + * Copyright (C) 2006 Qumranet, Inc. + * + * Authors: + * Yaniv Kamay <yaniv@qumranet.com> + * Avi Kivity <avi@qumranet.com> + * + */ + +#include <linux/highmem.h> + +#include <linux/kvm_host.h> +#include <asm/vmx.h> +#include <asm/kvm_host.h> +#include "mmu.h" +#include "lapic.h" +#include "debug.h" + +#ifdef KVM_DEBUG + +static unsigned long vmcs_readl(unsigned long field) +{ + unsigned long value; + + asm volatile (ASM_VMX_VMREAD_RDX_RAX + : "=a"(value) : "d"(field) : "cc"); + return value; +} + +static u16 vmcs_read16(unsigned long field) +{ + return vmcs_readl(field); +} + +static u32 vmcs_read32(unsigned long field) +{ + return vmcs_readl(field); +} + +static u64 vmcs_read64(unsigned long field) +{ +#ifdef CONFIG_X86_64 + return vmcs_readl(field); +#else + return vmcs_readl(field) | ((u64)vmcs_readl(field+1) << 32); +#endif +} + +void show_code(struct kvm_vcpu *vcpu) +{ + gva_t rip = vmcs_readl(GUEST_RIP); + u8 code[50]; + char buf[30 + 3 * sizeof code]; + int i; + gpa_t gpa; + + if (!is_long_mode(vcpu)) + rip += vmcs_readl(GUEST_CS_BASE); + + gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, rip); + if (gpa == UNMAPPED_GVA) + return; + kvm_read_guest(vcpu->kvm, gpa, code, sizeof code); + for (i = 0; i < sizeof code; ++i) + sprintf(buf + i * 3, " %02x", code[i]); + vcpu_printf(vcpu, "code: %lx%s\n", rip, buf); +} + +struct gate_struct { + u16 offset_low; + u16 segment; + unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1; + u16 offset_middle; + u32 offset_high; + u32 zero1; +} __attribute__((packed)); + +void show_irq(struct kvm_vcpu *vcpu, int irq) +{ + unsigned long idt_base = vmcs_readl(GUEST_IDTR_BASE); + unsigned long idt_limit = vmcs_readl(GUEST_IDTR_LIMIT); + struct gate_struct gate; + gpa_t gpa; + + if (!is_long_mode(vcpu)) + vcpu_printf(vcpu, "%s: not in long mode\n", __FUNCTION__); + + if (!is_long_mode(vcpu) || idt_limit < irq * sizeof(gate)) { + vcpu_printf(vcpu, "%s: 0x%x read_guest err\n", + __FUNCTION__, + irq); + return; + } + + gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, idt_base + irq * sizeof(gate)); + if (gpa == UNMAPPED_GVA) + return; + + if (kvm_read_guest(vcpu->kvm, gpa, &gate, sizeof(gate)) != sizeof(gate)) { + vcpu_printf(vcpu, "%s: 0x%x read_guest err\n", + __FUNCTION__, + irq); + return; + } + vcpu_printf(vcpu, "%s: 0x%x handler 0x%llx\n", + __FUNCTION__, + irq, + ((u64)gate.offset_high << 32) | + ((u64)gate.offset_middle << 16) | + gate.offset_low); +} + +void show_page(struct kvm_vcpu *vcpu, + gva_t addr) +{ + u64 *buf = kmalloc(PAGE_SIZE, GFP_KERNEL); + gpa_t gpa; + + if (!buf) + return; + + addr &= PAGE_MASK; + gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); + if (gpa == UNMAPPED_GVA) + return; + if (kvm_read_guest(vcpu->kvm, gpa, buf, PAGE_SIZE)) { + int i; + for (i = 0; i < PAGE_SIZE / sizeof(u64) ; i++) { + u8 *ptr = (u8*)&buf[i]; + int j; + vcpu_printf(vcpu, " 0x%16.16lx:", + addr + i * sizeof(u64)); + for (j = 0; j < sizeof(u64) ; j++) + vcpu_printf(vcpu, " 0x%2.2x", ptr[j]); + vcpu_printf(vcpu, "\n"); + } + } + kfree(buf); +} + +void show_u64(struct kvm_vcpu *vcpu, gva_t addr) +{ + u64 buf; + gpa_t gpa; + + gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); + if (gpa == UNMAPPED_GVA) + return; + if (kvm_read_guest(vcpu->kvm, gpa, &buf, sizeof(u64)) == sizeof(u64)) { + u8 *ptr = (u8*)&buf; + int j; + vcpu_printf(vcpu, " 0x%16.16lx:", addr); + for (j = 0; j < sizeof(u64) ; j++) + vcpu_printf(vcpu, " 0x%2.2x", ptr[j]); + vcpu_printf(vcpu, "\n"); + } +} + +#define IA32_DEBUGCTL_RESERVED_BITS 0xfffffffffffffe3cULL + +static int is_canonical(unsigned long addr) +{ + return addr == ((long)addr << 16) >> 16; +} + +int vm_entry_test_guest(struct kvm_vcpu *vcpu) +{ + unsigned long cr0; + unsigned long cr4; + unsigned long cr3; + unsigned long dr7; + u64 ia32_debugctl; + unsigned long sysenter_esp; + unsigned long sysenter_eip; + unsigned long rflags; + unsigned long cpu_exec_ctrl, cpu_secondary_exec_ctrl; + unsigned long tpr_threshold; + + int long_mode; + int virtual8086; + + #define RFLAGS_VM (1 << 17) + #define RFLAGS_RF (1 << 9) + + + #define VIR8086_SEG_BASE_TEST(seg)\ + if (vmcs_readl(GUEST_##seg##_BASE) != \ + (unsigned long)vmcs_read16(GUEST_##seg##_SELECTOR) << 4) {\ + vcpu_printf(vcpu, "%s: "#seg" base 0x%lx in "\ + "virtual8086 is not "#seg" selector 0x%x"\ + " shifted right 4 bits\n",\ + __FUNCTION__,\ + vmcs_readl(GUEST_##seg##_BASE),\ + vmcs_read16(GUEST_##seg##_SELECTOR));\ + return 0;\ + } + + #define VIR8086_SEG_LIMIT_TEST(seg)\ + if (vmcs_readl(GUEST_##seg##_LIMIT) != 0x0ffff) { \ + vcpu_printf(vcpu, "%s: "#seg" limit 0x%lx in "\ + "virtual8086 is not 0xffff\n",\ + __FUNCTION__,\ + vmcs_readl(GUEST_##seg##_LIMIT));\ + return 0;\ + } + + #define VIR8086_SEG_AR_TEST(seg)\ + if (vmcs_read32(GUEST_##seg##_AR_BYTES) != 0x0f3) { \ + vcpu_printf(vcpu, "%s: "#seg" AR 0x%x in "\ + "virtual8086 is not 0xf3\n",\ + __FUNCTION__,\ + vmcs_read32(GUEST_##seg##_AR_BYTES));\ + return 0;\ + } + + + cr0 = vmcs_readl(GUEST_CR0); + + if (!(cr0 & X86_CR0_PG)) { + vcpu_printf(vcpu, "%s: cr0 0x%lx, PG is not set\n", + __FUNCTION__, cr0); + return 0; + } + + if (!(cr0 & X86_CR0_PE)) { + vcpu_printf(vcpu, "%s: cr0 0x%lx, PE is not set\n", + __FUNCTION__, cr0); + return 0; + } + + if (!(cr0 & X86_CR0_NE)) { + vcpu_printf(vcpu, "%s: cr0 0x%lx, NE is not set\n", + __FUNCTION__, cr0); + return 0; + } + + if (!(cr0 & X86_CR0_WP)) { + vcpu_printf(vcpu, "%s: cr0 0x%lx, WP is not set\n", + __FUNCTION__, cr0); + } + + cr4 = vmcs_readl(GUEST_CR4); + + if (!(cr4 & X86_CR4_VMXE)) { + vcpu_printf(vcpu, "%s: cr4 0x%lx, VMXE is not set\n", + __FUNCTION__, cr4); + return 0; + } + + if (!(cr4 & X86_CR4_PAE)) { + vcpu_printf(vcpu, "%s: cr4 0x%lx, PAE is not set\n", + __FUNCTION__, cr4); + } + + ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + + if (ia32_debugctl & IA32_DEBUGCTL_RESERVED_BITS ) { + vcpu_printf(vcpu, "%s: ia32_debugctl 0x%llx, reserve bits\n", + __FUNCTION__, ia32_debugctl); + return 0; + } + + long_mode = is_long_mode(vcpu); + + if (long_mode) { + } + + if ( long_mode && !(cr4 & X86_CR4_PAE)) { + vcpu_printf(vcpu, "%s: long mode and not PAE\n", + __FUNCTION__); + return 0; + } + + cr3 = vmcs_readl(GUEST_CR3); + + if (cr3 & CR3_L_MODE_RESERVED_BITS) { + vcpu_printf(vcpu, "%s: cr3 0x%lx, reserved bits\n", + __FUNCTION__, cr3); + return 0; + } + + if ( !long_mode && (cr4 & X86_CR4_PAE)) { + /* check the 4 PDPTEs for reserved bits */ + unsigned long pdpt_pfn = cr3 >> PAGE_SHIFT; + int i; + u64 pdpte; + unsigned offset = (cr3 & (PAGE_SIZE-1)) >> 5; + u64 *pdpt = kmap_atomic(pfn_to_page(pdpt_pfn), KM_USER0); + + for (i = 0; i < 4; ++i) { + pdpte = pdpt[offset + i]; + if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull)) + break; + } + + kunmap_atomic(pdpt, KM_USER0); + + if (i != 4) { + vcpu_printf(vcpu, "%s: pae cr3[%d] 0x%llx, reserved bits\n", + __FUNCTION__, i, pdpte); + return 0; + } + } + + dr7 = vmcs_readl(GUEST_DR7); + + if (dr7 & ~((1ULL << 32) - 1)) { + vcpu_printf(vcpu, "%s: dr7 0x%lx, reserved bits\n", + __FUNCTION__, dr7); + return 0; + } + + sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); + + if (!is_canonical(sysenter_esp)) { + vcpu_printf(vcpu, "%s: sysenter_esp 0x%lx, not canonical\n", + __FUNCTION__, sysenter_esp); + return 0; + } + + sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); + + if (!is_canonical(sysenter_eip)) { + vcpu_printf(vcpu, "%s: sysenter_eip 0x%lx, not canonical\n", + __FUNCTION__, sysenter_eip); + return 0; + } + + rflags = vmcs_readl(GUEST_RFLAGS); + virtual8086 = rflags & RFLAGS_VM; + + + if (vmcs_read16(GUEST_TR_SELECTOR) & SELECTOR_TI_MASK) { + vcpu_printf(vcpu, "%s: tr selctor 0x%x, TI is set\n", + __FUNCTION__, vmcs_read16(GUEST_TR_SELECTOR)); + return 0; + } + + if (!(vmcs_read32(GUEST_LDTR_AR_BYTES) & AR_UNUSABLE_MASK) && + vmcs_read16(GUEST_LDTR_SELECTOR) & SELECTOR_TI_MASK) { + vcpu_printf(vcpu, "%s: ldtr selctor 0x%x," + " is usable and TI is set\n", + __FUNCTION__, vmcs_read16(GUEST_LDTR_SELECTOR)); + return 0; + } + + if (!virtual8086 && + (vmcs_read16(GUEST_SS_SELECTOR) & SELECTOR_RPL_MASK) != + (vmcs_read16(GUEST_CS_SELECTOR) & SELECTOR_RPL_MASK)) { + vcpu_printf(vcpu, "%s: ss selctor 0x%x cs selctor 0x%x," + " not same RPL\n", + __FUNCTION__, + vmcs_read16(GUEST_SS_SELECTOR), + vmcs_read16(GUEST_CS_SELECTOR)); + return 0; + } + + if (virtual8086) { + VIR8086_SEG_BASE_TEST(CS); + VIR8086_SEG_BASE_TEST(SS); + VIR8086_SEG_BASE_TEST(DS); + VIR8086_SEG_BASE_TEST(ES); + VIR8086_SEG_BASE_TEST(FS); + VIR8086_SEG_BASE_TEST(GS); + } + + if (!is_canonical(vmcs_readl(GUEST_TR_BASE)) || + !is_canonical(vmcs_readl(GUEST_FS_BASE)) || + !is_canonical(vmcs_readl(GUEST_GS_BASE)) ) { + vcpu_printf(vcpu, "%s: TR 0x%lx FS 0x%lx or GS 0x%lx base" + " is not canonical\n", + __FUNCTION__, + vmcs_readl(GUEST_TR_BASE), + vmcs_readl(GUEST_FS_BASE), + vmcs_readl(GUEST_GS_BASE)); + return 0; + + } + + if (!(vmcs_read32(GUEST_LDTR_AR_BYTES) & AR_UNUSABLE_MASK) && + !is_canonical(vmcs_readl(GUEST_LDTR_BASE))) { + vcpu_printf(vcpu, "%s: LDTR base 0x%lx, usable and is not" + " canonical\n", + __FUNCTION__, + vmcs_readl(GUEST_LDTR_BASE)); + return 0; + } + + if ((vmcs_readl(GUEST_CS_BASE) & ~((1ULL << 32) - 1))) { + vcpu_printf(vcpu, "%s: CS base 0x%lx, not all bits 63-32" + " are zero\n", + __FUNCTION__, + vmcs_readl(GUEST_CS_BASE)); + return 0; + } + + #define SEG_BASE_TEST(seg)\ + if ( !(vmcs_read32(GUEST_##seg##_AR_BYTES) & AR_UNUSABLE_MASK) &&\ + (vmcs_readl(GUEST_##seg##_BASE) & ~((1ULL << 32) - 1))) {\ + vcpu_printf(vcpu, "%s: "#seg" base 0x%lx, is usable and not"\ + " all bits 63-32 are zero\n",\ + __FUNCTION__,\ + vmcs_readl(GUEST_##seg##_BASE));\ + return 0;\ + } + SEG_BASE_TEST(SS); + SEG_BASE_TEST(DS); + SEG_BASE_TEST(ES); + + if (virtual8086) { + VIR8086_SEG_LIMIT_TEST(CS); + VIR8086_SEG_LIMIT_TEST(SS); + VIR8086_SEG_LIMIT_TEST(DS); + VIR8086_SEG_LIMIT_TEST(ES); + VIR8086_SEG_LIMIT_TEST(FS); + VIR8086_SEG_LIMIT_TEST(GS); + } + + if (virtual8086) { + VIR8086_SEG_AR_TEST(CS); + VIR8086_SEG_AR_TEST(SS); + VIR8086_SEG_AR_TEST(DS); + VIR8086_SEG_AR_TEST(ES); + VIR8086_SEG_AR_TEST(FS); + VIR8086_SEG_AR_TEST(GS); + } else { + + u32 cs_ar = vmcs_read32(GUEST_CS_AR_BYTES); + u32 ss_ar = vmcs_read32(GUEST_SS_AR_BYTES); + u32 tr_ar = vmcs_read32(GUEST_TR_AR_BYTES); + u32 ldtr_ar = vmcs_read32(GUEST_LDTR_AR_BYTES); + + #define SEG_G_TEST(seg) { \ + u32 lim = vmcs_read32(GUEST_##seg##_LIMIT); \ + u32 ar = vmcs_read32(GUEST_##seg##_AR_BYTES); \ + int err = 0; \ + if (((lim & ~PAGE_MASK) != ~PAGE_MASK) && (ar & AR_G_MASK)) \ + err = 1; \ + if ((lim & ~((1u << 20) - 1)) && !(ar & AR_G_MASK)) \ + err = 1; \ + if (err) { \ + vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, G err. lim" \ + " is 0x%x\n", \ + __FUNCTION__, \ + ar, lim); \ + return 0; \ + } \ + } + + + if (!(cs_ar & AR_TYPE_ACCESSES_MASK)) { + vcpu_printf(vcpu, "%s: cs AR 0x%x, accesses is clear\n", + __FUNCTION__, + cs_ar); + return 0; + } + + if (!(cs_ar & AR_TYPE_CODE_MASK)) { + vcpu_printf(vcpu, "%s: cs AR 0x%x, code is clear\n", + __FUNCTION__, + cs_ar); + return 0; + } + + if (!(cs_ar & AR_S_MASK)) { + vcpu_printf(vcpu, "%s: cs AR 0x%x, type is sys\n", + __FUNCTION__, + cs_ar); + return 0; + } + + if ((cs_ar & AR_TYPE_MASK) >= 8 && (cs_ar & AR_TYPE_MASK) < 12 && + AR_DPL(cs_ar) != + (vmcs_read16(GUEST_CS_SELECTOR) & SELECTOR_RPL_MASK) ) { + vcpu_printf(vcpu, "%s: cs AR 0x%x, " + "DPL(0x%x) not as RPL(0x%x)\n", + __FUNCTION__, + cs_ar, AR_DPL(cs_ar), vmcs_read16(GUEST_CS_SELECTOR) & SELECTOR_RPL_MASK); + return 0; + } + + if ((cs_ar & AR_TYPE_MASK) >= 13 && (cs_ar & AR_TYPE_MASK) < 16 && + AR_DPL(cs_ar) > + (vmcs_read16(GUEST_CS_SELECTOR) & SELECTOR_RPL_MASK) ) { + vcpu_printf(vcpu, "%s: cs AR 0x%x, " + "DPL greater than RPL\n", + __FUNCTION__, + cs_ar); + return 0; + } + + if (!(cs_ar & AR_P_MASK)) { + vcpu_printf(vcpu, "%s: CS AR 0x%x, not " + "present\n", + __FUNCTION__, + cs_ar); + return 0; + } + + if ((cs_ar & AR_RESERVD_MASK)) { + vcpu_printf(vcpu, "%s: CS AR 0x%x, reseved" + " bits are set\n", + __FUNCTION__, + cs_ar); + return 0; + } + + if (long_mode & (cs_ar & AR_L_MASK) && (cs_ar & AR_DB_MASK)) { + vcpu_printf(vcpu, "%s: CS AR 0x%x, DB and L are set" + " in long mode\n", + __FUNCTION__, + cs_ar); + return 0; + + } + + SEG_G_TEST(CS); + + if (!(ss_ar & AR_UNUSABLE_MASK)) { + if ((ss_ar & AR_TYPE_MASK) != 3 && + (ss_ar & AR_TYPE_MASK) != 7 ) { + vcpu_printf(vcpu, "%s: ss AR 0x%x, usable and type" + " is not 3 or 7\n", + __FUNCTION__, + ss_ar); + return 0; + } + + if (!(ss_ar & AR_S_MASK)) { + vcpu_printf(vcpu, "%s: ss AR 0x%x, usable and" + " is sys\n", + __FUNCTION__, + ss_ar); + return 0; + } + if (!(ss_ar & AR_P_MASK)) { + vcpu_printf(vcpu, "%s: SS AR 0x%x, usable" + " and not present\n", + __FUNCTION__, + ss_ar); + return 0; + } + + if ((ss_ar & AR_RESERVD_MASK)) { + vcpu_printf(vcpu, "%s: SS AR 0x%x, reseved" + " bits are set\n", + __FUNCTION__, + ss_ar); + return 0; + } + + SEG_G_TEST(SS); + + } + + if (AR_DPL(ss_ar) != + (vmcs_read16(GUEST_SS_SELECTOR) & SELECTOR_RPL_MASK) ) { + vcpu_printf(vcpu, "%s: SS AR 0x%x, " + "DPL not as RPL\n", + __FUNCTION__, + ss_ar); + return 0; + } + + #define SEG_AR_TEST(seg) {\ + u32 ar = vmcs_read32(GUEST_##seg##_AR_BYTES);\ + if (!(ar & AR_UNUSABLE_MASK)) {\ + if (!(ar & AR_TYPE_ACCESSES_MASK)) {\ + vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, "\ + "usable and not accesses\n",\ + __FUNCTION__,\ + ar);\ + return 0;\ + }\ + if ((ar & AR_TYPE_CODE_MASK) &&\ + !(ar & AR_TYPE_READABLE_MASK)) {\ + vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, "\ + "code and not readable\n",\ + __FUNCTION__,\ + ar);\ + return 0;\ + }\ + if (!(ar & AR_S_MASK)) {\ + vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, usable and"\ + " is sys\n",\ + __FUNCTION__,\ + ar);\ + return 0;\ + }\ + if ((ar & AR_TYPE_MASK) >= 0 && \ + (ar & AR_TYPE_MASK) < 12 && \ + AR_DPL(ar) < (vmcs_read16(GUEST_##seg##_SELECTOR) & \ + SELECTOR_RPL_MASK) ) {\ + vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, "\ + "DPL less than RPL\n",\ + __FUNCTION__,\ + ar);\ + return 0;\ + }\ + if (!(ar & AR_P_MASK)) {\ + vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, usable and"\ + " not present\n",\ + __FUNCTION__,\ + ar);\ + return 0;\ + }\ + if ((ar & AR_RESERVD_MASK)) {\ + vcpu_printf(vcpu, "%s: "#seg" AR"\ + " 0x%x, reseved"\ + " bits are set\n",\ + __FUNCTION__,\ + ar);\ + return 0;\ + }\ + SEG_G_TEST(seg)\ + }\ + } + +#undef DS +#undef ES +#undef FS +#undef GS + + SEG_AR_TEST(DS); + SEG_AR_TEST(ES); + SEG_AR_TEST(FS); + SEG_AR_TEST(GS); + + // TR test + if (long_mode) { + if ((tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) { + vcpu_printf(vcpu, "%s: TR AR 0x%x, long" + " mode and not 64bit busy" + " tss\n", + __FUNCTION__, + tr_ar); + return 0; + } + } else { + if ((tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_32_TSS && + (tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_16_TSS) { + vcpu_printf(vcpu, "%s: TR AR 0x%x, legacy" + " mode and not 16/32bit " + "busy tss\n", + __FUNCTION__, + tr_ar); + return 0; + } + + } + if ((tr_ar & AR_S_MASK)) { + vcpu_printf(vcpu, "%s: TR AR 0x%x, S is set\n", + __FUNCTION__, + tr_ar); + return 0; + } + if (!(tr_ar & AR_P_MASK)) { + vcpu_printf(vcpu, "%s: TR AR 0x%x, P is not set\n", + __FUNCTION__, + tr_ar); + return 0; + } + + if ((tr_ar & (AR_RESERVD_MASK| AR_UNUSABLE_MASK))) { + vcpu_printf(vcpu, "%s: TR AR 0x%x, reserved bit are" + " set\n", + __FUNCTION__, + tr_ar); + return 0; + } + SEG_G_TEST(TR); + + // TR test + if (!(ldtr_ar & AR_UNUSABLE_MASK)) { + + if ((ldtr_ar & AR_TYPE_MASK) != AR_TYPE_LDT) { + vcpu_printf(vcpu, "%s: LDTR AR 0x%x," + " bad type\n", + __FUNCTION__, + ldtr_ar); + return 0; + } + + if ((ldtr_ar & AR_S_MASK)) { + vcpu_printf(vcpu, "%s: LDTR AR 0x%x," + " S is set\n", + __FUNCTION__, + ldtr_ar); + return 0; + } + + if (!(ldtr_ar & AR_P_MASK)) { + vcpu_printf(vcpu, "%s: LDTR AR 0x%x," + " P is not set\n", + __FUNCTION__, + ldtr_ar); + return 0; + } + if ((ldtr_ar & AR_RESERVD_MASK)) { + vcpu_printf(vcpu, "%s: LDTR AR 0x%x," + " reserved bit are set\n", + __FUNCTION__, + ldtr_ar); + return 0; + } + SEG_G_TEST(LDTR); + } + } + + // GDTR and IDTR + + + #define IDT_GDT_TEST(reg)\ + if (!is_canonical(vmcs_readl(GUEST_##reg##_BASE))) {\ + vcpu_printf(vcpu, "%s: "#reg" BASE 0x%lx, not canonical\n",\ + __FUNCTION__,\ + vmcs_readl(GUEST_##reg##_BASE));\ + return 0;\ + }\ + if (vmcs_read32(GUEST_##reg##_LIMIT) >> 16) {\ + vcpu_printf(vcpu, "%s: "#reg" LIMIT 0x%x, size err\n",\ + __FUNCTION__,\ + vmcs_read32(GUEST_##reg##_LIMIT));\ + return 0;\ + }\ + + IDT_GDT_TEST(GDTR); + IDT_GDT_TEST(IDTR); + + + // RIP + + if ((!long_mode || !(vmcs_read32(GUEST_CS_AR_BYTES) & AR_L_MASK)) && + vmcs_readl(GUEST_RIP) & ~((1ULL << 32) - 1) ){ + vcpu_printf(vcpu, "%s: RIP 0x%lx, size err\n", + __FUNCTION__, + vmcs_readl(GUEST_RIP)); + return 0; + } + + if (!is_canonical(vmcs_readl(GUEST_RIP))) { + vcpu_printf(vcpu, "%s: RIP 0x%lx, not canonical\n", + __FUNCTION__, + vmcs_readl(GUEST_RIP)); + return 0; + } + + // RFLAGS + #define RFLAGS_RESEVED_CLEAR_BITS\ + (~((1ULL << 22) - 1) | (1ULL << 15) | (1ULL << 5) | (1ULL << 3)) + #define RFLAGS_RESEVED_SET_BITS (1 << 1) + + if ((rflags & RFLAGS_RESEVED_CLEAR_BITS) || + !(rflags & RFLAGS_RESEVED_SET_BITS)) { + vcpu_printf(vcpu, "%s: RFLAGS 0x%lx, reserved bits 0x%llx 0x%x\n", + __FUNCTION__, + rflags, + RFLAGS_RESEVED_CLEAR_BITS, + RFLAGS_RESEVED_SET_BITS); + return 0; + } + + if (long_mode && virtual8086) { + vcpu_printf(vcpu, "%s: RFLAGS 0x%lx, vm and long mode\n", + __FUNCTION__, + rflags); + return 0; + } + + + if (!(rflags & RFLAGS_RF)) { + u32 vm_entry_info = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); + if ((vm_entry_info & INTR_INFO_VALID_MASK) && + (vm_entry_info & INTR_INFO_INTR_TYPE_MASK) == + INTR_TYPE_EXT_INTR) { + vcpu_printf(vcpu, "%s: RFLAGS 0x%lx, external" + " interrupt and RF is clear\n", + __FUNCTION__, + rflags); + return 0; + } + + } + + cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); + cpu_secondary_exec_ctrl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL); + tpr_threshold = vmcs_read32(TPR_THRESHOLD); + + if ((cpu_exec_ctrl & CPU_BASED_TPR_SHADOW)) { + if (tpr_threshold & ~0xf) { + vcpu_printf(vcpu, "%s: if TPR shadow execution control" + " is 1 bits 31:4 of TPR threshold must" + " be 0", __FUNCTION__); + return 0; + } + if (!(cpu_secondary_exec_ctrl & + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { + u32 apic_tpr = *((u32 *)(vcpu->arch.apic->regs + 0x80)); + apic_tpr >>= 4; + if (tpr_threshold > apic_tpr) { + vcpu_printf(vcpu, "%s: if TPR shadow execution control" + " is 1 and virtual apic accesses is 0" + " the value of bits 3:0 of the TPR " + "threshold VM-execution control field" + " should not be greater than the value" + " of bits 7:4 in byte 80H on the " + "virtual-APIC page", __FUNCTION__); + return 0; + } + + } + } + + // to be continued from Checks on Guest Non-Register State (22.3.1.5) + return 1; +} + +static int check_fixed_bits(struct kvm_vcpu *vcpu, const char *reg, + unsigned long cr, + u32 msr_fixed_0, u32 msr_fixed_1) +{ + u64 fixed_bits_0, fixed_bits_1; + + rdmsrl(msr_fixed_0, fixed_bits_0); + rdmsrl(msr_fixed_1, fixed_bits_1); + if ((cr & fixed_bits_0) != fixed_bits_0) { + vcpu_printf(vcpu, "%s: %s (%lx) has one of %llx unset\n", + __FUNCTION__, reg, cr, fixed_bits_0); + return 0; + } + if ((~cr & ~fixed_bits_1) != ~fixed_bits_1) { + vcpu_printf(vcpu, "%s: %s (%lx) has one of %llx set\n", + __FUNCTION__, reg, cr, ~fixed_bits_1); + return 0; + } + return 1; +} + +static int phys_addr_width(void) +{ + unsigned eax, ebx, ecx, edx; + + cpuid(0x80000008, &eax, &ebx, &ecx, &edx); + return eax & 0xff; +} + +static int check_canonical(struct kvm_vcpu *vcpu, const char *name, + unsigned long reg) +{ +#ifdef CONFIG_X86_64 + unsigned long x; + + if (sizeof(reg) == 4) + return 1; + x = (long)reg >> 48; + if (!(x == 0 || x == ~0UL)) { + vcpu_printf(vcpu, "%s: %s (%lx) not canonical\n", + __FUNCTION__, name, reg); + return 0; + } +#endif + return 1; +} + +static int check_selector(struct kvm_vcpu *vcpu, const char *name, + int rpl_ti, int null, + u16 sel) +{ + if (rpl_ti && (sel & 7)) { + vcpu_printf(vcpu, "%s: %s (%x) nonzero rpl or ti\n", + __FUNCTION__, name, sel); + return 0; + } + if (null && !sel) { + vcpu_printf(vcpu, "%s: %s (%x) zero\n", + __FUNCTION__, name, sel); + return 0; + } + return 1; +} + +//#define MSR_IA32_VMX_CR0_FIXED0 0x486 +//#define MSR_IA32_VMX_CR0_FIXED1 0x487 + +//#define MSR_IA32_VMX_CR4_FIXED0 0x488 +//#define MSR_IA32_VMX_CR4_FIXED1 0x489 +#define VM_EXIT_HOST_ADD_SPACE_SIZE 0x00000200 + +int vm_entry_test_host(struct kvm_vcpu *vcpu) +{ + int r = 1; + unsigned long cr0 = vmcs_readl(HOST_CR0); + unsigned long cr4 = vmcs_readl(HOST_CR4); + unsigned long cr3 = vmcs_readl(HOST_CR3); + int host_64; + + host_64 = vmcs_read32(VM_EXIT_CONTROLS) & VM_EXIT_HOST_ADD_SPACE_SIZE; + + /* 22.2.2 */ + r &= check_fixed_bits(vcpu, "host cr0", cr0, MSR_IA32_VMX_CR0_FIXED0, + MSR_IA32_VMX_CR0_FIXED1); + + r &= check_fixed_bits(vcpu, "host cr0", cr4, MSR_IA32_VMX_CR4_FIXED0, + MSR_IA32_VMX_CR4_FIXED1); + if ((u64)cr3 >> phys_addr_width()) { + vcpu_printf(vcpu, "%s: cr3 (%lx) vs phys addr width\n", + __FUNCTION__, cr3); + r = 0; + } + + r &= check_canonical(vcpu, "host ia32_sysenter_eip", + vmcs_readl(HOST_IA32_SYSENTER_EIP)); + r &= check_canonical(vcpu, "host ia32_sysenter_esp", + vmcs_readl(HOST_IA32_SYSENTER_ESP)); + + /* 22.2.3 */ + r &= check_selector(vcpu, "host cs", 1, 1, + vmcs_read16(HOST_CS_SELECTOR)); + r &= check_selector(vcpu, "host ss", 1, !host_64, + vmcs_read16(HOST_SS_SELECTOR)); + r &= check_selector(vcpu, "host ds", 1, 0, + vmcs_read16(HOST_DS_SELECTOR)); + r &= check_selector(vcpu, "host es", 1, 0, + vmcs_read16(HOST_ES_SELECTOR)); + r &= check_selector(vcpu, "host fs", 1, 0, + vmcs_read16(HOST_FS_SELECTOR)); + r &= check_selector(vcpu, "host gs", 1, 0, + vmcs_read16(HOST_GS_SELECTOR)); + r &= check_selector(vcpu, "host tr", 1, 1, + vmcs_read16(HOST_TR_SELECTOR)); + +#ifdef CONFIG_X86_64 + r &= check_canonical(vcpu, "host fs base", + vmcs_readl(HOST_FS_BASE)); + r &= check_canonical(vcpu, "host gs base", + vmcs_readl(HOST_GS_BASE)); + r &= check_canonical(vcpu, "host gdtr base", + vmcs_readl(HOST_GDTR_BASE)); + r &= check_canonical(vcpu, "host idtr base", + vmcs_readl(HOST_IDTR_BASE)); +#endif + + /* 22.2.4 */ +#ifdef CONFIG_X86_64 + if (!host_64) { + vcpu_printf(vcpu, "%s: vm exit controls: !64 bit host\n", + __FUNCTION__); + r = 0; + } + if (!(cr4 & X86_CR4_PAE)) { + vcpu_printf(vcpu, "%s: cr4 (%lx): !pae\n", + __FUNCTION__, cr4); + r = 0; + } + r &= check_canonical(vcpu, "host rip", vmcs_readl(HOST_RIP)); +#endif + + return r; +} + +int vm_entry_test(struct kvm_vcpu *vcpu) +{ + int rg, rh; + + rg = vm_entry_test_guest(vcpu); + rh = vm_entry_test_host(vcpu); + return rg && rh; +} + +void vmcs_dump(struct kvm_vcpu *vcpu) +{ + vcpu_printf(vcpu, "************************ vmcs_dump ************************\n"); + vcpu_printf(vcpu, "VM_ENTRY_CONTROLS 0x%x\n", vmcs_read32(VM_ENTRY_CONTROLS)); + + vcpu_printf(vcpu, "GUEST_CR0 0x%lx\n", vmcs_readl(GUEST_CR0)); + vcpu_printf(vcpu, "GUEST_CR3 0x%lx\n", vmcs_readl(GUEST_CR3)); + vcpu_printf(vcpu, "GUEST_CR4 0x%lx\n", vmcs_readl(GUEST_CR4)); + + vcpu_printf(vcpu, "GUEST_SYSENTER_ESP 0x%lx\n", vmcs_readl(GUEST_SYSENTER_ESP)); + vcpu_printf(vcpu, "GUEST_SYSENTER_EIP 0x%lx\n", vmcs_readl(GUEST_SYSENTER_EIP)); + + + vcpu_printf(vcpu, "GUEST_IA32_DEBUGCTL 0x%llx\n", vmcs_read64(GUEST_IA32_DEBUGCTL)); + vcpu_printf(vcpu, "GUEST_DR7 0x%lx\n", vmcs_readl(GUEST_DR7)); + + vcpu_printf(vcpu, "GUEST_RFLAGS 0x%lx\n", vmcs_readl(GUEST_RFLAGS)); + vcpu_printf(vcpu, "GUEST_RIP 0x%lx\n", vmcs_readl(GUEST_RIP)); + + vcpu_printf(vcpu, "GUEST_CS_SELECTOR 0x%x\n", vmcs_read16(GUEST_CS_SELECTOR)); + vcpu_printf(vcpu, "GUEST_DS_SELECTOR 0x%x\n", vmcs_read16(GUEST_DS_SELECTOR)); + vcpu_printf(vcpu, "GUEST_ES_SELECTOR 0x%x\n", vmcs_read16(GUEST_ES_SELECTOR)); + vcpu_printf(vcpu, "GUEST_FS_SELECTOR 0x%x\n", vmcs_read16(GUEST_FS_SELECTOR)); + vcpu_printf(vcpu, "GUEST_GS_SELECTOR 0x%x\n", vmcs_read16(GUEST_GS_SELECTOR)); + vcpu_printf(vcpu, "GUEST_SS_SELECTOR 0x%x\n", vmcs_read16(GUEST_SS_SELECTOR)); + + vcpu_printf(vcpu, "GUEST_TR_SELECTOR 0x%x\n", vmcs_read16(GUEST_TR_SELECTOR)); + vcpu_printf(vcpu, "GUEST_LDTR_SELECTOR 0x%x\n", vmcs_read16(GUEST_LDTR_SELECTOR)); + + vcpu_printf(vcpu, "GUEST_CS_AR_BYTES 0x%x\n", vmcs_read32(GUEST_CS_AR_BYTES)); + vcpu_printf(vcpu, "GUEST_DS_AR_BYTES 0x%x\n", vmcs_read32(GUEST_DS_AR_BYTES)); + vcpu_printf(vcpu, "GUEST_ES_AR_BYTES 0x%x\n", vmcs_read32(GUEST_ES_AR_BYTES)); + vcpu_printf(vcpu, "GUEST_FS_AR_BYTES 0x%x\n", vmcs_read32(GUEST_FS_AR_BYTES)); + vcpu_printf(vcpu, "GUEST_GS_AR_BYTES 0x%x\n", vmcs_read32(GUEST_GS_AR_BYTES)); + vcpu_printf(vcpu, "GUEST_SS_AR_BYTES 0x%x\n", vmcs_read32(GUEST_SS_AR_BYTES)); + + vcpu_printf(vcpu, "GUEST_LDTR_AR_BYTES 0x%x\n", vmcs_read32(GUEST_LDTR_AR_BYTES)); + vcpu_printf(vcpu, "GUEST_TR_AR_BYTES 0x%x\n", vmcs_read32(GUEST_TR_AR_BYTES)); + + vcpu_printf(vcpu, "GUEST_CS_BASE 0x%lx\n", vmcs_readl(GUEST_CS_BASE)); + vcpu_printf(vcpu, "GUEST_DS_BASE 0x%lx\n", vmcs_readl(GUEST_DS_BASE)); + vcpu_printf(vcpu, "GUEST_ES_BASE 0x%lx\n", vmcs_readl(GUEST_ES_BASE)); + vcpu_printf(vcpu, "GUEST_FS_BASE 0x%lx\n", vmcs_readl(GUEST_FS_BASE)); + vcpu_printf(vcpu, "GUEST_GS_BASE 0x%lx\n", vmcs_readl(GUEST_GS_BASE)); + vcpu_printf(vcpu, "GUEST_SS_BASE 0x%lx\n", vmcs_readl(GUEST_SS_BASE)); + + + vcpu_printf(vcpu, "GUEST_LDTR_BASE 0x%lx\n", vmcs_readl(GUEST_LDTR_BASE)); + vcpu_printf(vcpu, "GUEST_TR_BASE 0x%lx\n", vmcs_readl(GUEST_TR_BASE)); + + vcpu_printf(vcpu, "GUEST_CS_LIMIT 0x%x\n", vmcs_read32(GUEST_CS_LIMIT)); + vcpu_printf(vcpu, "GUEST_DS_LIMIT 0x%x\n", vmcs_read32(GUEST_DS_LIMIT)); + vcpu_printf(vcpu, "GUEST_ES_LIMIT 0x%x\n", vmcs_read32(GUEST_ES_LIMIT)); + vcpu_printf(vcpu, "GUEST_FS_LIMIT 0x%x\n", vmcs_read32(GUEST_FS_LIMIT)); + vcpu_printf(vcpu, "GUEST_GS_LIMIT 0x%x\n", vmcs_read32(GUEST_GS_LIMIT)); + vcpu_printf(vcpu, "GUEST_SS_LIMIT 0x%x\n", vmcs_read32(GUEST_SS_LIMIT)); + + vcpu_printf(vcpu, "GUEST_LDTR_LIMIT 0x%x\n", vmcs_read32(GUEST_LDTR_LIMIT)); + vcpu_printf(vcpu, "GUEST_TR_LIMIT 0x%x\n", vmcs_read32(GUEST_TR_LIMIT)); + + vcpu_printf(vcpu, "GUEST_GDTR_BASE 0x%lx\n", vmcs_readl(GUEST_GDTR_BASE)); + vcpu_printf(vcpu, "GUEST_IDTR_BASE 0x%lx\n", vmcs_readl(GUEST_IDTR_BASE)); + + vcpu_printf(vcpu, "GUEST_GDTR_LIMIT 0x%x\n", vmcs_read32(GUEST_GDTR_LIMIT)); + vcpu_printf(vcpu, "GUEST_IDTR_LIMIT 0x%x\n", vmcs_read32(GUEST_IDTR_LIMIT)); + + vcpu_printf(vcpu, "EXCEPTION_BITMAP 0x%x\n", vmcs_read32(EXCEPTION_BITMAP)); + vcpu_printf(vcpu, "CPU_BASED_VM_EXEC_CONTROL 0x%x\n", vmcs_read32(CPU_BASED_VM_EXEC_CONTROL)); + vcpu_printf(vcpu, "SECONDARY_VM_EXEC_CONTROL 0x%x\n", vmcs_read32(SECONDARY_VM_EXEC_CONTROL)); + vcpu_printf(vcpu, "TPR_THREASHOLD 0x%x\n", vmcs_read32(TPR_THRESHOLD)); + vcpu_printf(vcpu, "TPR 0x%x\n", *((u32 *) (vcpu->arch.apic->regs + 0x80))); + vcpu_printf(vcpu, "***********************************************************\n"); +} + +void regs_dump(struct kvm_vcpu *vcpu) +{ + #define REG_DUMP(reg) \ + vcpu_printf(vcpu, #reg" = 0x%lx(VCPU)\n", vcpu->arch.regs[VCPU_REGS_##reg]) + #define VMCS_REG_DUMP(reg) \ + vcpu_printf(vcpu, #reg" = 0x%lx(VMCS)\n", vmcs_readl(GUEST_##reg)) + + vcpu_printf(vcpu, "************************ regs_dump ************************\n"); + REG_DUMP(RAX); + REG_DUMP(RBX); + REG_DUMP(RCX); + REG_DUMP(RDX); + REG_DUMP(RSP); + REG_DUMP(RBP); + REG_DUMP(RSI); + REG_DUMP(RDI); + REG_DUMP(R8); + REG_DUMP(R9); + REG_DUMP(R10); + REG_DUMP(R11); + REG_DUMP(R12); + REG_DUMP(R13); + REG_DUMP(R14); + REG_DUMP(R15); + + VMCS_REG_DUMP(RSP); + VMCS_REG_DUMP(RIP); + VMCS_REG_DUMP(RFLAGS); + + vcpu_printf(vcpu, "***********************************************************\n"); +} + +void sregs_dump(struct kvm_vcpu *vcpu) +{ + vcpu_printf(vcpu, "************************ sregs_dump ************************\n"); + vcpu_printf(vcpu, "cr0 = 0x%lx\n", vcpu->arch.cr0); + vcpu_printf(vcpu, "cr2 = 0x%lx\n", vcpu->arch.cr2); + vcpu_printf(vcpu, "cr3 = 0x%lx\n", vcpu->arch.cr3); + vcpu_printf(vcpu, "cr4 = 0x%lx\n", vcpu->arch.cr4); + vcpu_printf(vcpu, "cr8 = 0x%lx\n", vcpu->arch.cr8); + vcpu_printf(vcpu, "shadow_efer = 0x%llx\n", vcpu->arch.shadow_efer); + vcpu_printf(vcpu, "***********************************************************\n"); +} + +void show_pending_interrupts(struct kvm_vcpu *vcpu) +{ + vcpu_printf(vcpu, "************************ pending interrupts ****************\n"); + if (vcpu->arch.interrupt.pending) + vcpu_printf(vcpu, "nr = %d%s\n", vcpu->arch.interrupt.nr, vcpu->arch.interrupt.soft?"(soft)":""); + vcpu_printf(vcpu, "************************************************************\n"); +} + +void vcpu_dump(struct kvm_vcpu *vcpu) +{ + regs_dump(vcpu); + sregs_dump(vcpu); + vmcs_dump(vcpu); + show_pending_interrupts(vcpu); + /* more ... */ +} +#endif + |