summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIzik Eidus <ieidus@redhat.com>2009-10-26 17:06:32 +0200
committerYaniv Kamay <ykamay@redhat.com>2009-10-26 22:53:15 +0200
commit54d132f06ea4cda5caf6fe47295f990631f80241 (patch)
tree92942ccf82ad98ee451815384fe635c9674f23ac
parent94bca281a7d09eb16acf184291ae1b5dd4497bb3 (diff)
vdesktop: add new kernel dir
Signed-off-by: Izik Eidus <ieidus@redhat.com>
-rw-r--r--kernel/.gitignore72
-rw-r--r--kernel/.gitmodules3
-rw-r--r--kernel/COPYING339
-rw-r--r--kernel/Kbuild2
-rw-r--r--kernel/Makefile74
-rw-r--r--kernel/anon_inodes.c275
-rwxr-xr-xkernel/configure131
-rw-r--r--kernel/external-module-compat-comm.h1015
-rw-r--r--kernel/external-module-compat.c470
-rw-r--r--kernel/ia64/Kbuild13
-rw-r--r--kernel/ia64/Makefile.pre27
-rw-r--r--kernel/ia64/external-module-compat.h60
-rw-r--r--kernel/include-compat/asm-ia64/msidef.h42
-rw-r--r--kernel/include-compat/asm-x86/asm.h3
-rw-r--r--kernel/include-compat/asm-x86/cmpxchg.h3
-rw-r--r--kernel/include-compat/asm-x86/mce.h1
-rw-r--r--kernel/include-compat/asm-x86/msidef.h55
-rw-r--r--kernel/include-compat/asm-x86/msr-index.h1
-rw-r--r--kernel/include-compat/asm-x86/pvclock-abi.h42
-rw-r--r--kernel/include-compat/linux/anon_inodes.h16
-rw-r--r--kernel/include-compat/linux/eventfd.h1
-rw-r--r--kernel/include-compat/linux/ftrace_event.h1
-rw-r--r--kernel/include-compat/linux/intel-iommu.h355
-rw-r--r--kernel/include-compat/linux/iommu.h112
-rw-r--r--kernel/include-compat/linux/iova.h52
-rw-r--r--kernel/include-compat/linux/magic.h41
-rw-r--r--kernel/include-compat/linux/marker.h119
-rw-r--r--kernel/include-compat/linux/math64.h3
-rw-r--r--kernel/include-compat/linux/mmu_notifier.h6
-rw-r--r--kernel/include-compat/linux/msi.h50
-rw-r--r--kernel/include-compat/linux/mutex.h3
-rw-r--r--kernel/include-compat/linux/srcu.h53
-rw-r--r--kernel/include-compat/linux/tracepoint.h1
-rw-r--r--kernel/include-compat/trace/define_trace.h2
-rw-r--r--kernel/kvm-kmod.spec52
-rw-r--r--kernel/powerpc/Makefile.pre1
-rw-r--r--kernel/request-irq-compat.c44
-rw-r--r--kernel/scripts/65-kvm.rules2
-rwxr-xr-xkernel/scripts/make-release95
-rw-r--r--kernel/srcu.c267
-rwxr-xr-xkernel/sync248
-rw-r--r--kernel/unifdef.h40
-rw-r--r--kernel/x86/Kbuild14
-rw-r--r--kernel/x86/Makefile.pre1
-rw-r--r--kernel/x86/debug.h23
-rw-r--r--kernel/x86/external-module-compat.h687
-rw-r--r--kernel/x86/preempt.c247
-rw-r--r--kernel/x86/vmx-debug.c1112
48 files changed, 6276 insertions, 0 deletions
diff --git a/kernel/.gitignore b/kernel/.gitignore
new file mode 100644
index 00000000..5c46f862
--- /dev/null
+++ b/kernel/.gitignore
@@ -0,0 +1,72 @@
+*.o
+*.d
+*~
+*.flat
+*.a
+.*.cmd
+*.ko
+*.mod.c
+config.mak
+modules.order
+Module.symvers
+Modules.symvers
+Module.markers
+.tmp_versions
+include-compat/asm
+include
+x86/modules.order
+x86/i825[49].[ch]
+x86/kvm_main.c
+x86/kvm_svm.h
+x86/vmx.[ch]
+x86/svm.[ch]
+x86/mmu.[ch]
+x86/paging_tmpl.h
+x86/ioapic.[ch]
+x86/iodev.h
+x86/irq.[ch]
+x86/lapic.[ch]
+x86/tss.h
+x86/x86.[ch]
+x86/coalesced_mmio.[ch]
+x86/kvm_cache_regs.h
+x86/irq_comm.c
+x86/timer.c
+x86/kvm_timer.h
+x86/iommu.c
+x86/svm-trace.h
+x86/trace-arch.h
+x86/trace.h
+x86/vmx-trace.h
+x86/assigned-dev.c
+x86/emulate.c
+x86/eventfd.c
+x86/mmutrace.h
+ia64/asm-offsets.c
+ia64/coalesced_mmio.[ch]
+ia64/ioapic.[ch]
+ia64/iodev.h
+ia64/iommu.c
+ia64/irq.h
+ia64/irq_comm.c
+ia64/kvm-ia64.c
+ia64/kvm_fw.c
+ia64/kvm_lib.c
+ia64/kvm_main.c
+ia64/kvm_minstate.h
+ia64/lapic.h
+ia64/memcpy.S
+ia64/memset.S
+ia64/misc.h
+ia64/mmio.c
+ia64/optvfault.S
+ia64/process.c
+ia64/trampoline.S
+ia64/vcpu.[ch]
+ia64/vmm.c
+ia64/vmm_ivt.S
+ia64/vti.h
+ia64/vtlb.c
+ia64/assigned-dev.c
+ia64/eventfd.c
+.stgit-*
diff --git a/kernel/.gitmodules b/kernel/.gitmodules
new file mode 100644
index 00000000..9c639211
--- /dev/null
+++ b/kernel/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "linux-2.6"]
+ path = linux-2.6
+ url = ../kvm.git
diff --git a/kernel/COPYING b/kernel/COPYING
new file mode 100644
index 00000000..fb60aad2
--- /dev/null
+++ b/kernel/COPYING
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
diff --git a/kernel/Kbuild b/kernel/Kbuild
new file mode 100644
index 00000000..ec34c43e
--- /dev/null
+++ b/kernel/Kbuild
@@ -0,0 +1,2 @@
+obj-$(CONFIG_X86) += x86/
+obj-$(CONFIG_IA64) += ia64/
diff --git a/kernel/Makefile b/kernel/Makefile
new file mode 100644
index 00000000..f406e3d8
--- /dev/null
+++ b/kernel/Makefile
@@ -0,0 +1,74 @@
+include config.mak
+
+ARCH_DIR = $(if $(filter $(ARCH),x86_64 i386),x86,$(ARCH))
+ARCH_CONFIG := $(shell echo $(ARCH_DIR) | tr '[:lower:]' '[:upper:]')
+# NONARCH_CONFIG used for unifdef, and only cover X86 and IA64 now
+NONARCH_CONFIG = $(filter-out $(ARCH_CONFIG),X86 IA64)
+
+KVERREL = $(patsubst /lib/modules/%/build,%,$(KERNELDIR))
+
+DESTDIR=
+
+MAKEFILE_PRE = $(ARCH_DIR)/Makefile.pre
+
+INSTALLDIR = $(patsubst %/build,%/extra,$(KERNELDIR))
+ORIGMODDIR = $(patsubst %/build,%/kernel,$(KERNELDIR))
+
+rpmrelease = devel
+
+LINUX = ./linux-2.6
+
+all:: prerequisite
+# include header priority 1) $LINUX 2) $KERNELDIR 3) include-compat
+ $(MAKE) -C $(KERNELDIR) M=`pwd` \
+ LINUXINCLUDE="-I`pwd`/include -Iinclude \
+ $(if $(KERNELSOURCEDIR),\
+ -Iinclude2 -I$(KERNELSOURCEDIR)/include -I$(KERNELSOURCEDIR)/arch/${ARCH_DIR}/include, \
+ -Iarch/${ARCH_DIR}/include) -I`pwd`/include-compat -I`pwd`/${ARCH_DIR} \
+ -include include/linux/autoconf.h \
+ -include `pwd`/$(ARCH_DIR)/external-module-compat.h" \
+ "$$@"
+
+include $(MAKEFILE_PRE)
+
+.PHONY: sync
+
+KVM_VERSION_GIT = $(if $(and $(filter kvm-devel,$(KVM_VERSION)), \
+ $(wildcard $(LINUX)/.git)), \
+ $(shell git --git-dir=$(LINUX)/.git describe), \
+ $(KVM_VERSION))
+
+sync:
+ ./sync -v $(KVM_VERSION_GIT) -l $(LINUX)
+
+install:
+ mkdir -p $(DESTDIR)/$(INSTALLDIR)
+ cp $(ARCH_DIR)/*.ko $(DESTDIR)/$(INSTALLDIR)
+ for i in $(DESTDIR)/$(ORIGMODDIR)/drivers/kvm/*.ko \
+ $(DESTDIR)/$(ORIGMODDIR)/arch/$(ARCH_DIR)/kvm/*.ko; do \
+ if [ -f "$$i" ]; then mv "$$i" "$$i.orig"; fi; \
+ done
+ /sbin/depmod -a $(DEPMOD_VERSION) -b $(DESTDIR)
+ install -m 644 -D scripts/65-kvm.rules $(DESTDIR)/etc/udev/rules.d/65-kvm.rules
+
+tmpspec = .tmp.kvm-kmod.spec
+
+rpm-topdir := $$(pwd)/rpmtop
+
+RPMDIR = $(rpm-topdir)/RPMS
+
+rpm: all
+ mkdir -p $(rpm-topdir)/BUILD $(RPMDIR)/$$(uname -i)
+ sed 's/^Release:.*/Release: $(rpmrelease)/; s/^%define kverrel.*/%define kverrel $(KVERREL)/' \
+ kvm-kmod.spec > $(tmpspec)
+ rpmbuild --define="kverrel $(KVERREL)" \
+ --define="objdir $$(pwd)/$(ARCH_DIR)" \
+ --define="_rpmdir $(RPMDIR)" \
+ --define="_topdir $(rpm-topdir)" \
+ -bb $(tmpspec)
+
+clean:
+ $(MAKE) -C $(KERNELDIR) M=`pwd` $@
+
+distclean: clean
+ rm -f config.mak include/asm include-compat/asm
diff --git a/kernel/anon_inodes.c b/kernel/anon_inodes.c
new file mode 100644
index 00000000..135adaea
--- /dev/null
+++ b/kernel/anon_inodes.c
@@ -0,0 +1,275 @@
+/*
+ * fs/anon_inodes.c
+ *
+ * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
+ *
+ * Thanks to Arnd Bergmann for code review and suggestions.
+ * More changes for Thomas Gleixner suggestions.
+ *
+ */
+
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/magic.h>
+#include <linux/anon_inodes.h>
+
+#include <asm/uaccess.h>
+
+/* anon_inodes on RHEL >= 5.2 is equivalent to 2.6.27 version */
+#ifdef RHEL_RELEASE_CODE
+# if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,2)) && defined(CONFIG_ANON_INODES)
+# define RHEL_ANON_INODES
+# endif
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) && !defined(RHEL_ANON_INODES)
+
+static struct vfsmount *anon_inode_mnt __read_mostly;
+static struct inode *anon_inode_inode;
+static struct file_operations anon_inode_fops;
+
+#if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,17)
+
+static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *data,
+ struct vfsmount *mnt)
+{
+ return get_sb_pseudo(fs_type, "kvm_anon_inode:", NULL, 0x99700426, mnt);
+}
+
+#else
+
+static struct super_block *anon_inodefs_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name,
+ void *data)
+{
+ return get_sb_pseudo(fs_type, "kvm_anon_inode:", NULL, 0x99700426);
+}
+
+#endif
+
+static int anon_inodefs_delete_dentry(struct dentry *dentry)
+{
+ /*
+ * We faked vfs to believe the dentry was hashed when we created it.
+ * Now we restore the flag so that dput() will work correctly.
+ */
+ dentry->d_flags |= DCACHE_UNHASHED;
+ return 1;
+}
+
+static struct file_system_type anon_inode_fs_type = {
+ .name = "kvm_anon_inodefs",
+ .get_sb = anon_inodefs_get_sb,
+ .kill_sb = kill_anon_super,
+};
+static struct dentry_operations anon_inodefs_dentry_operations = {
+ .d_delete = anon_inodefs_delete_dentry,
+};
+
+/**
+ * anon_inode_getfd - creates a new file instance by hooking it up to and
+ * anonymous inode, and a dentry that describe the "class"
+ * of the file
+ *
+ * @name: [in] name of the "class" of the new file
+ * @fops [in] file operations for the new file
+ * @priv [in] private data for the new file (will be file's private_data)
+ *
+ * Creates a new file by hooking it on a single inode. This is useful for files
+ * that do not need to have a full-fledged inode in order to operate correctly.
+ * All the files created with anon_inode_getfd() will share a single inode, by
+ * hence saving memory and avoiding code duplication for the file/inode/dentry
+ * setup. Returns new descriptor or -error.
+ */
+int anon_inode_getfd(const char *name, const struct file_operations *fops,
+ void *priv, int flags)
+{
+ struct qstr this;
+ struct dentry *dentry;
+ struct inode *inode;
+ struct file *file;
+ int error, fd;
+
+ if (IS_ERR(anon_inode_inode))
+ return -ENODEV;
+ file = get_empty_filp();
+ if (!file)
+ return -ENFILE;
+
+ inode = igrab(anon_inode_inode);
+ if (IS_ERR(inode)) {
+ error = PTR_ERR(inode);
+ goto err_put_filp;
+ }
+
+ error = get_unused_fd();
+ if (error < 0)
+ goto err_iput;
+ fd = error;
+
+ /*
+ * Link the inode to a directory entry by creating a unique name
+ * using the inode sequence number.
+ */
+ error = -ENOMEM;
+ this.name = name;
+ this.len = strlen(name);
+ this.hash = 0;
+ dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
+ if (!dentry)
+ goto err_put_unused_fd;
+ dentry->d_op = &anon_inodefs_dentry_operations;
+ /* Do not publish this dentry inside the global dentry hash table */
+ dentry->d_flags &= ~DCACHE_UNHASHED;
+ d_instantiate(dentry, inode);
+
+ file->f_vfsmnt = mntget(anon_inode_mnt);
+ file->f_dentry = dentry;
+ file->f_mapping = inode->i_mapping;
+
+ file->f_pos = 0;
+ file->f_flags = O_RDWR;
+ file->f_op = (struct file_operations *)fops;
+ file->f_mode = FMODE_READ | FMODE_WRITE;
+ file->f_version = 0;
+ file->private_data = priv;
+
+ fd_install(fd, file);
+
+ return fd;
+
+err_put_unused_fd:
+ put_unused_fd(fd);
+err_iput:
+ iput(inode);
+err_put_filp:
+ fput(file);
+ return error;
+}
+
+/*
+ * A single inode exist for all anon_inode files. Contrary to pipes,
+ * anon_inode inodes has no per-instance data associated, so we can avoid
+ * the allocation of multiple of them.
+ */
+static struct inode *anon_inode_mkinode(void)
+{
+ struct inode *inode = new_inode(anon_inode_mnt->mnt_sb);
+
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+
+ inode->i_fop = &anon_inode_fops;
+
+ /*
+ * Mark the inode dirty from the very beginning,
+ * that way it will never be moved to the dirty
+ * list because mark_inode_dirty() will think
+ * that it already _is_ on the dirty list.
+ */
+ inode->i_state = I_DIRTY;
+ inode->i_mode = S_IRUSR | S_IWUSR;
+ inode->i_uid = current->fsuid;
+ inode->i_gid = current->fsgid;
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ return inode;
+}
+
+static int anon_inode_init(void)
+{
+ int error;
+
+ error = register_filesystem(&anon_inode_fs_type);
+ if (error)
+ goto err_exit;
+ anon_inode_mnt = kern_mount(&anon_inode_fs_type);
+ if (IS_ERR(anon_inode_mnt)) {
+ error = PTR_ERR(anon_inode_mnt);
+ goto err_unregister_filesystem;
+ }
+ anon_inode_inode = anon_inode_mkinode();
+ if (IS_ERR(anon_inode_inode)) {
+ error = PTR_ERR(anon_inode_inode);
+ goto err_mntput;
+ }
+
+ return 0;
+
+err_mntput:
+ mntput(anon_inode_mnt);
+err_unregister_filesystem:
+ unregister_filesystem(&anon_inode_fs_type);
+err_exit:
+ return -ENOMEM;
+}
+
+int kvm_init_anon_inodes(void)
+{
+ return anon_inode_init();
+}
+
+void kvm_exit_anon_inodes(void)
+{
+ iput(anon_inode_inode);
+ mntput(anon_inode_mnt);
+ unregister_filesystem(&anon_inode_fs_type);
+}
+
+#else
+
+int kvm_init_anon_inodes(void)
+{
+ return 0;
+}
+
+void kvm_exit_anon_inodes(void)
+{
+}
+
+#undef anon_inode_getfd
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) && !defined(RHEL_ANON_INODES)
+
+int kvm_anon_inode_getfd(const char *name,
+ const struct file_operations *fops,
+ void *priv, int flags)
+{
+ int r;
+ int fd;
+ struct inode *inode;
+ struct file *file;
+
+ r = anon_inode_getfd(&fd, &inode, &file, name, fops, priv);
+ if (r < 0)
+ return r;
+ return fd;
+}
+
+#elif LINUX_VERSION_CODE == KERNEL_VERSION(2,6,26) && !defined(RHEL_ANON_INODES)
+
+int kvm_anon_inode_getfd(const char *name,
+ const struct file_operations *fops,
+ void *priv, int flags)
+{
+ return anon_inode_getfd(name, fops, priv);
+}
+
+#else
+
+int kvm_anon_inode_getfd(const char *name,
+ const struct file_operations *fops,
+ void *priv, int flags)
+{
+ return anon_inode_getfd(name, fops, priv, flags);
+}
+
+#endif
+
+#endif
diff --git a/kernel/configure b/kernel/configure
new file mode 100755
index 00000000..7122d400
--- /dev/null
+++ b/kernel/configure
@@ -0,0 +1,131 @@
+#!/bin/bash
+
+kernelsourcedir=
+kerneldir=/lib/modules/$(uname -r)/build
+cc=gcc
+ld=ld
+objcopy=objcopy
+ar=ar
+want_module=1
+cross_prefix=
+arch=`uname -m`
+# don't use uname if kerneldir is set
+no_uname=
+# we only need depmod_version for kvm.ko install
+depmod_version=
+if [ -z "TMPDIR" ] ; then
+ TMPDIR=.
+fi
+
+usage() {
+ cat <<-EOF
+ Usage: $0 [options]
+
+ Options include:
+ --arch=ARCH architecture to compile for ($arch)
+ --cross-prefix=PREFIX prefix for cross compile
+ --kerneldir=DIR kernel build directory ($kerneldir)
+ --help this helpful text
+EOF
+ exit 1
+}
+
+while [[ "$1" = -* ]]; do
+ opt="$1"; shift
+ arg=
+ hasarg=
+ if [[ "$opt" = *=* ]]; then
+ arg="${opt#*=}"
+ opt="${opt%%=*}"
+ hasarg=1
+ fi
+ case "$opt" in
+ --kerneldir)
+ kerneldir="$arg"
+ no_uname=1
+ ;;
+ --with-patched-kernel)
+ want_module=
+ ;;
+ --arch)
+ arch="$arg"
+ ;;
+ --cross-prefix)
+ cross_prefix="$arg"
+ ;;
+ --help)
+ usage
+ ;;
+ *)
+ usage
+ ;;
+ esac
+done
+
+karch="$arch"
+
+case $arch in
+ i?86*|x86_64*)
+ arch=${arch/#i?86/i386}
+ karch="x86"
+ ;;
+esac
+
+kvm_version() {
+ local fname="$(dirname "$0")/KVM_VERSION"
+
+ if test -f "$fname"; then
+ cat "$fname"
+ else
+ echo "kvm-devel"
+ fi
+}
+
+processor=${arch#*-}
+arch=${arch%%-*}
+
+# see if we have split build and source directories
+if [ -d "$kerneldir/include2" ]; then
+ kernelsourcedir=$kerneldir/source
+ if [ ! -L "$kernelsourcedir" ]; then
+ kernelsourcedir=${kerneldir%/build*}/source
+ fi
+fi
+
+if [ -n "$no_uname" -a "$want_module" ]; then
+ if [ -e "$kerneldir/.kernelrelease" ]; then
+ depmod_version=`cat "$kerneldir/.kernelrelease"`
+
+ elif [ -e "$kerneldir/include/config/kernel.release" ]; then
+ depmod_version=`cat "$kerneldir/include/config/kernel.release"`
+ elif [ -e "$kerneldir/.config" ]; then
+ depmod_version=$(awk '/Linux kernel version:/ { print $NF }' \
+ "$kerneldir/.config")
+ else
+ echo
+ echo "Error: kernelversion not found"
+ echo "Please make sure your kernel is configured"
+ echo
+ exit 1
+ fi
+fi
+
+rm -f include/asm include-compat/asm
+mkdir -p include
+ln -sf asm-"$karch" include/asm
+ln -sf asm-"$karch" include-compat/asm
+
+cat <<EOF > config.mak
+ARCH=$arch
+PROCESSOR=$processor
+PREFIX=$prefix
+KERNELDIR=$kerneldir
+KERNELSOURCEDIR=$kernelsourcedir
+CROSS_COMPILE=$cross_prefix
+CC=$cross_prefix$cc
+LD=$cross_prefix$ld
+OBJCOPY=$cross_prefix$objcopy
+AR=$cross_prefix$ar
+DEPMOD_VERSION=$depmod_version
+KVM_VERSION=$(kvm_version)
+EOF
diff --git a/kernel/external-module-compat-comm.h b/kernel/external-module-compat-comm.h
new file mode 100644
index 00000000..cec117ba
--- /dev/null
+++ b/kernel/external-module-compat-comm.h
@@ -0,0 +1,1015 @@
+
+/*
+ * Compatibility header for building as an external module.
+ */
+
+/*
+ * Avoid picking up the kernel's kvm.h in case we have a newer one.
+ */
+
+#include <linux/compiler.h>
+#include <linux/version.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_para.h>
+#include <linux/cpu.h>
+#include <linux/time.h>
+#include <asm/processor.h>
+#include <linux/hrtimer.h>
+#include <asm/bitops.h>
+
+/*
+ * 2.6.16 does not have GFP_NOWAIT
+ */
+
+#include <linux/gfp.h>
+
+#ifndef GFP_NOWAIT
+#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH)
+#endif
+
+
+/*
+ * kvm profiling support needs 2.6.20
+ */
+#include <linux/profile.h>
+
+#ifndef KVM_PROFILING
+#define KVM_PROFILING 1234
+#define prof_on 4321
+#endif
+
+/*
+ * smp_call_function_single() is not exported below 2.6.20, and has different
+ * semantics below 2.6.23. The 'nonatomic' argument was removed in 2.6.27.
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
+
+int kvm_smp_call_function_single(int cpu, void (*func)(void *info),
+ void *info, int wait);
+#undef smp_call_function_single
+#define smp_call_function_single kvm_smp_call_function_single
+
+#endif
+
+/* on_each_cpu() lost an argument in 2.6.27. */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
+
+#define kvm_on_each_cpu(func, info, wait) on_each_cpu(func, info, 0, wait)
+
+#else
+
+#define kvm_on_each_cpu(func, info, wait) on_each_cpu(func, info, wait)
+
+#endif
+
+/*
+ * The cpu hotplug stubs are broken if !CONFIG_CPU_HOTPLUG
+ */
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,15)
+#define DEFINE_MUTEX(a) DECLARE_MUTEX(a)
+#define mutex_lock_interruptible(a) down_interruptible(a)
+#define mutex_unlock(a) up(a)
+#define mutex_lock(a) down(a)
+#define mutex_init(a) init_MUTEX(a)
+#define mutex_trylock(a) down_trylock(a)
+#define mutex semaphore
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)
+#ifndef kzalloc
+#define kzalloc(size,flags) \
+({ \
+ void *__ret = kmalloc(size, flags); \
+ if (__ret) \
+ memset(__ret, 0, size); \
+ __ret; \
+})
+#endif
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
+#ifndef kmem_cache_zalloc
+#define kmem_cache_zalloc(cache,flags) \
+({ \
+ void *__ret = kmem_cache_alloc(cache, flags); \
+ if (__ret) \
+ memset(__ret, 0, kmem_cache_size(cache)); \
+ __ret; \
+})
+#endif
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21)
+
+#ifndef CONFIG_HOTPLUG_CPU
+#define register_cpu_notifier(nb) (0)
+#endif
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21)
+#define nr_cpu_ids NR_CPUS
+#endif
+
+#include <linux/miscdevice.h>
+#ifndef KVM_MINOR
+#define KVM_MINOR 232
+#endif
+
+#include <linux/notifier.h>
+#ifndef CPU_TASKS_FROZEN
+
+#define CPU_TASKS_FROZEN 0x0010
+#define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN)
+#define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN)
+#define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN)
+#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
+#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
+#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN)
+
+#endif
+
+#ifndef CPU_DYING
+#define CPU_DYING 0x000A
+#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN)
+#endif
+
+#include <asm/system.h>
+
+struct inode;
+#include <linux/anon_inodes.h>
+#define anon_inode_getfd kvm_anon_inode_getfd
+int kvm_init_anon_inodes(void);
+void kvm_exit_anon_inodes(void);
+int anon_inode_getfd(const char *name,
+ const struct file_operations *fops,
+ void *priv , int flags);
+
+/*
+ * 2.6.23 removed the cache destructor
+ */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+# define kmem_cache_create(name, size, align, flags, ctor) \
+ kmem_cache_create(name, size, align, flags, ctor, NULL)
+#endif
+
+/* HRTIMER_MODE_ABS started life with a different name */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21)
+#define HRTIMER_MODE_ABS HRTIMER_ABS
+#endif
+
+/* div64_u64 is fairly new */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
+
+#define div64_u64 kvm_div64_u64
+
+#ifdef CONFIG_64BIT
+
+static inline uint64_t div64_u64(uint64_t dividend, uint64_t divisor)
+{
+ return dividend / divisor;
+}
+
+#else
+
+uint64_t div64_u64(uint64_t dividend, uint64_t divisor);
+
+#endif
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
+
+#ifdef RHEL_RELEASE_CODE
+#if RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,2)
+#define RHEL_BOOL 1
+#endif
+#endif
+
+#ifndef RHEL_BOOL
+
+typedef _Bool bool;
+
+#define false 0
+#define true 1
+
+#endif
+
+#endif
+
+/*
+ * PF_VCPU is a Linux 2.6.24 addition
+ */
+
+#include <linux/sched.h>
+
+#ifndef PF_VCPU
+#define PF_VCPU 0
+#endif
+
+/*
+ * smp_call_function_mask() is not defined/exported below 2.6.24 on all
+ * targets and below 2.6.26 on x86-64
+ */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) || \
+ (defined CONFIG_X86_64 && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26))
+
+int kvm_smp_call_function_mask(cpumask_t mask, void (*func) (void *info),
+ void *info, int wait);
+
+#define smp_call_function_mask kvm_smp_call_function_mask
+
+void kvm_smp_send_reschedule(int cpu);
+
+#else
+
+#define kvm_smp_send_reschedule smp_send_reschedule
+
+#endif
+
+/* empty_zero_page isn't exported in all kernels */
+#include <asm/pgtable.h>
+
+#define empty_zero_page kvm_empty_zero_page
+
+static char empty_zero_page[PAGE_SIZE];
+
+static inline void blahblah(void)
+{
+ (void)empty_zero_page[0];
+}
+
+/* __mmdrop() is not exported before 2.6.25 */
+#include <linux/sched.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+
+#define mmdrop(x) do { (void)(x); } while (0)
+#define mmget(x) do { (void)(x); } while (0)
+
+#else
+
+#define mmget(x) do { atomic_inc(x); } while (0)
+
+#endif
+
+/* pagefault_enable(), page_fault_disable() - 2.6.20 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
+# define KVM_NEED_PAGEFAULT_DISABLE 1
+# ifdef RHEL_RELEASE_CODE
+# if RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,3)
+# undef KVM_NEED_PAGEFAULT_DISABLE
+# endif
+# endif
+#endif
+
+#ifdef KVM_NEED_PAGEFAULT_DISABLE
+
+static inline void pagefault_disable(void)
+{
+ inc_preempt_count();
+ /*
+ * make sure to have issued the store before a pagefault
+ * can hit.
+ */
+ barrier();
+}
+
+static inline void pagefault_enable(void)
+{
+ /*
+ * make sure to issue those last loads/stores before enabling
+ * the pagefault handler again.
+ */
+ barrier();
+ dec_preempt_count();
+ /*
+ * make sure we do..
+ */
+ barrier();
+ preempt_check_resched();
+}
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18)
+#include <asm/uaccess.h>
+#else
+#include <linux/uaccess.h>
+#endif
+
+/* vm ops ->fault() was introduced in 2.6.23. */
+#include <linux/mm.h>
+
+#ifdef KVM_MAIN
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+
+struct vm_fault {
+ unsigned int flags;
+ pgoff_t pgoff;
+ void __user *virtual_address;
+ struct page *page;
+};
+
+static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+
+static inline struct page *kvm_nopage_to_fault(
+ int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf),
+ struct vm_area_struct *vma,
+ unsigned long address,
+ int *type)
+{
+ struct vm_fault vmf;
+ int ret;
+
+ vmf.pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+ vmf.virtual_address = (void __user *)address;
+ ret = fault(vma, &vmf);
+ if (ret)
+ return NOPAGE_SIGBUS;
+ *type = VM_FAULT_MINOR;
+ return vmf.page;
+}
+
+static inline struct page *__kvm_vcpu_fault(struct vm_area_struct *vma,
+ unsigned long address,
+ int *type)
+{
+ return kvm_nopage_to_fault(kvm_vcpu_fault, vma, address, type);
+}
+
+static inline struct page *__kvm_vm_fault(struct vm_area_struct *vma,
+ unsigned long address,
+ int *type)
+{
+ return kvm_nopage_to_fault(kvm_vm_fault, vma, address, type);
+}
+
+#define VMA_OPS_FAULT(x) nopage
+#define VMA_OPS_FAULT_FUNC(x) __##x
+
+#else
+
+#define VMA_OPS_FAULT(x) x
+#define VMA_OPS_FAULT_FUNC(x) x
+
+#endif
+#endif
+
+/* simple vfs attribute getter signature has changed to add a return code */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+
+#define MAKE_SIMPLE_ATTRIBUTE_GETTER(x) \
+ static u64 x(void *v) \
+ { \
+ u64 ret = 0; \
+ \
+ __##x(v, &ret); \
+ return ret; \
+ }
+
+#else
+
+#define MAKE_SIMPLE_ATTRIBUTE_GETTER(x) \
+ static int x(void *v, u64 *val) \
+ { \
+ return __##x(v, val); \
+ }
+
+#endif
+
+/* set_kset_name() is gone in 2.6.25 */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25)
+
+#define set_kset_name(x) .name = x
+
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,25)
+#ifndef FASTCALL
+#define FASTCALL(x) x
+#define fastcall
+#endif
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+
+unsigned kvm_get_tsc_khz(void);
+#define kvm_tsc_khz (kvm_get_tsc_khz())
+
+#else
+
+#define kvm_tsc_khz tsc_khz
+
+#endif
+
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,21)
+
+#include <linux/ktime.h>
+#include <linux/hrtimer.h>
+
+#define ktime_get kvm_ktime_get
+
+static inline ktime_t ktime_get(void)
+{
+ struct timespec now;
+
+ ktime_get_ts(&now);
+
+ return timespec_to_ktime(now);
+}
+
+#endif
+
+/* __aligned arrived in 2.6.21 */
+#ifndef __aligned
+#define __aligned(x) __attribute__((__aligned__(x)))
+#endif
+
+#include <linux/mm.h>
+
+/* The shrinker API changed in 2.6.23 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+
+struct kvm_shrinker {
+ int (*shrink)(int nr_to_scan, gfp_t gfp_mask);
+ int seeks;
+ struct shrinker *kshrinker;
+};
+
+static inline void register_shrinker(struct kvm_shrinker *shrinker)
+{
+ shrinker->kshrinker = set_shrinker(shrinker->seeks, shrinker->shrink);
+}
+
+static inline void unregister_shrinker(struct kvm_shrinker *shrinker)
+{
+ if (shrinker->kshrinker)
+ remove_shrinker(shrinker->kshrinker);
+}
+
+#define shrinker kvm_shrinker
+
+#endif
+
+/* clocksource */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18)
+static inline u32 clocksource_khz2mult(u32 khz, u32 shift_constant)
+{
+ /* khz = cyc/(Million ns)
+ * mult/2^shift = ns/cyc
+ * mult = ns/cyc * 2^shift
+ * mult = 1Million/khz * 2^shift
+ * mult = 1000000 * 2^shift / khz
+ * mult = (1000000<<shift) / khz
+ */
+ u64 tmp = ((u64)1000000) << shift_constant;
+
+ tmp += khz/2; /* round for do_div */
+ do_div(tmp, khz);
+
+ return (u32)tmp;
+}
+#else
+#include <linux/clocksource.h>
+#endif
+
+/* manually export hrtimer_init/start/cancel */
+#include <linux/kallsyms.h>
+extern void (*hrtimer_init_p)(struct hrtimer *timer, clockid_t which_clock,
+ enum hrtimer_mode mode);
+extern int (*hrtimer_start_p)(struct hrtimer *timer, ktime_t tim,
+ const enum hrtimer_mode mode);
+extern int (*hrtimer_cancel_p)(struct hrtimer *timer);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) && defined(CONFIG_KALLSYMS)
+static inline void hrtimer_kallsyms_resolve(void)
+{
+ hrtimer_init_p = (void *) kallsyms_lookup_name("hrtimer_init");
+ BUG_ON(!hrtimer_init_p);
+ hrtimer_start_p = (void *) kallsyms_lookup_name("hrtimer_start");
+ BUG_ON(!hrtimer_start_p);
+ hrtimer_cancel_p = (void *) kallsyms_lookup_name("hrtimer_cancel");
+ BUG_ON(!hrtimer_cancel_p);
+}
+#else
+static inline void hrtimer_kallsyms_resolve(void)
+{
+ hrtimer_init_p = hrtimer_init;
+ hrtimer_start_p = hrtimer_start;
+ hrtimer_cancel_p = hrtimer_cancel;
+}
+#endif
+
+/* handle old hrtimer API with data pointer */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
+static inline void hrtimer_data_pointer(struct hrtimer *timer)
+{
+ timer->data = (void *)timer;
+}
+#else
+static inline void hrtimer_data_pointer(struct hrtimer *timer) {}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
+
+#define ns_to_timespec kvm_ns_to_timespec
+
+struct timespec kvm_ns_to_timespec(const s64 nsec);
+
+#endif
+
+/* work_struct lost the 'data' field in 2.6.20 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
+
+#define kvm_INIT_WORK(work, handler) \
+ INIT_WORK(work, (void (*)(void *))handler, work)
+
+#else
+
+#define kvm_INIT_WORK(work, handler) INIT_WORK(work, handler)
+
+#endif
+
+/* cancel_work_sync() was flush_work() in 2.6.21 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
+
+static inline int cancel_work_sync(struct work_struct *work)
+{
+ /*
+ * FIXME: actually cancel. How? Add own implementation of workqueues?
+ */
+ return 0;
+}
+
+/* ... and it returned void before 2.6.23 */
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+
+#define cancel_work_sync(work) ({ cancel_work_sync(work); 0; })
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
+
+static inline void flush_work(struct work_struct *work)
+{
+ cancel_work_sync(work);
+}
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
+
+struct pci_dev;
+
+struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn);
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21)
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17)
+#include <linux/relayfs_fs.h>
+#else
+#include <linux/relay.h>
+#endif
+
+/* relay_open() interface has changed on 2.6.21 */
+
+struct rchan *kvm_relay_open(const char *base_filename,
+ struct dentry *parent,
+ size_t subbuf_size,
+ size_t n_subbufs,
+ struct rchan_callbacks *cb,
+ void *private_data);
+
+#else
+
+#define kvm_relay_open relay_open
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
+
+static inline int get_user_pages_fast(unsigned long start, int nr_pages,
+ int write, struct page **pages)
+{
+ int npages;
+
+ down_read(&current->mm->mmap_sem);
+ npages = get_user_pages(current, current->mm, start, nr_pages, write,
+ 0, pages, NULL);
+ up_read(&current->mm->mmap_sem);
+
+ return npages;
+}
+
+#endif
+
+/* spin_needbreak() was called something else in 2.6.24 */
+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,24)
+
+#define spin_needbreak need_lockbreak
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+
+static inline void kvm_hrtimer_add_expires_ns(struct hrtimer *timer, u64 delta)
+{
+ timer->expires = ktime_add_ns(timer->expires, delta);
+}
+
+static inline ktime_t kvm_hrtimer_get_expires(struct hrtimer *timer)
+{
+ return timer->expires;
+}
+
+static inline u64 kvm_hrtimer_get_expires_ns(struct hrtimer *timer)
+{
+ return ktime_to_ns(timer->expires);
+}
+
+static inline void kvm_hrtimer_start_expires(struct hrtimer *timer, int mode)
+{
+ hrtimer_start_p(timer, timer->expires, mode);
+}
+
+static inline ktime_t kvm_hrtimer_expires_remaining(const struct hrtimer *timer)
+{
+ return ktime_sub(timer->expires, timer->base->get_time());
+}
+
+#else
+
+#define kvm_hrtimer_add_expires_ns hrtimer_add_expires_ns
+#define kvm_hrtimer_get_expires hrtimer_get_expires
+#define kvm_hrtimer_get_expires_ns hrtimer_get_expires_ns
+#define kvm_hrtimer_start_expires hrtimer_start_expires
+#define kvm_hrtimer_expires_remaining hrtimer_expires_remaining
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+
+static inline int pci_reset_function(struct pci_dev *dev)
+{
+ return 0;
+}
+
+#endif
+
+#include <linux/interrupt.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
+
+typedef irqreturn_t (*kvm_irq_handler_t)(int, void *);
+int kvm_request_irq(unsigned int a, kvm_irq_handler_t handler, unsigned long c,
+ const char *d, void *e);
+void kvm_free_irq(unsigned int irq, void *dev_id);
+
+#else
+
+#define kvm_request_irq request_irq
+#define kvm_free_irq free_irq
+
+#endif
+
+/* dynamically allocated cpu masks introduced in 2.6.28 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+
+typedef cpumask_t cpumask_var_t[1];
+
+static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+ return 1;
+}
+
+static inline void free_cpumask_var(cpumask_var_t mask)
+{
+}
+
+static inline void cpumask_clear(cpumask_var_t mask)
+{
+ cpus_clear(*mask);
+}
+
+static inline void cpumask_set_cpu(int cpu, cpumask_var_t mask)
+{
+ cpu_set(cpu, *mask);
+}
+
+static inline int smp_call_function_many(cpumask_var_t cpus,
+ void (*func)(void *data), void *data,
+ int sync)
+{
+ return smp_call_function_mask(*cpus, func, data, sync);
+}
+
+static inline int cpumask_empty(cpumask_var_t mask)
+{
+ return cpus_empty(*mask);
+}
+
+static inline int cpumask_test_cpu(int cpu, cpumask_var_t mask)
+{
+ return cpu_isset(cpu, *mask);
+}
+
+static inline void cpumask_clear_cpu(int cpu, cpumask_var_t mask)
+{
+ cpu_clear(cpu, *mask);
+}
+
+#define cpu_online_mask (&cpu_online_map)
+
+#endif
+
+/* A zeroing constructor was added late 2.6.30 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30)
+
+static inline bool zalloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
+{
+ bool ret;
+
+ ret = alloc_cpumask_var(mask, flags);
+ if (ret)
+ cpumask_clear(*mask);
+ return ret;
+}
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29)
+
+#define IF_ANON_INODES_DOES_REFCOUNTS(x)
+
+#else
+
+#define IF_ANON_INODES_DOES_REFCOUNTS(x) x
+
+#endif
+
+
+/* Macro introduced only on newer kernels: */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+#define marker_synchronize_unregister() synchronize_sched()
+#endif
+
+/* pci_dev.msi_enable was introduced in 2.6.18 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18)
+
+struct pci_dev;
+
+int kvm_pcidev_msi_enabled(struct pci_dev *dev);
+
+#else
+
+#define kvm_pcidev_msi_enabled(dev) (dev)->msi_enabled
+
+#endif
+
+/* compound_head() was introduced in 2.6.22 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
+# define NEED_COMPOUND_HEAD 1
+# ifdef RHEL_RELEASE_CODE
+# if RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,2)
+# undef NEED_COMPOUND_HEAD
+# endif
+# endif
+#endif
+
+#ifdef NEED_COMPOUND_HEAD
+
+static inline struct page *compound_head(struct page *page)
+{
+ if (PageCompound(page))
+ page = (struct page *)page_private(page);
+ return page;
+}
+
+#endif
+
+#include <linux/iommu.h>
+#ifndef IOMMU_CACHE
+
+#define IOMMU_CACHE (4)
+#define IOMMU_CAP_CACHE_COHERENCY 0x1
+static inline int iommu_domain_has_cap(struct iommu_domain *domain,
+ unsigned long cap)
+{
+ return 0;
+}
+
+#endif
+
+#include <linux/file.h>
+
+/* eventfd_fget() will be introduced in 2.6.32 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
+
+static inline struct file *eventfd_fget(int fd)
+{
+ return fget(fd);
+}
+
+#endif
+
+/* srcu was born in 2.6.19 */
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19)
+
+#define kvm_init_srcu_struct init_srcu_struct
+#define kvm_cleanup_srcu_struct cleanup_srcu_struct
+#define kvm_srcu_read_lock srcu_read_lock
+#define kvm_srcu_read_unlock srcu_read_unlock
+#define kvm_synchronize_srcu synchronize_srcu
+#define kvm_srcu_batches_completed srcu_batches_completed
+
+#endif
+
+/* tracepoints were introduced in 2.6.28, but changed in 2.6.30 */
+
+#include <linux/tracepoint.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
+
+struct tracepoint;
+
+#undef DECLARE_TRACE
+#undef DEFINE_TRACE
+#undef PARAMS
+#undef TP_PROTO
+#undef TP_ARGS
+#undef EXPORT_TRACEPOINT_SYMBOL
+#undef EXPORT_TRACEPOINT_SYMBOL_GPL
+
+#define DECLARE_TRACE(name, proto, args) \
+ static inline void _do_trace_##name(struct tracepoint *tp, proto) \
+ { } \
+ static inline void trace_##name(proto) \
+ { } \
+ static inline int register_trace_##name(void (*probe)(proto)) \
+ { \
+ return -ENOSYS; \
+ } \
+ static inline int unregister_trace_##name(void (*probe)(proto)) \
+ { \
+ return -ENOSYS; \
+ }
+
+#define tracepoint_update_probe_range(begin, end) do {} while (0)
+
+#define DEFINE_TRACE(name)
+#define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
+#define EXPORT_TRACEPOINT_SYMBOL(name)
+
+#define PARAMS(args...) args
+#define TP_PROTO(args...) args
+#define TP_ARGS(args...) args
+
+#define TRACE_EVENT(name, proto, args, struct, assign, print) \
+ DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
+
+#undef tracepoint_synchronize_unregister
+#define tracepoint_synchronize_unregister() do {} while (0)
+
+#endif
+
+#include <linux/ftrace_event.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
+
+struct trace_print_flags {
+ unsigned long mask;
+ const char *name;
+};
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
+
+#define alloc_pages_exact_node alloc_pages_node
+
+#endif
+
+/* eventfd accessors, new in 2.6.31 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
+
+#include <linux/eventfd.h>
+#include <linux/fs.h>
+
+struct eventfd_ctx;
+
+static inline struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx)
+{
+ struct file *filp = (struct file *)ctx;
+
+ get_file(filp);
+ return ctx;
+}
+
+static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
+{
+ struct file *filp = (struct file *)ctx;
+
+ fput(filp);
+}
+
+static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd)
+{
+ struct file *filp = eventfd_fget(fd);
+
+ return (struct eventfd_ctx *)filp;
+}
+
+static inline struct eventfd_ctx *eventfd_ctx_fileget(struct file *file)
+{
+ return (struct eventfd_ctx *)file;
+}
+
+static inline int kvm_eventfd_signal(struct eventfd_ctx *ctx, int n)
+{
+ return -ENOSYS;
+}
+
+#else
+
+#define kvm_eventfd_signal eventfd_signal
+
+#endif
+
+#include <linux/hugetlb.h>
+
+/* vma_kernel_pagesize, exported since 2.6.32 */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
+
+#if defined(CONFIG_HUGETLB_PAGE) && LINUX_VERSION_CODE > KERNEL_VERSION(2,6,26)
+static inline
+unsigned long kvm_vma_kernel_pagesize(struct vm_area_struct *vma)
+{
+ struct hstate *hstate;
+
+ if (!is_vm_hugetlb_page(vma))
+ return PAGE_SIZE;
+
+ hstate = hstate_vma(vma);
+
+ return 1UL << (hstate->order + PAGE_SHIFT);
+}
+#else /* !CONFIG_HUGETLB_SIZE || <= 2.6.26 */
+#define kvm_vma_kernel_pagesize(v) PAGE_SIZE
+#endif
+
+#else /* >= 2.6.32 */
+
+#define kvm_vma_kernel_pagesize vma_kernel_pagesize
+
+#endif
+
+#ifndef printk_once
+/*
+ * Print a one-time message (analogous to WARN_ONCE() et al):
+ */
+#define printk_once(x...) ({ \
+ static int __print_once = 1; \
+ \
+ if (__print_once) { \
+ __print_once = 0; \
+ printk(x); \
+ } \
+})
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) && !defined(CONFIG_CPU_FREQ)
+static inline unsigned int cpufreq_get(unsigned int cpu)
+{
+ return 0;
+}
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
+#endif
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,27)
+#ifndef CONFIG_MMU_NOTIFIER
+struct mmu_notifier {};
+#endif
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
+static inline void hlist_del_init_rcu(struct hlist_node *n)
+{
+ if (!hlist_unhashed(n)) {
+ __hlist_del(n);
+ n->pprev = NULL;
+ }
+}
+#endif
diff --git a/kernel/external-module-compat.c b/kernel/external-module-compat.c
new file mode 100644
index 00000000..327fa6b7
--- /dev/null
+++ b/kernel/external-module-compat.c
@@ -0,0 +1,470 @@
+
+/*
+ * smp_call_function_single() is not exported below 2.6.20.
+ */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
+
+#undef smp_call_function_single
+
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+
+struct scfs_thunk_info {
+ int cpu;
+ void (*func)(void *info);
+ void *info;
+};
+
+static void scfs_thunk(void *_thunk)
+{
+ struct scfs_thunk_info *thunk = _thunk;
+
+ if (raw_smp_processor_id() == thunk->cpu)
+ thunk->func(thunk->info);
+}
+
+int kvm_smp_call_function_single(int cpu, void (*func)(void *info),
+ void *info, int wait)
+{
+ int r, this_cpu;
+ struct scfs_thunk_info thunk;
+
+ this_cpu = get_cpu();
+ WARN_ON(irqs_disabled());
+ if (cpu == this_cpu) {
+ r = 0;
+ local_irq_disable();
+ func(info);
+ local_irq_enable();
+ } else {
+ thunk.cpu = cpu;
+ thunk.func = func;
+ thunk.info = info;
+ r = smp_call_function(scfs_thunk, &thunk, 0, 1);
+ }
+ put_cpu();
+ return r;
+}
+EXPORT_SYMBOL_GPL(kvm_smp_call_function_single);
+
+#define smp_call_function_single kvm_smp_call_function_single
+
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+/*
+ * pre 2.6.23 doesn't handle smp_call_function_single on current cpu
+ */
+
+#undef smp_call_function_single
+
+#include <linux/smp.h>
+
+int kvm_smp_call_function_single(int cpu, void (*func)(void *info),
+ void *info, int wait)
+{
+ int this_cpu, r;
+
+ this_cpu = get_cpu();
+ WARN_ON(irqs_disabled());
+ if (cpu == this_cpu) {
+ r = 0;
+ local_irq_disable();
+ func(info);
+ local_irq_enable();
+ } else
+ r = smp_call_function_single(cpu, func, info, 0, wait);
+ put_cpu();
+ return r;
+}
+EXPORT_SYMBOL_GPL(kvm_smp_call_function_single);
+
+#define smp_call_function_single kvm_smp_call_function_single
+
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
+
+/* The 'nonatomic' argument was removed in 2.6.27. */
+
+#undef smp_call_function_single
+
+#include <linux/smp.h>
+
+#ifdef CONFIG_SMP
+int kvm_smp_call_function_single(int cpu, void (*func)(void *info),
+ void *info, int wait)
+{
+ return smp_call_function_single(cpu, func, info, 0, wait);
+}
+#else /* !CONFIG_SMP */
+int kvm_smp_call_function_single(int cpu, void (*func)(void *info),
+ void *info, int wait)
+{
+ WARN_ON(cpu != 0);
+ local_irq_disable();
+ func(info);
+ local_irq_enable();
+ return 0;
+
+}
+#endif /* !CONFIG_SMP */
+EXPORT_SYMBOL_GPL(kvm_smp_call_function_single);
+
+#define smp_call_function_single kvm_smp_call_function_single
+
+#endif
+
+/* div64_u64 is fairly new */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
+
+#ifndef CONFIG_64BIT
+
+/* 64bit divisor, dividend and result. dynamic precision */
+uint64_t div64_u64(uint64_t dividend, uint64_t divisor)
+{
+ uint32_t high, d;
+
+ high = divisor >> 32;
+ if (high) {
+ unsigned int shift = fls(high);
+
+ d = divisor >> shift;
+ dividend >>= shift;
+ } else
+ d = divisor;
+
+ do_div(dividend, d);
+
+ return dividend;
+}
+
+#endif
+
+#endif
+
+/*
+ * smp_call_function_mask() is not defined/exported below 2.6.24 on all
+ * targets and below 2.6.26 on x86-64
+ */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) || \
+ (defined CONFIG_X86_64 && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26))
+
+#include <linux/smp.h>
+
+struct kvm_call_data_struct {
+ void (*func) (void *info);
+ void *info;
+ atomic_t started;
+ atomic_t finished;
+ int wait;
+};
+
+static void kvm_ack_smp_call(void *_data)
+{
+ struct kvm_call_data_struct *data = _data;
+ /* if wait == 0, data can be out of scope
+ * after atomic_inc(info->started)
+ */
+ void (*func) (void *info) = data->func;
+ void *info = data->info;
+ int wait = data->wait;
+
+ smp_mb();
+ atomic_inc(&data->started);
+ (*func)(info);
+ if (wait) {
+ smp_mb();
+ atomic_inc(&data->finished);
+ }
+}
+
+int kvm_smp_call_function_mask(cpumask_t mask,
+ void (*func) (void *info), void *info, int wait)
+{
+#ifdef CONFIG_SMP
+ struct kvm_call_data_struct data;
+ cpumask_t allbutself;
+ int cpus;
+ int cpu;
+ int me;
+
+ me = get_cpu();
+ WARN_ON(irqs_disabled());
+ allbutself = cpu_online_map;
+ cpu_clear(me, allbutself);
+
+ cpus_and(mask, mask, allbutself);
+ cpus = cpus_weight(mask);
+
+ if (!cpus)
+ goto out;
+
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+ data.wait = wait;
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+ for (cpu = first_cpu(mask); cpu != NR_CPUS; cpu = next_cpu(cpu, mask))
+ smp_call_function_single(cpu, kvm_ack_smp_call, &data, 0);
+
+ while (atomic_read(&data.started) != cpus) {
+ cpu_relax();
+ barrier();
+ }
+
+ if (!wait)
+ goto out;
+
+ while (atomic_read(&data.finished) != cpus) {
+ cpu_relax();
+ barrier();
+ }
+out:
+ put_cpu();
+#endif /* CONFIG_SMP */
+ return 0;
+}
+
+#include <linux/workqueue.h>
+
+static void vcpu_kick_intr(void *info)
+{
+}
+
+struct kvm_kick {
+ int cpu;
+ struct work_struct work;
+};
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
+static void kvm_do_smp_call_function(void *data)
+{
+ int me;
+ struct kvm_kick *kvm_kick = data;
+#else
+static void kvm_do_smp_call_function(struct work_struct *work)
+{
+ int me;
+ struct kvm_kick *kvm_kick = container_of(work, struct kvm_kick, work);
+#endif
+ me = get_cpu();
+
+ if (kvm_kick->cpu != me)
+ smp_call_function_single(kvm_kick->cpu, vcpu_kick_intr,
+ NULL, 0);
+ kfree(kvm_kick);
+ put_cpu();
+}
+
+void kvm_queue_smp_call_function(int cpu)
+{
+ struct kvm_kick *kvm_kick = kmalloc(sizeof(struct kvm_kick), GFP_ATOMIC);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
+ INIT_WORK(&kvm_kick->work, kvm_do_smp_call_function, kvm_kick);
+#else
+ INIT_WORK(&kvm_kick->work, kvm_do_smp_call_function);
+#endif
+
+ schedule_work(&kvm_kick->work);
+}
+
+void kvm_smp_send_reschedule(int cpu)
+{
+ if (irqs_disabled()) {
+ kvm_queue_smp_call_function(cpu);
+ return;
+ }
+ smp_call_function_single(cpu, vcpu_kick_intr, NULL, 0);
+}
+#endif
+
+/* manually export hrtimer_init/start/cancel */
+void (*hrtimer_init_p)(struct hrtimer *timer, clockid_t which_clock,
+ enum hrtimer_mode mode);
+int (*hrtimer_start_p)(struct hrtimer *timer, ktime_t tim,
+ const enum hrtimer_mode mode);
+int (*hrtimer_cancel_p)(struct hrtimer *timer);
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
+
+static void kvm_set_normalized_timespec(struct timespec *ts, time_t sec,
+ long nsec)
+{
+ while (nsec >= NSEC_PER_SEC) {
+ nsec -= NSEC_PER_SEC;
+ ++sec;
+ }
+ while (nsec < 0) {
+ nsec += NSEC_PER_SEC;
+ --sec;
+ }
+ ts->tv_sec = sec;
+ ts->tv_nsec = nsec;
+}
+
+struct timespec kvm_ns_to_timespec(const s64 nsec)
+{
+ struct timespec ts;
+
+ if (!nsec)
+ return (struct timespec) {0, 0};
+
+ ts.tv_sec = div_long_long_rem_signed(nsec, NSEC_PER_SEC, &ts.tv_nsec);
+ if (unlikely(nsec < 0))
+ kvm_set_normalized_timespec(&ts, ts.tv_sec, ts.tv_nsec);
+
+ return ts;
+}
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
+
+#include <linux/pci.h>
+
+struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn)
+{
+ struct pci_dev *dev = NULL;
+
+ while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ if (pci_domain_nr(dev->bus) == 0 &&
+ (dev->bus->number == bus && dev->devfn == devfn))
+ return dev;
+ }
+ return NULL;
+}
+
+#endif
+
+#include <linux/intel-iommu.h>
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+
+int intel_iommu_found()
+{
+ return 0;
+}
+
+#endif
+
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21)
+
+/* relay_open() interface has changed on 2.6.21 */
+
+struct rchan *kvm_relay_open(const char *base_filename,
+ struct dentry *parent,
+ size_t subbuf_size,
+ size_t n_subbufs,
+ struct rchan_callbacks *cb,
+ void *private_data)
+{
+ struct rchan *chan = relay_open(base_filename, parent,
+ subbuf_size, n_subbufs,
+ cb);
+ if (chan)
+ chan->private_data = private_data;
+ return chan;
+}
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18)
+
+#include <linux/pci.h>
+
+int kvm_pcidev_msi_enabled(struct pci_dev *dev)
+{
+ int pos;
+ u16 control;
+
+ if (!(pos = pci_find_capability(dev, PCI_CAP_ID_MSI)))
+ return 0;
+
+ pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
+ if (control & PCI_MSI_FLAGS_ENABLE)
+ return 1;
+
+ return 0;
+}
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
+
+extern unsigned tsc_khz;
+static unsigned tsc_khz_dummy = 2000000;
+static unsigned *tsc_khz_p;
+
+unsigned kvm_get_tsc_khz(void)
+{
+ if (!tsc_khz_p) {
+ tsc_khz_p = symbol_get(tsc_khz);
+ if (!tsc_khz_p)
+ tsc_khz_p = &tsc_khz_dummy;
+ }
+ return *tsc_khz_p;
+}
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+
+static enum hrtimer_restart kvm_hrtimer_wakeup(struct hrtimer *timer)
+{
+ struct hrtimer_sleeper *t =
+ container_of(timer, struct hrtimer_sleeper, timer);
+ struct task_struct *task = t->task;
+
+ t->task = NULL;
+ if (task)
+ wake_up_process(task);
+
+ return HRTIMER_NORESTART;
+}
+
+int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode)
+{
+ struct hrtimer_sleeper t;
+
+ /*
+ * Optimize when a zero timeout value is given. It does not
+ * matter whether this is an absolute or a relative time.
+ */
+ if (expires && !expires->tv64) {
+ __set_current_state(TASK_RUNNING);
+ return 0;
+ }
+
+ /*
+ * A NULL parameter means "inifinte"
+ */
+ if (!expires) {
+ schedule();
+ __set_current_state(TASK_RUNNING);
+ return -EINTR;
+ }
+
+ hrtimer_init(&t.timer, CLOCK_MONOTONIC, mode);
+ t.timer.expires = *expires;
+
+ t.timer.function = kvm_hrtimer_wakeup;
+ t.task = current;
+
+ hrtimer_start(&t.timer, t.timer.expires, mode);
+ if (!hrtimer_active(&t.timer))
+ t.task = NULL;
+
+ if (likely(t.task))
+ schedule();
+
+ hrtimer_cancel(&t.timer);
+
+ __set_current_state(TASK_RUNNING);
+
+ return !t.task ? 0 : -EINTR;
+}
+
+#endif
diff --git a/kernel/ia64/Kbuild b/kernel/ia64/Kbuild
new file mode 100644
index 00000000..e62f2b94
--- /dev/null
+++ b/kernel/ia64/Kbuild
@@ -0,0 +1,13 @@
+obj-m := kvm.o kvm-intel.o
+
+kvm-objs := kvm_main.o ioapic.o coalesced_mmio.o kvm-ia64.o kvm_fw.o \
+ irq_comm.o ../anon_inodes.o ../external-module-compat.o \
+ ../request-irq-compat.o assigned-dev.o
+
+ifeq ($(CONFIG_IOMMU_API),y)
+kvm-objs += iommu.o
+endif
+
+EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
+kvm-intel-objs := vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
+ vtlb.o process.o memset.o memcpy.o kvm_lib.o
diff --git a/kernel/ia64/Makefile.pre b/kernel/ia64/Makefile.pre
new file mode 100644
index 00000000..4d3410f6
--- /dev/null
+++ b/kernel/ia64/Makefile.pre
@@ -0,0 +1,27 @@
+prerequisite: asm-offsets.h ia64/memset.S ia64/memcpy.S
+ cp -f $(KERNELDIR)/arch/ia64/lib/memcpy.S ia64/memcpy.S
+ cp -f $(KERNELDIR)/arch/ia64/lib/memset.S ia64/memset.S
+ cmp -s asm-offset.h ia64/asm-offset.h || mv -f asm-offsets.* ia64/
+ cp -f $(KERNELDIR)/lib/vsprintf.c ia64/vsprintf.c
+ cp -f $(KERNELDIR)/lib/ctype.c ia64/ctype.c
+ sed -i /^EXPORT_SYMBOL/d ia64/vsprintf.c
+ sed -i /^EXPORT_SYMBOL/d ia64/ctype.c
+
+asm-offsets.h: asm-offsets.s
+ @(set -e; \
+ echo "/*"; \
+ echo " * DO NOT MODIFY."; \
+ echo " *"; \
+ echo " * This file was auto-generated from $<"; \
+ echo " *"; \
+ echo " */"; \
+ echo ""; \
+ echo "#ifndef __KVM_ASM_OFFSETS_H__"; \
+ echo "#define __KVM_ASM_OFFSETS_H__"; \
+ echo ""; \
+ sed -ne "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"; \
+ echo ""; \
+ echo "#endif") <$< >$@
+
+asm-offsets.s: ia64/asm-offsets.c
+ gcc -S -D__KERNEL__ -I./include -I$(KERNELDIR)/include -I$(KERNELDIR)/arch/ia64/include ia64/asm-offsets.c
diff --git a/kernel/ia64/external-module-compat.h b/kernel/ia64/external-module-compat.h
new file mode 100644
index 00000000..60a83a1b
--- /dev/null
+++ b/kernel/ia64/external-module-compat.h
@@ -0,0 +1,60 @@
+/*
+ * Compatibility header for building as an external module.
+ */
+
+#ifndef __ASSEMBLY__
+#include <linux/version.h>
+
+#include <linux/types.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
+
+typedef u64 phys_addr_t;
+
+#endif
+
+#include "../external-module-compat-comm.h"
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
+#error "KVM/IA-64 Can't be compiled if kernel version < 2.6.26"
+#endif
+
+#ifndef CONFIG_PREEMPT_NOTIFIERS
+/*Now, Just print an error message if no preempt notifiers configured!!
+ TODO: Implement it later! */
+#error "KVM/IA-64 depends on preempt notifiers in kernel."
+#endif
+
+#ifndef CONFIG_KVM_APIC_ARCHITECTURE
+#define CONFIG_KVM_APIC_ARCHITECTURE
+#endif
+
+/* smp_call_function() lost an argument in 2.6.27. */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
+
+#define kvm_smp_call_function(func, info, wait) smp_call_function(func, info, 0, wait)
+
+#else
+
+#define kvm_smp_call_function(func, info, wait) smp_call_function(func, info, wait)
+
+#endif
+
+/*There is no struct fdesc definition <2.6.27*/
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
+struct fdesc {
+ uint64_t ip;
+ uint64_t gp;
+};
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30)
+
+#define PAGE_KERNEL_UC __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX | \
+ _PAGE_MA_UC)
+#endif
+
+#endif
+
+#ifndef CONFIG_HAVE_KVM_IRQCHIP
+#define CONFIG_HAVE_KVM_IRQCHIP 1
+#endif
diff --git a/kernel/include-compat/asm-ia64/msidef.h b/kernel/include-compat/asm-ia64/msidef.h
new file mode 100644
index 00000000..592c1047
--- /dev/null
+++ b/kernel/include-compat/asm-ia64/msidef.h
@@ -0,0 +1,42 @@
+#ifndef _IA64_MSI_DEF_H
+#define _IA64_MSI_DEF_H
+
+/*
+ * Shifts for APIC-based data
+ */
+
+#define MSI_DATA_VECTOR_SHIFT 0
+#define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT)
+#define MSI_DATA_VECTOR_MASK 0xffffff00
+
+#define MSI_DATA_DELIVERY_MODE_SHIFT 8
+#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT)
+#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT)
+
+#define MSI_DATA_LEVEL_SHIFT 14
+#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT)
+#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT)
+
+#define MSI_DATA_TRIGGER_SHIFT 15
+#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT)
+#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT)
+
+/*
+ * Shift/mask fields for APIC-based bus address
+ */
+
+#define MSI_ADDR_DEST_ID_SHIFT 4
+#define MSI_ADDR_HEADER 0xfee00000
+
+#define MSI_ADDR_DEST_ID_MASK 0xfff0000f
+#define MSI_ADDR_DEST_ID_CPU(cpu) ((cpu) << MSI_ADDR_DEST_ID_SHIFT)
+
+#define MSI_ADDR_DEST_MODE_SHIFT 2
+#define MSI_ADDR_DEST_MODE_PHYS (0 << MSI_ADDR_DEST_MODE_SHIFT)
+#define MSI_ADDR_DEST_MODE_LOGIC (1 << MSI_ADDR_DEST_MODE_SHIFT)
+
+#define MSI_ADDR_REDIRECTION_SHIFT 3
+#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT)
+#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT)
+
+#endif/* _IA64_MSI_DEF_H */
diff --git a/kernel/include-compat/asm-x86/asm.h b/kernel/include-compat/asm-x86/asm.h
new file mode 100644
index 00000000..3ad6aab9
--- /dev/null
+++ b/kernel/include-compat/asm-x86/asm.h
@@ -0,0 +1,3 @@
+/*
+ * Empty file to satisfy #include <linux/asm.h> for older kernels.
+ */
diff --git a/kernel/include-compat/asm-x86/cmpxchg.h b/kernel/include-compat/asm-x86/cmpxchg.h
new file mode 100644
index 00000000..68daeebc
--- /dev/null
+++ b/kernel/include-compat/asm-x86/cmpxchg.h
@@ -0,0 +1,3 @@
+/*
+ * Empty file to satisfy #include <linux/cmpxchg.h> for older kernels.
+ */
diff --git a/kernel/include-compat/asm-x86/mce.h b/kernel/include-compat/asm-x86/mce.h
new file mode 100644
index 00000000..1eb03c6f
--- /dev/null
+++ b/kernel/include-compat/asm-x86/mce.h
@@ -0,0 +1 @@
+/* empty file to keep #include happy */
diff --git a/kernel/include-compat/asm-x86/msidef.h b/kernel/include-compat/asm-x86/msidef.h
new file mode 100644
index 00000000..6706b300
--- /dev/null
+++ b/kernel/include-compat/asm-x86/msidef.h
@@ -0,0 +1,55 @@
+#ifndef _ASM_X86_MSIDEF_H
+#define _ASM_X86_MSIDEF_H
+
+/*
+ * Constants for Intel APIC based MSI messages.
+ */
+
+/*
+ * Shifts for MSI data
+ */
+
+#define MSI_DATA_VECTOR_SHIFT 0
+#define MSI_DATA_VECTOR_MASK 0x000000ff
+#define MSI_DATA_VECTOR(v) (((v) << MSI_DATA_VECTOR_SHIFT) & \
+ MSI_DATA_VECTOR_MASK)
+
+#define MSI_DATA_DELIVERY_MODE_SHIFT 8
+#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT)
+#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT)
+
+#define MSI_DATA_LEVEL_SHIFT 14
+#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT)
+#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT)
+
+#define MSI_DATA_TRIGGER_SHIFT 15
+#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT)
+#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT)
+
+/*
+ * Shift/mask fields for msi address
+ */
+
+#define MSI_ADDR_BASE_HI 0
+#define MSI_ADDR_BASE_LO 0xfee00000
+
+#define MSI_ADDR_DEST_MODE_SHIFT 2
+#define MSI_ADDR_DEST_MODE_PHYSICAL (0 << MSI_ADDR_DEST_MODE_SHIFT)
+#define MSI_ADDR_DEST_MODE_LOGICAL (1 << MSI_ADDR_DEST_MODE_SHIFT)
+
+#define MSI_ADDR_REDIRECTION_SHIFT 3
+#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT)
+ /* dedicated cpu */
+#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT)
+ /* lowest priority */
+
+#define MSI_ADDR_DEST_ID_SHIFT 12
+#define MSI_ADDR_DEST_ID_MASK 0x00ffff0
+#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
+ MSI_ADDR_DEST_ID_MASK)
+
+#define MSI_ADDR_IR_EXT_INT (1 << 4)
+#define MSI_ADDR_IR_SHV (1 << 3)
+#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13)
+#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5)
+#endif /* _ASM_X86_MSIDEF_H */
diff --git a/kernel/include-compat/asm-x86/msr-index.h b/kernel/include-compat/asm-x86/msr-index.h
new file mode 100644
index 00000000..1eb03c6f
--- /dev/null
+++ b/kernel/include-compat/asm-x86/msr-index.h
@@ -0,0 +1 @@
+/* empty file to keep #include happy */
diff --git a/kernel/include-compat/asm-x86/pvclock-abi.h b/kernel/include-compat/asm-x86/pvclock-abi.h
new file mode 100644
index 00000000..6857f840
--- /dev/null
+++ b/kernel/include-compat/asm-x86/pvclock-abi.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_X86_PVCLOCK_ABI_H_
+#define _ASM_X86_PVCLOCK_ABI_H_
+#ifndef __ASSEMBLY__
+
+/*
+ * These structs MUST NOT be changed.
+ * They are the ABI between hypervisor and guest OS.
+ * Both Xen and KVM are using this.
+ *
+ * pvclock_vcpu_time_info holds the system time and the tsc timestamp
+ * of the last update. So the guest can use the tsc delta to get a
+ * more precise system time. There is one per virtual cpu.
+ *
+ * pvclock_wall_clock references the point in time when the system
+ * time was zero (usually boot time), thus the guest calculates the
+ * current wall clock by adding the system time.
+ *
+ * Protocol for the "version" fields is: hypervisor raises it (making
+ * it uneven) before it starts updating the fields and raises it again
+ * (making it even) when it is done. Thus the guest can make sure the
+ * time values it got are consistent by checking the version before
+ * and after reading them.
+ */
+
+struct pvclock_vcpu_time_info {
+ u32 version;
+ u32 pad0;
+ u64 tsc_timestamp;
+ u64 system_time;
+ u32 tsc_to_system_mul;
+ s8 tsc_shift;
+ u8 pad[3];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+ u32 version;
+ u32 sec;
+ u32 nsec;
+} __attribute__((__packed__));
+
+#endif /* __ASSEMBLY__ */
+#endif /* _ASM_X86_PVCLOCK_ABI_H_ */
diff --git a/kernel/include-compat/linux/anon_inodes.h b/kernel/include-compat/linux/anon_inodes.h
new file mode 100644
index 00000000..7b6862f2
--- /dev/null
+++ b/kernel/include-compat/linux/anon_inodes.h
@@ -0,0 +1,16 @@
+/*
+ * include/linux/anon_inodes.h
+ *
+ * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org>
+ *
+ */
+
+#ifndef _LINUX_ANON_INODES_H
+#define _LINUX_ANON_INODES_H
+
+struct file_operations;
+
+int anon_inode_getfd(const char *name, const struct file_operations *fops,
+ void *priv);
+
+#endif /* _LINUX_ANON_INODES_H */
diff --git a/kernel/include-compat/linux/eventfd.h b/kernel/include-compat/linux/eventfd.h
new file mode 100644
index 00000000..c3580fb7
--- /dev/null
+++ b/kernel/include-compat/linux/eventfd.h
@@ -0,0 +1 @@
+/* Dummy file */
diff --git a/kernel/include-compat/linux/ftrace_event.h b/kernel/include-compat/linux/ftrace_event.h
new file mode 100644
index 00000000..c89c4c9a
--- /dev/null
+++ b/kernel/include-compat/linux/ftrace_event.h
@@ -0,0 +1 @@
+/* dummy file for #include compatibility */
diff --git a/kernel/include-compat/linux/intel-iommu.h b/kernel/include-compat/linux/intel-iommu.h
new file mode 100644
index 00000000..1490fc07
--- /dev/null
+++ b/kernel/include-compat/linux/intel-iommu.h
@@ -0,0 +1,355 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Ashok Raj <ashok.raj@intel.com>
+ * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ */
+
+#ifndef _INTEL_IOMMU_H_
+#define _INTEL_IOMMU_H_
+
+#include <linux/types.h>
+#include <linux/msi.h>
+#include <linux/sysdev.h>
+#include "iova.h"
+#include <linux/io.h>
+
+/*
+ * We need a fixed PAGE_SIZE of 4K irrespective of
+ * arch PAGE_SIZE for IOMMU page tables.
+ */
+#define PAGE_SHIFT_4K (12)
+#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
+#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
+#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
+
+#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
+#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
+#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
+
+/*
+ * Intel IOMMU register specification per version 1.0 public spec.
+ */
+
+#define DMAR_VER_REG 0x0 /* Arch version supported by this IOMMU */
+#define DMAR_CAP_REG 0x8 /* Hardware supported capabilities */
+#define DMAR_ECAP_REG 0x10 /* Extended capabilities supported */
+#define DMAR_GCMD_REG 0x18 /* Global command register */
+#define DMAR_GSTS_REG 0x1c /* Global status register */
+#define DMAR_RTADDR_REG 0x20 /* Root entry table */
+#define DMAR_CCMD_REG 0x28 /* Context command reg */
+#define DMAR_FSTS_REG 0x34 /* Fault Status register */
+#define DMAR_FECTL_REG 0x38 /* Fault control register */
+#define DMAR_FEDATA_REG 0x3c /* Fault event interrupt data register */
+#define DMAR_FEADDR_REG 0x40 /* Fault event interrupt addr register */
+#define DMAR_FEUADDR_REG 0x44 /* Upper address register */
+#define DMAR_AFLOG_REG 0x58 /* Advanced Fault control */
+#define DMAR_PMEN_REG 0x64 /* Enable Protected Memory Region */
+#define DMAR_PLMBASE_REG 0x68 /* PMRR Low addr */
+#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
+#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */
+#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
+
+#define OFFSET_STRIDE (9)
+/*
+#define dmar_readl(dmar, reg) readl(dmar + reg)
+#define dmar_readq(dmar, reg) ({ \
+ u32 lo, hi; \
+ lo = readl(dmar + reg); \
+ hi = readl(dmar + reg + 4); \
+ (((u64) hi) << 32) + lo; })
+*/
+static inline u64 dmar_readq(void __iomem *addr)
+{
+ u32 lo, hi;
+ lo = readl(addr);
+ hi = readl(addr + 4);
+ return (((u64) hi) << 32) + lo;
+}
+
+static inline void dmar_writeq(void __iomem *addr, u64 val)
+{
+ writel((u32)val, addr);
+ writel((u32)(val >> 32), addr + 4);
+}
+
+#define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4)
+#define DMAR_VER_MINOR(v) ((v) & 0x0f)
+
+/*
+ * Decoding Capability Register
+ */
+#define cap_read_drain(c) (((c) >> 55) & 1)
+#define cap_write_drain(c) (((c) >> 54) & 1)
+#define cap_max_amask_val(c) (((c) >> 48) & 0x3f)
+#define cap_num_fault_regs(c) ((((c) >> 40) & 0xff) + 1)
+#define cap_pgsel_inv(c) (((c) >> 39) & 1)
+
+#define cap_super_page_val(c) (((c) >> 34) & 0xf)
+#define cap_super_offset(c) (((find_first_bit(&cap_super_page_val(c), 4)) \
+ * OFFSET_STRIDE) + 21)
+
+#define cap_fault_reg_offset(c) ((((c) >> 24) & 0x3ff) * 16)
+#define cap_max_fault_reg_offset(c) \
+ (cap_fault_reg_offset(c) + cap_num_fault_regs(c) * 16)
+
+#define cap_zlr(c) (((c) >> 22) & 1)
+#define cap_isoch(c) (((c) >> 23) & 1)
+#define cap_mgaw(c) ((((c) >> 16) & 0x3f) + 1)
+#define cap_sagaw(c) (((c) >> 8) & 0x1f)
+#define cap_caching_mode(c) (((c) >> 7) & 1)
+#define cap_phmr(c) (((c) >> 6) & 1)
+#define cap_plmr(c) (((c) >> 5) & 1)
+#define cap_rwbf(c) (((c) >> 4) & 1)
+#define cap_afl(c) (((c) >> 3) & 1)
+#define cap_ndoms(c) (((unsigned long)1) << (4 + 2 * ((c) & 0x7)))
+/*
+ * Extended Capability Register
+ */
+
+#define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1)
+#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16)
+#define ecap_max_iotlb_offset(e) \
+ (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
+#define ecap_coherent(e) ((e) & 0x1)
+
+
+/* IOTLB_REG */
+#define DMA_TLB_GLOBAL_FLUSH (((u64)1) << 60)
+#define DMA_TLB_DSI_FLUSH (((u64)2) << 60)
+#define DMA_TLB_PSI_FLUSH (((u64)3) << 60)
+#define DMA_TLB_IIRG(type) ((type >> 60) & 7)
+#define DMA_TLB_IAIG(val) (((val) >> 57) & 7)
+#define DMA_TLB_READ_DRAIN (((u64)1) << 49)
+#define DMA_TLB_WRITE_DRAIN (((u64)1) << 48)
+#define DMA_TLB_DID(id) (((u64)((id) & 0xffff)) << 32)
+#define DMA_TLB_IVT (((u64)1) << 63)
+#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
+#define DMA_TLB_MAX_SIZE (0x3f)
+
+/* PMEN_REG */
+#define DMA_PMEN_EPM (((u32)1)<<31)
+#define DMA_PMEN_PRS (((u32)1)<<0)
+
+/* GCMD_REG */
+#define DMA_GCMD_TE (((u32)1) << 31)
+#define DMA_GCMD_SRTP (((u32)1) << 30)
+#define DMA_GCMD_SFL (((u32)1) << 29)
+#define DMA_GCMD_EAFL (((u32)1) << 28)
+#define DMA_GCMD_WBF (((u32)1) << 27)
+
+/* GSTS_REG */
+#define DMA_GSTS_TES (((u32)1) << 31)
+#define DMA_GSTS_RTPS (((u32)1) << 30)
+#define DMA_GSTS_FLS (((u32)1) << 29)
+#define DMA_GSTS_AFLS (((u32)1) << 28)
+#define DMA_GSTS_WBFS (((u32)1) << 27)
+
+/* CCMD_REG */
+#define DMA_CCMD_ICC (((u64)1) << 63)
+#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
+#define DMA_CCMD_DOMAIN_INVL (((u64)2) << 61)
+#define DMA_CCMD_DEVICE_INVL (((u64)3) << 61)
+#define DMA_CCMD_FM(m) (((u64)((m) & 0x3)) << 32)
+#define DMA_CCMD_MASK_NOBIT 0
+#define DMA_CCMD_MASK_1BIT 1
+#define DMA_CCMD_MASK_2BIT 2
+#define DMA_CCMD_MASK_3BIT 3
+#define DMA_CCMD_SID(s) (((u64)((s) & 0xffff)) << 16)
+#define DMA_CCMD_DID(d) ((u64)((d) & 0xffff))
+
+/* FECTL_REG */
+#define DMA_FECTL_IM (((u32)1) << 31)
+
+/* FSTS_REG */
+#define DMA_FSTS_PPF ((u32)2)
+#define DMA_FSTS_PFO ((u32)1)
+#define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff)
+
+/* FRCD_REG, 32 bits access */
+#define DMA_FRCD_F (((u32)1) << 31)
+#define dma_frcd_type(d) ((d >> 30) & 1)
+#define dma_frcd_fault_reason(c) (c & 0xff)
+#define dma_frcd_source_id(c) (c & 0xffff)
+#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
+
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+ u64 val;
+ u64 rsvd1;
+};
+#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
+static inline bool root_present(struct root_entry *root)
+{
+ return (root->val & 1);
+}
+static inline void set_root_present(struct root_entry *root)
+{
+ root->val |= 1;
+}
+static inline void set_root_value(struct root_entry *root, unsigned long value)
+{
+ root->val |= value & PAGE_MASK_4K;
+}
+
+struct context_entry;
+static inline struct context_entry *
+get_context_addr_from_root(struct root_entry *root)
+{
+ return (struct context_entry *)
+ (root_present(root)?phys_to_virt(
+ root->val & PAGE_MASK_4K):
+ NULL);
+}
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+ u64 lo;
+ u64 hi;
+};
+#define context_present(c) ((c).lo & 1)
+#define context_fault_disable(c) (((c).lo >> 1) & 1)
+#define context_translation_type(c) (((c).lo >> 2) & 3)
+#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
+#define context_address_width(c) ((c).hi & 7)
+#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
+
+#define context_set_present(c) do {(c).lo |= 1;} while (0)
+#define context_set_fault_enable(c) \
+ do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
+#define context_set_translation_type(c, val) \
+ do { \
+ (c).lo &= (((u64)-1) << 4) | 3; \
+ (c).lo |= ((val) & 3) << 2; \
+ } while (0)
+#define CONTEXT_TT_MULTI_LEVEL 0
+#define context_set_address_root(c, val) \
+ do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
+#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
+#define context_set_domain_id(c, val) \
+ do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
+#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
+
+/*
+ * 0: readable
+ * 1: writable
+ * 2-6: reserved
+ * 7: super page
+ * 8-11: available
+ * 12-63: Host physcial address
+ */
+struct dma_pte {
+ u64 val;
+};
+#define dma_clear_pte(p) do {(p).val = 0;} while (0)
+
+#define DMA_PTE_READ (1)
+#define DMA_PTE_WRITE (2)
+
+#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
+#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
+#define dma_set_pte_prot(p, prot) \
+ do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
+#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
+#define dma_set_pte_addr(p, addr) do {\
+ (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
+#define dma_pte_present(p) (((p).val & 3) != 0)
+
+struct intel_iommu;
+
+struct dmar_domain {
+ int id; /* domain id */
+ struct intel_iommu *iommu; /* back pointer to owning iommu */
+
+ struct list_head devices; /* all devices' list */
+ struct iova_domain iovad; /* iova's that belong to this domain */
+
+ struct dma_pte *pgd; /* virtual address */
+ spinlock_t mapping_lock; /* page table lock */
+ int gaw; /* max guest address width */
+
+ /* adjusted guest address width, 0 is level 2 30-bit */
+ int agaw;
+
+#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
+ int flags;
+};
+
+/* PCI domain-device relationship */
+struct device_domain_info {
+ struct list_head link; /* link to domain siblings */
+ struct list_head global; /* link to global list */
+ u8 bus; /* PCI bus numer */
+ u8 devfn; /* PCI devfn number */
+ struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
+ struct dmar_domain *domain; /* pointer to domain */
+};
+
+extern int init_dmars(void);
+
+struct intel_iommu {
+ void __iomem *reg; /* Pointer to hardware regs, virtual addr */
+ u64 cap;
+ u64 ecap;
+ unsigned long *domain_ids; /* bitmap of domains */
+ struct dmar_domain **domains; /* ptr to domains */
+ int seg;
+ u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
+ spinlock_t lock; /* protect context, domain ids */
+ spinlock_t register_lock; /* protect register handling */
+ struct root_entry *root_entry; /* virtual address */
+
+ unsigned int irq;
+ unsigned char name[7]; /* Device Name */
+ struct msi_msg saved_msg;
+ struct sys_device sysdev;
+};
+
+#ifndef CONFIG_DMAR_GFX_WA
+static inline void iommu_prepare_gfx_mapping(void)
+{
+ return;
+}
+#endif /* !CONFIG_DMAR_GFX_WA */
+
+void intel_iommu_domain_exit(struct dmar_domain *domain);
+struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev);
+int intel_iommu_context_mapping(struct dmar_domain *domain,
+ struct pci_dev *pdev);
+int intel_iommu_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
+ u64 hpa, size_t size, int prot);
+void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn);
+struct dmar_domain *intel_iommu_find_domain(struct pci_dev *pdev);
+int intel_iommu_found(void);
+u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova);
+
+#endif
diff --git a/kernel/include-compat/linux/iommu.h b/kernel/include-compat/linux/iommu.h
new file mode 100644
index 00000000..8a7bfb1b
--- /dev/null
+++ b/kernel/include-compat/linux/iommu.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2007-2008 Advanced Micro Devices, Inc.
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __LINUX_IOMMU_H
+#define __LINUX_IOMMU_H
+
+#define IOMMU_READ (1)
+#define IOMMU_WRITE (2)
+
+struct device;
+
+struct iommu_domain {
+ void *priv;
+};
+
+struct iommu_ops {
+ int (*domain_init)(struct iommu_domain *domain);
+ void (*domain_destroy)(struct iommu_domain *domain);
+ int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
+ void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
+ int (*map)(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot);
+ void (*unmap)(struct iommu_domain *domain, unsigned long iova,
+ size_t size);
+ phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
+ unsigned long iova);
+};
+
+#ifdef CONFIG_IOMMU_API
+
+extern void register_iommu(struct iommu_ops *ops);
+extern bool iommu_found(void);
+extern struct iommu_domain *iommu_domain_alloc(void);
+extern void iommu_domain_free(struct iommu_domain *domain);
+extern int iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev);
+extern void iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev);
+extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot);
+extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
+ size_t size);
+extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
+ unsigned long iova);
+
+#else /* CONFIG_IOMMU_API */
+
+static inline void register_iommu(struct iommu_ops *ops)
+{
+}
+
+static inline bool iommu_found(void)
+{
+ return false;
+}
+
+static inline struct iommu_domain *iommu_domain_alloc(void)
+{
+ return NULL;
+}
+
+static inline void iommu_domain_free(struct iommu_domain *domain)
+{
+}
+
+static inline int iommu_attach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+ return -ENODEV;
+}
+
+static inline void iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev)
+{
+}
+
+static inline int iommu_map_range(struct iommu_domain *domain,
+ unsigned long iova, phys_addr_t paddr,
+ size_t size, int prot)
+{
+ return -ENODEV;
+}
+
+static inline void iommu_unmap_range(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
+{
+}
+
+static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
+ unsigned long iova)
+{
+ return 0;
+}
+
+#endif /* CONFIG_IOMMU_API */
+
+#endif /* __LINUX_IOMMU_H */
diff --git a/kernel/include-compat/linux/iova.h b/kernel/include-compat/linux/iova.h
new file mode 100644
index 00000000..228f6c94
--- /dev/null
+++ b/kernel/include-compat/linux/iova.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This file is released under the GPLv2.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
+ *
+ */
+
+#ifndef _IOVA_H_
+#define _IOVA_H_
+
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/rbtree.h>
+#include <linux/dma-mapping.h>
+
+/* IO virtual address start page frame number */
+#define IOVA_START_PFN (1)
+
+/* iova structure */
+struct iova {
+ struct rb_node node;
+ unsigned long pfn_hi; /* IOMMU dish out addr hi */
+ unsigned long pfn_lo; /* IOMMU dish out addr lo */
+};
+
+/* holds all the iova translations for a domain */
+struct iova_domain {
+ spinlock_t iova_alloc_lock;/* Lock to protect iova allocation */
+ spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */
+ struct rb_root rbroot; /* iova domain rbtree root */
+ struct rb_node *cached32_node; /* Save last alloced node */
+ unsigned long dma_32bit_pfn;
+};
+
+struct iova *alloc_iova_mem(void);
+void free_iova_mem(struct iova *iova);
+void free_iova(struct iova_domain *iovad, unsigned long pfn);
+void __free_iova(struct iova_domain *iovad, struct iova *iova);
+struct iova *alloc_iova(struct iova_domain *iovad, unsigned long size,
+ unsigned long limit_pfn,
+ bool size_aligned);
+struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
+ unsigned long pfn_hi);
+void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
+void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit);
+struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
+void put_iova_domain(struct iova_domain *iovad);
+
+#endif
diff --git a/kernel/include-compat/linux/magic.h b/kernel/include-compat/linux/magic.h
new file mode 100644
index 00000000..a9c6567f
--- /dev/null
+++ b/kernel/include-compat/linux/magic.h
@@ -0,0 +1,41 @@
+#ifndef __LINUX_MAGIC_H__
+#define __LINUX_MAGIC_H__
+
+#define ADFS_SUPER_MAGIC 0xadf5
+#define AFFS_SUPER_MAGIC 0xadff
+#define AFS_SUPER_MAGIC 0x5346414F
+#define AUTOFS_SUPER_MAGIC 0x0187
+#define CODA_SUPER_MAGIC 0x73757245
+#define EFS_SUPER_MAGIC 0x414A53
+#define EXT2_SUPER_MAGIC 0xEF53
+#define EXT3_SUPER_MAGIC 0xEF53
+#define EXT4_SUPER_MAGIC 0xEF53
+#define HPFS_SUPER_MAGIC 0xf995e849
+#define ISOFS_SUPER_MAGIC 0x9660
+#define JFFS2_SUPER_MAGIC 0x72b6
+#define KVMFS_SUPER_MAGIC 0x19700426
+
+#define MINIX_SUPER_MAGIC 0x137F /* original minix fs */
+#define MINIX_SUPER_MAGIC2 0x138F /* minix fs, 30 char names */
+#define MINIX2_SUPER_MAGIC 0x2468 /* minix V2 fs */
+#define MINIX2_SUPER_MAGIC2 0x2478 /* minix V2 fs, 30 char names */
+#define MINIX3_SUPER_MAGIC 0x4d5a /* minix V3 fs */
+
+#define MSDOS_SUPER_MAGIC 0x4d44 /* MD */
+#define NCP_SUPER_MAGIC 0x564c /* Guess, what 0x564c is :-) */
+#define NFS_SUPER_MAGIC 0x6969
+#define OPENPROM_SUPER_MAGIC 0x9fa1
+#define PROC_SUPER_MAGIC 0x9fa0
+#define QNX4_SUPER_MAGIC 0x002f /* qnx4 fs detection */
+
+#define REISERFS_SUPER_MAGIC 0x52654973 /* used by gcc */
+ /* used by file system utilities that
+ look at the superblock, etc. */
+#define REISERFS_SUPER_MAGIC_STRING "ReIsErFs"
+#define REISER2FS_SUPER_MAGIC_STRING "ReIsEr2Fs"
+#define REISER2FS_JR_SUPER_MAGIC_STRING "ReIsEr3Fs"
+
+#define SMB_SUPER_MAGIC 0x517B
+#define USBDEVICE_SUPER_MAGIC 0x9fa2
+
+#endif /* __LINUX_MAGIC_H__ */
diff --git a/kernel/include-compat/linux/marker.h b/kernel/include-compat/linux/marker.h
new file mode 100644
index 00000000..ceef04f2
--- /dev/null
+++ b/kernel/include-compat/linux/marker.h
@@ -0,0 +1,119 @@
+/*
+ * Alternative file to satisfy #include <linux/marker.h> for older kernels.
+ */
+#ifndef _LINUX_MARKER_H
+#define _LINUX_MARKER_H
+
+/*
+ * Code markup for dynamic and static tracing.
+ *
+ * See Documentation/marker.txt.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <linux/types.h>
+
+struct module;
+struct marker;
+
+/**
+ * marker_probe_func - Type of a marker probe function
+ * @probe_private: probe private data
+ * @call_private: call site private data
+ * @fmt: format string
+ * @args: variable argument list pointer. Use a pointer to overcome C's
+ * inability to pass this around as a pointer in a portable manner in
+ * the callee otherwise.
+ *
+ * Type of marker probe functions. They receive the mdata and need to parse the
+ * format string to recover the variable argument list.
+ */
+typedef void marker_probe_func(void *probe_private, void *call_private,
+ const char *fmt, va_list *args);
+
+struct marker_probe_closure {
+ marker_probe_func *func; /* Callback */
+ void *probe_private; /* Private probe data */
+};
+
+struct marker {
+ const char *name; /* Marker name */
+ const char *format; /* Marker format string, describing the
+ * variable argument list.
+ */
+ char state; /* Marker state. */
+ char ptype; /* probe type : 0 : single, 1 : multi */
+ void (*call)(const struct marker *mdata, /* Probe wrapper */
+ void *call_private, const char *fmt, ...);
+ struct marker_probe_closure single;
+ struct marker_probe_closure *multi;
+} __attribute__((aligned(8)));
+
+#define __trace_mark(name, call_private, format, args...) \
+ __mark_check_format(format, ## args)
+static inline void marker_update_probe_range(struct marker *begin,
+ struct marker *end)
+{ }
+
+/**
+ * trace_mark - Marker
+ * @name: marker name, not quoted.
+ * @format: format string
+ * @args...: variable argument list
+ *
+ * Places a marker.
+ */
+#define trace_mark(name, format, args...) \
+ __trace_mark(name, NULL, format, ## args)
+
+/**
+ * MARK_NOARGS - Format string for a marker with no argument.
+ */
+#define MARK_NOARGS " "
+
+/* To be used for string format validity checking with gcc */
+static inline void __attribute__((format(printf,1,2)))
+___mark_check_format(const char *fmt, ...)
+{
+}
+
+#define __mark_check_format(format, args...) \
+ do { \
+ if (0) \
+ ___mark_check_format(format, ## args); \
+ } while (0)
+
+extern marker_probe_func __mark_empty_function;
+
+extern void marker_probe_cb(const struct marker *mdata,
+ void *call_private, const char *fmt, ...);
+extern void marker_probe_cb_noarg(const struct marker *mdata,
+ void *call_private, const char *fmt, ...);
+
+/*
+ * Connect a probe to a marker.
+ * private data pointer must be a valid allocated memory address, or NULL.
+ */
+extern int marker_probe_register(const char *name, const char *format,
+ marker_probe_func *probe, void *probe_private);
+
+/*
+ * Returns the private data given to marker_probe_register.
+ */
+extern int marker_probe_unregister(const char *name,
+ marker_probe_func *probe, void *probe_private);
+/*
+ * Unregister a marker by providing the registered private data.
+ */
+extern int marker_probe_unregister_private_data(marker_probe_func *probe,
+ void *probe_private);
+
+extern void *marker_get_private_data(const char *name, marker_probe_func *probe,
+ int num);
+
+#endif
+
diff --git a/kernel/include-compat/linux/math64.h b/kernel/include-compat/linux/math64.h
new file mode 100644
index 00000000..dc7c5812
--- /dev/null
+++ b/kernel/include-compat/linux/math64.h
@@ -0,0 +1,3 @@
+/*
+ * Empty file to satisfy #include <linux/math64.h> for older kernels.
+ */
diff --git a/kernel/include-compat/linux/mmu_notifier.h b/kernel/include-compat/linux/mmu_notifier.h
new file mode 100644
index 00000000..a6db4bab
--- /dev/null
+++ b/kernel/include-compat/linux/mmu_notifier.h
@@ -0,0 +1,6 @@
+#ifndef _LINUX_MMU_NOTIFIER_H
+#define _LINUX_MMU_NOTIFIER_H
+
+struct mmu_notifier {};
+
+#endif
diff --git a/kernel/include-compat/linux/msi.h b/kernel/include-compat/linux/msi.h
new file mode 100644
index 00000000..8f293922
--- /dev/null
+++ b/kernel/include-compat/linux/msi.h
@@ -0,0 +1,50 @@
+#ifndef LINUX_MSI_H
+#define LINUX_MSI_H
+
+#include <linux/list.h>
+
+struct msi_msg {
+ u32 address_lo; /* low 32 bits of msi message address */
+ u32 address_hi; /* high 32 bits of msi message address */
+ u32 data; /* 16 bits of msi message data */
+};
+
+/* Helper functions */
+extern void mask_msi_irq(unsigned int irq);
+extern void unmask_msi_irq(unsigned int irq);
+extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
+extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
+
+struct msi_desc {
+ struct {
+ __u8 type : 5; /* {0: unused, 5h:MSI, 11h:MSI-X} */
+ __u8 maskbit : 1; /* mask-pending bit supported ? */
+ __u8 masked : 1;
+ __u8 is_64 : 1; /* Address size: 0=32bit 1=64bit */
+ __u8 pos; /* Location of the msi capability */
+ __u32 maskbits_mask; /* mask bits mask */
+ __u16 entry_nr; /* specific enabled entry */
+ unsigned default_irq; /* default pre-assigned irq */
+ }msi_attrib;
+
+ unsigned int irq;
+ struct list_head list;
+
+ void __iomem *mask_base;
+ struct pci_dev *dev;
+
+ /* Last set MSI message */
+ struct msi_msg msg;
+};
+
+/*
+ * The arch hook for setup up msi irqs
+ */
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc);
+void arch_teardown_msi_irq(unsigned int irq);
+extern int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
+extern void arch_teardown_msi_irqs(struct pci_dev *dev);
+extern int arch_msi_check_device(struct pci_dev* dev, int nvec, int type);
+
+
+#endif /* LINUX_MSI_H */
diff --git a/kernel/include-compat/linux/mutex.h b/kernel/include-compat/linux/mutex.h
new file mode 100644
index 00000000..449905c0
--- /dev/null
+++ b/kernel/include-compat/linux/mutex.h
@@ -0,0 +1,3 @@
+/*
+ * Empty file to satisfy #include <linux/mutex.h> for older kernels.
+ */
diff --git a/kernel/include-compat/linux/srcu.h b/kernel/include-compat/linux/srcu.h
new file mode 100644
index 00000000..0d476be9
--- /dev/null
+++ b/kernel/include-compat/linux/srcu.h
@@ -0,0 +1,53 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * Documentation/RCU/ *.txt
+ *
+ */
+
+#ifndef _LINUX_SRCU_H
+#define _LINUX_SRCU_H
+
+struct srcu_struct_array {
+ int c[2];
+};
+
+struct srcu_struct {
+ int completed;
+ struct srcu_struct_array *per_cpu_ref;
+ struct mutex mutex;
+};
+
+#ifndef CONFIG_PREEMPT
+#define srcu_barrier() barrier()
+#else /* #ifndef CONFIG_PREEMPT */
+#define srcu_barrier()
+#endif /* #else #ifndef CONFIG_PREEMPT */
+
+int kvm_init_srcu_struct(struct srcu_struct *sp);
+void kvm_cleanup_srcu_struct(struct srcu_struct *sp);
+int kvm_srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
+void kvm_srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
+void kvm_synchronize_srcu(struct srcu_struct *sp);
+long kvm_srcu_batches_completed(struct srcu_struct *sp);
+
+#endif
diff --git a/kernel/include-compat/linux/tracepoint.h b/kernel/include-compat/linux/tracepoint.h
new file mode 100644
index 00000000..f2e9a589
--- /dev/null
+++ b/kernel/include-compat/linux/tracepoint.h
@@ -0,0 +1 @@
+/* Dummy file to satisfy #include */
diff --git a/kernel/include-compat/trace/define_trace.h b/kernel/include-compat/trace/define_trace.h
new file mode 100644
index 00000000..222c9784
--- /dev/null
+++ b/kernel/include-compat/trace/define_trace.h
@@ -0,0 +1,2 @@
+/* Empty file to satisfy include */
+
diff --git a/kernel/kvm-kmod.spec b/kernel/kvm-kmod.spec
new file mode 100644
index 00000000..89b3d882
--- /dev/null
+++ b/kernel/kvm-kmod.spec
@@ -0,0 +1,52 @@
+%define kmod_name kvm
+
+Name: kvm-kmod
+Version: 0.0
+Release: 0
+Summary: %{kmod_name} kernel module
+
+Group: System Environment/Kernel
+License: GPL
+URL: http://www.qumranet.com
+BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}
+
+ExclusiveArch: i386 x86_64 ia64
+
+%description
+This kernel module provides support for virtual machines using hardware support
+(Intel VT-x&VT-i or AMD SVM).
+
+%prep
+
+%build
+
+rm -rf %{buildroot}
+
+%install
+
+%define kverrel unknown
+%define moddir /lib/modules/%{kverrel}/extra
+mkdir -p %{buildroot}/%{moddir}
+cp %{objdir}/%{kmod_name}.ko %{objdir}/%{kmod_name}-*.ko %{buildroot}/%{moddir}
+chmod u+x %{buildroot}/%{moddir}/%{kmod_name}*.ko
+
+%post
+
+depmod %{kverrel}
+
+%postun
+
+depmod %{kverrel}
+
+%clean
+%{__rm} -rf %{buildroot}
+
+%files
+%{moddir}/%{kmod_name}.ko
+%ifarch i386 x86_64
+%{moddir}/%{kmod_name}-amd.ko
+%endif
+%{moddir}/%{kmod_name}-intel.ko
+
+
+%changelog
diff --git a/kernel/powerpc/Makefile.pre b/kernel/powerpc/Makefile.pre
new file mode 100644
index 00000000..e38baf13
--- /dev/null
+++ b/kernel/powerpc/Makefile.pre
@@ -0,0 +1 @@
+prerequisite:
diff --git a/kernel/request-irq-compat.c b/kernel/request-irq-compat.c
new file mode 100644
index 00000000..51193cb3
--- /dev/null
+++ b/kernel/request-irq-compat.c
@@ -0,0 +1,44 @@
+/*
+ * compat for request_irq
+ */
+
+#include <linux/interrupt.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
+
+static kvm_irq_handler_t kvm_irq_handlers[NR_IRQS];
+static DEFINE_MUTEX(kvm_irq_handlers_mutex);
+
+static irqreturn_t kvm_irq_thunk(int irq, void *dev_id, struct pt_regs *regs)
+{
+ kvm_irq_handler_t handler = kvm_irq_handlers[irq];
+ return handler(irq, dev_id);
+}
+
+int kvm_request_irq(unsigned int a, kvm_irq_handler_t handler,
+ unsigned long c, const char *d, void *e)
+{
+ int rc = -EBUSY;
+ kvm_irq_handler_t old;
+
+ mutex_lock(&kvm_irq_handlers_mutex);
+ old = kvm_irq_handlers[a];
+ if (old)
+ goto out;
+ kvm_irq_handlers[a] = handler;
+ rc = request_irq(a, kvm_irq_thunk, c, d, e);
+ if (rc)
+ kvm_irq_handlers[a] = NULL;
+out:
+ mutex_unlock(&kvm_irq_handlers_mutex);
+ return rc;
+}
+
+void kvm_free_irq(unsigned int irq, void *dev_id)
+{
+ mutex_lock(&kvm_irq_handlers_mutex);
+ free_irq(irq, dev_id);
+ kvm_irq_handlers[irq] = NULL;
+ mutex_unlock(&kvm_irq_handlers_mutex);
+}
+
+#endif
diff --git a/kernel/scripts/65-kvm.rules b/kernel/scripts/65-kvm.rules
new file mode 100644
index 00000000..857b08cf
--- /dev/null
+++ b/kernel/scripts/65-kvm.rules
@@ -0,0 +1,2 @@
+KERNEL=="kvm", MODE="0660", GROUP="kvm"
+ACTION=="add|change", SUBSYSTEM=="dmi", KERNEL=="id", RUN+="/bin/sh -c 'grep -q vmx /proc/cpuinfo && /sbin/modprobe kvm-intel; grep -q svm /proc/cpuinfo && /sbin/modprobe kvm-amd'"
diff --git a/kernel/scripts/make-release b/kernel/scripts/make-release
new file mode 100755
index 00000000..f9205e33
--- /dev/null
+++ b/kernel/scripts/make-release
@@ -0,0 +1,95 @@
+#!/bin/bash -e
+
+usage() {
+ echo "usage: $0 [--upload] [--formal] commit [name]"
+ exit 1
+}
+
+[[ -f ~/.kvmreleaserc ]] && . ~/.kvmreleaserc
+
+upload=
+formal=
+
+releasedir=~/sf-release
+[[ -z "$TMP" ]] && TMP="/tmp"
+tmpdir="$TMP/kvm-kmod-make-release.$$"
+while [[ "$1" = -* ]]; do
+ opt="$1"
+ shift
+ case "$opt" in
+ --upload)
+ upload="yes"
+ ;;
+ --formal)
+ formal="yes"
+ ;;
+ *)
+ usage
+ ;;
+ esac
+done
+
+commit="$1"
+name="$2"
+
+if [[ -z "$commit" ]]; then
+ usage
+fi
+
+if [[ -z "$name" ]]; then
+ name="$commit"
+fi
+
+tarball="$releasedir/$name.tar.bz2"
+
+cd "$(dirname "$0")"/..
+LINUX="$(readlink -f "linux-2.6")"
+
+kvm_git="$(readlink -f .git)"
+linux_git="$(readlink -f "$LINUX/.git")"
+
+mkdir -p "$tmpdir/$name"
+mkdir -p "$tmpdir/$name/linux-2.6"
+
+files=("arch/*/kvm/*" "virt/kvm" "include/linux/kvm*"
+ "arch/*/include/asm/kvm*" "arch/x86/include/asm/virtext.h"
+ "arch/x86/include/asm/vmx.h" "arch/x86/include/asm/svm.h"
+ "include/trace/events/kvm*"
+ )
+
+index="$tmpdir/index"
+
+rm -f "$index"
+GIT_INDEX_FILE="$index" git --git-dir="$kvm_git" read-tree "$commit"
+GIT_INDEX_FILE="$index" git --git-dir="$kvm_git" --work-tree="$tmpdir/$name" checkout "$commit" .
+lcommit=($(git --git-dir="$kvm_git" ls-tree "$commit" linux-2.6))
+lcommit="${lcommit[2]}"
+rm -f "$index"
+GIT_INDEX_FILE="$index" git --git-dir="$linux_git" read-tree "$lcommit"
+GIT_INDEX_FILE="$index" git --git-dir="$linux_git" \
+ --work-tree="$tmpdir/$name/linux-2.6" \
+ checkout "$lcommit" "${files[@]}"
+
+cd "$tmpdir/$name"
+
+if [[ -z "$formal" ]]; then
+ version="kvm-devel"
+else
+ version="$name"
+fi
+
+./sync "$name" -v "$version"
+
+rm -rf "$tmpdir/$name/linux-2.6"
+
+if [[ -n "$formal" ]]; then
+ echo "$name" > "$tmpdir/$name/KVM_VERSION"
+fi
+
+tar cjf "$tarball" -C "$tmpdir" "$name"
+
+rm -rf "$tmpdir"
+
+if [[ -n "$upload" ]]; then
+ rsync --progress -h "$tarball" kiszka@frs.sourceforge.net:uploads/
+fi
diff --git a/kernel/srcu.c b/kernel/srcu.c
new file mode 100644
index 00000000..e9734bc6
--- /dev/null
+++ b/kernel/srcu.c
@@ -0,0 +1,267 @@
+/*
+ * Sleepable Read-Copy Update mechanism for mutual exclusion.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2006
+ *
+ * Author: Paul McKenney <paulmck@us.ibm.com>
+ *
+ * For detailed explanation of Read-Copy Update mechanism see -
+ * Documentation/RCU/ *.txt
+ *
+ */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
+
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/preempt.h>
+#include <linux/rcupdate.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/srcu.h>
+
+#undef kvm_init_srcu_struct
+#undef kvm_cleanup_srcu_struct
+#undef kvm_srcu_read_lock
+#undef kvm_srcu_read_unlock
+#undef kvm_synchronize_srcu
+#undef kvm_srcu_batches_completed
+/**
+ * init_srcu_struct - initialize a sleep-RCU structure
+ * @sp: structure to initialize.
+ *
+ * Must invoke this on a given srcu_struct before passing that srcu_struct
+ * to any other function. Each srcu_struct represents a separate domain
+ * of SRCU protection.
+ */
+int kvm_init_srcu_struct(struct srcu_struct *sp)
+{
+ sp->completed = 0;
+ mutex_init(&sp->mutex);
+ sp->per_cpu_ref = alloc_percpu(struct srcu_struct_array);
+ return (sp->per_cpu_ref ? 0 : -ENOMEM);
+}
+
+/*
+ * srcu_readers_active_idx -- returns approximate number of readers
+ * active on the specified rank of per-CPU counters.
+ */
+
+static int srcu_readers_active_idx(struct srcu_struct *sp, int idx)
+{
+ int cpu;
+ int sum;
+
+ sum = 0;
+ for_each_possible_cpu(cpu)
+ sum += per_cpu_ptr(sp->per_cpu_ref, cpu)->c[idx];
+ return sum;
+}
+
+/**
+ * srcu_readers_active - returns approximate number of readers.
+ * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
+ *
+ * Note that this is not an atomic primitive, and can therefore suffer
+ * severe errors when invoked on an active srcu_struct. That said, it
+ * can be useful as an error check at cleanup time.
+ */
+static int srcu_readers_active(struct srcu_struct *sp)
+{
+ return srcu_readers_active_idx(sp, 0) + srcu_readers_active_idx(sp, 1);
+}
+
+/**
+ * cleanup_srcu_struct - deconstruct a sleep-RCU structure
+ * @sp: structure to clean up.
+ *
+ * Must invoke this after you are finished using a given srcu_struct that
+ * was initialized via init_srcu_struct(), else you leak memory.
+ */
+void kvm_cleanup_srcu_struct(struct srcu_struct *sp)
+{
+ int sum;
+
+ sum = srcu_readers_active(sp);
+ WARN_ON(sum); /* Leakage unless caller handles error. */
+ if (sum != 0)
+ return;
+ free_percpu(sp->per_cpu_ref);
+ sp->per_cpu_ref = NULL;
+}
+
+/**
+ * srcu_read_lock - register a new reader for an SRCU-protected structure.
+ * @sp: srcu_struct in which to register the new reader.
+ *
+ * Counts the new reader in the appropriate per-CPU element of the
+ * srcu_struct. Must be called from process context.
+ * Returns an index that must be passed to the matching srcu_read_unlock().
+ */
+int kvm_srcu_read_lock(struct srcu_struct *sp)
+{
+ int idx;
+
+ preempt_disable();
+ idx = sp->completed & 0x1;
+ barrier(); /* ensure compiler looks -once- at sp->completed. */
+ per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]++;
+ srcu_barrier(); /* ensure compiler won't misorder critical section. */
+ preempt_enable();
+ return idx;
+}
+
+/**
+ * srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
+ * @sp: srcu_struct in which to unregister the old reader.
+ * @idx: return value from corresponding srcu_read_lock().
+ *
+ * Removes the count for the old reader from the appropriate per-CPU
+ * element of the srcu_struct. Note that this may well be a different
+ * CPU than that which was incremented by the corresponding srcu_read_lock().
+ * Must be called from process context.
+ */
+void kvm_srcu_read_unlock(struct srcu_struct *sp, int idx)
+{
+ preempt_disable();
+ srcu_barrier(); /* ensure compiler won't misorder critical section. */
+ per_cpu_ptr(sp->per_cpu_ref, smp_processor_id())->c[idx]--;
+ preempt_enable();
+}
+
+/**
+ * synchronize_srcu - wait for prior SRCU read-side critical-section completion
+ * @sp: srcu_struct with which to synchronize.
+ *
+ * Flip the completed counter, and wait for the old count to drain to zero.
+ * As with classic RCU, the updater must use some separate means of
+ * synchronizing concurrent updates. Can block; must be called from
+ * process context.
+ *
+ * Note that it is illegal to call synchornize_srcu() from the corresponding
+ * SRCU read-side critical section; doing so will result in deadlock.
+ * However, it is perfectly legal to call synchronize_srcu() on one
+ * srcu_struct from some other srcu_struct's read-side critical section.
+ */
+void kvm_synchronize_srcu(struct srcu_struct *sp)
+{
+ int idx;
+
+ idx = sp->completed;
+ mutex_lock(&sp->mutex);
+
+ /*
+ * Check to see if someone else did the work for us while we were
+ * waiting to acquire the lock. We need -two- advances of
+ * the counter, not just one. If there was but one, we might have
+ * shown up -after- our helper's first synchronize_sched(), thus
+ * having failed to prevent CPU-reordering races with concurrent
+ * srcu_read_unlock()s on other CPUs (see comment below). So we
+ * either (1) wait for two or (2) supply the second ourselves.
+ */
+
+ if ((sp->completed - idx) >= 2) {
+ mutex_unlock(&sp->mutex);
+ return;
+ }
+
+ synchronize_sched(); /* Force memory barrier on all CPUs. */
+
+ /*
+ * The preceding synchronize_sched() ensures that any CPU that
+ * sees the new value of sp->completed will also see any preceding
+ * changes to data structures made by this CPU. This prevents
+ * some other CPU from reordering the accesses in its SRCU
+ * read-side critical section to precede the corresponding
+ * srcu_read_lock() -- ensuring that such references will in
+ * fact be protected.
+ *
+ * So it is now safe to do the flip.
+ */
+
+ idx = sp->completed & 0x1;
+ sp->completed++;
+
+ synchronize_sched(); /* Force memory barrier on all CPUs. */
+
+ /*
+ * At this point, because of the preceding synchronize_sched(),
+ * all srcu_read_lock() calls using the old counters have completed.
+ * Their corresponding critical sections might well be still
+ * executing, but the srcu_read_lock() primitives themselves
+ * will have finished executing.
+ */
+
+ while (srcu_readers_active_idx(sp, idx))
+ schedule_timeout_interruptible(1);
+
+ synchronize_sched(); /* Force memory barrier on all CPUs. */
+
+ /*
+ * The preceding synchronize_sched() forces all srcu_read_unlock()
+ * primitives that were executing concurrently with the preceding
+ * for_each_possible_cpu() loop to have completed by this point.
+ * More importantly, it also forces the corresponding SRCU read-side
+ * critical sections to have also completed, and the corresponding
+ * references to SRCU-protected data items to be dropped.
+ *
+ * Note:
+ *
+ * Despite what you might think at first glance, the
+ * preceding synchronize_sched() -must- be within the
+ * critical section ended by the following mutex_unlock().
+ * Otherwise, a task taking the early exit can race
+ * with a srcu_read_unlock(), which might have executed
+ * just before the preceding srcu_readers_active() check,
+ * and whose CPU might have reordered the srcu_read_unlock()
+ * with the preceding critical section. In this case, there
+ * is nothing preventing the synchronize_sched() task that is
+ * taking the early exit from freeing a data structure that
+ * is still being referenced (out of order) by the task
+ * doing the srcu_read_unlock().
+ *
+ * Alternatively, the comparison with "2" on the early exit
+ * could be changed to "3", but this increases synchronize_srcu()
+ * latency for bulk loads. So the current code is preferred.
+ */
+
+ mutex_unlock(&sp->mutex);
+}
+
+/**
+ * srcu_batches_completed - return batches completed.
+ * @sp: srcu_struct on which to report batch completion.
+ *
+ * Report the number of batches, correlated with, but not necessarily
+ * precisely the same as, the number of grace periods that have elapsed.
+ */
+
+long kvm_srcu_batches_completed(struct srcu_struct *sp)
+{
+ return sp->completed;
+}
+
+EXPORT_SYMBOL_GPL(kvm_init_srcu_struct);
+EXPORT_SYMBOL_GPL(kvm_cleanup_srcu_struct);
+EXPORT_SYMBOL_GPL(kvm_srcu_read_lock);
+EXPORT_SYMBOL_GPL(kvm_srcu_read_unlock);
+EXPORT_SYMBOL_GPL(kvm_synchronize_srcu);
+EXPORT_SYMBOL_GPL(kvm_srcu_batches_completed);
+
+#endif
diff --git a/kernel/sync b/kernel/sync
new file mode 100755
index 00000000..da5c218a
--- /dev/null
+++ b/kernel/sync
@@ -0,0 +1,248 @@
+#!/usr/bin/python
+
+import sys, os, glob, os.path, shutil, re
+from optparse import OptionParser
+
+glob = glob.glob
+
+def cmd(c):
+ if os.system(c) != 0:
+ raise Exception('command execution failed: ' + c)
+
+parser = OptionParser(usage = 'usage: %prog [-v VERSION][-l LINUX]')
+parser.add_option('-v', action = 'store', type = 'string', dest = 'version', \
+ help = 'kvm-kmod release version', default = 'kvm-devel')
+parser.add_option('-l', action = 'store', type = 'string', dest = 'linux', \
+ help = 'Linux kernel tree to sync from', \
+ default = 'linux-2.6')
+parser.set_defaults()
+(options, args) = parser.parse_args()
+version = options.version
+linux = options.linux
+
+_re_cache = {}
+
+def re_cache(regexp):
+ global _re_cache
+ if regexp not in _re_cache:
+ _re_cache[regexp] = re.compile(regexp)
+ return _re_cache[regexp]
+
+def __hack(data):
+ compat_apis = str.split(
+ 'INIT_WORK desc_struct ldttss_desc64 desc_ptr '
+ 'hrtimer_add_expires_ns hrtimer_get_expires '
+ 'hrtimer_get_expires_ns hrtimer_start_expires '
+ 'hrtimer_expires_remaining smp_send_reschedule '
+ 'on_each_cpu relay_open request_irq free_irq '
+ 'init_srcu_struct cleanup_srcu_struct srcu_read_lock '
+ 'srcu_read_unlock synchronize_srcu srcu_batches_completed '
+ 'do_machine_check eventfd_signal get_desc_base get_desc_limit '
+ 'vma_kernel_pagesize '
+ )
+ anon_inodes = anon_inodes_exit = False
+ mce = False
+ result = []
+ def sub(regexp, repl, str):
+ return re_cache(regexp).sub(repl, str)
+ for line in data.splitlines():
+ orig = line
+ def match(regexp):
+ return re_cache(regexp).search(line)
+ def w(line, result = result):
+ result.append(line)
+ f = line.split()
+ if match(r'^int kvm_init\('): anon_inodes = 1
+ if match(r'return 0;') and anon_inodes:
+ w('\tr = kvm_init_anon_inodes();')
+ w('\tif (r) {')
+ w('\t\t__free_page(bad_page);')
+ w('\t\tgoto out;')
+ w('\t}')
+ w('\tpreempt_notifier_sys_init();')
+ w('\tprintk("loaded kvm module (%s)\\n");\n' % (version,))
+ anon_inodes = False
+ if match(r'^void kvm_exit'): anon_inodes_exit = True
+ if match(r'\}') and anon_inodes_exit:
+ w('\tkvm_exit_anon_inodes();')
+ w('\tpreempt_notifier_sys_exit();')
+ anon_inodes_exit = False
+ if match(r'^int kvm_arch_init'): kvm_arch_init = True
+ if match(r'\btsc_khz\b') and kvm_arch_init:
+ line = sub(r'\btsc_khz\b', 'kvm_tsc_khz', line)
+ if match(r'^}'): kvm_arch_init = False
+ if match(r'MODULE_AUTHOR'):
+ w('MODULE_INFO(version, "%s");' % (version,))
+ line = sub(r'(\w+)->dev->msi_enabled',
+ r'kvm_pcidev_msi_enabled(\1->dev)', line)
+ if match(r'atomic_inc\(&kvm->mm->mm_count\);'):
+ line = 'mmget(&kvm->mm->mm_count);'
+ if match(r'^\t\.fault = '):
+ fcn = sub(r',', '', f[2])
+ line = '\t.VMA_OPS_FAULT(fault) = VMA_OPS_FAULT_FUNC(' + fcn + '),'
+ if match(r'^static int (.*_stat_get|lost_records_get)'):
+ line = line[0:11] + '__' + line[11:]
+ if match(r'DEFINE_SIMPLE_ATTRIBUTE.*(_stat_get|lost_records_get)'):
+ name = sub(r',', '', f[1])
+ w('MAKE_SIMPLE_ATTRIBUTE_GETTER(' + name + ')')
+ line = sub(r'linux/mm_types\.h', 'linux/mm.h', line)
+ line = sub(r'\b__user\b', ' ', line)
+ if match(r'^\t\.name = "kvm"'):
+ line = '\tset_kset_name("kvm"),'
+ if match(r'#include <linux/compiler.h>'):
+ line = ''
+ if match(r'#include <linux/clocksource.h>'):
+ line = ''
+ if match(r'#include <linux\/types.h>'):
+ line = '#include <asm/types.h>'
+ if match(r'\t\.change_pte.*kvm_mmu_notifier_change_pte,'):
+ line = '#ifdef MMU_NOTIFIER_HAS_CHANGE_PTE\n' + line + '\n#endif'
+ if match(r'static void kvm_mmu_notifier_change_pte'):
+ line = sub(r'static ', '', line)
+ line = '#ifdef MMU_NOTIFIER_HAS_CHANGE_PTE\n' + 'static\n' + '#endif\n' + line
+ line = sub(r'\bhrtimer_init\b', 'hrtimer_init_p', line)
+ line = sub(r'\bhrtimer_start\b', 'hrtimer_start_p', line)
+ line = sub(r'\bhrtimer_cancel\b', 'hrtimer_cancel_p', line)
+ if match(r'case KVM_CAP_SYNC_MMU'):
+ line = '#ifdef CONFIG_MMU_NOTIFIER\n' + line + '\n#endif'
+ for ident in compat_apis:
+ line = sub(r'\b' + ident + r'\b', 'kvm_' + ident, line)
+ if match(r'kvm_.*_fops\.owner = module;'):
+ line = 'IF_ANON_INODES_DOES_REFCOUNTS(' + line + ')'
+ if not match(r'#include'):
+ line = sub(r'\blapic\n', 'l_apic', line)
+ if match(r'struct pt_regs regs'):
+ mce = True
+ if mce and match(r'\.cs'):
+ line = sub(r'cs', r'kvm_pt_regs_cs', line)
+ if mce and match(r'\.flags'):
+ line = sub(r'flags', r'kvm_pt_regs_flags', line)
+ mce = False
+ line = sub(r'boot_cpu_data.x86_phys_bits', 'kvm_x86_phys_bits', line)
+ if match(r'^static const struct vm_operations_struct kvm_'):
+ line = sub(r' const ', ' ', line)
+ if line == 'static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx)':
+ line = sub(r'\)', ', struct page **mapped_page)', line)
+ if line == '\treturn kmap_atomic(page, idx);':
+ line = '\t*mapped_page = page;\n' + line
+ if line == 'static void nested_svm_unmap(void *addr, enum km_type idx)':
+ line = sub(r'\)', ', struct page *mapped_page)', line)
+ if line == '\tpage = kmap_atomic_to_page(addr);':
+ line = '\tpage = mapped_page;'
+ if match(r'= nested_svm_map(.*);'):
+ line = '\t{ struct page *mapped_page;\n' + sub(r'\);', ', &mapped_page);', line)
+ if match('nested_svm_unmap(.*);'):
+ line = sub(r'\);', ', mapped_page); }', line)
+ if match(r'->thread.debugreg[0-7]'):
+ line = sub(r'->thread.debugreg([0-7])', r'->thread.kvm_compat_debugreg(\1)', line)
+ w(line)
+ if match(r'\tkvm_init_debug'):
+ w('\thrtimer_kallsyms_resolve();')
+ if match(r'apic->timer.dev.function ='):
+ w('\thrtimer_data_pointer(&apic->timer.dev);')
+ if match(r'pt->timer.function ='):
+ w('\thrtimer_data_pointer(&pt->timer);')
+ data = str.join('', [line + '\n' for line in result])
+ return data
+
+def _hack(fname, arch):
+ data = file(fname).read()
+ data = __hack(data)
+ file(fname, 'w').write(data)
+
+def unifdef(fname):
+ data = file('unifdef.h').read() + file(fname).read()
+ file(fname, 'w').write(data)
+
+def hack(T, arch, file):
+ _hack(T + '/' + file, arch)
+
+hack_files = {
+ 'x86': str.split('kvm_main.c mmu.c vmx.c svm.c x86.c irq.h lapic.c'
+ ' i8254.c timer.c eventfd.c'),
+ 'ia64': str.split('kvm_main.c kvm_fw.c kvm_lib.c kvm-ia64.c'),
+}
+
+def mkdir(dir):
+ if not os.path.exists(dir):
+ os.makedirs(dir)
+
+def cp(src, dst):
+ mkdir(os.path.dirname(dst))
+ file(dst, 'w').write(file(src).read())
+
+def copy_if_changed(src, dst):
+ for dir, subdirs, files in os.walk(src):
+ ndir = dst + '/' + dir[len(src)+1:]
+ mkdir(ndir)
+ for fname in files:
+ old = ndir + '/' + fname
+ new = dir + '/' + fname
+ try:
+ if file(old).read() != file(new).read():
+ raise Exception('different.')
+ except:
+ cp(new, old)
+
+def rmtree(path):
+ if os.path.exists(path):
+ shutil.rmtree(path)
+
+def header_sync(arch):
+ T = 'header'
+ rmtree(T)
+ for file in glob('%(linux)s/include/linux/kvm*.h' % { 'linux': linux }):
+ out = ('%(T)s/include/linux/%(name)s'
+ % { 'T': T, 'name': os.path.basename(file) })
+ cp(file, out)
+ unifdef(out)
+ for file in glob(('%(linux)s/include/trace/events/kvm*.h'
+ % { 'linux': linux })):
+ out = ('%(T)s/include/trace/events/%(name)s'
+ % { 'T': T, 'name': os.path.basename(file) })
+ cp(file, out)
+ unifdef(out)
+ arch_headers = (
+ [x
+ for dir in ['%(linux)s/arch/%(arch)s/include/asm/./kvm*.h',
+ '%(linux)s/arch/%(arch)s/include/asm/./vmx*.h',
+ '%(linux)s/arch/%(arch)s/include/asm/./svm*.h',
+ '%(linux)s/arch/%(arch)s/include/asm/./virtext*.h']
+ for x in glob(dir % { 'arch': arch, 'linux': linux })
+ ])
+ for file in arch_headers:
+ out = ('%(T)s/include/asm-%(arch)s/%(name)s'
+ % { 'T': T, 'name': os.path.basename(file), 'arch': arch })
+ cp(file, out)
+ unifdef(out)
+ hack(T, 'x86', 'include/linux/kvm.h')
+ hack(T, arch, 'include/asm-%(arch)s/kvm.h' % { 'arch': arch })
+ copy_if_changed(T, '.')
+ rmtree(T)
+
+def source_sync(arch):
+ T = 'source'
+ rmtree(T)
+ sources = [file
+ for pattern in ['%(linux)s/arch/%(arch)s/kvm/*.[cSh]',
+ '%(linux)s/virt/kvm/*.[cSh]']
+ for file in glob(pattern % { 'linux': linux, 'arch': arch })
+ if not file.endswith('.mod.c')
+ ]
+ for file in sources:
+ out = ('%(T)s/%(name)s'
+ % { 'T': T, 'name': os.path.basename(file) })
+ cp(file, out)
+
+ for i in glob(T + '/*.c'):
+ unifdef(i)
+
+ for i in hack_files[arch]:
+ hack(T, arch, i)
+
+ copy_if_changed(T, arch)
+ rmtree(T)
+
+for arch in ['x86', 'ia64']:
+ header_sync(arch)
+ source_sync(arch)
diff --git a/kernel/unifdef.h b/kernel/unifdef.h
new file mode 100644
index 00000000..6fc7be08
--- /dev/null
+++ b/kernel/unifdef.h
@@ -0,0 +1,40 @@
+#ifndef KVM_UNIFDEF_H
+#define KVM_UNIFDEF_H
+
+#ifdef __i386__
+#ifndef CONFIG_X86_32
+#define CONFIG_X86_32 1
+#endif
+#endif
+
+#ifdef __x86_64__
+#ifndef CONFIG_X86_64
+#define CONFIG_X86_64 1
+#endif
+#endif
+
+#if defined(__i386__) || defined (__x86_64__)
+#ifndef CONFIG_X86
+#define CONFIG_X86 1
+#endif
+#endif
+
+#ifdef __ia64__
+#ifndef CONFIG_IA64
+#define CONFIG_IA64 1
+#endif
+#endif
+
+#ifdef __PPC__
+#ifndef CONFIG_PPC
+#define CONFIG_PPC 1
+#endif
+#endif
+
+#ifdef __s390__
+#ifndef CONFIG_S390
+#define CONFIG_S390 1
+#endif
+#endif
+
+#endif
diff --git a/kernel/x86/Kbuild b/kernel/x86/Kbuild
new file mode 100644
index 00000000..af0824ce
--- /dev/null
+++ b/kernel/x86/Kbuild
@@ -0,0 +1,14 @@
+obj-m := kvm.o kvm-intel.o kvm-amd.o
+kvm-objs := kvm_main.o x86.o mmu.o emulate.o ../anon_inodes.o irq.o i8259.o \
+ lapic.o ioapic.o preempt.o i8254.o coalesced_mmio.o irq_comm.o \
+ timer.o eventfd.o \
+ ../external-module-compat.o ../request-irq-compat.o
+ifeq ($(CONFIG_IOMMU_API),y)
+kvm-objs += iommu.o
+endif
+kvm-intel-objs := vmx.o vmx-debug.o
+kvm-amd-objs := svm.o
+
+kvm-objs += ../srcu.o
+
+CFLAGS_kvm_main.o = -DKVM_MAIN
diff --git a/kernel/x86/Makefile.pre b/kernel/x86/Makefile.pre
new file mode 100644
index 00000000..e38baf13
--- /dev/null
+++ b/kernel/x86/Makefile.pre
@@ -0,0 +1 @@
+prerequisite:
diff --git a/kernel/x86/debug.h b/kernel/x86/debug.h
new file mode 100644
index 00000000..35793652
--- /dev/null
+++ b/kernel/x86/debug.h
@@ -0,0 +1,23 @@
+#ifndef __KVM_DEBUG_H
+#define __KVM_DEBUG_H
+
+#ifdef KVM_DEBUG
+
+void show_msrs(struct kvm_vcpu *vcpu);
+
+
+void show_irq(struct kvm_vcpu *vcpu, int irq);
+void show_page(struct kvm_vcpu *vcpu, gva_t addr);
+void show_u64(struct kvm_vcpu *vcpu, gva_t addr);
+void show_code(struct kvm_vcpu *vcpu);
+int vm_entry_test(struct kvm_vcpu *vcpu);
+
+void vmcs_dump(struct kvm_vcpu *vcpu);
+void regs_dump(struct kvm_vcpu *vcpu);
+void sregs_dump(struct kvm_vcpu *vcpu);
+void show_pending_interrupts(struct kvm_vcpu *vcpu);
+void vcpu_dump(struct kvm_vcpu *vcpu);
+
+#endif
+
+#endif
diff --git a/kernel/x86/external-module-compat.h b/kernel/x86/external-module-compat.h
new file mode 100644
index 00000000..b32e68ed
--- /dev/null
+++ b/kernel/x86/external-module-compat.h
@@ -0,0 +1,687 @@
+
+/*
+ * Compatibility header for building as an external module.
+ */
+
+#include <linux/compiler.h>
+#include <linux/version.h>
+
+#include <linux/types.h>
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+
+typedef u64 phys_addr_t;
+
+#endif
+
+#include "../external-module-compat-comm.h"
+
+#include <asm/msr.h>
+#include <asm/asm.h>
+
+#ifndef CONFIG_HAVE_KVM_EVENTFD
+#define CONFIG_HAVE_KVM_EVENTFD 1
+#endif
+
+#ifndef CONFIG_KVM_APIC_ARCHITECTURE
+#define CONFIG_KVM_APIC_ARCHITECTURE
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+
+#ifdef CONFIG_X86_64
+#define DECLARE_ARGS(val, low, high) unsigned low, high
+#define EAX_EDX_VAL(val, low, high) ((low) | ((u64)(high) << 32))
+#define EAX_EDX_ARGS(val, low, high) "a" (low), "d" (high)
+#define EAX_EDX_RET(val, low, high) "=a" (low), "=d" (high)
+#else
+#define DECLARE_ARGS(val, low, high) unsigned long long val
+#define EAX_EDX_VAL(val, low, high) (val)
+#define EAX_EDX_ARGS(val, low, high) "A" (val)
+#define EAX_EDX_RET(val, low, high) "=A" (val)
+#endif
+
+#ifndef __ASM_EX_SEC
+# define __ASM_EX_SEC " .section __ex_table,\"a\"\n"
+#endif
+
+#ifndef _ASM_EXTABLE
+# define _ASM_EXTABLE(from,to) \
+ __ASM_EX_SEC \
+ _ASM_ALIGN "\n" \
+ _ASM_PTR #from "," #to "\n" \
+ " .previous\n"
+#endif
+
+#ifndef __ASM_SEL
+#ifdef CONFIG_X86_32
+# define __ASM_SEL(a,b) __ASM_FORM(a)
+#else
+# define __ASM_SEL(a,b) __ASM_FORM(b)
+#endif
+#endif
+
+#ifndef __ASM_FORM
+# define __ASM_FORM(x) " " #x " "
+#endif
+
+#ifndef _ASM_PTR
+#define _ASM_PTR __ASM_SEL(.long, .quad)
+#endif
+
+#ifndef _ASM_ALIGN
+#define _ASM_ALIGN __ASM_SEL(.balign 4, .balign 8)
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) || defined(CONFIG_X86_64)
+
+static inline unsigned long long native_read_msr_safe(unsigned int msr,
+ int *err)
+{
+ DECLARE_ARGS(val, low, high);
+
+ asm volatile("2: rdmsr ; xor %[err],%[err]\n"
+ "1:\n\t"
+ ".section .fixup,\"ax\"\n\t"
+ "3: mov %[fault],%[err] ; jmp 1b\n\t"
+ ".previous\n\t"
+ _ASM_EXTABLE(2b, 3b)
+ : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
+ : "c" (msr), [fault] "i" (-EFAULT));
+ return EAX_EDX_VAL(val, low, high);
+}
+
+static inline unsigned long long native_read_tsc(void)
+{
+ unsigned long long val;
+ asm volatile("rdtsc" : "=A" (val));
+ return val;
+}
+
+#endif
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
+
+static inline int rdmsrl_safe(unsigned msr, unsigned long long *p)
+{
+ int err;
+
+ *p = native_read_msr_safe(msr, &err);
+ return err;
+}
+
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22)
+
+#ifndef _EFER_SCE
+#define _EFER_SCE 0 /* SYSCALL/SYSRET */
+#endif
+
+#ifndef EFER_SCE
+#define EFER_SCE (1<<_EFER_SCE)
+#endif
+
+#endif
+
+#ifndef MSR_KERNEL_GS_BASE
+#define MSR_KERNEL_GS_BASE 0xc0000102
+#endif
+
+#ifndef MSR_VM_CR
+#define MSR_VM_CR 0xc0010114
+#endif
+
+#ifndef MSR_VM_HSAVE_PA
+#define MSR_VM_HSAVE_PA 0xc0010117
+#endif
+
+#ifndef _EFER_SVME
+#define _EFER_SVME 12
+#define EFER_SVME (1<<_EFER_SVME)
+#endif
+
+#ifndef _EFER_FFXSR
+#define _EFER_FFXSR 14 /* Enable Fast FXSAVE/FXRSTOR */
+#define EFER_FFXSR (1<<_EFER_FFXSR)
+#endif
+
+#ifndef MSR_STAR
+#define MSR_STAR 0xc0000081
+#endif
+
+#ifndef MSR_K8_INT_PENDING_MSG
+#define MSR_K8_INT_PENDING_MSG 0xc0010055
+#endif
+
+#include <asm/cpufeature.h>
+
+#ifndef X86_FEATURE_SVM
+#define X86_FEATURE_SVM (6*32+ 2) /* Secure virtual machine */
+#endif
+
+#ifndef X86_FEATURE_FXSR_OPT
+#define X86_FEATURE_FXSR_OPT (1*32+25)
+#endif
+
+#ifndef X86_FEATURE_GBPAGES
+#define X86_FEATURE_GBPAGES (1*32+26) /* GB pages */
+#endif
+
+#ifndef X86_FEATURE_SSSE3
+#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
+#endif
+
+#ifndef X86_FEATURE_XMM4_1
+#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */
+#endif
+
+#ifndef X86_FEATURE_XMM4_2
+#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */
+#endif
+
+#ifndef X86_FEATURE_MOVBE
+#define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */
+#endif
+
+#ifndef X86_FEATURE_POPCNT
+#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
+#endif
+
+#ifndef X86_FEATURE_CR8_LEGACY
+#define X86_FEATURE_CR8_LEGACY (6*32+ 4) /* CR8 in 32-bit mode */
+#endif
+
+#ifndef X86_FEATURE_ABM
+#define X86_FEATURE_ABM (6*32+ 5) /* Advanced bit manipulation */
+#endif
+
+#ifndef X86_FEATURE_SSE4A
+#define X86_FEATURE_SSE4A (6*32+ 6) /* SSE-4A */
+#endif
+
+#ifndef X86_FEATURE_MISALIGNSSE
+#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */
+#endif
+
+#ifndef X86_FEATURE_3DNOWPREFETCH
+#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */
+#endif
+
+#ifndef X86_FEATURE_SSE5
+#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */
+#endif
+
+#ifndef X86_FEATURE_X2APIC
+#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
+#endif
+
+#ifndef MSR_AMD64_PATCH_LOADER
+#define MSR_AMD64_PATCH_LOADER 0xc0010020
+#endif
+
+#include <linux/smp.h>
+
+#ifndef X86_CR0_PE
+#define X86_CR0_PE 0x00000001
+#endif
+
+#ifndef X86_CR0_MP
+#define X86_CR0_MP 0x00000002
+#endif
+
+#ifndef X86_CR0_EM
+#define X86_CR0_EM 0x00000004
+#endif
+
+#ifndef X86_CR0_TS
+#define X86_CR0_TS 0x00000008
+#endif
+
+#ifndef X86_CR0_ET
+#define X86_CR0_ET 0x00000010
+#endif
+
+#ifndef X86_CR0_NE
+#define X86_CR0_NE 0x00000020
+#endif
+
+#ifndef X86_CR0_WP
+#define X86_CR0_WP 0x00010000
+#endif
+
+#ifndef X86_CR0_AM
+#define X86_CR0_AM 0x00040000
+#endif
+
+#ifndef X86_CR0_NW
+#define X86_CR0_NW 0x20000000
+#endif
+
+#ifndef X86_CR0_CD
+#define X86_CR0_CD 0x40000000
+#endif
+
+#ifndef X86_CR0_PG
+#define X86_CR0_PG 0x80000000
+#endif
+
+#ifndef X86_CR3_PWT
+#define X86_CR3_PWT 0x00000008
+#endif
+
+#ifndef X86_CR3_PCD
+#define X86_CR3_PCD 0x00000010
+#endif
+
+#ifndef X86_CR4_VMXE
+#define X86_CR4_VMXE 0x00002000
+#endif
+
+#undef X86_CR8_TPR
+#define X86_CR8_TPR 0x0f
+
+/*
+ * 2.6.22 does not define set_64bit() under nonpae
+ */
+#ifdef CONFIG_X86_32
+
+#include <asm/cmpxchg.h>
+
+static inline void __kvm_set_64bit(u64 *ptr, u64 val)
+{
+ unsigned int low = val;
+ unsigned int high = val >> 32;
+
+ __asm__ __volatile__ (
+ "\n1:\t"
+ "movl (%0), %%eax\n\t"
+ "movl 4(%0), %%edx\n\t"
+ "lock cmpxchg8b (%0)\n\t"
+ "jnz 1b"
+ : /* no outputs */
+ : "D"(ptr),
+ "b"(low),
+ "c"(high)
+ : "ax","dx","memory");
+}
+
+#undef set_64bit
+#define set_64bit __kvm_set_64bit
+
+static inline unsigned long long __kvm_cmpxchg64(volatile void *ptr,
+ unsigned long long old,
+ unsigned long long new)
+{
+ unsigned long long prev;
+ __asm__ __volatile__("lock cmpxchg8b %3"
+ : "=A"(prev)
+ : "b"((unsigned long)new),
+ "c"((unsigned long)(new >> 32)),
+ "m"(*__xg(ptr)),
+ "0"(old)
+ : "memory");
+ return prev;
+}
+
+#define kvm_cmpxchg64(ptr,o,n)\
+ ((__typeof__(*(ptr)))__kvm_cmpxchg64((ptr),(unsigned long long)(o),\
+ (unsigned long long)(n)))
+
+#undef cmpxchg64
+#define cmpxchg64(ptr, o, n) kvm_cmpxchg64(ptr, o, n)
+
+#endif
+
+#ifndef CONFIG_PREEMPT_NOTIFIERS
+/*
+ * Include sched|preempt.h before defining CONFIG_PREEMPT_NOTIFIERS to avoid
+ * a miscompile.
+ */
+#include <linux/sched.h>
+#include <linux/preempt.h>
+#define CONFIG_PREEMPT_NOTIFIERS
+#define CONFIG_PREEMPT_NOTIFIERS_COMPAT
+
+struct preempt_notifier;
+
+struct preempt_ops {
+ void (*sched_in)(struct preempt_notifier *notifier, int cpu);
+ void (*sched_out)(struct preempt_notifier *notifier,
+ struct task_struct *next);
+};
+
+struct preempt_notifier {
+ struct list_head link;
+ struct task_struct *tsk;
+ struct preempt_ops *ops;
+};
+
+void preempt_notifier_register(struct preempt_notifier *notifier);
+void preempt_notifier_unregister(struct preempt_notifier *notifier);
+
+static inline void preempt_notifier_init(struct preempt_notifier *notifier,
+ struct preempt_ops *ops)
+{
+ notifier->ops = ops;
+}
+
+void start_special_insn(void);
+void end_special_insn(void);
+void in_special_section(void);
+
+void preempt_notifier_sys_init(void);
+void preempt_notifier_sys_exit(void);
+
+#else
+
+static inline void start_special_insn(void) {}
+static inline void end_special_insn(void) {}
+static inline void in_special_section(void) {}
+
+static inline void preempt_notifier_sys_init(void) {}
+static inline void preempt_notifier_sys_exit(void) {}
+
+#endif
+
+/* CONFIG_HAS_IOMEM is apparently fairly new too (2.6.21 for x86_64). */
+#ifndef CONFIG_HAS_IOMEM
+#define CONFIG_HAS_IOMEM 1
+#endif
+
+/* X86_FEATURE_NX is missing in some x86_64 kernels */
+
+#include <asm/cpufeature.h>
+
+#ifndef X86_FEATURE_NX
+#define X86_FEATURE_NX (1*32+20)
+#endif
+
+/* EFER_LMA and EFER_LME are missing in pre 2.6.24 i386 kernels */
+#ifndef EFER_LME
+#define _EFER_LME 8 /* Long mode enable */
+#define _EFER_LMA 10 /* Long mode active (read-only) */
+#define EFER_LME (1<<_EFER_LME)
+#define EFER_LMA (1<<_EFER_LMA)
+#endif
+
+struct kvm_desc_struct {
+ union {
+ struct { unsigned int a, b; };
+ struct {
+ u16 limit0;
+ u16 base0;
+ unsigned base1: 8, type: 4, s: 1, dpl: 2, p: 1;
+ unsigned limit: 4, avl: 1, l: 1, d: 1, g: 1, base2: 8;
+ };
+
+ };
+} __attribute__((packed));
+
+struct kvm_ldttss_desc64 {
+ u16 limit0;
+ u16 base0;
+ unsigned base1 : 8, type : 5, dpl : 2, p : 1;
+ unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
+ u32 base3;
+ u32 zero1;
+} __attribute__((packed));
+
+struct kvm_desc_ptr {
+ unsigned short size;
+ unsigned long address;
+} __attribute__((packed));
+
+static inline unsigned long kvm_get_desc_base(const struct kvm_desc_struct *desc)
+{
+ return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24);
+}
+
+static inline unsigned long kvm_get_desc_limit(const struct kvm_desc_struct *desc)
+{
+ return desc->limit0 | (desc->limit << 16);
+}
+
+#include <asm/msr.h>
+#ifndef MSR_FS_BASE
+#define MSR_FS_BASE 0xc0000100
+#endif
+#ifndef MSR_GS_BASE
+#define MSR_GS_BASE 0xc0000101
+#endif
+
+/* undefine lapic */
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18)
+
+#undef lapic
+
+#endif
+
+#include <asm/hw_irq.h>
+#ifndef NMI_VECTOR
+#define NMI_VECTOR 2
+#endif
+
+#ifndef MSR_MTRRcap
+#define MSR_MTRRcap 0x0fe
+#define MSR_MTRRfix64K_00000 0x250
+#define MSR_MTRRfix16K_80000 0x258
+#define MSR_MTRRfix16K_A0000 0x259
+#define MSR_MTRRfix4K_C0000 0x268
+#define MSR_MTRRfix4K_C8000 0x269
+#define MSR_MTRRfix4K_D0000 0x26a
+#define MSR_MTRRfix4K_D8000 0x26b
+#define MSR_MTRRfix4K_E0000 0x26c
+#define MSR_MTRRfix4K_E8000 0x26d
+#define MSR_MTRRfix4K_F0000 0x26e
+#define MSR_MTRRfix4K_F8000 0x26f
+#define MSR_MTRRdefType 0x2ff
+#endif
+
+#ifndef MSR_IA32_CR_PAT
+#define MSR_IA32_CR_PAT 0x00000277
+#endif
+
+#ifndef MSR_VM_IGNNE
+#define MSR_VM_IGNNE 0xc0010115
+#endif
+
+/* Define DEBUGCTLMSR bits */
+#ifndef DEBUGCTLMSR_LBR
+
+#define _DEBUGCTLMSR_LBR 0 /* last branch recording */
+#define _DEBUGCTLMSR_BTF 1 /* single-step on branches */
+
+#define DEBUGCTLMSR_LBR (1UL << _DEBUGCTLMSR_LBR)
+#define DEBUGCTLMSR_BTF (1UL << _DEBUGCTLMSR_BTF)
+
+#endif
+
+#ifndef MSR_FAM10H_MMIO_CONF_BASE
+#define MSR_FAM10H_MMIO_CONF_BASE 0xc0010058
+#endif
+
+#ifndef MSR_AMD64_NB_CFG
+#define MSR_AMD64_NB_CFG 0xc001001f
+#endif
+
+#include <asm/asm.h>
+
+#ifndef __ASM_SIZE
+# define ____ASM_FORM(x) " " #x " "
+# ifdef CONFIG_X86_64
+# define __ASM_SIZE(inst) ____ASM_FORM(inst##q)
+# else
+# define __ASM_SIZE(inst) ____ASM_FORM(inst##l)
+# endif
+#endif
+
+#ifndef _ASM_PTR
+# ifdef CONFIG_X86_64
+# define _ASM_PTR ".quad"
+# else
+# define _ASM_PTR ".long"
+# endif
+#endif
+
+/* Intel VT MSRs */
+#ifndef MSR_IA32_VMX_BASIC
+#define MSR_IA32_VMX_BASIC 0x00000480
+#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481
+#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482
+#define MSR_IA32_VMX_EXIT_CTLS 0x00000483
+#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484
+#define MSR_IA32_VMX_MISC 0x00000485
+#define MSR_IA32_VMX_CR0_FIXED0 0x00000486
+#define MSR_IA32_VMX_CR0_FIXED1 0x00000487
+#define MSR_IA32_VMX_CR4_FIXED0 0x00000488
+#define MSR_IA32_VMX_CR4_FIXED1 0x00000489
+#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a
+#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
+#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
+#endif
+
+#ifndef MSR_IA32_FEATURE_CONTROL
+#define MSR_IA32_FEATURE_CONTROL 0x0000003a
+
+#define FEATURE_CONTROL_LOCKED (1<<0)
+#define FEATURE_CONTROL_VMXON_ENABLED (1<<2)
+#endif
+
+#ifndef MSR_IA32_TSC
+#define MSR_IA32_TSC 0x00000010
+#endif
+
+#ifndef MSR_K7_HWCR
+#define MSR_K7_HWCR 0xc0010015
+#endif
+
+#ifndef MSR_K8_SYSCFG
+#define MSR_K8_SYSCFG 0xc0010010
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) && defined(__x86_64__)
+
+#undef set_debugreg
+#define set_debugreg(value, register) \
+ __asm__("movq %0,%%db" #register \
+ : /* no output */ \
+ :"r" ((unsigned long)value))
+
+#endif
+
+#if !defined(CONFIG_X86_64) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+#define kvm_compat_debugreg(x) debugreg[x]
+#else
+#define kvm_compat_debugreg(x) debugreg##x
+#endif
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29)
+
+struct mtrr_var_range {
+ u32 base_lo;
+ u32 base_hi;
+ u32 mask_lo;
+ u32 mask_hi;
+};
+
+/* In the Intel processor's MTRR interface, the MTRR type is always held in
+ an 8 bit field: */
+typedef u8 mtrr_type;
+
+#define MTRR_NUM_FIXED_RANGES 88
+#define MTRR_MAX_VAR_RANGES 256
+
+struct mtrr_state_type {
+ struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES];
+ mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES];
+ unsigned char enabled;
+ unsigned char have_fixed;
+ mtrr_type def_type;
+};
+
+#endif
+
+#ifndef CONFIG_HAVE_KVM_IRQCHIP
+#define CONFIG_HAVE_KVM_IRQCHIP 1
+#endif
+
+#include <asm/mce.h>
+
+#ifndef MCG_CTL_P
+#define MCG_CTL_P (1ULL<<8)
+#define MCG_STATUS_MCIP (1ULL<<2)
+#define MCI_STATUS_VAL (1ULL<<63)
+#define MCI_STATUS_OVER (1ULL<<62)
+#define MCI_STATUS_UC (1ULL<<61)
+#endif
+
+/* do_machine_check() exported in 2.6.31 */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
+
+static inline void kvm_do_machine_check(struct pt_regs *regs, long error_code)
+{
+ panic("kvm machine check!\n");
+}
+
+#else
+
+#define kvm_do_machine_check do_machine_check
+
+#endif
+
+/* pt_regs.flags was once pt_regs.eflags */
+
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
+
+#define kvm_pt_regs_flags eflags
+
+# ifdef CONFIG_X86_64
+# define kvm_pt_regs_cs cs
+# else
+# define kvm_pt_regs_cs xcs
+# endif
+
+#else
+
+#define kvm_pt_regs_flags flags
+#define kvm_pt_regs_cs cs
+
+#endif
+
+/* boot_cpu_data.x86_phys_bits only appeared for i386 in 2.6.30 */
+
+#if !defined(CONFIG_X86_64) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30))
+
+#define kvm_x86_phys_bits 40
+
+#else
+
+#define kvm_x86_phys_bits (boot_cpu_data.x86_phys_bits)
+
+#endif
+
+#include <asm/apicdef.h>
+
+#ifndef APIC_BASE_MSR
+#define APIC_BASE_MSR 0x800
+#endif
+
+#ifndef APIC_SPIV_DIRECTED_EOI
+#define APIC_SPIV_DIRECTED_EOI (1 << 12)
+#endif
+
+#ifndef APIC_LVR_DIRECTED_EOI
+#define APIC_LVR_DIRECTED_EOI (1 << 24)
+#endif
+
+#ifndef APIC_SELF_IPI
+#define APIC_SELF_IPI 0x3F0
+#endif
+
+#ifndef X2APIC_ENABLE
+#define X2APIC_ENABLE (1UL << 10)
+#endif
+
diff --git a/kernel/x86/preempt.c b/kernel/x86/preempt.c
new file mode 100644
index 00000000..440060b2
--- /dev/null
+++ b/kernel/x86/preempt.c
@@ -0,0 +1,247 @@
+
+#ifdef CONFIG_PREEMPT_NOTIFIERS_COMPAT
+
+#include <linux/sched.h>
+#include <linux/percpu.h>
+
+static DEFINE_SPINLOCK(pn_lock);
+static LIST_HEAD(pn_list);
+
+#define dprintk(fmt) do { \
+ if (0) \
+ printk("%s (%d/%d): " fmt, __FUNCTION__, \
+ current->pid, raw_smp_processor_id()); \
+ } while (0)
+
+static void preempt_enable_sched_out_notifiers(void)
+{
+ asm volatile ("mov %0, %%db0" : : "r"(schedule));
+ asm volatile ("mov %0, %%db7" : : "r"(0x701ul));
+ current->thread.kvm_compat_debugreg(7) = 0ul;
+#ifdef TIF_DEBUG
+ clear_tsk_thread_flag(current, TIF_DEBUG);
+#endif
+}
+
+static void preempt_enable_sched_in_notifiers(void * addr)
+{
+ asm volatile ("mov %0, %%db0" : : "r"(addr));
+ asm volatile ("mov %0, %%db7" : : "r"(0x701ul));
+ current->thread.kvm_compat_debugreg(0) = (unsigned long) addr;
+ current->thread.kvm_compat_debugreg(7) = 0x701ul;
+#ifdef TIF_DEBUG
+ set_tsk_thread_flag(current, TIF_DEBUG);
+#endif
+}
+
+static void __preempt_disable_notifiers(void)
+{
+ asm volatile ("mov %0, %%db7" : : "r"(0ul));
+}
+
+static void preempt_disable_notifiers(void)
+{
+ __preempt_disable_notifiers();
+ current->thread.kvm_compat_debugreg(7) = 0ul;
+#ifdef TIF_DEBUG
+ clear_tsk_thread_flag(current, TIF_DEBUG);
+#endif
+}
+
+static void fastcall __attribute__((used)) preempt_notifier_trigger(void *** ip)
+{
+ struct preempt_notifier *pn;
+ int cpu = raw_smp_processor_id();
+ int found = 0;
+
+ dprintk(" - in\n");
+ //dump_stack();
+ spin_lock(&pn_lock);
+ list_for_each_entry(pn, &pn_list, link)
+ if (pn->tsk == current) {
+ found = 1;
+ break;
+ }
+ spin_unlock(&pn_lock);
+
+ if (found) {
+ if ((void *) *ip != schedule) {
+ dprintk("sched_in\n");
+ preempt_enable_sched_out_notifiers();
+
+ preempt_disable();
+ local_irq_enable();
+ pn->ops->sched_in(pn, cpu);
+ local_irq_disable();
+ preempt_enable_no_resched();
+ } else {
+ void * sched_in_addr;
+ dprintk("sched_out\n");
+#ifdef CONFIG_X86_64
+ sched_in_addr = **(ip+3);
+#else
+ /* no special debug stack switch on x86 */
+ sched_in_addr = (void *) *(ip+3);
+#endif
+ preempt_enable_sched_in_notifiers(sched_in_addr);
+
+ preempt_disable();
+ local_irq_enable();
+ pn->ops->sched_out(pn, NULL);
+ local_irq_disable();
+ preempt_enable_no_resched();
+ }
+ } else
+ __preempt_disable_notifiers();
+ dprintk(" - out\n");
+}
+
+unsigned long orig_int1_handler;
+
+#ifdef CONFIG_X86_64
+
+#define SAVE_REGS \
+ "push %rax; push %rbx; push %rcx; push %rdx; " \
+ "push %rsi; push %rdi; push %rbp; " \
+ "push %r8; push %r9; push %r10; push %r11; " \
+ "push %r12; push %r13; push %r14; push %r15"
+
+#define RESTORE_REGS \
+ "pop %r15; pop %r14; pop %r13; pop %r12; " \
+ "pop %r11; pop %r10; pop %r9; pop %r8; " \
+ "pop %rbp; pop %rdi; pop %rsi; " \
+ "pop %rdx; pop %rcx; pop %rbx; pop %rax "
+
+#define TMP "%rax"
+
+#else
+
+#define SAVE_REGS "pusha"
+#define RESTORE_REGS "popa"
+#define TMP "%eax"
+
+#endif
+
+asm ("pn_int1_handler: \n\t"
+ "push " TMP " \n\t"
+ "mov %db7, " TMP " \n\t"
+ "cmp $0x701, " TMP " \n\t"
+ "pop " TMP " \n\t"
+ "jnz .Lnotme \n\t"
+ "push " TMP " \n\t"
+ "mov %db6, " TMP " \n\t"
+ "test $0x1, " TMP " \n\t"
+ "pop " TMP " \n\t"
+ "jz .Lnotme \n\t"
+ SAVE_REGS "\n\t"
+#ifdef CONFIG_X86_64
+ "leaq 120(%rsp),%rdi\n\t"
+#else
+ "leal 32(%esp),%eax\n\t"
+#endif
+ "call preempt_notifier_trigger \n\t"
+ RESTORE_REGS "\n\t"
+#ifdef CONFIG_X86_64
+ "orq $0x10000, 16(%rsp) \n\t"
+ "iretq \n\t"
+#else
+ "orl $0x10000, 8(%esp) \n\t"
+ "iret \n\t"
+#endif
+ ".Lnotme: \n\t"
+#ifdef CONFIG_X86_64
+ "jmpq *orig_int1_handler\n\t"
+#else
+ "jmpl *orig_int1_handler\n\t"
+#endif
+ );
+
+void preempt_notifier_register(struct preempt_notifier *notifier)
+{
+ unsigned long flags;
+
+ dprintk(" - in\n");
+ spin_lock_irqsave(&pn_lock, flags);
+ preempt_enable_sched_out_notifiers();
+ notifier->tsk = current;
+ list_add(&notifier->link, &pn_list);
+ spin_unlock_irqrestore(&pn_lock, flags);
+ dprintk(" - out\n");
+}
+
+void preempt_notifier_unregister(struct preempt_notifier *notifier)
+{
+ unsigned long flags;
+
+ dprintk(" - in\n");
+ spin_lock_irqsave(&pn_lock, flags);
+ list_del(&notifier->link);
+ spin_unlock_irqrestore(&pn_lock, flags);
+ preempt_disable_notifiers();
+ dprintk(" - out\n");
+}
+
+struct intr_gate {
+ u16 offset0;
+ u16 segment;
+ u16 junk;
+ u16 offset1;
+#ifdef CONFIG_X86_64
+ u32 offset2;
+ u32 blah;
+#endif
+} __attribute__((packed));
+
+struct idt_desc {
+ u16 limit;
+ struct intr_gate *gates;
+} __attribute__((packed));
+
+static struct intr_gate orig_int1_gate;
+
+void pn_int1_handler(void);
+
+void preempt_notifier_sys_init(void)
+{
+ struct idt_desc idt_desc;
+ struct intr_gate *int1_gate;
+
+ printk("kvm: emulating preempt notifiers;"
+ " do not benchmark on this machine\n");
+ dprintk("\n");
+ asm ("sidt %0" : "=m"(idt_desc));
+ int1_gate = &idt_desc.gates[1];
+ orig_int1_gate = *int1_gate;
+ orig_int1_handler = int1_gate->offset0
+ | ((u32)int1_gate->offset1 << 16);
+#ifdef CONFIG_X86_64
+ orig_int1_handler |= (u64)int1_gate->offset2 << 32;
+#endif
+ int1_gate->offset0 = (unsigned long)pn_int1_handler;
+ int1_gate->offset1 = (unsigned long)pn_int1_handler >> 16;
+#ifdef CONFIG_X86_64
+ int1_gate->offset2 = (unsigned long)pn_int1_handler >> 32;
+#endif
+}
+
+static void do_disable(void *blah)
+{
+#ifdef TIF_DEBUG
+ if (!test_tsk_thread_flag(current, TIF_DEBUG))
+#else
+ if (!current->thread.kvm_compat_debugreg(7))
+#endif
+ __preempt_disable_notifiers();
+}
+
+void preempt_notifier_sys_exit(void)
+{
+ struct idt_desc idt_desc;
+
+ dprintk("\n");
+ kvm_on_each_cpu(do_disable, NULL, 1);
+ asm ("sidt %0" : "=m"(idt_desc));
+ idt_desc.gates[1] = orig_int1_gate;
+}
+
+#endif
diff --git a/kernel/x86/vmx-debug.c b/kernel/x86/vmx-debug.c
new file mode 100644
index 00000000..d466f03f
--- /dev/null
+++ b/kernel/x86/vmx-debug.c
@@ -0,0 +1,1112 @@
+/*
+ * Kernel-based Virtual Machine driver for Linux
+ *
+ * This module enables machines with Intel VT-x extensions to run virtual
+ * machines without emulation or binary translation.
+ *
+ * Debug support
+ *
+ * Copyright (C) 2006 Qumranet, Inc.
+ *
+ * Authors:
+ * Yaniv Kamay <yaniv@qumranet.com>
+ * Avi Kivity <avi@qumranet.com>
+ *
+ */
+
+#include <linux/highmem.h>
+
+#include <linux/kvm_host.h>
+#include <asm/vmx.h>
+#include <asm/kvm_host.h>
+#include "mmu.h"
+#include "lapic.h"
+#include "debug.h"
+
+#ifdef KVM_DEBUG
+
+static unsigned long vmcs_readl(unsigned long field)
+{
+ unsigned long value;
+
+ asm volatile (ASM_VMX_VMREAD_RDX_RAX
+ : "=a"(value) : "d"(field) : "cc");
+ return value;
+}
+
+static u16 vmcs_read16(unsigned long field)
+{
+ return vmcs_readl(field);
+}
+
+static u32 vmcs_read32(unsigned long field)
+{
+ return vmcs_readl(field);
+}
+
+static u64 vmcs_read64(unsigned long field)
+{
+#ifdef CONFIG_X86_64
+ return vmcs_readl(field);
+#else
+ return vmcs_readl(field) | ((u64)vmcs_readl(field+1) << 32);
+#endif
+}
+
+void show_code(struct kvm_vcpu *vcpu)
+{
+ gva_t rip = vmcs_readl(GUEST_RIP);
+ u8 code[50];
+ char buf[30 + 3 * sizeof code];
+ int i;
+ gpa_t gpa;
+
+ if (!is_long_mode(vcpu))
+ rip += vmcs_readl(GUEST_CS_BASE);
+
+ gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, rip);
+ if (gpa == UNMAPPED_GVA)
+ return;
+ kvm_read_guest(vcpu->kvm, gpa, code, sizeof code);
+ for (i = 0; i < sizeof code; ++i)
+ sprintf(buf + i * 3, " %02x", code[i]);
+ vcpu_printf(vcpu, "code: %lx%s\n", rip, buf);
+}
+
+struct gate_struct {
+ u16 offset_low;
+ u16 segment;
+ unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
+ u16 offset_middle;
+ u32 offset_high;
+ u32 zero1;
+} __attribute__((packed));
+
+void show_irq(struct kvm_vcpu *vcpu, int irq)
+{
+ unsigned long idt_base = vmcs_readl(GUEST_IDTR_BASE);
+ unsigned long idt_limit = vmcs_readl(GUEST_IDTR_LIMIT);
+ struct gate_struct gate;
+ gpa_t gpa;
+
+ if (!is_long_mode(vcpu))
+ vcpu_printf(vcpu, "%s: not in long mode\n", __FUNCTION__);
+
+ if (!is_long_mode(vcpu) || idt_limit < irq * sizeof(gate)) {
+ vcpu_printf(vcpu, "%s: 0x%x read_guest err\n",
+ __FUNCTION__,
+ irq);
+ return;
+ }
+
+ gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, idt_base + irq * sizeof(gate));
+ if (gpa == UNMAPPED_GVA)
+ return;
+
+ if (kvm_read_guest(vcpu->kvm, gpa, &gate, sizeof(gate)) != sizeof(gate)) {
+ vcpu_printf(vcpu, "%s: 0x%x read_guest err\n",
+ __FUNCTION__,
+ irq);
+ return;
+ }
+ vcpu_printf(vcpu, "%s: 0x%x handler 0x%llx\n",
+ __FUNCTION__,
+ irq,
+ ((u64)gate.offset_high << 32) |
+ ((u64)gate.offset_middle << 16) |
+ gate.offset_low);
+}
+
+void show_page(struct kvm_vcpu *vcpu,
+ gva_t addr)
+{
+ u64 *buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ gpa_t gpa;
+
+ if (!buf)
+ return;
+
+ addr &= PAGE_MASK;
+ gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+ if (gpa == UNMAPPED_GVA)
+ return;
+ if (kvm_read_guest(vcpu->kvm, gpa, buf, PAGE_SIZE)) {
+ int i;
+ for (i = 0; i < PAGE_SIZE / sizeof(u64) ; i++) {
+ u8 *ptr = (u8*)&buf[i];
+ int j;
+ vcpu_printf(vcpu, " 0x%16.16lx:",
+ addr + i * sizeof(u64));
+ for (j = 0; j < sizeof(u64) ; j++)
+ vcpu_printf(vcpu, " 0x%2.2x", ptr[j]);
+ vcpu_printf(vcpu, "\n");
+ }
+ }
+ kfree(buf);
+}
+
+void show_u64(struct kvm_vcpu *vcpu, gva_t addr)
+{
+ u64 buf;
+ gpa_t gpa;
+
+ gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+ if (gpa == UNMAPPED_GVA)
+ return;
+ if (kvm_read_guest(vcpu->kvm, gpa, &buf, sizeof(u64)) == sizeof(u64)) {
+ u8 *ptr = (u8*)&buf;
+ int j;
+ vcpu_printf(vcpu, " 0x%16.16lx:", addr);
+ for (j = 0; j < sizeof(u64) ; j++)
+ vcpu_printf(vcpu, " 0x%2.2x", ptr[j]);
+ vcpu_printf(vcpu, "\n");
+ }
+}
+
+#define IA32_DEBUGCTL_RESERVED_BITS 0xfffffffffffffe3cULL
+
+static int is_canonical(unsigned long addr)
+{
+ return addr == ((long)addr << 16) >> 16;
+}
+
+int vm_entry_test_guest(struct kvm_vcpu *vcpu)
+{
+ unsigned long cr0;
+ unsigned long cr4;
+ unsigned long cr3;
+ unsigned long dr7;
+ u64 ia32_debugctl;
+ unsigned long sysenter_esp;
+ unsigned long sysenter_eip;
+ unsigned long rflags;
+ unsigned long cpu_exec_ctrl, cpu_secondary_exec_ctrl;
+ unsigned long tpr_threshold;
+
+ int long_mode;
+ int virtual8086;
+
+ #define RFLAGS_VM (1 << 17)
+ #define RFLAGS_RF (1 << 9)
+
+
+ #define VIR8086_SEG_BASE_TEST(seg)\
+ if (vmcs_readl(GUEST_##seg##_BASE) != \
+ (unsigned long)vmcs_read16(GUEST_##seg##_SELECTOR) << 4) {\
+ vcpu_printf(vcpu, "%s: "#seg" base 0x%lx in "\
+ "virtual8086 is not "#seg" selector 0x%x"\
+ " shifted right 4 bits\n",\
+ __FUNCTION__,\
+ vmcs_readl(GUEST_##seg##_BASE),\
+ vmcs_read16(GUEST_##seg##_SELECTOR));\
+ return 0;\
+ }
+
+ #define VIR8086_SEG_LIMIT_TEST(seg)\
+ if (vmcs_readl(GUEST_##seg##_LIMIT) != 0x0ffff) { \
+ vcpu_printf(vcpu, "%s: "#seg" limit 0x%lx in "\
+ "virtual8086 is not 0xffff\n",\
+ __FUNCTION__,\
+ vmcs_readl(GUEST_##seg##_LIMIT));\
+ return 0;\
+ }
+
+ #define VIR8086_SEG_AR_TEST(seg)\
+ if (vmcs_read32(GUEST_##seg##_AR_BYTES) != 0x0f3) { \
+ vcpu_printf(vcpu, "%s: "#seg" AR 0x%x in "\
+ "virtual8086 is not 0xf3\n",\
+ __FUNCTION__,\
+ vmcs_read32(GUEST_##seg##_AR_BYTES));\
+ return 0;\
+ }
+
+
+ cr0 = vmcs_readl(GUEST_CR0);
+
+ if (!(cr0 & X86_CR0_PG)) {
+ vcpu_printf(vcpu, "%s: cr0 0x%lx, PG is not set\n",
+ __FUNCTION__, cr0);
+ return 0;
+ }
+
+ if (!(cr0 & X86_CR0_PE)) {
+ vcpu_printf(vcpu, "%s: cr0 0x%lx, PE is not set\n",
+ __FUNCTION__, cr0);
+ return 0;
+ }
+
+ if (!(cr0 & X86_CR0_NE)) {
+ vcpu_printf(vcpu, "%s: cr0 0x%lx, NE is not set\n",
+ __FUNCTION__, cr0);
+ return 0;
+ }
+
+ if (!(cr0 & X86_CR0_WP)) {
+ vcpu_printf(vcpu, "%s: cr0 0x%lx, WP is not set\n",
+ __FUNCTION__, cr0);
+ }
+
+ cr4 = vmcs_readl(GUEST_CR4);
+
+ if (!(cr4 & X86_CR4_VMXE)) {
+ vcpu_printf(vcpu, "%s: cr4 0x%lx, VMXE is not set\n",
+ __FUNCTION__, cr4);
+ return 0;
+ }
+
+ if (!(cr4 & X86_CR4_PAE)) {
+ vcpu_printf(vcpu, "%s: cr4 0x%lx, PAE is not set\n",
+ __FUNCTION__, cr4);
+ }
+
+ ia32_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+
+ if (ia32_debugctl & IA32_DEBUGCTL_RESERVED_BITS ) {
+ vcpu_printf(vcpu, "%s: ia32_debugctl 0x%llx, reserve bits\n",
+ __FUNCTION__, ia32_debugctl);
+ return 0;
+ }
+
+ long_mode = is_long_mode(vcpu);
+
+ if (long_mode) {
+ }
+
+ if ( long_mode && !(cr4 & X86_CR4_PAE)) {
+ vcpu_printf(vcpu, "%s: long mode and not PAE\n",
+ __FUNCTION__);
+ return 0;
+ }
+
+ cr3 = vmcs_readl(GUEST_CR3);
+
+ if (cr3 & CR3_L_MODE_RESERVED_BITS) {
+ vcpu_printf(vcpu, "%s: cr3 0x%lx, reserved bits\n",
+ __FUNCTION__, cr3);
+ return 0;
+ }
+
+ if ( !long_mode && (cr4 & X86_CR4_PAE)) {
+ /* check the 4 PDPTEs for reserved bits */
+ unsigned long pdpt_pfn = cr3 >> PAGE_SHIFT;
+ int i;
+ u64 pdpte;
+ unsigned offset = (cr3 & (PAGE_SIZE-1)) >> 5;
+ u64 *pdpt = kmap_atomic(pfn_to_page(pdpt_pfn), KM_USER0);
+
+ for (i = 0; i < 4; ++i) {
+ pdpte = pdpt[offset + i];
+ if ((pdpte & 1) && (pdpte & 0xfffffff0000001e6ull))
+ break;
+ }
+
+ kunmap_atomic(pdpt, KM_USER0);
+
+ if (i != 4) {
+ vcpu_printf(vcpu, "%s: pae cr3[%d] 0x%llx, reserved bits\n",
+ __FUNCTION__, i, pdpte);
+ return 0;
+ }
+ }
+
+ dr7 = vmcs_readl(GUEST_DR7);
+
+ if (dr7 & ~((1ULL << 32) - 1)) {
+ vcpu_printf(vcpu, "%s: dr7 0x%lx, reserved bits\n",
+ __FUNCTION__, dr7);
+ return 0;
+ }
+
+ sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
+
+ if (!is_canonical(sysenter_esp)) {
+ vcpu_printf(vcpu, "%s: sysenter_esp 0x%lx, not canonical\n",
+ __FUNCTION__, sysenter_esp);
+ return 0;
+ }
+
+ sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
+
+ if (!is_canonical(sysenter_eip)) {
+ vcpu_printf(vcpu, "%s: sysenter_eip 0x%lx, not canonical\n",
+ __FUNCTION__, sysenter_eip);
+ return 0;
+ }
+
+ rflags = vmcs_readl(GUEST_RFLAGS);
+ virtual8086 = rflags & RFLAGS_VM;
+
+
+ if (vmcs_read16(GUEST_TR_SELECTOR) & SELECTOR_TI_MASK) {
+ vcpu_printf(vcpu, "%s: tr selctor 0x%x, TI is set\n",
+ __FUNCTION__, vmcs_read16(GUEST_TR_SELECTOR));
+ return 0;
+ }
+
+ if (!(vmcs_read32(GUEST_LDTR_AR_BYTES) & AR_UNUSABLE_MASK) &&
+ vmcs_read16(GUEST_LDTR_SELECTOR) & SELECTOR_TI_MASK) {
+ vcpu_printf(vcpu, "%s: ldtr selctor 0x%x,"
+ " is usable and TI is set\n",
+ __FUNCTION__, vmcs_read16(GUEST_LDTR_SELECTOR));
+ return 0;
+ }
+
+ if (!virtual8086 &&
+ (vmcs_read16(GUEST_SS_SELECTOR) & SELECTOR_RPL_MASK) !=
+ (vmcs_read16(GUEST_CS_SELECTOR) & SELECTOR_RPL_MASK)) {
+ vcpu_printf(vcpu, "%s: ss selctor 0x%x cs selctor 0x%x,"
+ " not same RPL\n",
+ __FUNCTION__,
+ vmcs_read16(GUEST_SS_SELECTOR),
+ vmcs_read16(GUEST_CS_SELECTOR));
+ return 0;
+ }
+
+ if (virtual8086) {
+ VIR8086_SEG_BASE_TEST(CS);
+ VIR8086_SEG_BASE_TEST(SS);
+ VIR8086_SEG_BASE_TEST(DS);
+ VIR8086_SEG_BASE_TEST(ES);
+ VIR8086_SEG_BASE_TEST(FS);
+ VIR8086_SEG_BASE_TEST(GS);
+ }
+
+ if (!is_canonical(vmcs_readl(GUEST_TR_BASE)) ||
+ !is_canonical(vmcs_readl(GUEST_FS_BASE)) ||
+ !is_canonical(vmcs_readl(GUEST_GS_BASE)) ) {
+ vcpu_printf(vcpu, "%s: TR 0x%lx FS 0x%lx or GS 0x%lx base"
+ " is not canonical\n",
+ __FUNCTION__,
+ vmcs_readl(GUEST_TR_BASE),
+ vmcs_readl(GUEST_FS_BASE),
+ vmcs_readl(GUEST_GS_BASE));
+ return 0;
+
+ }
+
+ if (!(vmcs_read32(GUEST_LDTR_AR_BYTES) & AR_UNUSABLE_MASK) &&
+ !is_canonical(vmcs_readl(GUEST_LDTR_BASE))) {
+ vcpu_printf(vcpu, "%s: LDTR base 0x%lx, usable and is not"
+ " canonical\n",
+ __FUNCTION__,
+ vmcs_readl(GUEST_LDTR_BASE));
+ return 0;
+ }
+
+ if ((vmcs_readl(GUEST_CS_BASE) & ~((1ULL << 32) - 1))) {
+ vcpu_printf(vcpu, "%s: CS base 0x%lx, not all bits 63-32"
+ " are zero\n",
+ __FUNCTION__,
+ vmcs_readl(GUEST_CS_BASE));
+ return 0;
+ }
+
+ #define SEG_BASE_TEST(seg)\
+ if ( !(vmcs_read32(GUEST_##seg##_AR_BYTES) & AR_UNUSABLE_MASK) &&\
+ (vmcs_readl(GUEST_##seg##_BASE) & ~((1ULL << 32) - 1))) {\
+ vcpu_printf(vcpu, "%s: "#seg" base 0x%lx, is usable and not"\
+ " all bits 63-32 are zero\n",\
+ __FUNCTION__,\
+ vmcs_readl(GUEST_##seg##_BASE));\
+ return 0;\
+ }
+ SEG_BASE_TEST(SS);
+ SEG_BASE_TEST(DS);
+ SEG_BASE_TEST(ES);
+
+ if (virtual8086) {
+ VIR8086_SEG_LIMIT_TEST(CS);
+ VIR8086_SEG_LIMIT_TEST(SS);
+ VIR8086_SEG_LIMIT_TEST(DS);
+ VIR8086_SEG_LIMIT_TEST(ES);
+ VIR8086_SEG_LIMIT_TEST(FS);
+ VIR8086_SEG_LIMIT_TEST(GS);
+ }
+
+ if (virtual8086) {
+ VIR8086_SEG_AR_TEST(CS);
+ VIR8086_SEG_AR_TEST(SS);
+ VIR8086_SEG_AR_TEST(DS);
+ VIR8086_SEG_AR_TEST(ES);
+ VIR8086_SEG_AR_TEST(FS);
+ VIR8086_SEG_AR_TEST(GS);
+ } else {
+
+ u32 cs_ar = vmcs_read32(GUEST_CS_AR_BYTES);
+ u32 ss_ar = vmcs_read32(GUEST_SS_AR_BYTES);
+ u32 tr_ar = vmcs_read32(GUEST_TR_AR_BYTES);
+ u32 ldtr_ar = vmcs_read32(GUEST_LDTR_AR_BYTES);
+
+ #define SEG_G_TEST(seg) { \
+ u32 lim = vmcs_read32(GUEST_##seg##_LIMIT); \
+ u32 ar = vmcs_read32(GUEST_##seg##_AR_BYTES); \
+ int err = 0; \
+ if (((lim & ~PAGE_MASK) != ~PAGE_MASK) && (ar & AR_G_MASK)) \
+ err = 1; \
+ if ((lim & ~((1u << 20) - 1)) && !(ar & AR_G_MASK)) \
+ err = 1; \
+ if (err) { \
+ vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, G err. lim" \
+ " is 0x%x\n", \
+ __FUNCTION__, \
+ ar, lim); \
+ return 0; \
+ } \
+ }
+
+
+ if (!(cs_ar & AR_TYPE_ACCESSES_MASK)) {
+ vcpu_printf(vcpu, "%s: cs AR 0x%x, accesses is clear\n",
+ __FUNCTION__,
+ cs_ar);
+ return 0;
+ }
+
+ if (!(cs_ar & AR_TYPE_CODE_MASK)) {
+ vcpu_printf(vcpu, "%s: cs AR 0x%x, code is clear\n",
+ __FUNCTION__,
+ cs_ar);
+ return 0;
+ }
+
+ if (!(cs_ar & AR_S_MASK)) {
+ vcpu_printf(vcpu, "%s: cs AR 0x%x, type is sys\n",
+ __FUNCTION__,
+ cs_ar);
+ return 0;
+ }
+
+ if ((cs_ar & AR_TYPE_MASK) >= 8 && (cs_ar & AR_TYPE_MASK) < 12 &&
+ AR_DPL(cs_ar) !=
+ (vmcs_read16(GUEST_CS_SELECTOR) & SELECTOR_RPL_MASK) ) {
+ vcpu_printf(vcpu, "%s: cs AR 0x%x, "
+ "DPL(0x%x) not as RPL(0x%x)\n",
+ __FUNCTION__,
+ cs_ar, AR_DPL(cs_ar), vmcs_read16(GUEST_CS_SELECTOR) & SELECTOR_RPL_MASK);
+ return 0;
+ }
+
+ if ((cs_ar & AR_TYPE_MASK) >= 13 && (cs_ar & AR_TYPE_MASK) < 16 &&
+ AR_DPL(cs_ar) >
+ (vmcs_read16(GUEST_CS_SELECTOR) & SELECTOR_RPL_MASK) ) {
+ vcpu_printf(vcpu, "%s: cs AR 0x%x, "
+ "DPL greater than RPL\n",
+ __FUNCTION__,
+ cs_ar);
+ return 0;
+ }
+
+ if (!(cs_ar & AR_P_MASK)) {
+ vcpu_printf(vcpu, "%s: CS AR 0x%x, not "
+ "present\n",
+ __FUNCTION__,
+ cs_ar);
+ return 0;
+ }
+
+ if ((cs_ar & AR_RESERVD_MASK)) {
+ vcpu_printf(vcpu, "%s: CS AR 0x%x, reseved"
+ " bits are set\n",
+ __FUNCTION__,
+ cs_ar);
+ return 0;
+ }
+
+ if (long_mode & (cs_ar & AR_L_MASK) && (cs_ar & AR_DB_MASK)) {
+ vcpu_printf(vcpu, "%s: CS AR 0x%x, DB and L are set"
+ " in long mode\n",
+ __FUNCTION__,
+ cs_ar);
+ return 0;
+
+ }
+
+ SEG_G_TEST(CS);
+
+ if (!(ss_ar & AR_UNUSABLE_MASK)) {
+ if ((ss_ar & AR_TYPE_MASK) != 3 &&
+ (ss_ar & AR_TYPE_MASK) != 7 ) {
+ vcpu_printf(vcpu, "%s: ss AR 0x%x, usable and type"
+ " is not 3 or 7\n",
+ __FUNCTION__,
+ ss_ar);
+ return 0;
+ }
+
+ if (!(ss_ar & AR_S_MASK)) {
+ vcpu_printf(vcpu, "%s: ss AR 0x%x, usable and"
+ " is sys\n",
+ __FUNCTION__,
+ ss_ar);
+ return 0;
+ }
+ if (!(ss_ar & AR_P_MASK)) {
+ vcpu_printf(vcpu, "%s: SS AR 0x%x, usable"
+ " and not present\n",
+ __FUNCTION__,
+ ss_ar);
+ return 0;
+ }
+
+ if ((ss_ar & AR_RESERVD_MASK)) {
+ vcpu_printf(vcpu, "%s: SS AR 0x%x, reseved"
+ " bits are set\n",
+ __FUNCTION__,
+ ss_ar);
+ return 0;
+ }
+
+ SEG_G_TEST(SS);
+
+ }
+
+ if (AR_DPL(ss_ar) !=
+ (vmcs_read16(GUEST_SS_SELECTOR) & SELECTOR_RPL_MASK) ) {
+ vcpu_printf(vcpu, "%s: SS AR 0x%x, "
+ "DPL not as RPL\n",
+ __FUNCTION__,
+ ss_ar);
+ return 0;
+ }
+
+ #define SEG_AR_TEST(seg) {\
+ u32 ar = vmcs_read32(GUEST_##seg##_AR_BYTES);\
+ if (!(ar & AR_UNUSABLE_MASK)) {\
+ if (!(ar & AR_TYPE_ACCESSES_MASK)) {\
+ vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, "\
+ "usable and not accesses\n",\
+ __FUNCTION__,\
+ ar);\
+ return 0;\
+ }\
+ if ((ar & AR_TYPE_CODE_MASK) &&\
+ !(ar & AR_TYPE_READABLE_MASK)) {\
+ vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, "\
+ "code and not readable\n",\
+ __FUNCTION__,\
+ ar);\
+ return 0;\
+ }\
+ if (!(ar & AR_S_MASK)) {\
+ vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, usable and"\
+ " is sys\n",\
+ __FUNCTION__,\
+ ar);\
+ return 0;\
+ }\
+ if ((ar & AR_TYPE_MASK) >= 0 && \
+ (ar & AR_TYPE_MASK) < 12 && \
+ AR_DPL(ar) < (vmcs_read16(GUEST_##seg##_SELECTOR) & \
+ SELECTOR_RPL_MASK) ) {\
+ vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, "\
+ "DPL less than RPL\n",\
+ __FUNCTION__,\
+ ar);\
+ return 0;\
+ }\
+ if (!(ar & AR_P_MASK)) {\
+ vcpu_printf(vcpu, "%s: "#seg" AR 0x%x, usable and"\
+ " not present\n",\
+ __FUNCTION__,\
+ ar);\
+ return 0;\
+ }\
+ if ((ar & AR_RESERVD_MASK)) {\
+ vcpu_printf(vcpu, "%s: "#seg" AR"\
+ " 0x%x, reseved"\
+ " bits are set\n",\
+ __FUNCTION__,\
+ ar);\
+ return 0;\
+ }\
+ SEG_G_TEST(seg)\
+ }\
+ }
+
+#undef DS
+#undef ES
+#undef FS
+#undef GS
+
+ SEG_AR_TEST(DS);
+ SEG_AR_TEST(ES);
+ SEG_AR_TEST(FS);
+ SEG_AR_TEST(GS);
+
+ // TR test
+ if (long_mode) {
+ if ((tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_64_TSS) {
+ vcpu_printf(vcpu, "%s: TR AR 0x%x, long"
+ " mode and not 64bit busy"
+ " tss\n",
+ __FUNCTION__,
+ tr_ar);
+ return 0;
+ }
+ } else {
+ if ((tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_32_TSS &&
+ (tr_ar & AR_TYPE_MASK) != AR_TYPE_BUSY_16_TSS) {
+ vcpu_printf(vcpu, "%s: TR AR 0x%x, legacy"
+ " mode and not 16/32bit "
+ "busy tss\n",
+ __FUNCTION__,
+ tr_ar);
+ return 0;
+ }
+
+ }
+ if ((tr_ar & AR_S_MASK)) {
+ vcpu_printf(vcpu, "%s: TR AR 0x%x, S is set\n",
+ __FUNCTION__,
+ tr_ar);
+ return 0;
+ }
+ if (!(tr_ar & AR_P_MASK)) {
+ vcpu_printf(vcpu, "%s: TR AR 0x%x, P is not set\n",
+ __FUNCTION__,
+ tr_ar);
+ return 0;
+ }
+
+ if ((tr_ar & (AR_RESERVD_MASK| AR_UNUSABLE_MASK))) {
+ vcpu_printf(vcpu, "%s: TR AR 0x%x, reserved bit are"
+ " set\n",
+ __FUNCTION__,
+ tr_ar);
+ return 0;
+ }
+ SEG_G_TEST(TR);
+
+ // TR test
+ if (!(ldtr_ar & AR_UNUSABLE_MASK)) {
+
+ if ((ldtr_ar & AR_TYPE_MASK) != AR_TYPE_LDT) {
+ vcpu_printf(vcpu, "%s: LDTR AR 0x%x,"
+ " bad type\n",
+ __FUNCTION__,
+ ldtr_ar);
+ return 0;
+ }
+
+ if ((ldtr_ar & AR_S_MASK)) {
+ vcpu_printf(vcpu, "%s: LDTR AR 0x%x,"
+ " S is set\n",
+ __FUNCTION__,
+ ldtr_ar);
+ return 0;
+ }
+
+ if (!(ldtr_ar & AR_P_MASK)) {
+ vcpu_printf(vcpu, "%s: LDTR AR 0x%x,"
+ " P is not set\n",
+ __FUNCTION__,
+ ldtr_ar);
+ return 0;
+ }
+ if ((ldtr_ar & AR_RESERVD_MASK)) {
+ vcpu_printf(vcpu, "%s: LDTR AR 0x%x,"
+ " reserved bit are set\n",
+ __FUNCTION__,
+ ldtr_ar);
+ return 0;
+ }
+ SEG_G_TEST(LDTR);
+ }
+ }
+
+ // GDTR and IDTR
+
+
+ #define IDT_GDT_TEST(reg)\
+ if (!is_canonical(vmcs_readl(GUEST_##reg##_BASE))) {\
+ vcpu_printf(vcpu, "%s: "#reg" BASE 0x%lx, not canonical\n",\
+ __FUNCTION__,\
+ vmcs_readl(GUEST_##reg##_BASE));\
+ return 0;\
+ }\
+ if (vmcs_read32(GUEST_##reg##_LIMIT) >> 16) {\
+ vcpu_printf(vcpu, "%s: "#reg" LIMIT 0x%x, size err\n",\
+ __FUNCTION__,\
+ vmcs_read32(GUEST_##reg##_LIMIT));\
+ return 0;\
+ }\
+
+ IDT_GDT_TEST(GDTR);
+ IDT_GDT_TEST(IDTR);
+
+
+ // RIP
+
+ if ((!long_mode || !(vmcs_read32(GUEST_CS_AR_BYTES) & AR_L_MASK)) &&
+ vmcs_readl(GUEST_RIP) & ~((1ULL << 32) - 1) ){
+ vcpu_printf(vcpu, "%s: RIP 0x%lx, size err\n",
+ __FUNCTION__,
+ vmcs_readl(GUEST_RIP));
+ return 0;
+ }
+
+ if (!is_canonical(vmcs_readl(GUEST_RIP))) {
+ vcpu_printf(vcpu, "%s: RIP 0x%lx, not canonical\n",
+ __FUNCTION__,
+ vmcs_readl(GUEST_RIP));
+ return 0;
+ }
+
+ // RFLAGS
+ #define RFLAGS_RESEVED_CLEAR_BITS\
+ (~((1ULL << 22) - 1) | (1ULL << 15) | (1ULL << 5) | (1ULL << 3))
+ #define RFLAGS_RESEVED_SET_BITS (1 << 1)
+
+ if ((rflags & RFLAGS_RESEVED_CLEAR_BITS) ||
+ !(rflags & RFLAGS_RESEVED_SET_BITS)) {
+ vcpu_printf(vcpu, "%s: RFLAGS 0x%lx, reserved bits 0x%llx 0x%x\n",
+ __FUNCTION__,
+ rflags,
+ RFLAGS_RESEVED_CLEAR_BITS,
+ RFLAGS_RESEVED_SET_BITS);
+ return 0;
+ }
+
+ if (long_mode && virtual8086) {
+ vcpu_printf(vcpu, "%s: RFLAGS 0x%lx, vm and long mode\n",
+ __FUNCTION__,
+ rflags);
+ return 0;
+ }
+
+
+ if (!(rflags & RFLAGS_RF)) {
+ u32 vm_entry_info = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
+ if ((vm_entry_info & INTR_INFO_VALID_MASK) &&
+ (vm_entry_info & INTR_INFO_INTR_TYPE_MASK) ==
+ INTR_TYPE_EXT_INTR) {
+ vcpu_printf(vcpu, "%s: RFLAGS 0x%lx, external"
+ " interrupt and RF is clear\n",
+ __FUNCTION__,
+ rflags);
+ return 0;
+ }
+
+ }
+
+ cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
+ cpu_secondary_exec_ctrl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
+ tpr_threshold = vmcs_read32(TPR_THRESHOLD);
+
+ if ((cpu_exec_ctrl & CPU_BASED_TPR_SHADOW)) {
+ if (tpr_threshold & ~0xf) {
+ vcpu_printf(vcpu, "%s: if TPR shadow execution control"
+ " is 1 bits 31:4 of TPR threshold must"
+ " be 0", __FUNCTION__);
+ return 0;
+ }
+ if (!(cpu_secondary_exec_ctrl &
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+ u32 apic_tpr = *((u32 *)(vcpu->arch.apic->regs + 0x80));
+ apic_tpr >>= 4;
+ if (tpr_threshold > apic_tpr) {
+ vcpu_printf(vcpu, "%s: if TPR shadow execution control"
+ " is 1 and virtual apic accesses is 0"
+ " the value of bits 3:0 of the TPR "
+ "threshold VM-execution control field"
+ " should not be greater than the value"
+ " of bits 7:4 in byte 80H on the "
+ "virtual-APIC page", __FUNCTION__);
+ return 0;
+ }
+
+ }
+ }
+
+ // to be continued from Checks on Guest Non-Register State (22.3.1.5)
+ return 1;
+}
+
+static int check_fixed_bits(struct kvm_vcpu *vcpu, const char *reg,
+ unsigned long cr,
+ u32 msr_fixed_0, u32 msr_fixed_1)
+{
+ u64 fixed_bits_0, fixed_bits_1;
+
+ rdmsrl(msr_fixed_0, fixed_bits_0);
+ rdmsrl(msr_fixed_1, fixed_bits_1);
+ if ((cr & fixed_bits_0) != fixed_bits_0) {
+ vcpu_printf(vcpu, "%s: %s (%lx) has one of %llx unset\n",
+ __FUNCTION__, reg, cr, fixed_bits_0);
+ return 0;
+ }
+ if ((~cr & ~fixed_bits_1) != ~fixed_bits_1) {
+ vcpu_printf(vcpu, "%s: %s (%lx) has one of %llx set\n",
+ __FUNCTION__, reg, cr, ~fixed_bits_1);
+ return 0;
+ }
+ return 1;
+}
+
+static int phys_addr_width(void)
+{
+ unsigned eax, ebx, ecx, edx;
+
+ cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
+ return eax & 0xff;
+}
+
+static int check_canonical(struct kvm_vcpu *vcpu, const char *name,
+ unsigned long reg)
+{
+#ifdef CONFIG_X86_64
+ unsigned long x;
+
+ if (sizeof(reg) == 4)
+ return 1;
+ x = (long)reg >> 48;
+ if (!(x == 0 || x == ~0UL)) {
+ vcpu_printf(vcpu, "%s: %s (%lx) not canonical\n",
+ __FUNCTION__, name, reg);
+ return 0;
+ }
+#endif
+ return 1;
+}
+
+static int check_selector(struct kvm_vcpu *vcpu, const char *name,
+ int rpl_ti, int null,
+ u16 sel)
+{
+ if (rpl_ti && (sel & 7)) {
+ vcpu_printf(vcpu, "%s: %s (%x) nonzero rpl or ti\n",
+ __FUNCTION__, name, sel);
+ return 0;
+ }
+ if (null && !sel) {
+ vcpu_printf(vcpu, "%s: %s (%x) zero\n",
+ __FUNCTION__, name, sel);
+ return 0;
+ }
+ return 1;
+}
+
+//#define MSR_IA32_VMX_CR0_FIXED0 0x486
+//#define MSR_IA32_VMX_CR0_FIXED1 0x487
+
+//#define MSR_IA32_VMX_CR4_FIXED0 0x488
+//#define MSR_IA32_VMX_CR4_FIXED1 0x489
+#define VM_EXIT_HOST_ADD_SPACE_SIZE 0x00000200
+
+int vm_entry_test_host(struct kvm_vcpu *vcpu)
+{
+ int r = 1;
+ unsigned long cr0 = vmcs_readl(HOST_CR0);
+ unsigned long cr4 = vmcs_readl(HOST_CR4);
+ unsigned long cr3 = vmcs_readl(HOST_CR3);
+ int host_64;
+
+ host_64 = vmcs_read32(VM_EXIT_CONTROLS) & VM_EXIT_HOST_ADD_SPACE_SIZE;
+
+ /* 22.2.2 */
+ r &= check_fixed_bits(vcpu, "host cr0", cr0, MSR_IA32_VMX_CR0_FIXED0,
+ MSR_IA32_VMX_CR0_FIXED1);
+
+ r &= check_fixed_bits(vcpu, "host cr0", cr4, MSR_IA32_VMX_CR4_FIXED0,
+ MSR_IA32_VMX_CR4_FIXED1);
+ if ((u64)cr3 >> phys_addr_width()) {
+ vcpu_printf(vcpu, "%s: cr3 (%lx) vs phys addr width\n",
+ __FUNCTION__, cr3);
+ r = 0;
+ }
+
+ r &= check_canonical(vcpu, "host ia32_sysenter_eip",
+ vmcs_readl(HOST_IA32_SYSENTER_EIP));
+ r &= check_canonical(vcpu, "host ia32_sysenter_esp",
+ vmcs_readl(HOST_IA32_SYSENTER_ESP));
+
+ /* 22.2.3 */
+ r &= check_selector(vcpu, "host cs", 1, 1,
+ vmcs_read16(HOST_CS_SELECTOR));
+ r &= check_selector(vcpu, "host ss", 1, !host_64,
+ vmcs_read16(HOST_SS_SELECTOR));
+ r &= check_selector(vcpu, "host ds", 1, 0,
+ vmcs_read16(HOST_DS_SELECTOR));
+ r &= check_selector(vcpu, "host es", 1, 0,
+ vmcs_read16(HOST_ES_SELECTOR));
+ r &= check_selector(vcpu, "host fs", 1, 0,
+ vmcs_read16(HOST_FS_SELECTOR));
+ r &= check_selector(vcpu, "host gs", 1, 0,
+ vmcs_read16(HOST_GS_SELECTOR));
+ r &= check_selector(vcpu, "host tr", 1, 1,
+ vmcs_read16(HOST_TR_SELECTOR));
+
+#ifdef CONFIG_X86_64
+ r &= check_canonical(vcpu, "host fs base",
+ vmcs_readl(HOST_FS_BASE));
+ r &= check_canonical(vcpu, "host gs base",
+ vmcs_readl(HOST_GS_BASE));
+ r &= check_canonical(vcpu, "host gdtr base",
+ vmcs_readl(HOST_GDTR_BASE));
+ r &= check_canonical(vcpu, "host idtr base",
+ vmcs_readl(HOST_IDTR_BASE));
+#endif
+
+ /* 22.2.4 */
+#ifdef CONFIG_X86_64
+ if (!host_64) {
+ vcpu_printf(vcpu, "%s: vm exit controls: !64 bit host\n",
+ __FUNCTION__);
+ r = 0;
+ }
+ if (!(cr4 & X86_CR4_PAE)) {
+ vcpu_printf(vcpu, "%s: cr4 (%lx): !pae\n",
+ __FUNCTION__, cr4);
+ r = 0;
+ }
+ r &= check_canonical(vcpu, "host rip", vmcs_readl(HOST_RIP));
+#endif
+
+ return r;
+}
+
+int vm_entry_test(struct kvm_vcpu *vcpu)
+{
+ int rg, rh;
+
+ rg = vm_entry_test_guest(vcpu);
+ rh = vm_entry_test_host(vcpu);
+ return rg && rh;
+}
+
+void vmcs_dump(struct kvm_vcpu *vcpu)
+{
+ vcpu_printf(vcpu, "************************ vmcs_dump ************************\n");
+ vcpu_printf(vcpu, "VM_ENTRY_CONTROLS 0x%x\n", vmcs_read32(VM_ENTRY_CONTROLS));
+
+ vcpu_printf(vcpu, "GUEST_CR0 0x%lx\n", vmcs_readl(GUEST_CR0));
+ vcpu_printf(vcpu, "GUEST_CR3 0x%lx\n", vmcs_readl(GUEST_CR3));
+ vcpu_printf(vcpu, "GUEST_CR4 0x%lx\n", vmcs_readl(GUEST_CR4));
+
+ vcpu_printf(vcpu, "GUEST_SYSENTER_ESP 0x%lx\n", vmcs_readl(GUEST_SYSENTER_ESP));
+ vcpu_printf(vcpu, "GUEST_SYSENTER_EIP 0x%lx\n", vmcs_readl(GUEST_SYSENTER_EIP));
+
+
+ vcpu_printf(vcpu, "GUEST_IA32_DEBUGCTL 0x%llx\n", vmcs_read64(GUEST_IA32_DEBUGCTL));
+ vcpu_printf(vcpu, "GUEST_DR7 0x%lx\n", vmcs_readl(GUEST_DR7));
+
+ vcpu_printf(vcpu, "GUEST_RFLAGS 0x%lx\n", vmcs_readl(GUEST_RFLAGS));
+ vcpu_printf(vcpu, "GUEST_RIP 0x%lx\n", vmcs_readl(GUEST_RIP));
+
+ vcpu_printf(vcpu, "GUEST_CS_SELECTOR 0x%x\n", vmcs_read16(GUEST_CS_SELECTOR));
+ vcpu_printf(vcpu, "GUEST_DS_SELECTOR 0x%x\n", vmcs_read16(GUEST_DS_SELECTOR));
+ vcpu_printf(vcpu, "GUEST_ES_SELECTOR 0x%x\n", vmcs_read16(GUEST_ES_SELECTOR));
+ vcpu_printf(vcpu, "GUEST_FS_SELECTOR 0x%x\n", vmcs_read16(GUEST_FS_SELECTOR));
+ vcpu_printf(vcpu, "GUEST_GS_SELECTOR 0x%x\n", vmcs_read16(GUEST_GS_SELECTOR));
+ vcpu_printf(vcpu, "GUEST_SS_SELECTOR 0x%x\n", vmcs_read16(GUEST_SS_SELECTOR));
+
+ vcpu_printf(vcpu, "GUEST_TR_SELECTOR 0x%x\n", vmcs_read16(GUEST_TR_SELECTOR));
+ vcpu_printf(vcpu, "GUEST_LDTR_SELECTOR 0x%x\n", vmcs_read16(GUEST_LDTR_SELECTOR));
+
+ vcpu_printf(vcpu, "GUEST_CS_AR_BYTES 0x%x\n", vmcs_read32(GUEST_CS_AR_BYTES));
+ vcpu_printf(vcpu, "GUEST_DS_AR_BYTES 0x%x\n", vmcs_read32(GUEST_DS_AR_BYTES));
+ vcpu_printf(vcpu, "GUEST_ES_AR_BYTES 0x%x\n", vmcs_read32(GUEST_ES_AR_BYTES));
+ vcpu_printf(vcpu, "GUEST_FS_AR_BYTES 0x%x\n", vmcs_read32(GUEST_FS_AR_BYTES));
+ vcpu_printf(vcpu, "GUEST_GS_AR_BYTES 0x%x\n", vmcs_read32(GUEST_GS_AR_BYTES));
+ vcpu_printf(vcpu, "GUEST_SS_AR_BYTES 0x%x\n", vmcs_read32(GUEST_SS_AR_BYTES));
+
+ vcpu_printf(vcpu, "GUEST_LDTR_AR_BYTES 0x%x\n", vmcs_read32(GUEST_LDTR_AR_BYTES));
+ vcpu_printf(vcpu, "GUEST_TR_AR_BYTES 0x%x\n", vmcs_read32(GUEST_TR_AR_BYTES));
+
+ vcpu_printf(vcpu, "GUEST_CS_BASE 0x%lx\n", vmcs_readl(GUEST_CS_BASE));
+ vcpu_printf(vcpu, "GUEST_DS_BASE 0x%lx\n", vmcs_readl(GUEST_DS_BASE));
+ vcpu_printf(vcpu, "GUEST_ES_BASE 0x%lx\n", vmcs_readl(GUEST_ES_BASE));
+ vcpu_printf(vcpu, "GUEST_FS_BASE 0x%lx\n", vmcs_readl(GUEST_FS_BASE));
+ vcpu_printf(vcpu, "GUEST_GS_BASE 0x%lx\n", vmcs_readl(GUEST_GS_BASE));
+ vcpu_printf(vcpu, "GUEST_SS_BASE 0x%lx\n", vmcs_readl(GUEST_SS_BASE));
+
+
+ vcpu_printf(vcpu, "GUEST_LDTR_BASE 0x%lx\n", vmcs_readl(GUEST_LDTR_BASE));
+ vcpu_printf(vcpu, "GUEST_TR_BASE 0x%lx\n", vmcs_readl(GUEST_TR_BASE));
+
+ vcpu_printf(vcpu, "GUEST_CS_LIMIT 0x%x\n", vmcs_read32(GUEST_CS_LIMIT));
+ vcpu_printf(vcpu, "GUEST_DS_LIMIT 0x%x\n", vmcs_read32(GUEST_DS_LIMIT));
+ vcpu_printf(vcpu, "GUEST_ES_LIMIT 0x%x\n", vmcs_read32(GUEST_ES_LIMIT));
+ vcpu_printf(vcpu, "GUEST_FS_LIMIT 0x%x\n", vmcs_read32(GUEST_FS_LIMIT));
+ vcpu_printf(vcpu, "GUEST_GS_LIMIT 0x%x\n", vmcs_read32(GUEST_GS_LIMIT));
+ vcpu_printf(vcpu, "GUEST_SS_LIMIT 0x%x\n", vmcs_read32(GUEST_SS_LIMIT));
+
+ vcpu_printf(vcpu, "GUEST_LDTR_LIMIT 0x%x\n", vmcs_read32(GUEST_LDTR_LIMIT));
+ vcpu_printf(vcpu, "GUEST_TR_LIMIT 0x%x\n", vmcs_read32(GUEST_TR_LIMIT));
+
+ vcpu_printf(vcpu, "GUEST_GDTR_BASE 0x%lx\n", vmcs_readl(GUEST_GDTR_BASE));
+ vcpu_printf(vcpu, "GUEST_IDTR_BASE 0x%lx\n", vmcs_readl(GUEST_IDTR_BASE));
+
+ vcpu_printf(vcpu, "GUEST_GDTR_LIMIT 0x%x\n", vmcs_read32(GUEST_GDTR_LIMIT));
+ vcpu_printf(vcpu, "GUEST_IDTR_LIMIT 0x%x\n", vmcs_read32(GUEST_IDTR_LIMIT));
+
+ vcpu_printf(vcpu, "EXCEPTION_BITMAP 0x%x\n", vmcs_read32(EXCEPTION_BITMAP));
+ vcpu_printf(vcpu, "CPU_BASED_VM_EXEC_CONTROL 0x%x\n", vmcs_read32(CPU_BASED_VM_EXEC_CONTROL));
+ vcpu_printf(vcpu, "SECONDARY_VM_EXEC_CONTROL 0x%x\n", vmcs_read32(SECONDARY_VM_EXEC_CONTROL));
+ vcpu_printf(vcpu, "TPR_THREASHOLD 0x%x\n", vmcs_read32(TPR_THRESHOLD));
+ vcpu_printf(vcpu, "TPR 0x%x\n", *((u32 *) (vcpu->arch.apic->regs + 0x80)));
+ vcpu_printf(vcpu, "***********************************************************\n");
+}
+
+void regs_dump(struct kvm_vcpu *vcpu)
+{
+ #define REG_DUMP(reg) \
+ vcpu_printf(vcpu, #reg" = 0x%lx(VCPU)\n", vcpu->arch.regs[VCPU_REGS_##reg])
+ #define VMCS_REG_DUMP(reg) \
+ vcpu_printf(vcpu, #reg" = 0x%lx(VMCS)\n", vmcs_readl(GUEST_##reg))
+
+ vcpu_printf(vcpu, "************************ regs_dump ************************\n");
+ REG_DUMP(RAX);
+ REG_DUMP(RBX);
+ REG_DUMP(RCX);
+ REG_DUMP(RDX);
+ REG_DUMP(RSP);
+ REG_DUMP(RBP);
+ REG_DUMP(RSI);
+ REG_DUMP(RDI);
+ REG_DUMP(R8);
+ REG_DUMP(R9);
+ REG_DUMP(R10);
+ REG_DUMP(R11);
+ REG_DUMP(R12);
+ REG_DUMP(R13);
+ REG_DUMP(R14);
+ REG_DUMP(R15);
+
+ VMCS_REG_DUMP(RSP);
+ VMCS_REG_DUMP(RIP);
+ VMCS_REG_DUMP(RFLAGS);
+
+ vcpu_printf(vcpu, "***********************************************************\n");
+}
+
+void sregs_dump(struct kvm_vcpu *vcpu)
+{
+ vcpu_printf(vcpu, "************************ sregs_dump ************************\n");
+ vcpu_printf(vcpu, "cr0 = 0x%lx\n", vcpu->arch.cr0);
+ vcpu_printf(vcpu, "cr2 = 0x%lx\n", vcpu->arch.cr2);
+ vcpu_printf(vcpu, "cr3 = 0x%lx\n", vcpu->arch.cr3);
+ vcpu_printf(vcpu, "cr4 = 0x%lx\n", vcpu->arch.cr4);
+ vcpu_printf(vcpu, "cr8 = 0x%lx\n", vcpu->arch.cr8);
+ vcpu_printf(vcpu, "shadow_efer = 0x%llx\n", vcpu->arch.shadow_efer);
+ vcpu_printf(vcpu, "***********************************************************\n");
+}
+
+void show_pending_interrupts(struct kvm_vcpu *vcpu)
+{
+ vcpu_printf(vcpu, "************************ pending interrupts ****************\n");
+ if (vcpu->arch.interrupt.pending)
+ vcpu_printf(vcpu, "nr = %d%s\n", vcpu->arch.interrupt.nr, vcpu->arch.interrupt.soft?"(soft)":"");
+ vcpu_printf(vcpu, "************************************************************\n");
+}
+
+void vcpu_dump(struct kvm_vcpu *vcpu)
+{
+ regs_dump(vcpu);
+ sregs_dump(vcpu);
+ vmcs_dump(vcpu);
+ show_pending_interrupts(vcpu);
+ /* more ... */
+}
+#endif
+