diff --git a/[refs] b/[refs] index eea7d838365a..f041fbffed4e 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 0ed1507183adea174bc4b6611b50d90e044730c2 +refs/heads/master: 9d87dd97ffcd3b5eb2bbaf0d5d93f4bfcaed3f04 diff --git a/trunk/.gitignore b/trunk/.gitignore index fdcce40226d7..3016ed30526d 100644 --- a/trunk/.gitignore +++ b/trunk/.gitignore @@ -27,6 +27,7 @@ TAGS vmlinux* !vmlinux.lds.S System.map +Module.markers Module.symvers !.gitignore diff --git a/trunk/Documentation/00-INDEX b/trunk/Documentation/00-INDEX index a82a113b4a4b..1977fab38656 100644 --- a/trunk/Documentation/00-INDEX +++ b/trunk/Documentation/00-INDEX @@ -329,8 +329,6 @@ sgi-visws.txt - short blurb on the SGI Visual Workstations. sh/ - directory with info on porting Linux to a new architecture. -smart-config.txt - - description of the Smart Config makefile feature. sound/ - directory with info on sound card support. sparc/ diff --git a/trunk/Documentation/ABI/stable/sysfs-class-ubi b/trunk/Documentation/ABI/stable/sysfs-class-ubi new file mode 100644 index 000000000000..18d471d9faea --- /dev/null +++ b/trunk/Documentation/ABI/stable/sysfs-class-ubi @@ -0,0 +1,212 @@ +What: /sys/class/ubi/ +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + The ubi/ class sub-directory belongs to the UBI subsystem and + provides general UBI information, per-UBI device information + and per-UBI volume information. + +What: /sys/class/ubi/version +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + This file contains version of the latest supported UBI on-media + format. Currently it is 1, and there is no plan to change this. + However, if in the future UBI needs on-flash format changes + which cannot be done in a compatible manner, a new format + version will be added. So this is a mechanism for possible + future backward-compatible (but forward-incompatible) + improvements. + +What: /sys/class/ubiX/ +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + The /sys/class/ubi0, /sys/class/ubi1, etc directories describe + UBI devices (UBI device 0, 1, etc). They contain general UBI + device information and per UBI volume information (each UBI + device may have many UBI volumes) + +What: /sys/class/ubi/ubiX/avail_eraseblocks +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Amount of available logical eraseblock. For example, one may + create a new UBI volume which has this amount of logical + eraseblocks. + +What: /sys/class/ubi/ubiX/bad_peb_count +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Count of bad physical eraseblocks on the underlying MTD device. + +What: /sys/class/ubi/ubiX/bgt_enabled +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Contains ASCII "0\n" if the UBI background thread is disabled, + and ASCII "1\n" if it is enabled. + +What: /sys/class/ubi/ubiX/dev +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Major and minor numbers of the character device corresponding + to this UBI device (in : format). + +What: /sys/class/ubi/ubiX/eraseblock_size +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Maximum logical eraseblock size this UBI device may provide. UBI + volumes may have smaller logical eraseblock size because of their + alignment. + +What: /sys/class/ubi/ubiX/max_ec +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Maximum physical eraseblock erase counter value. + +What: /sys/class/ubi/ubiX/max_vol_count +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Maximum number of volumes which this UBI device may have. + +What: /sys/class/ubi/ubiX/min_io_size +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Minimum input/output unit size. All the I/O may only be done + in fractions of the contained number. + +What: /sys/class/ubi/ubiX/mtd_num +Date: January 2008 +KernelVersion: 2.6.25 +Contact: Artem Bityutskiy +Description: + Number of the underlying MTD device. + +What: /sys/class/ubi/ubiX/reserved_for_bad +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Number of physical eraseblocks reserved for bad block handling. + +What: /sys/class/ubi/ubiX/total_eraseblocks +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Total number of good (not marked as bad) physical eraseblocks on + the underlying MTD device. + +What: /sys/class/ubi/ubiX/volumes_count +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Count of volumes on this UBI device. + +What: /sys/class/ubi/ubiX/ubiX_Y/ +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + The /sys/class/ubi/ubiX/ubiX_0/, /sys/class/ubi/ubiX/ubiX_1/, + etc directories describe UBI volumes on UBI device X (volumes + 0, 1, etc). + +What: /sys/class/ubi/ubiX/ubiX_Y/alignment +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Volume alignment - the value the logical eraseblock size of + this volume has to be aligned on. For example, 2048 means that + logical eraseblock size is multiple of 2048. In other words, + volume logical eraseblock size is UBI device logical eraseblock + size aligned to the alignment value. + +What: /sys/class/ubi/ubiX/ubiX_Y/corrupted +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Contains ASCII "0\n" if the UBI volume is OK, and ASCII "1\n" + if it is corrupted (e.g., due to an interrupted volume update). + +What: /sys/class/ubi/ubiX/ubiX_Y/data_bytes +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + The amount of data this volume contains. This value makes sense + only for static volumes, and for dynamic volume it equivalent + to the total volume size in bytes. + +What: /sys/class/ubi/ubiX/ubiX_Y/dev +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Major and minor numbers of the character device corresponding + to this UBI volume (in : format). + +What: /sys/class/ubi/ubiX/ubiX_Y/name +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Volume name. + +What: /sys/class/ubi/ubiX/ubiX_Y/reserved_ebs +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Count of physical eraseblock reserved for this volume. + Equivalent to the volume size in logical eraseblocks. + +What: /sys/class/ubi/ubiX/ubiX_Y/type +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Volume type. Contains ASCII "dynamic\n" for dynamic volumes and + "static\n" for static volumes. + +What: /sys/class/ubi/ubiX/ubiX_Y/upd_marker +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Contains ASCII "0\n" if the update marker is not set for this + volume, and "1\n" if it is set. The update marker is set when + volume update starts, and cleaned when it ends. So the presence + of the update marker indicates that the volume is being updated + at the moment of the update was interrupted. The later may be + checked using the "corrupted" sysfs file. + +What: /sys/class/ubi/ubiX/ubiX_Y/usable_eb_size +Date: July 2006 +KernelVersion: 2.6.22 +Contact: Artem Bityutskiy +Description: + Logical eraseblock size of this volume. Equivalent to logical + eraseblock size of the device aligned on the volume alignment + value. diff --git a/trunk/Documentation/DocBook/Makefile b/trunk/Documentation/DocBook/Makefile index b2b6366bba51..83966e94cc32 100644 --- a/trunk/Documentation/DocBook/Makefile +++ b/trunk/Documentation/DocBook/Makefile @@ -187,8 +187,11 @@ quiet_cmd_fig2png = FIG2PNG $@ ### # Rule to convert a .c file to inline XML documentation + gen_xml = : + quiet_gen_xml = echo ' GEN $@' +silent_gen_xml = : %.xml: %.c - @echo ' GEN $@' + @$($(quiet)gen_xml) @( \ echo ""; \ expand --tabs=8 < $< | \ diff --git a/trunk/Documentation/HOWTO b/trunk/Documentation/HOWTO index 54835610b3d6..0291ade44c17 100644 --- a/trunk/Documentation/HOWTO +++ b/trunk/Documentation/HOWTO @@ -249,9 +249,11 @@ process is as follows: release a new -rc kernel every week. - Process continues until the kernel is considered "ready", the process should last around 6 weeks. - - A list of known regressions present in each -rc release is - tracked at the following URI: - http://kernelnewbies.org/known_regressions + - Known regressions in each release are periodically posted to the + linux-kernel mailing list. The goal is to reduce the length of + that list to zero before declaring the kernel to be "ready," but, in + the real world, a small number of regressions often remain at + release time. It is worth mentioning what Andrew Morton wrote on the linux-kernel mailing list about kernel releases: @@ -261,7 +263,7 @@ mailing list about kernel releases: 2.6.x.y -stable kernel tree --------------------------- -Kernels with 4 digit versions are -stable kernels. They contain +Kernels with 4-part versions are -stable kernels. They contain relatively small and critical fixes for security problems or significant regressions discovered in a given 2.6.x kernel. @@ -273,7 +275,10 @@ If no 2.6.x.y kernel is available, then the highest numbered 2.6.x kernel is the current stable kernel. 2.6.x.y are maintained by the "stable" team , and are -released almost every other week. +released as needs dictate. The normal release period is approximately +two weeks, but it can be longer if there are no pressing problems. A +security-related problem, instead, can cause a release to happen almost +instantly. The file Documentation/stable_kernel_rules.txt in the kernel tree documents what kinds of changes are acceptable for the -stable tree, and @@ -298,7 +303,9 @@ a while Andrew or the subsystem maintainer pushes it on to Linus for inclusion in mainline. It is heavily encouraged that all new patches get tested in the -mm tree -before they are sent to Linus for inclusion in the main kernel tree. +before they are sent to Linus for inclusion in the main kernel tree. Code +which does not make an appearance in -mm before the opening of the merge +window will prove hard to merge into the mainline. These kernels are not appropriate for use on systems that are supposed to be stable and they are more risky to run than any of the other @@ -354,11 +361,12 @@ Here is a list of some of the different kernel trees available: - SCSI, James Bottomley git.kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git + - x86, Ingo Molnar + git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-x86.git + quilt trees: - - USB, PCI, Driver Core, and I2C, Greg Kroah-Hartman + - USB, Driver Core, and I2C, Greg Kroah-Hartman kernel.org/pub/linux/kernel/people/gregkh/gregkh-2.6/ - - x86-64, partly i386, Andi Kleen - ftp.firstfloor.org:/pub/ak/x86_64/quilt/ Other kernel trees can be found listed at http://git.kernel.org/ and in the MAINTAINERS file. @@ -392,8 +400,8 @@ If you want to be advised of the future bug reports, you can subscribe to the bugme-new mailing list (only new bug reports are mailed here) or to the bugme-janitor mailing list (every change in the bugzilla is mailed here) - http://lists.osdl.org/mailman/listinfo/bugme-new - http://lists.osdl.org/mailman/listinfo/bugme-janitors + http://lists.linux-foundation.org/mailman/listinfo/bugme-new + http://lists.linux-foundation.org/mailman/listinfo/bugme-janitors diff --git a/trunk/Documentation/arm/Samsung-S3C24XX/NAND.txt b/trunk/Documentation/arm/Samsung-S3C24XX/NAND.txt new file mode 100644 index 000000000000..bc478a3409b8 --- /dev/null +++ b/trunk/Documentation/arm/Samsung-S3C24XX/NAND.txt @@ -0,0 +1,30 @@ + S3C24XX NAND Support + ==================== + +Introduction +------------ + +Small Page NAND +--------------- + +The driver uses a 512 byte (1 page) ECC code for this setup. The +ECC code is not directly compatible with the default kernel ECC +code, so the driver enforces its own OOB layout and ECC parameters + +Large Page NAND +--------------- + +The driver is capable of handling NAND flash with a 2KiB page +size, with support for hardware ECC generation and correction. + +Unlike the 512byte page mode, the driver generates ECC data for +each 256 byte block in an 2KiB page. This means that more than +one error in a page can be rectified. It also means that the +OOB layout remains the default kernel layout for these flashes. + + +Document Author +--------------- + +Ben Dooks, Copyright 2007 Simtec Electronics + diff --git a/trunk/Documentation/arm/Samsung-S3C24XX/Overview.txt b/trunk/Documentation/arm/Samsung-S3C24XX/Overview.txt index c31b76fa66c4..d04e1e30c47f 100644 --- a/trunk/Documentation/arm/Samsung-S3C24XX/Overview.txt +++ b/trunk/Documentation/arm/Samsung-S3C24XX/Overview.txt @@ -156,6 +156,8 @@ NAND controller. If there are any problems the latest linux-mtd code can be found from http://www.linux-mtd.infradead.org/ + For more information see Documentation/arm/Samsung-S3C24XX/NAND.txt + Serial ------ diff --git a/trunk/Documentation/device-mapper/dm-crypt.txt b/trunk/Documentation/device-mapper/dm-crypt.txt new file mode 100644 index 000000000000..6680cab2c705 --- /dev/null +++ b/trunk/Documentation/device-mapper/dm-crypt.txt @@ -0,0 +1,52 @@ +dm-crypt +========= + +Device-Mapper's "crypt" target provides transparent encryption of block devices +using the kernel crypto API. + +Parameters: + + + Encryption cipher and an optional IV generation mode. + (In format cipher-chainmode-ivopts:ivmode). + Examples: + des + aes-cbc-essiv:sha256 + twofish-ecb + + /proc/crypto contains supported crypto modes + + + Key used for encryption. It is encoded as a hexadecimal number. + You can only use key sizes that are valid for the selected cipher. + + + The IV offset is a sector count that is added to the sector number + before creating the IV. + + + This is the device that is going to be used as backend and contains the + encrypted data. You can specify it as a path like /dev/xxx or a device + number :. + + + Starting sector within the device where the encrypted data begins. + +Example scripts +=============== +LUKS (Linux Unified Key Setup) is now the preferred way to set up disk +encryption with dm-crypt using the 'cryptsetup' utility, see +http://luks.endorphin.org/ + +[[ +#!/bin/sh +# Create a crypt device using dmsetup +dmsetup create crypt1 --table "0 `blockdev --getsize $1` crypt aes-cbc-essiv:sha256 babebabebabebabebabebabebabebabe 0 $1 0" +]] + +[[ +#!/bin/sh +# Create a crypt device using cryptsetup and LUKS header with default cipher +cryptsetup luksFormat $1 +cryptsetup luksOpen $1 crypt1 +]] diff --git a/trunk/Documentation/filesystems/nfs-rdma.txt b/trunk/Documentation/filesystems/nfs-rdma.txt index 1ae34879574b..d0ec45ae4e7d 100644 --- a/trunk/Documentation/filesystems/nfs-rdma.txt +++ b/trunk/Documentation/filesystems/nfs-rdma.txt @@ -5,7 +5,7 @@ ################################################################################ Author: NetApp and Open Grid Computing - Date: February 25, 2008 + Date: April 15, 2008 Table of Contents ~~~~~~~~~~~~~~~~~ @@ -197,12 +197,16 @@ NFS/RDMA Setup - On the server system, configure the /etc/exports file and start the NFS/RDMA server. - Exports entries with the following format have been tested: + Exports entries with the following formats have been tested: - /vol0 10.97.103.47(rw,async) 192.168.0.47(rw,async,insecure,no_root_squash) + /vol0 192.168.0.47(fsid=0,rw,async,insecure,no_root_squash) + /vol0 192.168.0.0/255.255.255.0(fsid=0,rw,async,insecure,no_root_squash) - Here the first IP address is the client's Ethernet address and the second - IP address is the clients IPoIB address. + The IP address(es) is(are) the client's IPoIB address for an InfiniBand HCA or the + cleint's iWARP address(es) for an RNIC. + + NOTE: The "insecure" option must be used because the NFS/RDMA client does not + use a reserved port. Each time a machine boots: diff --git a/trunk/Documentation/filesystems/seq_file.txt b/trunk/Documentation/filesystems/seq_file.txt index 7fb8e6dc62bf..b843743aa0b5 100644 --- a/trunk/Documentation/filesystems/seq_file.txt +++ b/trunk/Documentation/filesystems/seq_file.txt @@ -122,8 +122,7 @@ stop() is the place to free it. } Finally, the show() function should format the object currently pointed to -by the iterator for output. It should return zero, or an error code if -something goes wrong. The example module's show() function is: +by the iterator for output. The example module's show() function is: static int ct_seq_show(struct seq_file *s, void *v) { @@ -132,6 +131,12 @@ something goes wrong. The example module's show() function is: return 0; } +If all is well, the show() function should return zero. A negative error +code in the usual manner indicates that something went wrong; it will be +passed back to user space. This function can also return SEQ_SKIP, which +causes the current item to be skipped; if the show() function has already +generated output before returning SEQ_SKIP, that output will be dropped. + We will look at seq_printf() in a moment. But first, the definition of the seq_file iterator is finished by creating a seq_operations structure with the four functions we have just defined: @@ -182,12 +187,18 @@ The first two output a single character and a string, just like one would expect. seq_escape() is like seq_puts(), except that any character in s which is in the string esc will be represented in octal form in the output. -There is also a function for printing filenames: +There is also a pair of functions for printing filenames: int seq_path(struct seq_file *m, struct path *path, char *esc); + int seq_path_root(struct seq_file *m, struct path *path, + struct path *root, char *esc) Here, path indicates the file of interest, and esc is a set of characters -which should be escaped in the output. +which should be escaped in the output. A call to seq_path() will output +the path relative to the current process's filesystem root. If a different +root is desired, it can be used with seq_path_root(). Note that, if it +turns out that path cannot be reached from root, the value of root will be +changed in seq_file_root() to a root which *does* work. Making it all work diff --git a/trunk/Documentation/i386/boot.txt b/trunk/Documentation/i386/boot.txt index 2eb16100bb3f..0fac3465f2e3 100644 --- a/trunk/Documentation/i386/boot.txt +++ b/trunk/Documentation/i386/boot.txt @@ -42,6 +42,8 @@ Protocol 2.05: (Kernel 2.6.20) Make protected mode kernel relocatable. Protocol 2.06: (Kernel 2.6.22) Added a field that contains the size of the boot command line +Protocol 2.09: (kernel 2.6.26) Added a field of 64-bit physical + pointer to single linked list of struct setup_data. **** MEMORY LAYOUT @@ -172,6 +174,8 @@ Offset Proto Name Meaning 0240/8 2.07+ hardware_subarch_data Subarchitecture-specific data 0248/4 2.08+ payload_offset Offset of kernel payload 024C/4 2.08+ payload_length Length of kernel payload +0250/8 2.09+ setup_data 64-bit physical pointer to linked list + of struct setup_data (1) For backwards compatibility, if the setup_sects field contains 0, the real value is 4. @@ -572,6 +576,28 @@ command line is entered using the following protocol: covered by setup_move_size, so you may need to adjust this field. +Field name: setup_data +Type: write (obligatory) +Offset/size: 0x250/8 +Protocol: 2.09+ + + The 64-bit physical pointer to NULL terminated single linked list of + struct setup_data. This is used to define a more extensible boot + parameters passing mechanism. The definition of struct setup_data is + as follow: + + struct setup_data { + u64 next; + u32 type; + u32 len; + u8 data[0]; + }; + + Where, the next is a 64-bit physical pointer to the next node of + linked list, the next field of the last node is 0; the type is used + to identify the contents of data; the len is the length of data + field; the data holds the real payload. + **** MEMORY LAYOUT OF THE REAL-MODE CODE diff --git a/trunk/Documentation/ia64/kvm.txt b/trunk/Documentation/ia64/kvm.txt new file mode 100644 index 000000000000..bec9d815da33 --- /dev/null +++ b/trunk/Documentation/ia64/kvm.txt @@ -0,0 +1,82 @@ +Currently, kvm module in EXPERIMENTAL stage on IA64. This means that +interfaces are not stable enough to use. So, plase had better don't run +critical applications in virtual machine. We will try our best to make it +strong in future versions! + Guide: How to boot up guests on kvm/ia64 + +This guide is to describe how to enable kvm support for IA-64 systems. + +1. Get the kvm source from git.kernel.org. + Userspace source: + git clone git://git.kernel.org/pub/scm/virt/kvm/kvm-userspace.git + Kernel Source: + git clone git://git.kernel.org/pub/scm/linux/kernel/git/xiantao/kvm-ia64.git + +2. Compile the source code. + 2.1 Compile userspace code: + (1)cd ./kvm-userspace + (2)./configure + (3)cd kernel + (4)make sync LINUX= $kernel_dir (kernel_dir is the directory of kernel source.) + (5)cd .. + (6)make qemu + (7)cd qemu; make install + + 2.2 Compile kernel source code: + (1) cd ./$kernel_dir + (2) Make menuconfig + (3) Enter into virtualization option, and choose kvm. + (4) make + (5) Once (4) done, make modules_install + (6) Make initrd, and use new kernel to reboot up host machine. + (7) Once (6) done, cd $kernel_dir/arch/ia64/kvm + (8) insmod kvm.ko; insmod kvm-intel.ko + +Note: For step 2, please make sure that host page size == TARGET_PAGE_SIZE of qemu, otherwise, may fail. + +3. Get Guest Firmware named as Flash.fd, and put it under right place: + (1) If you have the guest firmware (binary) released by Intel Corp for Xen, use it directly. + + (2) If you have no firmware at hand, Please download its source from + hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg + you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries. + + (3) Rename the firware you owned to Flash.fd, and copy it to /usr/local/share/qemu + +4. Boot up Linux or Windows guests: + 4.1 Create or install a image for guest boot. If you have xen experience, it should be easy. + + 4.2 Boot up guests use the following command. + /usr/local/bin/qemu-system-ia64 -smp xx -m 512 -hda $your_image + (xx is the number of virtual processors for the guest, now the maximum value is 4) + +5. Known possibile issue on some platforms with old Firmware. + +If meet strange host crashe issues, try to solve it through either of the following ways: + +(1): Upgrade your Firmware to the latest one. + +(2): Applying the below patch to kernel source. +diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S +index 0b53344..f02b0f7 100644 +--- a/arch/ia64/kernel/pal.S ++++ b/arch/ia64/kernel/pal.S +@@ -84,7 +84,8 @@ GLOBAL_ENTRY(ia64_pal_call_static) + mov ar.pfs = loc1 + mov rp = loc0 + ;; +- srlz.d // seralize restoration of psr.l ++ srlz.i // seralize restoration of psr.l ++ ;; + br.ret.sptk.many b0 + END(ia64_pal_call_static) + +6. Bug report: + If you found any issues when use kvm/ia64, Please post the bug info to kvm-ia64-devel mailing list. + https://lists.sourceforge.net/lists/listinfo/kvm-ia64-devel/ + +Thanks for your interest! Let's work together, and make kvm/ia64 stronger and stronger! + + + Xiantao Zhang + 2008.3.10 diff --git a/trunk/Documentation/ide/ide-tape.txt b/trunk/Documentation/ide/ide-tape.txt index 658f271a373f..3f348a0b21d8 100644 --- a/trunk/Documentation/ide/ide-tape.txt +++ b/trunk/Documentation/ide/ide-tape.txt @@ -1,146 +1,65 @@ -/* - * IDE ATAPI streaming tape driver. - * - * This driver is a part of the Linux ide driver. - * - * The driver, in co-operation with ide.c, basically traverses the - * request-list for the block device interface. The character device - * interface, on the other hand, creates new requests, adds them - * to the request-list of the block device, and waits for their completion. - * - * Pipelined operation mode is now supported on both reads and writes. - * - * The block device major and minor numbers are determined from the - * tape's relative position in the ide interfaces, as explained in ide.c. - * - * The character device interface consists of the following devices: - * - * ht0 major 37, minor 0 first IDE tape, rewind on close. - * ht1 major 37, minor 1 second IDE tape, rewind on close. - * ... - * nht0 major 37, minor 128 first IDE tape, no rewind on close. - * nht1 major 37, minor 129 second IDE tape, no rewind on close. - * ... - * - * The general magnetic tape commands compatible interface, as defined by - * include/linux/mtio.h, is accessible through the character device. - * - * General ide driver configuration options, such as the interrupt-unmask - * flag, can be configured by issuing an ioctl to the block device interface, - * as any other ide device. - * - * Our own ide-tape ioctl's can be issued to either the block device or - * the character device interface. - * - * Maximal throughput with minimal bus load will usually be achieved in the - * following scenario: - * - * 1. ide-tape is operating in the pipelined operation mode. - * 2. No buffering is performed by the user backup program. - * - * Testing was done with a 2 GB CONNER CTMA 4000 IDE ATAPI Streaming Tape Drive. - * - * Here are some words from the first releases of hd.c, which are quoted - * in ide.c and apply here as well: - * - * | Special care is recommended. Have Fun! - * - * - * An overview of the pipelined operation mode. - * - * In the pipelined write mode, we will usually just add requests to our - * pipeline and return immediately, before we even start to service them. The - * user program will then have enough time to prepare the next request while - * we are still busy servicing previous requests. In the pipelined read mode, - * the situation is similar - we add read-ahead requests into the pipeline, - * before the user even requested them. - * - * The pipeline can be viewed as a "safety net" which will be activated when - * the system load is high and prevents the user backup program from keeping up - * with the current tape speed. At this point, the pipeline will get - * shorter and shorter but the tape will still be streaming at the same speed. - * Assuming we have enough pipeline stages, the system load will hopefully - * decrease before the pipeline is completely empty, and the backup program - * will be able to "catch up" and refill the pipeline again. - * - * When using the pipelined mode, it would be best to disable any type of - * buffering done by the user program, as ide-tape already provides all the - * benefits in the kernel, where it can be done in a more efficient way. - * As we will usually not block the user program on a request, the most - * efficient user code will then be a simple read-write-read-... cycle. - * Any additional logic will usually just slow down the backup process. - * - * Using the pipelined mode, I get a constant over 400 KBps throughput, - * which seems to be the maximum throughput supported by my tape. - * - * However, there are some downfalls: - * - * 1. We use memory (for data buffers) in proportional to the number - * of pipeline stages (each stage is about 26 KB with my tape). - * 2. In the pipelined write mode, we cheat and postpone error codes - * to the user task. In read mode, the actual tape position - * will be a bit further than the last requested block. - * - * Concerning (1): - * - * 1. We allocate stages dynamically only when we need them. When - * we don't need them, we don't consume additional memory. In - * case we can't allocate stages, we just manage without them - * (at the expense of decreased throughput) so when Linux is - * tight in memory, we will not pose additional difficulties. - * - * 2. The maximum number of stages (which is, in fact, the maximum - * amount of memory) which we allocate is limited by the compile - * time parameter IDETAPE_MAX_PIPELINE_STAGES. - * - * 3. The maximum number of stages is a controlled parameter - We - * don't start from the user defined maximum number of stages - * but from the lower IDETAPE_MIN_PIPELINE_STAGES (again, we - * will not even allocate this amount of stages if the user - * program can't handle the speed). We then implement a feedback - * loop which checks if the pipeline is empty, and if it is, we - * increase the maximum number of stages as necessary until we - * reach the optimum value which just manages to keep the tape - * busy with minimum allocated memory or until we reach - * IDETAPE_MAX_PIPELINE_STAGES. - * - * Concerning (2): - * - * In pipelined write mode, ide-tape can not return accurate error codes - * to the user program since we usually just add the request to the - * pipeline without waiting for it to be serviced. In case an error - * occurs, I will report it on the next user request. - * - * In the pipelined read mode, subsequent read requests or forward - * filemark spacing will perform correctly, as we preserve all blocks - * and filemarks which we encountered during our excess read-ahead. - * - * For accurate tape positioning and error reporting, disabling - * pipelined mode might be the best option. - * - * You can enable/disable/tune the pipelined operation mode by adjusting - * the compile time parameters below. - * - * - * Possible improvements. - * - * 1. Support for the ATAPI overlap protocol. - * - * In order to maximize bus throughput, we currently use the DSC - * overlap method which enables ide.c to service requests from the - * other device while the tape is busy executing a command. The - * DSC overlap method involves polling the tape's status register - * for the DSC bit, and servicing the other device while the tape - * isn't ready. - * - * In the current QIC development standard (December 1995), - * it is recommended that new tape drives will *in addition* - * implement the ATAPI overlap protocol, which is used for the - * same purpose - efficient use of the IDE bus, but is interrupt - * driven and thus has much less CPU overhead. - * - * ATAPI overlap is likely to be supported in most new ATAPI - * devices, including new ATAPI cdroms, and thus provides us - * a method by which we can achieve higher throughput when - * sharing a (fast) ATA-2 disk with any (slow) new ATAPI device. - */ +IDE ATAPI streaming tape driver. + +This driver is a part of the Linux ide driver. + +The driver, in co-operation with ide.c, basically traverses the +request-list for the block device interface. The character device +interface, on the other hand, creates new requests, adds them +to the request-list of the block device, and waits for their completion. + +The block device major and minor numbers are determined from the +tape's relative position in the ide interfaces, as explained in ide.c. + +The character device interface consists of the following devices: + +ht0 major 37, minor 0 first IDE tape, rewind on close. +ht1 major 37, minor 1 second IDE tape, rewind on close. +... +nht0 major 37, minor 128 first IDE tape, no rewind on close. +nht1 major 37, minor 129 second IDE tape, no rewind on close. +... + +The general magnetic tape commands compatible interface, as defined by +include/linux/mtio.h, is accessible through the character device. + +General ide driver configuration options, such as the interrupt-unmask +flag, can be configured by issuing an ioctl to the block device interface, +as any other ide device. + +Our own ide-tape ioctl's can be issued to either the block device or +the character device interface. + +Maximal throughput with minimal bus load will usually be achieved in the +following scenario: + + 1. ide-tape is operating in the pipelined operation mode. + 2. No buffering is performed by the user backup program. + +Testing was done with a 2 GB CONNER CTMA 4000 IDE ATAPI Streaming Tape Drive. + +Here are some words from the first releases of hd.c, which are quoted +in ide.c and apply here as well: + +| Special care is recommended. Have Fun! + +Possible improvements: + +1. Support for the ATAPI overlap protocol. + +In order to maximize bus throughput, we currently use the DSC +overlap method which enables ide.c to service requests from the +other device while the tape is busy executing a command. The +DSC overlap method involves polling the tape's status register +for the DSC bit, and servicing the other device while the tape +isn't ready. + +In the current QIC development standard (December 1995), +it is recommended that new tape drives will *in addition* +implement the ATAPI overlap protocol, which is used for the +same purpose - efficient use of the IDE bus, but is interrupt +driven and thus has much less CPU overhead. + +ATAPI overlap is likely to be supported in most new ATAPI +devices, including new ATAPI cdroms, and thus provides us +a method by which we can achieve higher throughput when +sharing a (fast) ATA-2 disk with any (slow) new ATAPI device. diff --git a/trunk/Documentation/ide/ide.txt b/trunk/Documentation/ide/ide.txt index 486c699f4aea..0c78f4b1d9d9 100644 --- a/trunk/Documentation/ide/ide.txt +++ b/trunk/Documentation/ide/ide.txt @@ -82,27 +82,26 @@ Drives are normally found by auto-probing and/or examining the CMOS/BIOS data. For really weird situations, the apparent (fdisk) geometry can also be specified on the kernel "command line" using LILO. The format of such lines is: - hdx=cyls,heads,sects -or hdx=cdrom + ide_core.chs=[interface_number.device_number]:cyls,heads,sects +or ide_core.cdrom=[interface_number.device_number] -where hdx can be any of hda through hdh, Three values are required -(cyls,heads,sects). For example: +For example: - hdc=1050,32,64 hdd=cdrom + ide_core.chs=1.0:1050,32,64 ide_core.cdrom=1.1 -either {hda,hdb} or {hdc,hdd}. The results of successful auto-probing may -override the physical geometry/irq specified, though the "original" geometry -may be retained as the "logical" geometry for partitioning purposes (fdisk). +The results of successful auto-probing may override the physical geometry/irq +specified, though the "original" geometry may be retained as the "logical" +geometry for partitioning purposes (fdisk). If the auto-probing during boot time confuses a drive (ie. the drive works with hd.c but not with ide.c), then an command line option may be specified for each drive for which you'd like the drive to skip the hardware probe/identification sequence. For example: - hdb=noprobe + ide_core.noprobe=0.1 or - hdc=768,16,32 - hdc=noprobe + ide_core.chs=1.0:768,16,32 + ide_core.noprobe=1.0 Note that when only one IDE device is attached to an interface, it should be jumpered as "single" or "master", *not* "slave". Many folks have had @@ -118,9 +117,9 @@ If for some reason your cdrom drive is *not* found at boot time, you can force the probe to look harder by supplying a kernel command line parameter via LILO, such as: - hdc=cdrom /* hdc = "master" on second interface */ + ide_core.cdrom=1.0 /* "master" on second interface (hdc) */ or - hdd=cdrom /* hdd = "slave" on second interface */ + ide_core.cdrom=1.1 /* "slave" on second interface (hdd) */ For example, a GW2000 system might have a hard drive on the primary interface (/dev/hda) and an IDE cdrom drive on the secondary interface @@ -174,9 +173,7 @@ to /etc/modprobe.conf. When ide.c is used as a module, you can pass command line parameters to the driver using the "options=" keyword to insmod, while replacing any ',' with -';'. For example: - - insmod ide.o options="hda=nodma hdb=nodma" +';'. ================================================================================ @@ -184,57 +181,6 @@ driver using the "options=" keyword to insmod, while replacing any ',' with Summary of ide driver parameters for kernel command line -------------------------------------------------------- - "hdx=" is recognized for all "x" from "a" to "u", such as "hdc". - - "idex=" is recognized for all "x" from "0" to "9", such as "ide1". - - "hdx=noprobe" : drive may be present, but do not probe for it - - "hdx=none" : drive is NOT present, ignore cmos and do not probe - - "hdx=nowerr" : ignore the WRERR_STAT bit on this drive - - "hdx=cdrom" : drive is present, and is a cdrom drive - - "hdx=cyl,head,sect" : disk drive is present, with specified geometry - - "hdx=autotune" : driver will attempt to tune interface speed - to the fastest PIO mode supported, - if possible for this drive only. - Not fully supported by all chipset types, - and quite likely to cause trouble with - older/odd IDE drives. - - "hdx=nodma" : disallow DMA - - "idebus=xx" : inform IDE driver of VESA/PCI bus speed in MHz, - where "xx" is between 20 and 66 inclusive, - used when tuning chipset PIO modes. - For PCI bus, 25 is correct for a P75 system, - 30 is correct for P90,P120,P180 systems, - and 33 is used for P100,P133,P166 systems. - If in doubt, use idebus=33 for PCI. - As for VLB, it is safest to not specify it. - Bigger values are safer than smaller ones. - - "idex=serialize" : do not overlap operations on idex. Please note - that you will have to specify this option for - both the respective primary and secondary channel - to take effect. - - "idex=reset" : reset interface after probe - - "idex=ata66" : informs the interface that it has an 80c cable - for chipsets that are ATA-66 capable, but the - ability to bit test for detection is currently - unknown. - - "ide=doubler" : probe/support IDE doublers on Amiga - -There may be more options than shown -- use the source, Luke! - -Everything else is rejected with a "BAD OPTION" message. - For legacy IDE VLB host drivers (ali14xx/dtc2278/ht6560b/qd65xx/umc8672) you need to explicitly enable probing by using "probe" kernel parameter, i.e. to enable probing for ALI M14xx chipsets (ali14xx host driver) use: @@ -251,6 +197,33 @@ are detected automatically). You also need to use "probe" kernel parameter for ide-4drives driver (support for IDE generic chipset with four drives on one port). +To enable support for IDE doublers on Amiga use "doubler" kernel parameter +for gayle host driver (i.e. "gayle.doubler" if the driver is built-in). + +To force ignoring cable detection (this should be needed only if you're using +short 40-wires cable which cannot be automatically detected - if this is not +a case please report it as a bug instead) use "ignore_cable" kernel parameter: + +* "ide_core.ignore_cable=[interface_number]" boot option if IDE is built-in + (i.e. "ide_core.ignore_cable=1" to force ignoring cable for "ide1") + +* "ignore_cable=[interface_number]" module parameter (for ide_core module) + if IDE is compiled as module + +Other kernel parameters for ide_core are: + +* "nodma=[interface_number.device_number]" to disallow DMA for a device + +* "noflush=[interface_number.device_number]" to disable flush requests + +* "noprobe=[interface_number.device_number]" to skip probing + +* "nowerr=[interface_number.device_number]" to ignore the WRERR_STAT bit + +* "cdrom=[interface_number.device_number]" to force device as a CD-ROM + +* "chs=[interface_number.device_number]" to force device as a disk (using CHS) + ================================================================================ Some Terminology diff --git a/trunk/Documentation/ioctl-number.txt b/trunk/Documentation/ioctl-number.txt index c18363bd8d11..240ce7a56c40 100644 --- a/trunk/Documentation/ioctl-number.txt +++ b/trunk/Documentation/ioctl-number.txt @@ -183,6 +183,8 @@ Code Seq# Include File Comments 0xAC 00-1F linux/raw.h 0xAD 00 Netfilter device in development: +0xAE all linux/kvm.h Kernel-based Virtual Machine + 0xB0 all RATIO devices in development: 0xB1 00-1F PPPoX diff --git a/trunk/Documentation/kbuild/modules.txt b/trunk/Documentation/kbuild/modules.txt index 1d247d59ad56..1821c077b435 100644 --- a/trunk/Documentation/kbuild/modules.txt +++ b/trunk/Documentation/kbuild/modules.txt @@ -486,7 +486,7 @@ Module.symvers contains a list of all exported symbols from a kernel build. Sometimes, an external module uses exported symbols from another external module. Kbuild needs to have full knowledge on all symbols to avoid spitting out warnings about undefined symbols. - Two solutions exist to let kbuild know all symbols of more than + Three solutions exist to let kbuild know all symbols of more than one external module. The method with a top-level kbuild file is recommended but may be impractical in certain situations. @@ -523,6 +523,13 @@ Module.symvers contains a list of all exported symbols from a kernel build. containing the sum of all symbols defined and not part of the kernel. + Use make variable KBUILD_EXTRA_SYMBOLS in the Makefile + If it is impractical to copy Module.symvers from another + module, you can assign a space separated list of files to + KBUILD_EXTRA_SYMBOLS in your Makfile. These files will be + loaded by modpost during the initialisation of its symbol + tables. + === 8. Tips & Tricks --- 8.1 Testing for CONFIG_FOO_BAR diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index bf6303ec0bde..e5f3d918316f 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -772,10 +772,6 @@ and is between 256 and 4096 characters. It is defined in the file Format: ide=nodma or ide=doubler See Documentation/ide/ide.txt. - ide?= [HW] (E)IDE subsystem - Format: ide?=ata66 or chipset specific parameters. - See Documentation/ide/ide.txt. - idebus= [HW] (E)IDE subsystem - VLB/PCI bus speed See Documentation/ide/ide.txt. diff --git a/trunk/Documentation/leds-class.txt b/trunk/Documentation/leds-class.txt index 56757c751d6f..18860ad9935a 100644 --- a/trunk/Documentation/leds-class.txt +++ b/trunk/Documentation/leds-class.txt @@ -19,6 +19,12 @@ optimises away. Complex triggers whilst available to all LEDs have LED specific parameters and work on a per LED basis. The timer trigger is an example. +The timer trigger will periodically change the LED brightness between +LED_OFF and the current brightness setting. The "on" and "off" time can +be specified via /sys/class/leds//delay_{on,off} in milliseconds. +You can change the brightness value of a LED independently of the timer +trigger. However, if you set the brightness value to LED_OFF it will +also disable the timer trigger. You can change triggers in a similar manner to the way an IO scheduler is chosen (via /sys/class/leds//trigger). Trigger specific @@ -63,9 +69,9 @@ value if it is called with *delay_on==0 && *delay_off==0 parameters. In this case the driver should give back the chosen value through delay_on and delay_off parameters to the leds subsystem. -Any call to the brightness_set() callback function should cancel the -previously programmed hardware blinking function so setting the brightness -to 0 can also cancel the blinking of the LED. +Setting the brightness to zero with brightness_set() callback function +should completely turn off the LED and cancel the previously programmed +hardware blinking function, if any. Known Issues diff --git a/trunk/Documentation/mips/AU1xxx_IDE.README b/trunk/Documentation/mips/AU1xxx_IDE.README index 5c8334123f4f..25a6ed1aaa5b 100644 --- a/trunk/Documentation/mips/AU1xxx_IDE.README +++ b/trunk/Documentation/mips/AU1xxx_IDE.README @@ -46,8 +46,6 @@ Two files are introduced: a) 'include/asm-mips/mach-au1x00/au1xxx_ide.h' containes : struct _auide_hwif - struct drive_list_entry dma_white_list - struct drive_list_entry dma_black_list timing parameters for PIO mode 0/1/2/3/4 timing parameters for MWDMA 0/1/2 @@ -63,12 +61,6 @@ Four configs variables are introduced: CONFIG_BLK_DEV_IDE_AU1XXX_SEQTS_PER_RQ - maximum transfer size per descriptor -If MWDMA is enabled and the connected hard disc is not on the white list, the -kernel switches to a "safe mwdma mode" at boot time. In this mode the IDE -performance is substantial slower then in full speed mwdma. In this case -please add your hard disc to the white list (follow instruction from 'ADD NEW -HARD DISC TO WHITE OR BLACK LIST' section). - SUPPORTED IDE MODES ------------------- @@ -120,44 +112,6 @@ CONFIG_IDEDMA_AUTO=y Also undefine 'IDE_AU1XXX_BURSTMODE' in 'drivers/ide/mips/au1xxx-ide.c' to disable the burst support on DBDMA controller. -ADD NEW HARD DISC TO WHITE OR BLACK LIST ----------------------------------------- - -Step 1 : detect the model name of your hard disc - - a) connect your hard disc to the AU1XXX - - b) boot your kernel and get the hard disc model. - - Example boot log: - - --snipped-- - Uniform Multi-Platform E-IDE driver Revision: 7.00alpha2 - ide: Assuming 50MHz system bus speed for PIO modes; override with idebus=xx - Au1xxx IDE(builtin) configured for MWDMA2 - Probing IDE interface ide0... - hda: Maxtor 6E040L0, ATA DISK drive - ide0 at 0xac800000-0xac800007,0xac8001c0 on irq 64 - hda: max request size: 64KiB - hda: 80293248 sectors (41110 MB) w/2048KiB Cache, CHS=65535/16/63, (U)DMA - --snipped-- - - In this example 'Maxtor 6E040L0'. - -Step 2 : edit 'include/asm-mips/mach-au1x00/au1xxx_ide.h' - - Add your hard disc to the dma_white_list or dma_black_list structur. - -Step 3 : Recompile the kernel - - Enable MWDMA support in the kernel configuration. Recompile the kernel and - reboot. - -Step 4 : Tests - - If you have add a hard disc to the white list, please run some stress tests - for verification. - ACKNOWLEDGMENTS --------------- diff --git a/trunk/Documentation/networking/phy.txt b/trunk/Documentation/networking/phy.txt index 0bc95eab1512..8df6a7b0e66c 100644 --- a/trunk/Documentation/networking/phy.txt +++ b/trunk/Documentation/networking/phy.txt @@ -1,7 +1,7 @@ ------- PHY Abstraction Layer -(Updated 2006-11-30) +(Updated 2008-04-08) Purpose @@ -291,3 +291,39 @@ Writing a PHY driver Feel free to look at the Marvell, Cicada, and Davicom drivers in drivers/net/phy/ for examples (the lxt and qsemi drivers have not been tested as of this writing) + +Board Fixups + + Sometimes the specific interaction between the platform and the PHY requires + special handling. For instance, to change where the PHY's clock input is, + or to add a delay to account for latency issues in the data path. In order + to support such contingencies, the PHY Layer allows platform code to register + fixups to be run when the PHY is brought up (or subsequently reset). + + When the PHY Layer brings up a PHY it checks to see if there are any fixups + registered for it, matching based on UID (contained in the PHY device's phy_id + field) and the bus identifier (contained in phydev->dev.bus_id). Both must + match, however two constants, PHY_ANY_ID and PHY_ANY_UID, are provided as + wildcards for the bus ID and UID, respectively. + + When a match is found, the PHY layer will invoke the run function associated + with the fixup. This function is passed a pointer to the phy_device of + interest. It should therefore only operate on that PHY. + + The platform code can either register the fixup using phy_register_fixup(): + + int phy_register_fixup(const char *phy_id, + u32 phy_uid, u32 phy_uid_mask, + int (*run)(struct phy_device *)); + + Or using one of the two stubs, phy_register_fixup_for_uid() and + phy_register_fixup_for_id(): + + int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask, + int (*run)(struct phy_device *)); + int phy_register_fixup_for_id(const char *phy_id, + int (*run)(struct phy_device *)); + + The stubs set one of the two matching criteria, and set the other one to + match anything. + diff --git a/trunk/Documentation/powerpc/booting-without-of.txt b/trunk/Documentation/powerpc/booting-without-of.txt index 4cc780024e6c..cf89e8cfd5bf 100644 --- a/trunk/Documentation/powerpc/booting-without-of.txt +++ b/trunk/Documentation/powerpc/booting-without-of.txt @@ -2601,6 +2601,17 @@ platforms are moved over to use the flattened-device-tree model. differ between different families. May be 'virtex2p', 'virtex4', or 'virtex5'. + vi) Xilinx Uart 16550 + + Xilinx UART 16550 devices are very similar to the NS16550 but with + different register spacing and an offset from the base address. + + Requred properties: + - clock-frequency : Frequency of the clock input + - reg-offset : A value of 3 is required + - reg-shift : A value of 2 is required + + p) Freescale Synchronous Serial Interface The SSI is a serial device that communicates with audio codecs. It can diff --git a/trunk/Documentation/powerpc/kvm_440.txt b/trunk/Documentation/powerpc/kvm_440.txt new file mode 100644 index 000000000000..c02a003fa03a --- /dev/null +++ b/trunk/Documentation/powerpc/kvm_440.txt @@ -0,0 +1,41 @@ +Hollis Blanchard +15 Apr 2008 + +Various notes on the implementation of KVM for PowerPC 440: + +To enforce isolation, host userspace, guest kernel, and guest userspace all +run at user privilege level. Only the host kernel runs in supervisor mode. +Executing privileged instructions in the guest traps into KVM (in the host +kernel), where we decode and emulate them. Through this technique, unmodified +440 Linux kernels can be run (slowly) as guests. Future performance work will +focus on reducing the overhead and frequency of these traps. + +The usual code flow is started from userspace invoking an "run" ioctl, which +causes KVM to switch into guest context. We use IVPR to hijack the host +interrupt vectors while running the guest, which allows us to direct all +interrupts to kvmppc_handle_interrupt(). At this point, we could either +- handle the interrupt completely (e.g. emulate "mtspr SPRG0"), or +- let the host interrupt handler run (e.g. when the decrementer fires), or +- return to host userspace (e.g. when the guest performs device MMIO) + +Address spaces: We take advantage of the fact that Linux doesn't use the AS=1 +address space (in host or guest), which gives us virtual address space to use +for guest mappings. While the guest is running, the host kernel remains mapped +in AS=0, but the guest can only use AS=1 mappings. + +TLB entries: The TLB entries covering the host linear mapping remain +present while running the guest. This reduces the overhead of lightweight +exits, which are handled by KVM running in the host kernel. We keep three +copies of the TLB: + - guest TLB: contents of the TLB as the guest sees it + - shadow TLB: the TLB that is actually in hardware while guest is running + - host TLB: to restore TLB state when context switching guest -> host +When a TLB miss occurs because a mapping was not present in the shadow TLB, +but was present in the guest TLB, KVM handles the fault without invoking the +guest. Large guest pages are backed by multiple 4KB shadow pages through this +mechanism. + +IO: MMIO and DCR accesses are emulated by userspace. We use virtio for network +and block IO, so those drivers must be enabled in the guest. It's possible +that some qemu device emulation (e.g. e1000 or rtl8139) may also work with +little effort. diff --git a/trunk/Documentation/s390/kvm.txt b/trunk/Documentation/s390/kvm.txt new file mode 100644 index 000000000000..6f5ceb0f09fc --- /dev/null +++ b/trunk/Documentation/s390/kvm.txt @@ -0,0 +1,125 @@ +*** BIG FAT WARNING *** +The kvm module is currently in EXPERIMENTAL state for s390. This means that +the interface to the module is not yet considered to remain stable. Thus, be +prepared that we keep breaking your userspace application and guest +compatibility over and over again until we feel happy with the result. Make sure +your guest kernel, your host kernel, and your userspace launcher are in a +consistent state. + +This Documentation describes the unique ioctl calls to /dev/kvm, the resulting +kvm-vm file descriptors, and the kvm-vcpu file descriptors that differ from x86. + +1. ioctl calls to /dev/kvm +KVM does support the following ioctls on s390 that are common with other +architectures and do behave the same: +KVM_GET_API_VERSION +KVM_CREATE_VM (*) see note +KVM_CHECK_EXTENSION +KVM_GET_VCPU_MMAP_SIZE + +Notes: +* KVM_CREATE_VM may fail on s390, if the calling process has multiple +threads and has not called KVM_S390_ENABLE_SIE before. + +In addition, on s390 the following architecture specific ioctls are supported: +ioctl: KVM_S390_ENABLE_SIE +args: none +see also: include/linux/kvm.h +This call causes the kernel to switch on PGSTE in the user page table. This +operation is needed in order to run a virtual machine, and it requires the +calling process to be single-threaded. Note that the first call to KVM_CREATE_VM +will implicitly try to switch on PGSTE if the user process has not called +KVM_S390_ENABLE_SIE before. User processes that want to launch multiple threads +before creating a virtual machine have to call KVM_S390_ENABLE_SIE, or will +observe an error calling KVM_CREATE_VM. Switching on PGSTE is a one-time +operation, is not reversible, and will persist over the entire lifetime of +the calling process. It does not have any user-visible effect other than a small +performance penalty. + +2. ioctl calls to the kvm-vm file descriptor +KVM does support the following ioctls on s390 that are common with other +architectures and do behave the same: +KVM_CREATE_VCPU +KVM_SET_USER_MEMORY_REGION (*) see note +KVM_GET_DIRTY_LOG (**) see note + +Notes: +* kvm does only allow exactly one memory slot on s390, which has to start + at guest absolute address zero and at a user address that is aligned on any + page boundary. This hardware "limitation" allows us to have a few unique + optimizations. The memory slot doesn't have to be filled + with memory actually, it may contain sparse holes. That said, with different + user memory layout this does still allow a large flexibility when + doing the guest memory setup. +** KVM_GET_DIRTY_LOG doesn't work properly yet. The user will receive an empty +log. This ioctl call is only needed for guest migration, and we intend to +implement this one in the future. + +In addition, on s390 the following architecture specific ioctls for the kvm-vm +file descriptor are supported: +ioctl: KVM_S390_INTERRUPT +args: struct kvm_s390_interrupt * +see also: include/linux/kvm.h +This ioctl is used to submit a floating interrupt for a virtual machine. +Floating interrupts may be delivered to any virtual cpu in the configuration. +Only some interrupt types defined in include/linux/kvm.h make sense when +submitted as floating interrupts. The following interrupts are not considered +to be useful as floating interrupts, and a call to inject them will result in +-EINVAL error code: program interrupts and interprocessor signals. Valid +floating interrupts are: +KVM_S390_INT_VIRTIO +KVM_S390_INT_SERVICE + +3. ioctl calls to the kvm-vcpu file descriptor +KVM does support the following ioctls on s390 that are common with other +architectures and do behave the same: +KVM_RUN +KVM_GET_REGS +KVM_SET_REGS +KVM_GET_SREGS +KVM_SET_SREGS +KVM_GET_FPU +KVM_SET_FPU + +In addition, on s390 the following architecture specific ioctls for the +kvm-vcpu file descriptor are supported: +ioctl: KVM_S390_INTERRUPT +args: struct kvm_s390_interrupt * +see also: include/linux/kvm.h +This ioctl is used to submit an interrupt for a specific virtual cpu. +Only some interrupt types defined in include/linux/kvm.h make sense when +submitted for a specific cpu. The following interrupts are not considered +to be useful, and a call to inject them will result in -EINVAL error code: +service processor calls and virtio interrupts. Valid interrupt types are: +KVM_S390_PROGRAM_INT +KVM_S390_SIGP_STOP +KVM_S390_RESTART +KVM_S390_SIGP_SET_PREFIX +KVM_S390_INT_EMERGENCY + +ioctl: KVM_S390_STORE_STATUS +args: unsigned long +see also: include/linux/kvm.h +This ioctl stores the state of the cpu at the guest real address given as +argument, unless one of the following values defined in include/linux/kvm.h +is given as arguement: +KVM_S390_STORE_STATUS_NOADDR - the CPU stores its status to the save area in +absolute lowcore as defined by the principles of operation +KVM_S390_STORE_STATUS_PREFIXED - the CPU stores its status to the save area in +its prefix page just like the dump tool that comes with zipl. This is useful +to create a system dump for use with lkcdutils or crash. + +ioctl: KVM_S390_SET_INITIAL_PSW +args: struct kvm_s390_psw * +see also: include/linux/kvm.h +This ioctl can be used to set the processor status word (psw) of a stopped cpu +prior to running it with KVM_RUN. Note that this call is not required to modify +the psw during sie intercepts that fall back to userspace because struct kvm_run +does contain the psw, and this value is evaluated during reentry of KVM_RUN +after the intercept exit was recognized. + +ioctl: KVM_S390_INITIAL_RESET +args: none +see also: include/linux/kvm.h +This ioctl can be used to perform an initial cpu reset as defined by the +principles of operation. The target cpu has to be in stopped state. diff --git a/trunk/Documentation/smart-config.txt b/trunk/Documentation/smart-config.txt deleted file mode 100644 index 8467447b5a87..000000000000 --- a/trunk/Documentation/smart-config.txt +++ /dev/null @@ -1,98 +0,0 @@ -Smart CONFIG_* Dependencies -1 August 1999 - -Michael Chastain -Werner Almesberger -Martin von Loewis - -Here is the problem: - - Suppose that drivers/net/foo.c has the following lines: - - #include - - ... - - #ifdef CONFIG_FOO_AUTOFROB - /* Code for auto-frobbing */ - #else - /* Manual frobbing only */ - #endif - - ... - - #ifdef CONFIG_FOO_MODEL_TWO - /* Code for model two */ - #endif - - Now suppose the user (the person building kernels) reconfigures the - kernel to change some unrelated setting. This will regenerate the - file include/linux/autoconf.h, which will cause include/linux/config.h - to be out of date, which will cause drivers/net/foo.c to be recompiled. - - Most kernel sources, perhaps 80% of them, have at least one CONFIG_* - dependency somewhere. So changing _any_ CONFIG_* setting requires - almost _all_ of the kernel to be recompiled. - -Here is the solution: - - We've made the dependency generator, mkdep.c, smarter. Instead of - generating this dependency: - - drivers/net/foo.c: include/linux/config.h - - It now generates these dependencies: - - drivers/net/foo.c: \ - include/config/foo/autofrob.h \ - include/config/foo/model/two.h - - So drivers/net/foo.c depends only on the CONFIG_* lines that - it actually uses. - - A new program, split-include.c, runs at the beginning of - compilation (make bzImage or make zImage). split-include reads - include/linux/autoconf.h and updates the include/config/ tree, - writing one file per option. It updates only the files for options - that have changed. - -Flag Dependencies - - Martin Von Loewis contributed another feature to this patch: - 'flag dependencies'. The idea is that a .o file depends on - the compilation flags used to build it. The file foo.o has - its flags stored in .flags.foo.o. - - Suppose the user changes the foo driver from resident to modular. - 'make' will notice that the current foo.o was not compiled with - -DMODULE and will recompile foo.c. - - All .o files made from C source have flag dependencies. So do .o - files made with ld, and .a files made with ar. However, .o files - made from assembly source do not have flag dependencies (nobody - needs this yet, but it would be good to fix). - -Per-source-file Flags - - Flag dependencies also work with per-source-file flags. - You can specify compilation flags for individual source files - like this: - - CFLAGS_foo.o = -DSPECIAL_FOO_DEFINE - - This helps clean up drivers/net/Makefile, drivers/scsi/Makefile, - and several other Makefiles. - -Credit - - Werner Almesberger had the original idea and wrote the first - version of this patch. - - Michael Chastain picked it up and continued development. He is - now the principal author and maintainer. Please report any bugs - to him. - - Martin von Loewis wrote flag dependencies, with some modifications - by Michael Chastain. - - Thanks to all of the beta testers. diff --git a/trunk/Documentation/usb/anchors.txt b/trunk/Documentation/usb/anchors.txt new file mode 100644 index 000000000000..7304bcf5a306 --- /dev/null +++ b/trunk/Documentation/usb/anchors.txt @@ -0,0 +1,50 @@ +What is anchor? +=============== + +A USB driver needs to support some callbacks requiring +a driver to cease all IO to an interface. To do so, a +driver has to keep track of the URBs it has submitted +to know they've all completed or to call usb_kill_urb +for them. The anchor is a data structure takes care of +keeping track of URBs and provides methods to deal with +multiple URBs. + +Allocation and Initialisation +============================= + +There's no API to allocate an anchor. It is simply declared +as struct usb_anchor. init_usb_anchor() must be called to +initialise the data structure. + +Deallocation +============ + +Once it has no more URBs associated with it, the anchor can be +freed with normal memory management operations. + +Association and disassociation of URBs with anchors +=================================================== + +An association of URBs to an anchor is made by an explicit +call to usb_anchor_urb(). The association is maintained until +an URB is finished by (successfull) completion. Thus disassociation +is automatic. A function is provided to forcibly finish (kill) +all URBs associated with an anchor. +Furthermore, disassociation can be made with usb_unanchor_urb() + +Operations on multitudes of URBs +================================ + +usb_kill_anchored_urbs() +------------------------ + +This function kills all URBs associated with an anchor. The URBs +are called in the reverse temporal order they were submitted. +This way no data can be reordered. + +usb_wait_anchor_empty_timeout() +------------------------------- + +This function waits for all URBs associated with an anchor to finish +or a timeout, whichever comes first. Its return value will tell you +whether the timeout was reached. diff --git a/trunk/Documentation/usb/callbacks.txt b/trunk/Documentation/usb/callbacks.txt new file mode 100644 index 000000000000..7c812411945b --- /dev/null +++ b/trunk/Documentation/usb/callbacks.txt @@ -0,0 +1,132 @@ +What callbacks will usbcore do? +=============================== + +Usbcore will call into a driver through callbacks defined in the driver +structure and through the completion handler of URBs a driver submits. +Only the former are in the scope of this document. These two kinds of +callbacks are completely independent of each other. Information on the +completion callback can be found in Documentation/usb/URB.txt. + +The callbacks defined in the driver structure are: + +1. Hotplugging callbacks: + + * @probe: Called to see if the driver is willing to manage a particular + * interface on a device. + * @disconnect: Called when the interface is no longer accessible, usually + * because its device has been (or is being) disconnected or the + * driver module is being unloaded. + +2. Odd backdoor through usbfs: + + * @ioctl: Used for drivers that want to talk to userspace through + * the "usbfs" filesystem. This lets devices provide ways to + * expose information to user space regardless of where they + * do (or don't) show up otherwise in the filesystem. + +3. Power management (PM) callbacks: + + * @suspend: Called when the device is going to be suspended. + * @resume: Called when the device is being resumed. + * @reset_resume: Called when the suspended device has been reset instead + * of being resumed. + +4. Device level operations: + + * @pre_reset: Called when the device is about to be reset. + * @post_reset: Called after the device has been reset + +The ioctl interface (2) should be used only if you have a very good +reason. Sysfs is preferred these days. The PM callbacks are covered +separately in Documentation/usb/power-management.txt. + +Calling conventions +=================== + +All callbacks are mutually exclusive. There's no need for locking +against other USB callbacks. All callbacks are called from a task +context. You may sleep. However, it is important that all sleeps have a +small fixed upper limit in time. In particular you must not call out to +user space and await results. + +Hotplugging callbacks +===================== + +These callbacks are intended to associate and disassociate a driver with +an interface. A driver's bond to an interface is exclusive. + +The probe() callback +-------------------- + +int (*probe) (struct usb_interface *intf, + const struct usb_device_id *id); + +Accept or decline an interface. If you accept the device return 0, +otherwise -ENODEV or -ENXIO. Other error codes should be used only if a +genuine error occurred during initialisation which prevented a driver +from accepting a device that would else have been accepted. +You are strongly encouraged to use usbcore'sfacility, +usb_set_intfdata(), to associate a data structure with an interface, so +that you know which internal state and identity you associate with a +particular interface. The device will not be suspended and you may do IO +to the interface you are called for and endpoint 0 of the device. Device +initialisation that doesn't take too long is a good idea here. + +The disconnect() callback +------------------------- + +void (*disconnect) (struct usb_interface *intf); + +This callback is a signal to break any connection with an interface. +You are not allowed any IO to a device after returning from this +callback. You also may not do any other operation that may interfere +with another driver bound the interface, eg. a power management +operation. +If you are called due to a physical disconnection, all your URBs will be +killed by usbcore. Note that in this case disconnect will be called some +time after the physical disconnection. Thus your driver must be prepared +to deal with failing IO even prior to the callback. + +Device level callbacks +====================== + +pre_reset +--------- + +int (*pre_reset)(struct usb_interface *intf); + +Another driver or user space is triggering a reset on the device which +contains the interface passed as an argument. Cease IO and save any +device state you need to restore. + +If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if you +are in atomic context. + +post_reset +---------- + +int (*post_reset)(struct usb_interface *intf); + +The reset has completed. Restore any saved device state and begin +using the device again. + +If you need to allocate memory here, use GFP_NOIO or GFP_ATOMIC, if you +are in atomic context. + +Call sequences +============== + +No callbacks other than probe will be invoked for an interface +that isn't bound to your driver. + +Probe will never be called for an interface bound to a driver. +Hence following a successful probe, disconnect will be called +before there is another probe for the same interface. + +Once your driver is bound to an interface, disconnect can be +called at any time except in between pre_reset and post_reset. +pre_reset is always followed by post_reset, even if the reset +failed or the device has been unplugged. + +suspend is always followed by one of: resume, reset_resume, or +disconnect. diff --git a/trunk/Documentation/usb/persist.txt b/trunk/Documentation/usb/persist.txt index df54d645cbb5..d56cb1a11550 100644 --- a/trunk/Documentation/usb/persist.txt +++ b/trunk/Documentation/usb/persist.txt @@ -2,7 +2,7 @@ Alan Stern - September 2, 2006 (Updated May 29, 2007) + September 2, 2006 (Updated February 25, 2008) What is the problem? @@ -65,9 +65,10 @@ much better.) What is the solution? -Setting CONFIG_USB_PERSIST will cause the kernel to work around these -issues. It enables a mode in which the core USB device data -structures are allowed to persist across a power-session disruption. +The kernel includes a feature called USB-persist. It tries to work +around these issues by allowing the core USB device data structures to +persist across a power-session disruption. + It works like this. If the kernel sees that a USB host controller is not in the expected state during resume (i.e., if the controller was reset or otherwise had lost power) then it applies a persistence check @@ -80,28 +81,30 @@ re-enumeration shows that the device now attached to that port has the same descriptors as before, including the Vendor and Product IDs, then the kernel continues to use the same device structure. In effect, the kernel treats the device as though it had merely been reset instead of -unplugged. +unplugged. The same thing happens if the host controller is in the +expected state but a USB device was unplugged and then replugged. If no device is now attached to the port, or if the descriptors are different from what the kernel remembers, then the treatment is what you would expect. The kernel destroys the old device structure and behaves as though the old device had been unplugged and a new device -plugged in, just as it would without the CONFIG_USB_PERSIST option. +plugged in. The end result is that the USB device remains available and usable. Filesystem mounts and memory mappings are unaffected, and the world is now a good and happy place. -Note that even when CONFIG_USB_PERSIST is set, the "persist" feature -will be applied only to those devices for which it is enabled. You -can enable the feature by doing (as root): +Note that the "USB-persist" feature will be applied only to those +devices for which it is enabled. You can enable the feature by doing +(as root): echo 1 >/sys/bus/usb/devices/.../power/persist where the "..." should be filled in the with the device's ID. Disable the feature by writing 0 instead of 1. For hubs the feature is -automatically and permanently enabled, so you only have to worry about -setting it for devices where it really matters. +automatically and permanently enabled and the power/persist file +doesn't even exist, so you only have to worry about setting it for +devices where it really matters. Is this the best solution? @@ -112,19 +115,19 @@ centralized Logical Volume Manager. Such a solution would allow you to plug in a USB flash device, create a persistent volume associated with it, unplug the flash device, plug it back in later, and still have the same persistent volume associated with the device. As such -it would be more far-reaching than CONFIG_USB_PERSIST. +it would be more far-reaching than USB-persist. On the other hand, writing a persistent volume manager would be a big job and using it would require significant input from the user. This solution is much quicker and easier -- and it exists now, a giant point in its favor! -Furthermore, the USB_PERSIST option applies to _all_ USB devices, not +Furthermore, the USB-persist feature applies to _all_ USB devices, not just mass-storage devices. It might turn out to be equally useful for other device types, such as network interfaces. - WARNING: Using CONFIG_USB_PERSIST can be dangerous!! + WARNING: USB-persist can be dangerous!! When recovering an interrupted power session the kernel does its best to make sure the USB device hasn't been changed; that is, the same @@ -133,10 +136,10 @@ aren't guaranteed to be 100% accurate. If you replace one USB device with another of the same type (same manufacturer, same IDs, and so on) there's an excellent chance the -kernel won't detect the change. Serial numbers and other strings are -not compared. In many cases it wouldn't help if they were, because -manufacturers frequently omit serial numbers entirely in their -devices. +kernel won't detect the change. The serial number string and other +descriptors are compared with the kernel's stored values, but this +might not help since manufacturers frequently omit serial numbers +entirely in their devices. Furthermore it's quite possible to leave a USB device exactly the same while changing its media. If you replace the flash memory card in a @@ -152,5 +155,5 @@ but yourself. YOU HAVE BEEN WARNED! USE AT YOUR OWN RISK! That having been said, most of the time there shouldn't be any trouble -at all. The "persist" feature can be extremely useful. Make the most -of it. +at all. The USB-persist feature can be extremely useful. Make the +most of it. diff --git a/trunk/Documentation/usb/usb-serial.txt b/trunk/Documentation/usb/usb-serial.txt index 8b077e43eee7..ff2c1ff57ba2 100644 --- a/trunk/Documentation/usb/usb-serial.txt +++ b/trunk/Documentation/usb/usb-serial.txt @@ -192,12 +192,9 @@ Keyspan USA-series Serial Adapters FTDI Single Port Serial Driver - This is a single port DB-25 serial adapter. More information about this - device and the Linux driver can be found at: - http://reality.sgi.com/bryder_wellington/ftdi_sio/ + This is a single port DB-25 serial adapter. - For any questions or problems with this driver, please contact Bill Ryder - at bryder@sgi.com + For any questions or problems with this driver, please contact Bill Ryder. ZyXEL omni.net lcd plus ISDN TA diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index f50e927a1189..c1dd1ae7b133 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -1106,6 +1106,12 @@ M: kernel@wantstofly.org L: linux-usb@vger.kernel.org S: Maintained +CIRRUS LOGIC CS4270 SOUND DRIVER +P: Timur Tabi +M: timur@freescale.com +L: alsa-devel@alsa-project.org +S: Supported + CIRRUS LOGIC CS4280/CS461x SOUNDDRIVER P: Cirrus Logic Corporation (kernel 2.2 driver) M: Cirrus Logic Corporation, Thomas Woller @@ -1628,6 +1634,12 @@ L: linuxppc-dev@ozlabs.org L: netdev@vger.kernel.org S: Maintained +FREESCALE QUICC ENGINE LIBRARY +P: Timur Tabi +M: timur@freescale.com +L: linuxppc-dev@ozlabs.org +S: Supported + FREESCALE HIGHSPEED USB DEVICE DRIVER P: Li Yang M: leoli@freescale.com @@ -1642,6 +1654,19 @@ L: netdev@vger.kernel.org L: linuxppc-dev@ozlabs.org S: Maintained +FREESCALE QUICC ENGINE UCC UART DRIVER +P: Timur Tabi +M: timur@freescale.com +L: linuxppc-dev@ozlabs.org +S: Supported + +FREESCALE SOC SOUND DRIVERS +P: Timur Tabi +M: timur@freescale.com +L: alsa-devel@alsa-project.org +L: linuxppc-dev@ozlabs.org +S: Supported + FILE LOCKING (flock() and fcntl()/lockf()) P: Matthew Wilcox M: matthew@wil.cx @@ -2304,6 +2329,13 @@ L: kvm-devel@lists.sourceforge.net W: kvm.sourceforge.net S: Supported +KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC +P: Hollis Blanchard +M: hollisb@us.ibm.com +L: kvm-ppc-devel@lists.sourceforge.net +W: kvm.sourceforge.net +S: Supported + KERNEL VIRTUAL MACHINE For Itanium(KVM/IA64) P: Anthony Xu M: anthony.xu@intel.com @@ -2313,6 +2345,16 @@ L: kvm-ia64-devel@lists.sourceforge.net W: kvm.sourceforge.net S: Supported +KERNEL VIRTUAL MACHINE for s390 (KVM/s390) +P: Carsten Otte +M: cotte@de.ibm.com +P: Christian Borntraeger +M: borntraeger@de.ibm.com +M: linux390@de.ibm.com +L: linux-s390@vger.kernel.org +W: http://www.ibm.com/developerworks/linux/linux390/ +S: Supported + KEXEC P: Eric Biederman M: ebiederm@xmission.com @@ -4356,6 +4398,16 @@ L: linux-wireless@vger.kernel.org W: http://oops.ghostprotocols.net:81/blog S: Maintained +WM97XX TOUCHSCREEN DRIVERS +P: Mark Brown +M: broonie@opensource.wolfsonmicro.com +P: Liam Girdwood +M: liam.girdwood@wolfsonmicro.com +L: linux-input@vger.kernel.org +T: git git://opensource.wolfsonmicro.com/linux-2.6-touch +W: http://opensource.wolfsonmicro.com/node/7 +S: Supported + X.25 NETWORK LAYER P: Henner Eisen M: eis@baty.hanse.de diff --git a/trunk/Makefile b/trunk/Makefile index d35c5246fce5..fc3411e6f071 100644 --- a/trunk/Makefile +++ b/trunk/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 25 -EXTRAVERSION = -numa +EXTRAVERSION = NAME = Funky Weasel is Jiggy wit it # *DOCUMENTATION* @@ -507,6 +507,10 @@ else KBUILD_CFLAGS += -O2 endif +ifneq (CONFIG_FRAME_WARN,0) +KBUILD_CFLAGS += $(call cc-option,-Wframe-larger-than=${CONFIG_FRAME_WARN}) +endif + # Force gcc to behave correct even for buggy distributions # Arch Makefiles may override this setting KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector) @@ -1396,7 +1400,7 @@ define xtags $(all-kconfigs) | xargs $1 -a \ --langdef=kconfig \ --language-force=kconfig \ - --regex-kconfig='/^[[:blank:]]*config[[:blank:]]+([[:alnum:]_]+)/\1/'; \ + --regex-kconfig='/^[[:blank:]]*(menu|)config[[:blank:]]+([[:alnum:]_]+)/\2/'; \ $(all-defconfigs) | xargs -r $1 -a \ --langdef=dotconfig \ --language-force=dotconfig \ @@ -1404,7 +1408,7 @@ define xtags elif $1 --version 2>&1 | grep -iq emacs; then \ $(all-sources) | xargs $1 -a; \ $(all-kconfigs) | xargs $1 -a \ - --regex='/^[ \t]*config[ \t]+\([a-zA-Z0-9_]+\)/\1/'; \ + --regex='/^[ \t]*(menu|)config[ \t]+\([a-zA-Z0-9_]+\)/\2/'; \ $(all-defconfigs) | xargs -r $1 -a \ --regex='/^#?[ \t]?\(CONFIG_[a-zA-Z0-9_]+\)/\1/'; \ else \ @@ -1539,7 +1543,6 @@ quiet_cmd_rmfiles = $(if $(wildcard $(rm-files)),CLEAN $(wildcard $(rm-files)) cmd_rmfiles = rm -f $(rm-files) # Run depmod only if we have System.map and depmod is executable -# and we build for the host arch quiet_cmd_depmod = DEPMOD $(KERNELRELEASE) cmd_depmod = \ if [ -r System.map -a -x $(DEPMOD) ]; then \ diff --git a/trunk/arch/arm/configs/am200epdkit_defconfig b/trunk/arch/arm/configs/am200epdkit_defconfig index dc030cfe5009..5e68420f4680 100644 --- a/trunk/arch/arm/configs/am200epdkit_defconfig +++ b/trunk/arch/arm/configs/am200epdkit_defconfig @@ -1,7 +1,7 @@ # # Automatically generated make config: don't edit -# Linux kernel version: 2.6.25-rc3 -# Sun Mar 9 06:33:33 2008 +# Linux kernel version: 2.6.25 +# Sun Apr 20 00:29:49 2008 # CONFIG_ARM=y CONFIG_SYS_SUPPORTS_APM_EMULATION=y @@ -51,7 +51,8 @@ CONFIG_FAIR_GROUP_SCHED=y # CONFIG_RT_GROUP_SCHED is not set CONFIG_USER_SCHED=y # CONFIG_CGROUP_SCHED is not set -# CONFIG_SYSFS_DEPRECATED is not set +CONFIG_SYSFS_DEPRECATED=y +CONFIG_SYSFS_DEPRECATED_V2=y # CONFIG_RELAY is not set # CONFIG_NAMESPACES is not set # CONFIG_BLK_DEV_INITRD is not set @@ -85,6 +86,7 @@ CONFIG_SLAB=y CONFIG_HAVE_OPROFILE=y # CONFIG_KPROBES is not set CONFIG_HAVE_KPROBES=y +CONFIG_HAVE_KRETPROBES=y CONFIG_PROC_PAGE_MONITOR=y CONFIG_SLABINFO=y CONFIG_RT_MUTEXES=y @@ -115,7 +117,6 @@ CONFIG_IOSCHED_NOOP=y CONFIG_DEFAULT_NOOP=y CONFIG_DEFAULT_IOSCHED="noop" CONFIG_CLASSIC_RCU=y -# CONFIG_PREEMPT_RCU is not set # # System Type @@ -320,8 +321,6 @@ CONFIG_TCP_CONG_CUBIC=y CONFIG_DEFAULT_TCP_CONG="cubic" # CONFIG_TCP_MD5SIG is not set # CONFIG_IPV6 is not set -# CONFIG_INET6_XFRM_TUNNEL is not set -# CONFIG_INET6_TUNNEL is not set # CONFIG_NETWORK_SECMARK is not set # CONFIG_NETFILTER is not set # CONFIG_IP_DCCP is not set @@ -383,7 +382,6 @@ CONFIG_IEEE80211=m CONFIG_IEEE80211_CRYPT_WEP=m # CONFIG_IEEE80211_CRYPT_CCMP is not set # CONFIG_IEEE80211_CRYPT_TKIP is not set -# CONFIG_IEEE80211_SOFTMAC is not set # CONFIG_RFKILL is not set # CONFIG_NET_9P is not set @@ -503,7 +501,7 @@ CONFIG_IDE_MAX_HWIFS=2 CONFIG_BLK_DEV_IDE=m # -# Please see Documentation/ide.txt for help/info on IDE drives +# Please see Documentation/ide/ide.txt for help/info on IDE drives # # CONFIG_BLK_DEV_IDE_SATA is not set CONFIG_BLK_DEV_IDEDISK=m @@ -518,10 +516,9 @@ CONFIG_IDE_PROC_FS=y # # IDE chipset support/bugfixes # -CONFIG_IDE_GENERIC=m # CONFIG_BLK_DEV_PLATFORM is not set # CONFIG_BLK_DEV_IDEDMA is not set -CONFIG_IDE_ARCH_OBSOLETE_INIT=y +# CONFIG_BLK_DEV_HD_ONLY is not set # CONFIG_BLK_DEV_HD is not set # @@ -562,6 +559,7 @@ CONFIG_NETDEV_10000=y # # CONFIG_WLAN_PRE80211 is not set # CONFIG_WLAN_80211 is not set +# CONFIG_IWLWIFI_LEDS is not set # CONFIG_NET_PCMCIA is not set # CONFIG_WAN is not set # CONFIG_PPP is not set @@ -707,6 +705,8 @@ CONFIG_SSB_POSSIBLE=y # # CONFIG_MFD_SM501 is not set # CONFIG_MFD_ASIC3 is not set +# CONFIG_HTC_EGPIO is not set +# CONFIG_HTC_PASIC3 is not set # # Multimedia devices @@ -745,6 +745,7 @@ CONFIG_FB_TILEBLITTING=y CONFIG_FB_PXA=y CONFIG_FB_PXA_PARAMETERS=y CONFIG_FB_MBX=m +# CONFIG_FB_METRONOME is not set CONFIG_FB_VIRTUAL=m # CONFIG_BACKLIGHT_LCD_SUPPORT is not set @@ -891,7 +892,6 @@ CONFIG_RTC_LIB=y # CONFIG_JFS_FS is not set # CONFIG_FS_POSIX_ACL is not set # CONFIG_XFS_FS is not set -# CONFIG_GFS2_FS is not set # CONFIG_OCFS2_FS is not set # CONFIG_DNOTIFY is not set CONFIG_INOTIFY=y diff --git a/trunk/arch/arm/mach-at91/at91cap9_devices.c b/trunk/arch/arm/mach-at91/at91cap9_devices.c index f1a80d74a4b6..be526746e01e 100644 --- a/trunk/arch/arm/mach-at91/at91cap9_devices.c +++ b/trunk/arch/arm/mach-at91/at91cap9_devices.c @@ -246,7 +246,7 @@ void __init at91_add_device_mmc(short mmc_id, struct at91_mmc_data *data) } mmc0_data = *data; - at91_clock_associate("mci0_clk", &at91cap9_mmc1_device.dev, "mci_clk"); + at91_clock_associate("mci0_clk", &at91cap9_mmc0_device.dev, "mci_clk"); platform_device_register(&at91cap9_mmc0_device); } else { /* MCI1 */ /* CLK */ diff --git a/trunk/arch/arm/mach-at91/at91sam9263_devices.c b/trunk/arch/arm/mach-at91/at91sam9263_devices.c index b6454c525962..719667e25c98 100644 --- a/trunk/arch/arm/mach-at91/at91sam9263_devices.c +++ b/trunk/arch/arm/mach-at91/at91sam9263_devices.c @@ -308,7 +308,7 @@ void __init at91_add_device_mmc(short mmc_id, struct at91_mmc_data *data) } mmc0_data = *data; - at91_clock_associate("mci0_clk", &at91sam9263_mmc1_device.dev, "mci_clk"); + at91_clock_associate("mci0_clk", &at91sam9263_mmc0_device.dev, "mci_clk"); platform_device_register(&at91sam9263_mmc0_device); } else { /* MCI1 */ /* CLK */ diff --git a/trunk/arch/arm/mach-at91/pm.c b/trunk/arch/arm/mach-at91/pm.c index 39733b6992aa..aa863c157708 100644 --- a/trunk/arch/arm/mach-at91/pm.c +++ b/trunk/arch/arm/mach-at91/pm.c @@ -61,6 +61,15 @@ static inline void sdram_selfrefresh_enable(void) #else #include +#ifdef CONFIG_ARCH_AT91SAM9263 +/* + * FIXME either or both the SDRAM controllers (EB0, EB1) might be in use; + * handle those cases both here and in the Suspend-To-RAM support. + */ +#define AT91_SDRAMC AT91_SDRAMC0 +#warning Assuming EB1 SDRAM controller is *NOT* used +#endif + static u32 saved_lpr; static inline void sdram_selfrefresh_enable(void) @@ -75,11 +84,6 @@ static inline void sdram_selfrefresh_enable(void) #define sdram_selfrefresh_disable() at91_sys_write(AT91_SDRAMC_LPR, saved_lpr) -/* - * FIXME: The AT91SAM9263 has a second EBI controller which may have - * additional SDRAM. pm_slowclock.S will require a similar fix. - */ - #endif diff --git a/trunk/arch/arm/mach-pxa/Makefile b/trunk/arch/arm/mach-pxa/Makefile index 7cdcb459ea9d..6a830853aa6a 100644 --- a/trunk/arch/arm/mach-pxa/Makefile +++ b/trunk/arch/arm/mach-pxa/Makefile @@ -5,9 +5,9 @@ # Common support (must be linked before board specific support) obj-y += clock.o devices.o generic.o irq.o dma.o \ time.o gpio.o -obj-$(CONFIG_PXA25x) += pxa25x.o mfp-pxa2xx.o -obj-$(CONFIG_PXA27x) += pxa27x.o mfp-pxa2xx.o -obj-$(CONFIG_PXA3xx) += pxa3xx.o mfp-pxa3xx.o smemc.o +obj-$(CONFIG_PXA25x) += mfp-pxa2xx.o pxa25x.o +obj-$(CONFIG_PXA27x) += mfp-pxa2xx.o pxa27x.o +obj-$(CONFIG_PXA3xx) += mfp-pxa3xx.o pxa3xx.o smemc.o obj-$(CONFIG_CPU_PXA300) += pxa300.o obj-$(CONFIG_CPU_PXA320) += pxa320.o diff --git a/trunk/arch/arm/mach-pxa/gumstix.c b/trunk/arch/arm/mach-pxa/gumstix.c index f01d18544133..bdf239754037 100644 --- a/trunk/arch/arm/mach-pxa/gumstix.c +++ b/trunk/arch/arm/mach-pxa/gumstix.c @@ -40,6 +40,7 @@ #include #include +#include #include "generic.h" diff --git a/trunk/arch/arm/mach-pxa/magician.c b/trunk/arch/arm/mach-pxa/magician.c index d70be75bd199..badba064dc04 100644 --- a/trunk/arch/arm/mach-pxa/magician.c +++ b/trunk/arch/arm/mach-pxa/magician.c @@ -114,6 +114,14 @@ static unsigned long magician_pin_config[] = { GPIO82_CIF_DD_5, GPIO84_CIF_FV, GPIO85_CIF_LV, + + /* Magician specific input GPIOs */ + GPIO9_GPIO, /* unknown */ + GPIO10_GPIO, /* GSM_IRQ */ + GPIO13_GPIO, /* CPLD_IRQ */ + GPIO107_GPIO, /* DS1WM_IRQ */ + GPIO108_GPIO, /* GSM_READY */ + GPIO115_GPIO, /* nPEN_IRQ */ }; /* @@ -438,7 +446,7 @@ static struct pasic3_led pasic3_leds[] = { static struct platform_device pasic3; -static struct pasic3_leds_machinfo __devinit pasic3_leds_info = { +static struct pasic3_leds_machinfo pasic3_leds_info = { .num_leds = ARRAY_SIZE(pasic3_leds), .power_gpio = EGPIO_MAGICIAN_LED_POWER, .leds = pasic3_leds, @@ -543,9 +551,28 @@ static struct platform_device power_supply = { static int magician_mci_init(struct device *dev, irq_handler_t detect_irq, void *data) { - return request_irq(IRQ_MAGICIAN_SD, detect_irq, + int err; + + err = request_irq(IRQ_MAGICIAN_SD, detect_irq, IRQF_DISABLED | IRQF_SAMPLE_RANDOM, "MMC card detect", data); + if (err) + goto err_request_irq; + err = gpio_request(EGPIO_MAGICIAN_SD_POWER, "SD_POWER"); + if (err) + goto err_request_power; + err = gpio_request(EGPIO_MAGICIAN_nSD_READONLY, "nSD_READONLY"); + if (err) + goto err_request_readonly; + + return 0; + +err_request_readonly: + gpio_free(EGPIO_MAGICIAN_SD_POWER); +err_request_power: + free_irq(IRQ_MAGICIAN_SD, data); +err_request_irq: + return err; } static void magician_mci_setpower(struct device *dev, unsigned int vdd) @@ -562,6 +589,8 @@ static int magician_mci_get_ro(struct device *dev) static void magician_mci_exit(struct device *dev, void *data) { + gpio_free(EGPIO_MAGICIAN_nSD_READONLY); + gpio_free(EGPIO_MAGICIAN_SD_POWER); free_irq(IRQ_MAGICIAN_SD, data); } @@ -643,28 +672,42 @@ static void __init magician_init(void) { void __iomem *cpld; int lcd_select; + int err; + + gpio_request(GPIO13_MAGICIAN_CPLD_IRQ, "CPLD_IRQ"); + gpio_request(GPIO107_MAGICIAN_DS1WM_IRQ, "DS1WM_IRQ"); pxa2xx_mfp_config(ARRAY_AND_SIZE(magician_pin_config)); platform_add_devices(devices, ARRAY_SIZE(devices)); + + err = gpio_request(GPIO83_MAGICIAN_nIR_EN, "nIR_EN"); + if (!err) { + gpio_direction_output(GPIO83_MAGICIAN_nIR_EN, 1); + pxa_set_ficp_info(&magician_ficp_info); + } pxa_set_i2c_info(NULL); pxa_set_mci_info(&magician_mci_info); pxa_set_ohci_info(&magician_ohci_info); - pxa_set_ficp_info(&magician_ficp_info); /* Check LCD type we have */ cpld = ioremap_nocache(PXA_CS3_PHYS, 0x1000); if (cpld) { u8 board_id = __raw_readb(cpld+0x14); + iounmap(cpld); system_rev = board_id & 0x7; lcd_select = board_id & 0x8; - iounmap(cpld); pr_info("LCD type: %s\n", lcd_select ? "Samsung" : "Toppoly"); - if (lcd_select && (system_rev < 3)) - pxa_gpio_mode(GPIO75_MAGICIAN_SAMSUNG_POWER_MD); - pxa_gpio_mode(GPIO104_MAGICIAN_LCD_POWER_1_MD); - pxa_gpio_mode(GPIO105_MAGICIAN_LCD_POWER_2_MD); - pxa_gpio_mode(GPIO106_MAGICIAN_LCD_POWER_3_MD); + if (lcd_select && (system_rev < 3)) { + gpio_request(GPIO75_MAGICIAN_SAMSUNG_POWER, "SAMSUNG_POWER"); + gpio_direction_output(GPIO75_MAGICIAN_SAMSUNG_POWER, 0); + } + gpio_request(GPIO104_MAGICIAN_LCD_POWER_1, "LCD_POWER_1"); + gpio_request(GPIO105_MAGICIAN_LCD_POWER_2, "LCD_POWER_2"); + gpio_request(GPIO106_MAGICIAN_LCD_POWER_3, "LCD_POWER_3"); + gpio_direction_output(GPIO104_MAGICIAN_LCD_POWER_1, 0); + gpio_direction_output(GPIO105_MAGICIAN_LCD_POWER_2, 0); + gpio_direction_output(GPIO106_MAGICIAN_LCD_POWER_3, 0); set_pxa_fb_info(lcd_select ? &samsung_info : &toppoly_info); } else pr_err("LCD detection: CPLD mapping failed\n"); diff --git a/trunk/arch/arm/mach-pxa/pm.c b/trunk/arch/arm/mach-pxa/pm.c index 039194cbe477..ec1bbf333a3a 100644 --- a/trunk/arch/arm/mach-pxa/pm.c +++ b/trunk/arch/arm/mach-pxa/pm.c @@ -46,8 +46,8 @@ int pxa_pm_enter(suspend_state_t state) sleep_save_checksum += sleep_save[i]; } - /* Clear sleep reset status */ - RCSR = RCSR_SMR; + /* Clear reset status */ + RCSR = RCSR_HWR | RCSR_WDR | RCSR_SMR | RCSR_GPR; /* *** go zzz *** */ pxa_cpu_pm_fns->enter(state); diff --git a/trunk/arch/arm/mach-pxa/pxa3xx.c b/trunk/arch/arm/mach-pxa/pxa3xx.c index dde355e88fa1..b6a6f5fcc77a 100644 --- a/trunk/arch/arm/mach-pxa/pxa3xx.c +++ b/trunk/arch/arm/mach-pxa/pxa3xx.c @@ -486,6 +486,8 @@ static int pxa3xx_set_wake(unsigned int irq, unsigned int on) case IRQ_MMC3: mask = ADXER_MFP_GEN12; break; + default: + return -EINVAL; } local_irq_save(flags); diff --git a/trunk/arch/arm/oprofile/op_model_mpcore.c b/trunk/arch/arm/oprofile/op_model_mpcore.c index 75bae067922d..74fae6045650 100644 --- a/trunk/arch/arm/oprofile/op_model_mpcore.c +++ b/trunk/arch/arm/oprofile/op_model_mpcore.c @@ -51,7 +51,7 @@ /* * MPCore SCU event monitor support */ -#define SCU_EVENTMONITORS_VA_BASE __io_address(REALVIEW_MPCORE_SCU_BASE + 0x10) +#define SCU_EVENTMONITORS_VA_BASE __io_address(REALVIEW_EB11MP_SCU_BASE + 0x10) /* * Bitmask of used SCU counters @@ -80,7 +80,7 @@ static irqreturn_t scu_em_interrupt(int irq, void *arg) struct eventmonitor __iomem *emc = SCU_EVENTMONITORS_VA_BASE; unsigned int cnt; - cnt = irq - IRQ_PMU_SCU0; + cnt = irq - IRQ_EB11MP_PMU_SCU0; oprofile_add_sample(get_irq_regs(), SCU_COUNTER(cnt)); scu_reset_counter(emc, cnt); @@ -119,10 +119,10 @@ static int scu_start(void) */ for (i = 0; i < NUM_SCU_COUNTERS; i++) { if (scu_em_used & (1 << i)) { - ret = request_irq(IRQ_PMU_SCU0 + i, scu_em_interrupt, IRQF_DISABLED, "SCU PMU", NULL); + ret = request_irq(IRQ_EB11MP_PMU_SCU0 + i, scu_em_interrupt, IRQF_DISABLED, "SCU PMU", NULL); if (ret) { printk(KERN_ERR "oprofile: unable to request IRQ%u for SCU Event Monitor\n", - IRQ_PMU_SCU0 + i); + IRQ_EB11MP_PMU_SCU0 + i); goto err_free_scu; } } @@ -153,7 +153,7 @@ static int scu_start(void) err_free_scu: while (i--) - free_irq(IRQ_PMU_SCU0 + i, NULL); + free_irq(IRQ_EB11MP_PMU_SCU0 + i, NULL); return ret; } @@ -175,7 +175,7 @@ static void scu_stop(void) for (i = 0; i < NUM_SCU_COUNTERS; i++) { if (scu_em_used & (1 << i)) { scu_reset_counter(emc, i); - free_irq(IRQ_PMU_SCU0 + i, NULL); + free_irq(IRQ_EB11MP_PMU_SCU0 + i, NULL); } } } @@ -225,10 +225,10 @@ static int em_setup_ctrs(void) } static int arm11_irqs[] = { - [0] = IRQ_PMU_CPU0, - [1] = IRQ_PMU_CPU1, - [2] = IRQ_PMU_CPU2, - [3] = IRQ_PMU_CPU3 + [0] = IRQ_EB11MP_PMU_CPU0, + [1] = IRQ_EB11MP_PMU_CPU1, + [2] = IRQ_EB11MP_PMU_CPU2, + [3] = IRQ_EB11MP_PMU_CPU3 }; static int em_start(void) @@ -273,22 +273,22 @@ static int em_setup(void) /* * Send SCU PMU interrupts to the "owner" CPU. */ - em_route_irq(IRQ_PMU_SCU0, 0); - em_route_irq(IRQ_PMU_SCU1, 0); - em_route_irq(IRQ_PMU_SCU2, 1); - em_route_irq(IRQ_PMU_SCU3, 1); - em_route_irq(IRQ_PMU_SCU4, 2); - em_route_irq(IRQ_PMU_SCU5, 2); - em_route_irq(IRQ_PMU_SCU6, 3); - em_route_irq(IRQ_PMU_SCU7, 3); + em_route_irq(IRQ_EB11MP_PMU_SCU0, 0); + em_route_irq(IRQ_EB11MP_PMU_SCU1, 0); + em_route_irq(IRQ_EB11MP_PMU_SCU2, 1); + em_route_irq(IRQ_EB11MP_PMU_SCU3, 1); + em_route_irq(IRQ_EB11MP_PMU_SCU4, 2); + em_route_irq(IRQ_EB11MP_PMU_SCU5, 2); + em_route_irq(IRQ_EB11MP_PMU_SCU6, 3); + em_route_irq(IRQ_EB11MP_PMU_SCU7, 3); /* * Send CP15 PMU interrupts to the owner CPU. */ - em_route_irq(IRQ_PMU_CPU0, 0); - em_route_irq(IRQ_PMU_CPU1, 1); - em_route_irq(IRQ_PMU_CPU2, 2); - em_route_irq(IRQ_PMU_CPU3, 3); + em_route_irq(IRQ_EB11MP_PMU_CPU0, 0); + em_route_irq(IRQ_EB11MP_PMU_CPU1, 1); + em_route_irq(IRQ_EB11MP_PMU_CPU2, 2); + em_route_irq(IRQ_EB11MP_PMU_CPU3, 3); return 0; } diff --git a/trunk/arch/ia64/Kconfig b/trunk/arch/ia64/Kconfig index cd13e138bd03..3aa6c821449a 100644 --- a/trunk/arch/ia64/Kconfig +++ b/trunk/arch/ia64/Kconfig @@ -19,6 +19,7 @@ config IA64 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_KVM default y help The Itanium Processor Family is Intel's 64-bit successor to @@ -589,6 +590,8 @@ config MSPEC source "fs/Kconfig" +source "arch/ia64/kvm/Kconfig" + source "lib/Kconfig" # diff --git a/trunk/arch/ia64/Makefile b/trunk/arch/ia64/Makefile index f1645c4f7039..ec4cca477f49 100644 --- a/trunk/arch/ia64/Makefile +++ b/trunk/arch/ia64/Makefile @@ -57,6 +57,7 @@ core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/ core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/ core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/ core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/ +core-$(CONFIG_KVM) += arch/ia64/kvm/ drivers-$(CONFIG_PCI) += arch/ia64/pci/ drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/ diff --git a/trunk/arch/ia64/kvm/Kconfig b/trunk/arch/ia64/kvm/Kconfig new file mode 100644 index 000000000000..7914e4828504 --- /dev/null +++ b/trunk/arch/ia64/kvm/Kconfig @@ -0,0 +1,49 @@ +# +# KVM configuration +# +config HAVE_KVM + bool + +menuconfig VIRTUALIZATION + bool "Virtualization" + depends on HAVE_KVM || IA64 + default y + ---help--- + Say Y here to get to see options for using your Linux host to run other + operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if VIRTUALIZATION + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM && EXPERIMENTAL + select PREEMPT_NOTIFIERS + select ANON_INODES + ---help--- + Support hosting fully virtualized guest machines using hardware + virtualization extensions. You will need a fairly recent + processor equipped with virtualization extensions. You will also + need to select one or more of the processor modules below. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + To compile this as a module, choose M here: the module + will be called kvm. + + If unsure, say N. + +config KVM_INTEL + tristate "KVM for Intel Itanium 2 processors support" + depends on KVM && m + ---help--- + Provides support for KVM on Itanium 2 processors equipped with the VT + extensions. + +config KVM_TRACE + bool + +endif # VIRTUALIZATION diff --git a/trunk/arch/ia64/kvm/Makefile b/trunk/arch/ia64/kvm/Makefile new file mode 100644 index 000000000000..41b034ffa73b --- /dev/null +++ b/trunk/arch/ia64/kvm/Makefile @@ -0,0 +1,61 @@ +#This Make file is to generate asm-offsets.h and build source. +# + +#Generate asm-offsets.h for vmm module build +offsets-file := asm-offsets.h + +always := $(offsets-file) +targets := $(offsets-file) +targets += arch/ia64/kvm/asm-offsets.s +clean-files := $(addprefix $(objtree)/,$(targets) $(obj)/memcpy.S $(obj)/memset.S) + +# Default sed regexp - multiline due to syntax constraints +define sed-y + "/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}" +endef + +quiet_cmd_offsets = GEN $@ +define cmd_offsets + (set -e; \ + echo "#ifndef __ASM_KVM_OFFSETS_H__"; \ + echo "#define __ASM_KVM_OFFSETS_H__"; \ + echo "/*"; \ + echo " * DO NOT MODIFY."; \ + echo " *"; \ + echo " * This file was generated by Makefile"; \ + echo " *"; \ + echo " */"; \ + echo ""; \ + sed -ne $(sed-y) $<; \ + echo ""; \ + echo "#endif" ) > $@ +endef +# We use internal rules to avoid the "is up to date" message from make +arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c + $(call if_changed_dep,cc_s_c) + +$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s + $(call cmd,offsets) + +# +# Makefile for Kernel-based Virtual Machine module +# + +EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ + +$(addprefix $(objtree)/,$(obj)/memcpy.S $(obj)/memset.S): + $(shell ln -snf ../lib/memcpy.S $(src)/memcpy.S) + $(shell ln -snf ../lib/memset.S $(src)/memset.S) + +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) + +kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o +obj-$(CONFIG_KVM) += kvm.o + +FORCE : $(obj)/$(offsets-file) +EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127 +kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \ + vtlb.o process.o +#Add link memcpy and memset to avoid possible structure assignment error +kvm-intel-objs += memset.o memcpy.o +obj-$(CONFIG_KVM_INTEL) += kvm-intel.o diff --git a/trunk/arch/ia64/kvm/asm-offsets.c b/trunk/arch/ia64/kvm/asm-offsets.c new file mode 100644 index 000000000000..4e3dc13a619c --- /dev/null +++ b/trunk/arch/ia64/kvm/asm-offsets.c @@ -0,0 +1,251 @@ +/* + * asm-offsets.c Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed + * to extract and format the required data. + * + * Anthony Xu + * Xiantao Zhang + * Copyright (c) 2007 Intel Corporation KVM support. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include +#include + +#include "vcpu.h" + +#define task_struct kvm_vcpu + +#define DEFINE(sym, val) \ + asm volatile("\n->" #sym " (%0) " #val : : "i" (val)) + +#define BLANK() asm volatile("\n->" : :) + +#define OFFSET(_sym, _str, _mem) \ + DEFINE(_sym, offsetof(_str, _mem)); + +void foo(void) +{ + DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu)); + DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs)); + + BLANK(); + + DEFINE(VMM_VCPU_META_RR0_OFFSET, + offsetof(struct kvm_vcpu, arch.metaphysical_rr0)); + DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET, + offsetof(struct kvm_vcpu, + arch.metaphysical_saved_rr0)); + DEFINE(VMM_VCPU_VRR0_OFFSET, + offsetof(struct kvm_vcpu, arch.vrr[0])); + DEFINE(VMM_VPD_IRR0_OFFSET, + offsetof(struct vpd, irr[0])); + DEFINE(VMM_VCPU_ITC_CHECK_OFFSET, + offsetof(struct kvm_vcpu, arch.itc_check)); + DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET, + offsetof(struct kvm_vcpu, arch.irq_check)); + DEFINE(VMM_VPD_VHPI_OFFSET, + offsetof(struct vpd, vhpi)); + DEFINE(VMM_VCPU_VSA_BASE_OFFSET, + offsetof(struct kvm_vcpu, arch.vsa_base)); + DEFINE(VMM_VCPU_VPD_OFFSET, + offsetof(struct kvm_vcpu, arch.vpd)); + DEFINE(VMM_VCPU_IRQ_CHECK, + offsetof(struct kvm_vcpu, arch.irq_check)); + DEFINE(VMM_VCPU_TIMER_PENDING, + offsetof(struct kvm_vcpu, arch.timer_pending)); + DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET, + offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0)); + DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET, + offsetof(struct kvm_vcpu, arch.mode_flags)); + DEFINE(VMM_VCPU_ITC_OFS_OFFSET, + offsetof(struct kvm_vcpu, arch.itc_offset)); + DEFINE(VMM_VCPU_LAST_ITC_OFFSET, + offsetof(struct kvm_vcpu, arch.last_itc)); + DEFINE(VMM_VCPU_SAVED_GP_OFFSET, + offsetof(struct kvm_vcpu, arch.saved_gp)); + + BLANK(); + + DEFINE(VMM_PT_REGS_B6_OFFSET, + offsetof(struct kvm_pt_regs, b6)); + DEFINE(VMM_PT_REGS_B7_OFFSET, + offsetof(struct kvm_pt_regs, b7)); + DEFINE(VMM_PT_REGS_AR_CSD_OFFSET, + offsetof(struct kvm_pt_regs, ar_csd)); + DEFINE(VMM_PT_REGS_AR_SSD_OFFSET, + offsetof(struct kvm_pt_regs, ar_ssd)); + DEFINE(VMM_PT_REGS_R8_OFFSET, + offsetof(struct kvm_pt_regs, r8)); + DEFINE(VMM_PT_REGS_R9_OFFSET, + offsetof(struct kvm_pt_regs, r9)); + DEFINE(VMM_PT_REGS_R10_OFFSET, + offsetof(struct kvm_pt_regs, r10)); + DEFINE(VMM_PT_REGS_R11_OFFSET, + offsetof(struct kvm_pt_regs, r11)); + DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET, + offsetof(struct kvm_pt_regs, cr_ipsr)); + DEFINE(VMM_PT_REGS_CR_IIP_OFFSET, + offsetof(struct kvm_pt_regs, cr_iip)); + DEFINE(VMM_PT_REGS_CR_IFS_OFFSET, + offsetof(struct kvm_pt_regs, cr_ifs)); + DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET, + offsetof(struct kvm_pt_regs, ar_unat)); + DEFINE(VMM_PT_REGS_AR_PFS_OFFSET, + offsetof(struct kvm_pt_regs, ar_pfs)); + DEFINE(VMM_PT_REGS_AR_RSC_OFFSET, + offsetof(struct kvm_pt_regs, ar_rsc)); + DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET, + offsetof(struct kvm_pt_regs, ar_rnat)); + + DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET, + offsetof(struct kvm_pt_regs, ar_bspstore)); + DEFINE(VMM_PT_REGS_PR_OFFSET, + offsetof(struct kvm_pt_regs, pr)); + DEFINE(VMM_PT_REGS_B0_OFFSET, + offsetof(struct kvm_pt_regs, b0)); + DEFINE(VMM_PT_REGS_LOADRS_OFFSET, + offsetof(struct kvm_pt_regs, loadrs)); + DEFINE(VMM_PT_REGS_R1_OFFSET, + offsetof(struct kvm_pt_regs, r1)); + DEFINE(VMM_PT_REGS_R12_OFFSET, + offsetof(struct kvm_pt_regs, r12)); + DEFINE(VMM_PT_REGS_R13_OFFSET, + offsetof(struct kvm_pt_regs, r13)); + DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET, + offsetof(struct kvm_pt_regs, ar_fpsr)); + DEFINE(VMM_PT_REGS_R15_OFFSET, + offsetof(struct kvm_pt_regs, r15)); + DEFINE(VMM_PT_REGS_R14_OFFSET, + offsetof(struct kvm_pt_regs, r14)); + DEFINE(VMM_PT_REGS_R2_OFFSET, + offsetof(struct kvm_pt_regs, r2)); + DEFINE(VMM_PT_REGS_R3_OFFSET, + offsetof(struct kvm_pt_regs, r3)); + DEFINE(VMM_PT_REGS_R16_OFFSET, + offsetof(struct kvm_pt_regs, r16)); + DEFINE(VMM_PT_REGS_R17_OFFSET, + offsetof(struct kvm_pt_regs, r17)); + DEFINE(VMM_PT_REGS_R18_OFFSET, + offsetof(struct kvm_pt_regs, r18)); + DEFINE(VMM_PT_REGS_R19_OFFSET, + offsetof(struct kvm_pt_regs, r19)); + DEFINE(VMM_PT_REGS_R20_OFFSET, + offsetof(struct kvm_pt_regs, r20)); + DEFINE(VMM_PT_REGS_R21_OFFSET, + offsetof(struct kvm_pt_regs, r21)); + DEFINE(VMM_PT_REGS_R22_OFFSET, + offsetof(struct kvm_pt_regs, r22)); + DEFINE(VMM_PT_REGS_R23_OFFSET, + offsetof(struct kvm_pt_regs, r23)); + DEFINE(VMM_PT_REGS_R24_OFFSET, + offsetof(struct kvm_pt_regs, r24)); + DEFINE(VMM_PT_REGS_R25_OFFSET, + offsetof(struct kvm_pt_regs, r25)); + DEFINE(VMM_PT_REGS_R26_OFFSET, + offsetof(struct kvm_pt_regs, r26)); + DEFINE(VMM_PT_REGS_R27_OFFSET, + offsetof(struct kvm_pt_regs, r27)); + DEFINE(VMM_PT_REGS_R28_OFFSET, + offsetof(struct kvm_pt_regs, r28)); + DEFINE(VMM_PT_REGS_R29_OFFSET, + offsetof(struct kvm_pt_regs, r29)); + DEFINE(VMM_PT_REGS_R30_OFFSET, + offsetof(struct kvm_pt_regs, r30)); + DEFINE(VMM_PT_REGS_R31_OFFSET, + offsetof(struct kvm_pt_regs, r31)); + DEFINE(VMM_PT_REGS_AR_CCV_OFFSET, + offsetof(struct kvm_pt_regs, ar_ccv)); + DEFINE(VMM_PT_REGS_F6_OFFSET, + offsetof(struct kvm_pt_regs, f6)); + DEFINE(VMM_PT_REGS_F7_OFFSET, + offsetof(struct kvm_pt_regs, f7)); + DEFINE(VMM_PT_REGS_F8_OFFSET, + offsetof(struct kvm_pt_regs, f8)); + DEFINE(VMM_PT_REGS_F9_OFFSET, + offsetof(struct kvm_pt_regs, f9)); + DEFINE(VMM_PT_REGS_F10_OFFSET, + offsetof(struct kvm_pt_regs, f10)); + DEFINE(VMM_PT_REGS_F11_OFFSET, + offsetof(struct kvm_pt_regs, f11)); + DEFINE(VMM_PT_REGS_R4_OFFSET, + offsetof(struct kvm_pt_regs, r4)); + DEFINE(VMM_PT_REGS_R5_OFFSET, + offsetof(struct kvm_pt_regs, r5)); + DEFINE(VMM_PT_REGS_R6_OFFSET, + offsetof(struct kvm_pt_regs, r6)); + DEFINE(VMM_PT_REGS_R7_OFFSET, + offsetof(struct kvm_pt_regs, r7)); + DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET, + offsetof(struct kvm_pt_regs, eml_unat)); + DEFINE(VMM_VCPU_IIPA_OFFSET, + offsetof(struct kvm_vcpu, arch.cr_iipa)); + DEFINE(VMM_VCPU_OPCODE_OFFSET, + offsetof(struct kvm_vcpu, arch.opcode)); + DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause)); + DEFINE(VMM_VCPU_ISR_OFFSET, + offsetof(struct kvm_vcpu, arch.cr_isr)); + DEFINE(VMM_PT_REGS_R16_SLOT, + (((offsetof(struct kvm_pt_regs, r16) + - sizeof(struct kvm_pt_regs)) >> 3) & 0x3f)); + DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET, + offsetof(struct kvm_vcpu, arch.mode_flags)); + DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp)); + BLANK(); + + DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd)); + DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs)); + DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET, + offsetof(struct kvm_vcpu, arch.insvc[0])); + DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta)); + DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr)); + + DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4])); + DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5])); + DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12])); + DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13])); + DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0])); + DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1])); + DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0])); + DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1])); + DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2])); + DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0])); + DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16])); + DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18])); + DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19])); + DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21])); + DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24])); + DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27])); + DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28])); + DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29])); + DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30])); + DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36])); + DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40])); + DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64])); + DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65])); + DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0])); + DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2])); + DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8])); + DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0])); + DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0])); + DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2])); + DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3])); + DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32])); + DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33])); + DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0])); + DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr)); + BLANK(); +} diff --git a/trunk/arch/ia64/kvm/kvm-ia64.c b/trunk/arch/ia64/kvm/kvm-ia64.c new file mode 100644 index 000000000000..6df073240135 --- /dev/null +++ b/trunk/arch/ia64/kvm/kvm-ia64.c @@ -0,0 +1,1806 @@ + +/* + * kvm_ia64.c: Basic KVM suppport On Itanium series processors + * + * + * Copyright (C) 2007, Intel Corporation. + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "misc.h" +#include "vti.h" +#include "iodev.h" +#include "ioapic.h" +#include "lapic.h" + +static unsigned long kvm_vmm_base; +static unsigned long kvm_vsa_base; +static unsigned long kvm_vm_buffer; +static unsigned long kvm_vm_buffer_size; +unsigned long kvm_vmm_gp; + +static long vp_env_info; + +static struct kvm_vmm_info *kvm_vmm_info; + +static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu); + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { NULL } +}; + + +struct fdesc{ + unsigned long ip; + unsigned long gp; +}; + +static void kvm_flush_icache(unsigned long start, unsigned long len) +{ + int l; + + for (l = 0; l < (len + 32); l += 32) + ia64_fc(start + l); + + ia64_sync_i(); + ia64_srlz_i(); +} + +static void kvm_flush_tlb_all(void) +{ + unsigned long i, j, count0, count1, stride0, stride1, addr; + long flags; + + addr = local_cpu_data->ptce_base; + count0 = local_cpu_data->ptce_count[0]; + count1 = local_cpu_data->ptce_count[1]; + stride0 = local_cpu_data->ptce_stride[0]; + stride1 = local_cpu_data->ptce_stride[1]; + + local_irq_save(flags); + for (i = 0; i < count0; ++i) { + for (j = 0; j < count1; ++j) { + ia64_ptce(addr); + addr += stride1; + } + addr += stride0; + } + local_irq_restore(flags); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva, + (u64)opt_handler); + + return iprv.status; +} + +static DEFINE_SPINLOCK(vp_lock); + +void kvm_arch_hardware_enable(void *garbage) +{ + long status; + long tmp_base; + unsigned long pte; + unsigned long saved_psr; + int slot; + + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), + PAGE_KERNEL)); + local_irq_save(saved_psr); + slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + if (slot < 0) + return; + local_irq_restore(saved_psr); + + spin_lock(&vp_lock); + status = ia64_pal_vp_init_env(kvm_vsa_base ? + VP_INIT_ENV : VP_INIT_ENV_INITALIZE, + __pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base); + if (status != 0) { + printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n"); + return ; + } + + if (!kvm_vsa_base) { + kvm_vsa_base = tmp_base; + printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base); + } + spin_unlock(&vp_lock); + ia64_ptr_entry(0x3, slot); +} + +void kvm_arch_hardware_disable(void *garbage) +{ + + long status; + int slot; + unsigned long pte; + unsigned long saved_psr; + unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA); + + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), + PAGE_KERNEL)); + + local_irq_save(saved_psr); + slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + if (slot < 0) + return; + local_irq_restore(saved_psr); + + status = ia64_pal_vp_exit_env(host_iva); + if (status) + printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n", + status); + ia64_ptr_entry(0x3, slot); +} + +void kvm_arch_check_processor_compat(void *rtn) +{ + *(int *)rtn = 0; +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + + int r; + + switch (ext) { + case KVM_CAP_IRQCHIP: + case KVM_CAP_USER_MEMORY: + + r = 1; + break; + default: + r = 0; + } + return r; + +} + +static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, + gpa_t addr) +{ + struct kvm_io_device *dev; + + dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); + + return dev; +} + +static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 1; + return 0; +} + +static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct kvm_mmio_req *p; + struct kvm_io_device *mmio_dev; + + p = kvm_get_vcpu_ioreq(vcpu); + + if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS) + goto mmio; + vcpu->mmio_needed = 1; + vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr; + vcpu->mmio_size = kvm_run->mmio.len = p->size; + vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir; + + if (vcpu->mmio_is_write) + memcpy(vcpu->mmio_data, &p->data, p->size); + memcpy(kvm_run->mmio.data, &p->data, p->size); + kvm_run->exit_reason = KVM_EXIT_MMIO; + return 0; +mmio: + mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr); + if (mmio_dev) { + if (!p->dir) + kvm_iodevice_write(mmio_dev, p->addr, p->size, + &p->data); + else + kvm_iodevice_read(mmio_dev, p->addr, p->size, + &p->data); + + } else + printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); + p->state = STATE_IORESP_READY; + + return 1; +} + +static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + + if (p->exit_reason == EXIT_REASON_PAL_CALL) + return kvm_pal_emul(vcpu, kvm_run); + else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 2; + return 0; + } +} + +static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + + if (p->exit_reason == EXIT_REASON_SAL_CALL) { + kvm_sal_emul(vcpu); + return 1; + } else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = 3; + return 0; + } + +} + +/* + * offset: address offset to IPI space. + * value: deliver value. + */ +static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm, + uint64_t vector) +{ + switch (dm) { + case SAPIC_FIXED: + kvm_apic_set_irq(vcpu, vector, 0); + break; + case SAPIC_NMI: + kvm_apic_set_irq(vcpu, 2, 0); + break; + case SAPIC_EXTINT: + kvm_apic_set_irq(vcpu, 0, 0); + break; + case SAPIC_INIT: + case SAPIC_PMI: + default: + printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n"); + break; + } +} + +static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id, + unsigned long eid) +{ + union ia64_lid lid; + int i; + + for (i = 0; i < KVM_MAX_VCPUS; i++) { + if (kvm->vcpus[i]) { + lid.val = VCPU_LID(kvm->vcpus[i]); + if (lid.id == id && lid.eid == eid) + return kvm->vcpus[i]; + } + } + + return NULL; +} + +static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p = kvm_get_exit_data(vcpu); + struct kvm_vcpu *target_vcpu; + struct kvm_pt_regs *regs; + union ia64_ipi_a addr = p->u.ipi_data.addr; + union ia64_ipi_d data = p->u.ipi_data.data; + + target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid); + if (!target_vcpu) + return handle_vm_error(vcpu, kvm_run); + + if (!target_vcpu->arch.launched) { + regs = vcpu_regs(target_vcpu); + + regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip; + regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp; + + target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + if (waitqueue_active(&target_vcpu->wq)) + wake_up_interruptible(&target_vcpu->wq); + } else { + vcpu_deliver_ipi(target_vcpu, data.dm, data.vector); + if (target_vcpu != vcpu) + kvm_vcpu_kick(target_vcpu); + } + + return 1; +} + +struct call_data { + struct kvm_ptc_g ptc_g_data; + struct kvm_vcpu *vcpu; +}; + +static void vcpu_global_purge(void *info) +{ + struct call_data *p = (struct call_data *)info; + struct kvm_vcpu *vcpu = p->vcpu; + + if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests)) + return; + + set_bit(KVM_REQ_PTC_G, &vcpu->requests); + if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) { + vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] = + p->ptc_g_data; + } else { + clear_bit(KVM_REQ_PTC_G, &vcpu->requests); + vcpu->arch.ptc_g_count = 0; + set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests); + } +} + +static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + struct exit_ctl_data *p = kvm_get_exit_data(vcpu); + struct kvm *kvm = vcpu->kvm; + struct call_data call_data; + int i; + call_data.ptc_g_data = p->u.ptc_g_data; + + for (i = 0; i < KVM_MAX_VCPUS; i++) { + if (!kvm->vcpus[i] || kvm->vcpus[i]->arch.mp_state == + KVM_MP_STATE_UNINITIALIZED || + vcpu == kvm->vcpus[i]) + continue; + + if (waitqueue_active(&kvm->vcpus[i]->wq)) + wake_up_interruptible(&kvm->vcpus[i]->wq); + + if (kvm->vcpus[i]->cpu != -1) { + call_data.vcpu = kvm->vcpus[i]; + smp_call_function_single(kvm->vcpus[i]->cpu, + vcpu_global_purge, &call_data, 0, 1); + } else + printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n"); + + } + return 1; +} + +static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + return 1; +} + +int kvm_emulate_halt(struct kvm_vcpu *vcpu) +{ + + ktime_t kt; + long itc_diff; + unsigned long vcpu_now_itc; + + unsigned long expires; + struct hrtimer *p_ht = &vcpu->arch.hlt_timer; + unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + vcpu_now_itc = ia64_getreg(_IA64_REG_AR_ITC) + vcpu->arch.itc_offset; + + if (time_after(vcpu_now_itc, vpd->itm)) { + vcpu->arch.timer_check = 1; + return 1; + } + itc_diff = vpd->itm - vcpu_now_itc; + if (itc_diff < 0) + itc_diff = -itc_diff; + + expires = div64_64(itc_diff, cyc_per_usec); + kt = ktime_set(0, 1000 * expires); + vcpu->arch.ht_active = 1; + hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS); + + if (irqchip_in_kernel(vcpu->kvm)) { + vcpu->arch.mp_state = KVM_MP_STATE_HALTED; + kvm_vcpu_block(vcpu); + hrtimer_cancel(p_ht); + vcpu->arch.ht_active = 0; + + if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) + return -EINTR; + return 1; + } else { + printk(KERN_ERR"kvm: Unsupported userspace halt!"); + return 0; + } +} + +static int handle_vm_shutdown(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + kvm_run->exit_reason = KVM_EXIT_SHUTDOWN; + return 0; +} + +static int handle_external_interrupt(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) +{ + return 1; +} + +static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu, + struct kvm_run *kvm_run) = { + [EXIT_REASON_VM_PANIC] = handle_vm_error, + [EXIT_REASON_MMIO_INSTRUCTION] = handle_mmio, + [EXIT_REASON_PAL_CALL] = handle_pal_call, + [EXIT_REASON_SAL_CALL] = handle_sal_call, + [EXIT_REASON_SWITCH_RR6] = handle_switch_rr6, + [EXIT_REASON_VM_DESTROY] = handle_vm_shutdown, + [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, + [EXIT_REASON_IPI] = handle_ipi, + [EXIT_REASON_PTC_G] = handle_global_purge, + +}; + +static const int kvm_vti_max_exit_handlers = + sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers); + +static void kvm_prepare_guest_switch(struct kvm_vcpu *vcpu) +{ +} + +static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p_exit_data; + + p_exit_data = kvm_get_exit_data(vcpu); + return p_exit_data->exit_reason; +} + +/* + * The guest has exited. See if we can fix it or if we need userspace + * assistance. + */ +static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) +{ + u32 exit_reason = kvm_get_exit_reason(vcpu); + vcpu->arch.last_exit = exit_reason; + + if (exit_reason < kvm_vti_max_exit_handlers + && kvm_vti_exit_handlers[exit_reason]) + return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run); + else { + kvm_run->exit_reason = KVM_EXIT_UNKNOWN; + kvm_run->hw.hardware_exit_reason = exit_reason; + } + return 0; +} + +static inline void vti_set_rr6(unsigned long rr6) +{ + ia64_set_rr(RR6, rr6); + ia64_srlz_i(); +} + +static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu) +{ + unsigned long pte; + struct kvm *kvm = vcpu->kvm; + int r; + + /*Insert a pair of tr to map vmm*/ + pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL)); + r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + if (r < 0) + goto out; + vcpu->arch.vmm_tr_slot = r; + /*Insert a pairt of tr to map data of vm*/ + pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL)); + r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE, + pte, KVM_VM_DATA_SHIFT); + if (r < 0) + goto out; + vcpu->arch.vm_tr_slot = r; + r = 0; +out: + return r; + +} + +static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu) +{ + + ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot); + ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot); + +} + +static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu) +{ + int cpu = smp_processor_id(); + + if (vcpu->arch.last_run_cpu != cpu || + per_cpu(last_vcpu, cpu) != vcpu) { + per_cpu(last_vcpu, cpu) = vcpu; + vcpu->arch.last_run_cpu = cpu; + kvm_flush_tlb_all(); + } + + vcpu->arch.host_rr6 = ia64_get_rr(RR6); + vti_set_rr6(vcpu->arch.vmm_rr); + return kvm_insert_vmm_mapping(vcpu); +} +static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu) +{ + kvm_purge_vmm_mapping(vcpu); + vti_set_rr6(vcpu->arch.host_rr6); +} + +static int vti_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + union context *host_ctx, *guest_ctx; + int r; + + /*Get host and guest context with guest address space.*/ + host_ctx = kvm_get_host_context(vcpu); + guest_ctx = kvm_get_guest_context(vcpu); + + r = kvm_vcpu_pre_transition(vcpu); + if (r < 0) + goto out; + kvm_vmm_info->tramp_entry(host_ctx, guest_ctx); + kvm_vcpu_post_transition(vcpu); + r = 0; +out: + return r; +} + +static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int r; + +again: + preempt_disable(); + + kvm_prepare_guest_switch(vcpu); + local_irq_disable(); + + if (signal_pending(current)) { + local_irq_enable(); + preempt_enable(); + r = -EINTR; + kvm_run->exit_reason = KVM_EXIT_INTR; + goto out; + } + + vcpu->guest_mode = 1; + kvm_guest_enter(); + + r = vti_vcpu_run(vcpu, kvm_run); + if (r < 0) { + local_irq_enable(); + preempt_enable(); + kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; + goto out; + } + + vcpu->arch.launched = 1; + vcpu->guest_mode = 0; + local_irq_enable(); + + /* + * We must have an instruction between local_irq_enable() and + * kvm_guest_exit(), so the timer interrupt isn't delayed by + * the interrupt shadow. The stat.exits increment will do nicely. + * But we need to prevent reordering, hence this barrier(): + */ + barrier(); + + kvm_guest_exit(); + + preempt_enable(); + + r = kvm_handle_exit(kvm_run, vcpu); + + if (r > 0) { + if (!need_resched()) + goto again; + } + +out: + if (r > 0) { + kvm_resched(vcpu); + goto again; + } + + return r; +} + +static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) +{ + struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu); + + if (!vcpu->mmio_is_write) + memcpy(&p->data, vcpu->mmio_data, 8); + p->state = STATE_IORESP_READY; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int r; + sigset_t sigsaved; + + vcpu_load(vcpu); + + if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { + kvm_vcpu_block(vcpu); + vcpu_put(vcpu); + return -EAGAIN; + } + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + + if (vcpu->mmio_needed) { + memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); + kvm_set_mmio_data(vcpu); + vcpu->mmio_read_completed = 1; + vcpu->mmio_needed = 0; + } + r = __vcpu_run(vcpu, kvm_run); + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + vcpu_put(vcpu); + return r; +} + +/* + * Allocate 16M memory for every vm to hold its specific data. + * Its memory map is defined in kvm_host.h. + */ +static struct kvm *kvm_alloc_kvm(void) +{ + + struct kvm *kvm; + uint64_t vm_base; + + vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE)); + + if (!vm_base) + return ERR_PTR(-ENOMEM); + printk(KERN_DEBUG"kvm: VM data's base Address:0x%lx\n", vm_base); + + /* Zero all pages before use! */ + memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); + + kvm = (struct kvm *)(vm_base + KVM_VM_OFS); + kvm->arch.vm_base = vm_base; + + return kvm; +} + +struct kvm_io_range { + unsigned long start; + unsigned long size; + unsigned long type; +}; + +static const struct kvm_io_range io_ranges[] = { + {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER}, + {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO}, + {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO}, + {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC}, + {PIB_START, PIB_SIZE, GPFN_PIB}, +}; + +static void kvm_build_io_pmt(struct kvm *kvm) +{ + unsigned long i, j; + + /* Mark I/O ranges */ + for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range)); + i++) { + for (j = io_ranges[i].start; + j < io_ranges[i].start + io_ranges[i].size; + j += PAGE_SIZE) + kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT, + io_ranges[i].type, 0); + } + +} + +/*Use unused rids to virtualize guest rid.*/ +#define GUEST_PHYSICAL_RR0 0x1739 +#define GUEST_PHYSICAL_RR4 0x2739 +#define VMM_INIT_RR 0x1660 + +static void kvm_init_vm(struct kvm *kvm) +{ + long vm_base; + + BUG_ON(!kvm); + + kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0; + kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4; + kvm->arch.vmm_init_rr = VMM_INIT_RR; + + vm_base = kvm->arch.vm_base; + if (vm_base) { + kvm->arch.vhpt_base = vm_base + KVM_VHPT_OFS; + kvm->arch.vtlb_base = vm_base + KVM_VTLB_OFS; + kvm->arch.vpd_base = vm_base + KVM_VPD_OFS; + } + + /* + *Fill P2M entries for MMIO/IO ranges + */ + kvm_build_io_pmt(kvm); + +} + +struct kvm *kvm_arch_create_vm(void) +{ + struct kvm *kvm = kvm_alloc_kvm(); + + if (IS_ERR(kvm)) + return ERR_PTR(-ENOMEM); + kvm_init_vm(kvm); + + return kvm; + +} + +static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, + struct kvm_irqchip *chip) +{ + int r; + + r = 0; + switch (chip->chip_id) { + case KVM_IRQCHIP_IOAPIC: + memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm), + sizeof(struct kvm_ioapic_state)); + break; + default: + r = -EINVAL; + break; + } + return r; +} + +static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip) +{ + int r; + + r = 0; + switch (chip->chip_id) { + case KVM_IRQCHIP_IOAPIC: + memcpy(ioapic_irqchip(kvm), + &chip->chip.ioapic, + sizeof(struct kvm_ioapic_state)); + break; + default: + r = -EINVAL; + break; + } + return r; +} + +#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + int r; + + vcpu_load(vcpu); + + for (i = 0; i < 16; i++) { + vpd->vgr[i] = regs->vpd.vgr[i]; + vpd->vbgr[i] = regs->vpd.vbgr[i]; + } + for (i = 0; i < 128; i++) + vpd->vcr[i] = regs->vpd.vcr[i]; + vpd->vhpi = regs->vpd.vhpi; + vpd->vnat = regs->vpd.vnat; + vpd->vbnat = regs->vpd.vbnat; + vpd->vpsr = regs->vpd.vpsr; + + vpd->vpr = regs->vpd.vpr; + + r = -EFAULT; + r = copy_from_user(&vcpu->arch.guest, regs->saved_guest, + sizeof(union context)); + if (r) + goto out; + r = copy_from_user(vcpu + 1, regs->saved_stack + + sizeof(struct kvm_vcpu), + IA64_STK_OFFSET - sizeof(struct kvm_vcpu)); + if (r) + goto out; + vcpu->arch.exit_data = + ((struct kvm_vcpu *)(regs->saved_stack))->arch.exit_data; + + RESTORE_REGS(mp_state); + RESTORE_REGS(vmm_rr); + memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS); + memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS); + RESTORE_REGS(itr_regions); + RESTORE_REGS(dtr_regions); + RESTORE_REGS(tc_regions); + RESTORE_REGS(irq_check); + RESTORE_REGS(itc_check); + RESTORE_REGS(timer_check); + RESTORE_REGS(timer_pending); + RESTORE_REGS(last_itc); + for (i = 0; i < 8; i++) { + vcpu->arch.vrr[i] = regs->vrr[i]; + vcpu->arch.ibr[i] = regs->ibr[i]; + vcpu->arch.dbr[i] = regs->dbr[i]; + } + for (i = 0; i < 4; i++) + vcpu->arch.insvc[i] = regs->insvc[i]; + RESTORE_REGS(xtp); + RESTORE_REGS(metaphysical_rr0); + RESTORE_REGS(metaphysical_rr4); + RESTORE_REGS(metaphysical_saved_rr0); + RESTORE_REGS(metaphysical_saved_rr4); + RESTORE_REGS(fp_psr); + RESTORE_REGS(saved_gp); + + vcpu->arch.irq_new_pending = 1; + vcpu->arch.itc_offset = regs->saved_itc - ia64_getreg(_IA64_REG_AR_ITC); + set_bit(KVM_REQ_RESUME, &vcpu->requests); + + vcpu_put(vcpu); + r = 0; +out: + return r; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + int r = -EINVAL; + + switch (ioctl) { + case KVM_SET_MEMORY_REGION: { + struct kvm_memory_region kvm_mem; + struct kvm_userspace_memory_region kvm_userspace_mem; + + r = -EFAULT; + if (copy_from_user(&kvm_mem, argp, sizeof kvm_mem)) + goto out; + kvm_userspace_mem.slot = kvm_mem.slot; + kvm_userspace_mem.flags = kvm_mem.flags; + kvm_userspace_mem.guest_phys_addr = + kvm_mem.guest_phys_addr; + kvm_userspace_mem.memory_size = kvm_mem.memory_size; + r = kvm_vm_ioctl_set_memory_region(kvm, + &kvm_userspace_mem, 0); + if (r) + goto out; + break; + } + case KVM_CREATE_IRQCHIP: + r = -EFAULT; + r = kvm_ioapic_init(kvm); + if (r) + goto out; + break; + case KVM_IRQ_LINE: { + struct kvm_irq_level irq_event; + + r = -EFAULT; + if (copy_from_user(&irq_event, argp, sizeof irq_event)) + goto out; + if (irqchip_in_kernel(kvm)) { + mutex_lock(&kvm->lock); + kvm_ioapic_set_irq(kvm->arch.vioapic, + irq_event.irq, + irq_event.level); + mutex_unlock(&kvm->lock); + r = 0; + } + break; + } + case KVM_GET_IRQCHIP: { + /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ + struct kvm_irqchip chip; + + r = -EFAULT; + if (copy_from_user(&chip, argp, sizeof chip)) + goto out; + r = -ENXIO; + if (!irqchip_in_kernel(kvm)) + goto out; + r = kvm_vm_ioctl_get_irqchip(kvm, &chip); + if (r) + goto out; + r = -EFAULT; + if (copy_to_user(argp, &chip, sizeof chip)) + goto out; + r = 0; + break; + } + case KVM_SET_IRQCHIP: { + /* 0: PIC master, 1: PIC slave, 2: IOAPIC */ + struct kvm_irqchip chip; + + r = -EFAULT; + if (copy_from_user(&chip, argp, sizeof chip)) + goto out; + r = -ENXIO; + if (!irqchip_in_kernel(kvm)) + goto out; + r = kvm_vm_ioctl_set_irqchip(kvm, &chip); + if (r) + goto out; + r = 0; + break; + } + default: + ; + } +out: + return r; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -EINVAL; + +} +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + + return -EINVAL; +} + +static int kvm_alloc_vmm_area(void) +{ + if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) { + kvm_vmm_base = __get_free_pages(GFP_KERNEL, + get_order(KVM_VMM_SIZE)); + if (!kvm_vmm_base) + return -ENOMEM; + + memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); + kvm_vm_buffer = kvm_vmm_base + VMM_SIZE; + + printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n", + kvm_vmm_base, kvm_vm_buffer); + } + + return 0; +} + +static void kvm_free_vmm_area(void) +{ + if (kvm_vmm_base) { + /*Zero this area before free to avoid bits leak!!*/ + memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE); + free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE)); + kvm_vmm_base = 0; + kvm_vm_buffer = 0; + kvm_vsa_base = 0; + } +} + +/* + * Make sure that a cpu that is being hot-unplugged does not have any vcpus + * cached on it. Leave it as blank for IA64. + */ +void decache_vcpus_on_cpu(int cpu) +{ +} + +static void vti_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +} + +static int vti_init_vpd(struct kvm_vcpu *vcpu) +{ + int i; + union cpuid3_t cpuid3; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (IS_ERR(vpd)) + return PTR_ERR(vpd); + + /* CPUID init */ + for (i = 0; i < 5; i++) + vpd->vcpuid[i] = ia64_get_cpuid(i); + + /* Limit the CPUID number to 5 */ + cpuid3.value = vpd->vcpuid[3]; + cpuid3.number = 4; /* 5 - 1 */ + vpd->vcpuid[3] = cpuid3.value; + + /*Set vac and vdc fields*/ + vpd->vac.a_from_int_cr = 1; + vpd->vac.a_to_int_cr = 1; + vpd->vac.a_from_psr = 1; + vpd->vac.a_from_cpuid = 1; + vpd->vac.a_cover = 1; + vpd->vac.a_bsw = 1; + vpd->vac.a_int = 1; + vpd->vdc.d_vmsw = 1; + + /*Set virtual buffer*/ + vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE; + + return 0; +} + +static int vti_create_vp(struct kvm_vcpu *vcpu) +{ + long ret; + struct vpd *vpd = vcpu->arch.vpd; + unsigned long vmm_ivt; + + vmm_ivt = kvm_vmm_info->vmm_ivt; + + printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt); + + ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0); + + if (ret) { + printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n"); + return -EINVAL; + } + return 0; +} + +static void init_ptce_info(struct kvm_vcpu *vcpu) +{ + ia64_ptce_info_t ptce = {0}; + + ia64_get_ptce(&ptce); + vcpu->arch.ptce_base = ptce.base; + vcpu->arch.ptce_count[0] = ptce.count[0]; + vcpu->arch.ptce_count[1] = ptce.count[1]; + vcpu->arch.ptce_stride[0] = ptce.stride[0]; + vcpu->arch.ptce_stride[1] = ptce.stride[1]; +} + +static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu) +{ + struct hrtimer *p_ht = &vcpu->arch.hlt_timer; + + if (hrtimer_cancel(p_ht)) + hrtimer_start(p_ht, p_ht->expires, HRTIMER_MODE_ABS); +} + +static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data) +{ + struct kvm_vcpu *vcpu; + wait_queue_head_t *q; + + vcpu = container_of(data, struct kvm_vcpu, arch.hlt_timer); + if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED) + goto out; + + q = &vcpu->wq; + if (waitqueue_active(q)) { + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + wake_up_interruptible(q); + } +out: + vcpu->arch.timer_check = 1; + return HRTIMER_NORESTART; +} + +#define PALE_RESET_ENTRY 0x80000000ffffffb0UL + +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu *v; + int r; + int i; + long itc_offset; + struct kvm *kvm = vcpu->kvm; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + union context *p_ctx = &vcpu->arch.guest; + struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu); + + /*Init vcpu context for first run.*/ + if (IS_ERR(vmm_vcpu)) + return PTR_ERR(vmm_vcpu); + + if (vcpu->vcpu_id == 0) { + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + + /*Set entry address for first run.*/ + regs->cr_iip = PALE_RESET_ENTRY; + + /*Initilize itc offset for vcpus*/ + itc_offset = 0UL - ia64_getreg(_IA64_REG_AR_ITC); + for (i = 0; i < MAX_VCPU_NUM; i++) { + v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); + v->arch.itc_offset = itc_offset; + v->arch.last_itc = 0; + } + } else + vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED; + + r = -ENOMEM; + vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL); + if (!vcpu->arch.apic) + goto out; + vcpu->arch.apic->vcpu = vcpu; + + p_ctx->gr[1] = 0; + p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + IA64_STK_OFFSET); + p_ctx->gr[13] = (unsigned long)vmm_vcpu; + p_ctx->psr = 0x1008522000UL; + p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/ + p_ctx->caller_unat = 0; + p_ctx->pr = 0x0; + p_ctx->ar[36] = 0x0; /*unat*/ + p_ctx->ar[19] = 0x0; /*rnat*/ + p_ctx->ar[18] = (unsigned long)vmm_vcpu + + ((sizeof(struct kvm_vcpu)+15) & ~15); + p_ctx->ar[64] = 0x0; /*pfs*/ + p_ctx->cr[0] = 0x7e04UL; + p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt; + p_ctx->cr[8] = 0x3c; + + /*Initilize region register*/ + p_ctx->rr[0] = 0x30; + p_ctx->rr[1] = 0x30; + p_ctx->rr[2] = 0x30; + p_ctx->rr[3] = 0x30; + p_ctx->rr[4] = 0x30; + p_ctx->rr[5] = 0x30; + p_ctx->rr[7] = 0x30; + + /*Initilize branch register 0*/ + p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry; + + vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr; + vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0; + vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4; + + hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + vcpu->arch.hlt_timer.function = hlt_timer_fn; + + vcpu->arch.last_run_cpu = -1; + vcpu->arch.vpd = (struct vpd *)VPD_ADDR(vcpu->vcpu_id); + vcpu->arch.vsa_base = kvm_vsa_base; + vcpu->arch.__gp = kvm_vmm_gp; + vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock); + vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_ADDR(vcpu->vcpu_id); + vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_ADDR(vcpu->vcpu_id); + init_ptce_info(vcpu); + + r = 0; +out: + return r; +} + +static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) +{ + unsigned long psr; + int r; + + local_irq_save(psr); + r = kvm_insert_vmm_mapping(vcpu); + if (r) + goto fail; + r = kvm_vcpu_init(vcpu, vcpu->kvm, id); + if (r) + goto fail; + + r = vti_init_vpd(vcpu); + if (r) { + printk(KERN_DEBUG"kvm: vpd init error!!\n"); + goto uninit; + } + + r = vti_create_vp(vcpu); + if (r) + goto uninit; + + kvm_purge_vmm_mapping(vcpu); + local_irq_restore(psr); + + return 0; +uninit: + kvm_vcpu_uninit(vcpu); +fail: + return r; +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, + unsigned int id) +{ + struct kvm_vcpu *vcpu; + unsigned long vm_base = kvm->arch.vm_base; + int r; + int cpu; + + r = -ENOMEM; + if (!vm_base) { + printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id); + goto fail; + } + vcpu = (struct kvm_vcpu *)(vm_base + KVM_VCPU_OFS + VCPU_SIZE * id); + vcpu->kvm = kvm; + + cpu = get_cpu(); + vti_vcpu_load(vcpu, cpu); + r = vti_vcpu_setup(vcpu, id); + put_cpu(); + + if (r) { + printk(KERN_DEBUG"kvm: vcpu_setup error!!\n"); + goto fail; + } + + return vcpu; +fail: + return ERR_PTR(r); +} + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + return 0; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, + struct kvm_debug_guest *dbg) +{ + return -EINVAL; +} + +static void free_kvm(struct kvm *kvm) +{ + unsigned long vm_base = kvm->arch.vm_base; + + if (vm_base) { + memset((void *)vm_base, 0, KVM_VM_DATA_SIZE); + free_pages(vm_base, get_order(KVM_VM_DATA_SIZE)); + } + +} + +static void kvm_release_vm_pages(struct kvm *kvm) +{ + struct kvm_memory_slot *memslot; + int i, j; + unsigned long base_gfn; + + for (i = 0; i < kvm->nmemslots; i++) { + memslot = &kvm->memslots[i]; + base_gfn = memslot->base_gfn; + + for (j = 0; j < memslot->npages; j++) { + if (memslot->rmap[j]) + put_page((struct page *)memslot->rmap[j]); + } + } +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + kfree(kvm->arch.vioapic); + kvm_release_vm_pages(kvm); + kvm_free_physmem(kvm); + free_kvm(kvm); +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + if (cpu != vcpu->cpu) { + vcpu->cpu = cpu; + if (vcpu->arch.ht_active) + kvm_migrate_hlt_timer(vcpu); + } +} + +#define SAVE_REGS(_x) regs->_x = vcpu->arch._x + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + int r; + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + vcpu_load(vcpu); + + for (i = 0; i < 16; i++) { + regs->vpd.vgr[i] = vpd->vgr[i]; + regs->vpd.vbgr[i] = vpd->vbgr[i]; + } + for (i = 0; i < 128; i++) + regs->vpd.vcr[i] = vpd->vcr[i]; + regs->vpd.vhpi = vpd->vhpi; + regs->vpd.vnat = vpd->vnat; + regs->vpd.vbnat = vpd->vbnat; + regs->vpd.vpsr = vpd->vpsr; + regs->vpd.vpr = vpd->vpr; + + r = -EFAULT; + r = copy_to_user(regs->saved_guest, &vcpu->arch.guest, + sizeof(union context)); + if (r) + goto out; + r = copy_to_user(regs->saved_stack, (void *)vcpu, IA64_STK_OFFSET); + if (r) + goto out; + SAVE_REGS(mp_state); + SAVE_REGS(vmm_rr); + memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS); + memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS); + SAVE_REGS(itr_regions); + SAVE_REGS(dtr_regions); + SAVE_REGS(tc_regions); + SAVE_REGS(irq_check); + SAVE_REGS(itc_check); + SAVE_REGS(timer_check); + SAVE_REGS(timer_pending); + SAVE_REGS(last_itc); + for (i = 0; i < 8; i++) { + regs->vrr[i] = vcpu->arch.vrr[i]; + regs->ibr[i] = vcpu->arch.ibr[i]; + regs->dbr[i] = vcpu->arch.dbr[i]; + } + for (i = 0; i < 4; i++) + regs->insvc[i] = vcpu->arch.insvc[i]; + regs->saved_itc = vcpu->arch.itc_offset + ia64_getreg(_IA64_REG_AR_ITC); + SAVE_REGS(xtp); + SAVE_REGS(metaphysical_rr0); + SAVE_REGS(metaphysical_rr4); + SAVE_REGS(metaphysical_saved_rr0); + SAVE_REGS(metaphysical_saved_rr4); + SAVE_REGS(fp_psr); + SAVE_REGS(saved_gp); + vcpu_put(vcpu); + r = 0; +out: + return r; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + + hrtimer_cancel(&vcpu->arch.hlt_timer); + kfree(vcpu->arch.apic); +} + + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +int kvm_arch_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + unsigned long i; + struct page *page; + int npages = mem->memory_size >> PAGE_SHIFT; + struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot]; + unsigned long base_gfn = memslot->base_gfn; + + for (i = 0; i < npages; i++) { + page = gfn_to_page(kvm, base_gfn + i); + kvm_set_pmt_entry(kvm, base_gfn + i, + page_to_pfn(page) << PAGE_SHIFT, + _PAGE_AR_RWX|_PAGE_MA_WB); + memslot->rmap[i] = (unsigned long)page; + } + + return 0; +} + + +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + kvm_vcpu_uninit(vcpu); +} + +static int vti_cpu_has_kvm_support(void) +{ + long avail = 1, status = 1, control = 1; + long ret; + + ret = ia64_pal_proc_get_features(&avail, &status, &control, 0); + if (ret) + goto out; + + if (!(avail & PAL_PROC_VM_BIT)) + goto out; + + printk(KERN_DEBUG"kvm: Hardware Supports VT\n"); + + ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info); + if (ret) + goto out; + printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size); + + if (!(vp_env_info & VP_OPCODE)) { + printk(KERN_WARNING"kvm: No opcode ability on hardware, " + "vm_env_info:0x%lx\n", vp_env_info); + } + + return 1; +out: + return 0; +} + +static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info, + struct module *module) +{ + unsigned long module_base; + unsigned long vmm_size; + + unsigned long vmm_offset, func_offset, fdesc_offset; + struct fdesc *p_fdesc; + + BUG_ON(!module); + + if (!kvm_vmm_base) { + printk("kvm: kvm area hasn't been initilized yet!!\n"); + return -EFAULT; + } + + /*Calculate new position of relocated vmm module.*/ + module_base = (unsigned long)module->module_core; + vmm_size = module->core_size; + if (unlikely(vmm_size > KVM_VMM_SIZE)) + return -EFAULT; + + memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size); + kvm_flush_icache(kvm_vmm_base, vmm_size); + + /*Recalculate kvm_vmm_info based on new VMM*/ + vmm_offset = vmm_info->vmm_ivt - module_base; + kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset; + printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n", + kvm_vmm_info->vmm_ivt); + + fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base; + kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE + + fdesc_offset); + func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base; + p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); + p_fdesc->ip = KVM_VMM_BASE + func_offset; + p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base); + + printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n", + KVM_VMM_BASE+func_offset); + + fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base; + kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE + + fdesc_offset); + func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base; + p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset); + p_fdesc->ip = KVM_VMM_BASE + func_offset; + p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base); + + kvm_vmm_gp = p_fdesc->gp; + + printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n", + kvm_vmm_info->vmm_entry); + printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n", + KVM_VMM_BASE + func_offset); + + return 0; +} + +int kvm_arch_init(void *opaque) +{ + int r; + struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque; + + if (!vti_cpu_has_kvm_support()) { + printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n"); + r = -EOPNOTSUPP; + goto out; + } + + if (kvm_vmm_info) { + printk(KERN_ERR "kvm: Already loaded VMM module!\n"); + r = -EEXIST; + goto out; + } + + r = -ENOMEM; + kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL); + if (!kvm_vmm_info) + goto out; + + if (kvm_alloc_vmm_area()) + goto out_free0; + + r = kvm_relocate_vmm(vmm_info, vmm_info->module); + if (r) + goto out_free1; + + return 0; + +out_free1: + kvm_free_vmm_area(); +out_free0: + kfree(kvm_vmm_info); +out: + return r; +} + +void kvm_arch_exit(void) +{ + kvm_free_vmm_area(); + kfree(kvm_vmm_info); + kvm_vmm_info = NULL; +} + +static int kvm_ia64_sync_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + struct kvm_memory_slot *memslot; + int r, i; + long n, base; + unsigned long *dirty_bitmap = (unsigned long *)((void *)kvm - KVM_VM_OFS + + KVM_MEM_DIRTY_LOG_OFS); + + r = -EINVAL; + if (log->slot >= KVM_MEMORY_SLOTS) + goto out; + + memslot = &kvm->memslots[log->slot]; + r = -ENOENT; + if (!memslot->dirty_bitmap) + goto out; + + n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + base = memslot->base_gfn / BITS_PER_LONG; + + for (i = 0; i < n/sizeof(long); ++i) { + memslot->dirty_bitmap[i] = dirty_bitmap[base + i]; + dirty_bitmap[base + i] = 0; + } + r = 0; +out: + return r; +} + +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + int r; + int n; + struct kvm_memory_slot *memslot; + int is_dirty = 0; + + spin_lock(&kvm->arch.dirty_log_lock); + + r = kvm_ia64_sync_dirty_log(kvm, log); + if (r) + goto out; + + r = kvm_get_dirty_log(kvm, log, &is_dirty); + if (r) + goto out; + + /* If nothing is dirty, don't bother messing with page tables. */ + if (is_dirty) { + kvm_flush_remote_tlbs(kvm); + memslot = &kvm->memslots[log->slot]; + n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; + memset(memslot->dirty_bitmap, 0, n); + } + r = 0; +out: + spin_unlock(&kvm->arch.dirty_log_lock); + return r; +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ +} + +static void vcpu_kick_intr(void *info) +{ +#ifdef DEBUG + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; + printk(KERN_DEBUG"vcpu_kick_intr %p \n", vcpu); +#endif +} + +void kvm_vcpu_kick(struct kvm_vcpu *vcpu) +{ + int ipi_pcpu = vcpu->cpu; + + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + + if (vcpu->guest_mode) + smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); +} + +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) +{ + + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (!test_and_set_bit(vec, &vpd->irr[0])) { + vcpu->arch.irq_new_pending = 1; + if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) + kvm_vcpu_kick(vcpu); + else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) { + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; + if (waitqueue_active(&vcpu->wq)) + wake_up_interruptible(&vcpu->wq); + } + return 1; + } + return 0; +} + +int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) +{ + return apic->vcpu->vcpu_id == dest; +} + +int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) +{ + return 0; +} + +struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, + unsigned long bitmap) +{ + struct kvm_vcpu *lvcpu = kvm->vcpus[0]; + int i; + + for (i = 1; i < KVM_MAX_VCPUS; i++) { + if (!kvm->vcpus[i]) + continue; + if (lvcpu->arch.xtp > kvm->vcpus[i]->arch.xtp) + lvcpu = kvm->vcpus[i]; + } + + return lvcpu; +} + +static int find_highest_bits(int *dat) +{ + u32 bits, bitnum; + int i; + + /* loop for all 256 bits */ + for (i = 7; i >= 0 ; i--) { + bits = dat[i]; + if (bits) { + bitnum = fls(bits); + return i * 32 + bitnum - 1; + } + } + + return -1; +} + +int kvm_highest_pending_irq(struct kvm_vcpu *vcpu) +{ + struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd); + + if (vpd->irr[0] & (1UL << NMI_VECTOR)) + return NMI_VECTOR; + if (vpd->irr[0] & (1UL << ExtINT_VECTOR)) + return ExtINT_VECTOR; + + return find_highest_bits((int *)&vpd->irr[0]); +} + +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) +{ + if (kvm_highest_pending_irq(vcpu) != -1) + return 1; + return 0; +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return 0; +} + +gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) +{ + return gfn; +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE; +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} diff --git a/trunk/arch/ia64/kvm/kvm_fw.c b/trunk/arch/ia64/kvm/kvm_fw.c new file mode 100644 index 000000000000..091f936c4485 --- /dev/null +++ b/trunk/arch/ia64/kvm/kvm_fw.c @@ -0,0 +1,500 @@ +/* + * PAL/SAL call delegation + * + * Copyright (c) 2004 Li Susie + * Copyright (c) 2005 Yu Ke + * Copyright (c) 2007 Xiantao Zhang + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ + +#include +#include + +#include "vti.h" +#include "misc.h" + +#include +#include +#include + +/* + * Handy macros to make sure that the PAL return values start out + * as something meaningful. + */ +#define INIT_PAL_STATUS_UNIMPLEMENTED(x) \ + { \ + x.status = PAL_STATUS_UNIMPLEMENTED; \ + x.v0 = 0; \ + x.v1 = 0; \ + x.v2 = 0; \ + } + +#define INIT_PAL_STATUS_SUCCESS(x) \ + { \ + x.status = PAL_STATUS_SUCCESS; \ + x.v0 = 0; \ + x.v1 = 0; \ + x.v2 = 0; \ + } + +static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu, + u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) { + struct exit_ctl_data *p; + + if (vcpu) { + p = &vcpu->arch.exit_data; + if (p->exit_reason == EXIT_REASON_PAL_CALL) { + *gr28 = p->u.pal_data.gr28; + *gr29 = p->u.pal_data.gr29; + *gr30 = p->u.pal_data.gr30; + *gr31 = p->u.pal_data.gr31; + return ; + } + } + printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n"); +} + +static void set_pal_result(struct kvm_vcpu *vcpu, + struct ia64_pal_retval result) { + + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + if (p && p->exit_reason == EXIT_REASON_PAL_CALL) { + p->u.pal_data.ret = result; + return ; + } + INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret); +} + +static void set_sal_result(struct kvm_vcpu *vcpu, + struct sal_ret_values result) { + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + if (p && p->exit_reason == EXIT_REASON_SAL_CALL) { + p->u.sal_data.ret = result; + return ; + } + printk(KERN_WARNING"Failed to set sal result!!\n"); +} + +struct cache_flush_args { + u64 cache_type; + u64 operation; + u64 progress; + long status; +}; + +cpumask_t cpu_cache_coherent_map; + +static void remote_pal_cache_flush(void *data) +{ + struct cache_flush_args *args = data; + long status; + u64 progress = args->progress; + + status = ia64_pal_cache_flush(args->cache_type, args->operation, + &progress, NULL); + if (status != 0) + args->status = status; +} + +static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu) +{ + u64 gr28, gr29, gr30, gr31; + struct ia64_pal_retval result = {0, 0, 0, 0}; + struct cache_flush_args args = {0, 0, 0, 0}; + long psr; + + gr28 = gr29 = gr30 = gr31 = 0; + kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31); + + if (gr31 != 0) + printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu); + + /* Always call Host Pal in int=1 */ + gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS; + args.cache_type = gr29; + args.operation = gr30; + smp_call_function(remote_pal_cache_flush, + (void *)&args, 1, 1); + if (args.status != 0) + printk(KERN_ERR"pal_cache_flush error!," + "status:0x%lx\n", args.status); + /* + * Call Host PAL cache flush + * Clear psr.ic when call PAL_CACHE_FLUSH + */ + local_irq_save(psr); + result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1, + &result.v0); + local_irq_restore(psr); + if (result.status != 0) + printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld" + "in1:%lx,in2:%lx\n", + vcpu, result.status, gr29, gr30); + +#if 0 + if (gr29 == PAL_CACHE_TYPE_COHERENT) { + cpus_setall(vcpu->arch.cache_coherent_map); + cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map); + cpus_setall(cpu_cache_coherent_map); + cpu_clear(vcpu->cpu, cpu_cache_coherent_map); + } +#endif + return result; +} + +struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0); + return result; +} + +static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0); + + /* + * PAL_FREQ_BASE may not be implemented in some platforms, + * call SAL instead. + */ + if (result.v0 == 0) { + result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, + &result.v0, + &result.v1); + result.v2 = 0; + } + + return result; +} + +static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0); + return result; +} + +static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu) +{ + struct ia64_pal_retval result; + + INIT_PAL_STATUS_UNIMPLEMENTED(result); + return result; +} + +static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result; + + INIT_PAL_STATUS_SUCCESS(result); + return result; +} + +static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu) +{ + + struct ia64_pal_retval result = {0, 0, 0, 0}; + long in0, in1, in2, in3; + + kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); + result.status = ia64_pal_proc_get_features(&result.v0, &result.v1, + &result.v2, in2); + + return result; +} + +static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu) +{ + + pal_cache_config_info_t ci; + long status; + unsigned long in0, in1, in2, in3, r9, r10; + + kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3); + status = ia64_pal_cache_config_info(in1, in2, &ci); + r9 = ci.pcci_info_1.pcci1_data; + r10 = ci.pcci_info_2.pcci2_data; + return ((struct ia64_pal_retval){status, r9, r10, 0}); +} + +#define GUEST_IMPL_VA_MSB 59 +#define GUEST_RID_BITS 18 + +static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu) +{ + + pal_vm_info_1_u_t vminfo1; + pal_vm_info_2_u_t vminfo2; + struct ia64_pal_retval result; + + PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0); + if (!result.status) { + vminfo1.pvi1_val = result.v0; + vminfo1.pal_vm_info_1_s.max_itr_entry = 8; + vminfo1.pal_vm_info_1_s.max_dtr_entry = 8; + result.v0 = vminfo1.pvi1_val; + vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB; + vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS; + result.v1 = vminfo2.pvi2_val; + } + + return result; +} + +static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu) +{ + struct ia64_pal_retval result; + + INIT_PAL_STATUS_UNIMPLEMENTED(result); + + return result; +} + +static u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu) +{ + u64 index = 0; + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + if (p && (p->exit_reason == EXIT_REASON_PAL_CALL)) + index = p->u.pal_data.gr28; + + return index; +} + +int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + + u64 gr28; + struct ia64_pal_retval result; + int ret = 1; + + gr28 = kvm_get_pal_call_index(vcpu); + /*printk("pal_call index:%lx\n",gr28);*/ + switch (gr28) { + case PAL_CACHE_FLUSH: + result = pal_cache_flush(vcpu); + break; + case PAL_CACHE_SUMMARY: + result = pal_cache_summary(vcpu); + break; + case PAL_HALT_LIGHT: + { + vcpu->arch.timer_pending = 1; + INIT_PAL_STATUS_SUCCESS(result); + if (kvm_highest_pending_irq(vcpu) == -1) + ret = kvm_emulate_halt(vcpu); + + } + break; + + case PAL_FREQ_RATIOS: + result = pal_freq_ratios(vcpu); + break; + + case PAL_FREQ_BASE: + result = pal_freq_base(vcpu); + break; + + case PAL_LOGICAL_TO_PHYSICAL : + result = pal_logical_to_physica(vcpu); + break; + + case PAL_VM_SUMMARY : + result = pal_vm_summary(vcpu); + break; + + case PAL_VM_INFO : + result = pal_vm_info(vcpu); + break; + case PAL_PLATFORM_ADDR : + result = pal_platform_addr(vcpu); + break; + case PAL_CACHE_INFO: + result = pal_cache_info(vcpu); + break; + case PAL_PTCE_INFO: + INIT_PAL_STATUS_SUCCESS(result); + result.v1 = (1L << 32) | 1L; + break; + case PAL_VM_PAGE_SIZE: + result.status = ia64_pal_vm_page_size(&result.v0, + &result.v1); + break; + case PAL_RSE_INFO: + result.status = ia64_pal_rse_info(&result.v0, + (pal_hints_u_t *)&result.v1); + break; + case PAL_PROC_GET_FEATURES: + result = pal_proc_get_features(vcpu); + break; + case PAL_DEBUG_INFO: + result.status = ia64_pal_debug_info(&result.v0, + &result.v1); + break; + case PAL_VERSION: + result.status = ia64_pal_version( + (pal_version_u_t *)&result.v0, + (pal_version_u_t *)&result.v1); + + break; + case PAL_FIXED_ADDR: + result.status = PAL_STATUS_SUCCESS; + result.v0 = vcpu->vcpu_id; + break; + default: + INIT_PAL_STATUS_UNIMPLEMENTED(result); + printk(KERN_WARNING"kvm: Unsupported pal call," + " index:0x%lx\n", gr28); + } + set_pal_result(vcpu, result); + return ret; +} + +static struct sal_ret_values sal_emulator(struct kvm *kvm, + long index, unsigned long in1, + unsigned long in2, unsigned long in3, + unsigned long in4, unsigned long in5, + unsigned long in6, unsigned long in7) +{ + unsigned long r9 = 0; + unsigned long r10 = 0; + long r11 = 0; + long status; + + status = 0; + switch (index) { + case SAL_FREQ_BASE: + status = ia64_sal_freq_base(in1, &r9, &r10); + break; + case SAL_PCI_CONFIG_READ: + printk(KERN_WARNING"kvm: Not allowed to call here!" + " SAL_PCI_CONFIG_READ\n"); + break; + case SAL_PCI_CONFIG_WRITE: + printk(KERN_WARNING"kvm: Not allowed to call here!" + " SAL_PCI_CONFIG_WRITE\n"); + break; + case SAL_SET_VECTORS: + if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) { + if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) { + status = -2; + } else { + kvm->arch.rdv_sal_data.boot_ip = in2; + kvm->arch.rdv_sal_data.boot_gp = in3; + } + printk("Rendvous called! iip:%lx\n\n", in2); + } else + printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu." + "ignored...\n", in1); + break; + case SAL_GET_STATE_INFO: + /* No more info. */ + status = -5; + r9 = 0; + break; + case SAL_GET_STATE_INFO_SIZE: + /* Return a dummy size. */ + status = 0; + r9 = 128; + break; + case SAL_CLEAR_STATE_INFO: + /* Noop. */ + break; + case SAL_MC_RENDEZ: + printk(KERN_WARNING + "kvm: called SAL_MC_RENDEZ. ignored...\n"); + break; + case SAL_MC_SET_PARAMS: + printk(KERN_WARNING + "kvm: called SAL_MC_SET_PARAMS.ignored!\n"); + break; + case SAL_CACHE_FLUSH: + if (1) { + /*Flush using SAL. + This method is faster but has a side + effect on other vcpu running on + this cpu. */ + status = ia64_sal_cache_flush(in1); + } else { + /*Maybe need to implement the method + without side effect!*/ + status = 0; + } + break; + case SAL_CACHE_INIT: + printk(KERN_WARNING + "kvm: called SAL_CACHE_INIT. ignored...\n"); + break; + case SAL_UPDATE_PAL: + printk(KERN_WARNING + "kvm: CALLED SAL_UPDATE_PAL. ignored...\n"); + break; + default: + printk(KERN_WARNING"kvm: called SAL_CALL with unknown index." + " index:%ld\n", index); + status = -1; + break; + } + return ((struct sal_ret_values) {status, r9, r10, r11}); +} + +static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1, + u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){ + + struct exit_ctl_data *p; + + p = kvm_get_exit_data(vcpu); + + if (p) { + if (p->exit_reason == EXIT_REASON_SAL_CALL) { + *in0 = p->u.sal_data.in0; + *in1 = p->u.sal_data.in1; + *in2 = p->u.sal_data.in2; + *in3 = p->u.sal_data.in3; + *in4 = p->u.sal_data.in4; + *in5 = p->u.sal_data.in5; + *in6 = p->u.sal_data.in6; + *in7 = p->u.sal_data.in7; + return ; + } + } + *in0 = 0; +} + +void kvm_sal_emul(struct kvm_vcpu *vcpu) +{ + + struct sal_ret_values result; + u64 index, in1, in2, in3, in4, in5, in6, in7; + + kvm_get_sal_call_data(vcpu, &index, &in1, &in2, + &in3, &in4, &in5, &in6, &in7); + result = sal_emulator(vcpu->kvm, index, in1, in2, in3, + in4, in5, in6, in7); + set_sal_result(vcpu, result); +} diff --git a/trunk/arch/ia64/kvm/kvm_minstate.h b/trunk/arch/ia64/kvm/kvm_minstate.h new file mode 100644 index 000000000000..13980d9b8bcf --- /dev/null +++ b/trunk/arch/ia64/kvm/kvm_minstate.h @@ -0,0 +1,273 @@ +/* + * kvm_minstate.h: min save macros + * Copyright (c) 2007, Intel Corporation. + * + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + + +#include +#include +#include +#include "asm-offsets.h" + +#define KVM_MINSTATE_START_SAVE_MIN \ + mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\ + ;; \ + mov.m r28 = ar.rnat; \ + addl r22 = VMM_RBS_OFFSET,r1; /* compute base of RBS */ \ + ;; \ + lfetch.fault.excl.nt1 [r22]; \ + addl r1 = IA64_STK_OFFSET-VMM_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ + mov r23 = ar.bspstore; /* save ar.bspstore */ \ + ;; \ + mov ar.bspstore = r22; /* switch to kernel RBS */\ + ;; \ + mov r18 = ar.bsp; \ + mov ar.rsc = 0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ + + + +#define KVM_MINSTATE_END_SAVE_MIN \ + bsw.1; /* switch back to bank 1 (must be last in insn group) */\ + ;; + + +#define PAL_VSA_SYNC_READ \ + /* begin to call pal vps sync_read */ \ + add r25 = VMM_VPD_BASE_OFFSET, r21; \ + adds r20 = VMM_VCPU_VSA_BASE_OFFSET, r21; /* entry point */ \ + ;; \ + ld8 r25 = [r25]; /* read vpd base */ \ + ld8 r20 = [r20]; \ + ;; \ + add r20 = PAL_VPS_SYNC_READ,r20; \ + ;; \ +{ .mii; \ + nop 0x0; \ + mov r24 = ip; \ + mov b0 = r20; \ + ;; \ +}; \ +{ .mmb; \ + add r24 = 0x20, r24; \ + nop 0x0; \ + br.cond.sptk b0; /* call the service */ \ + ;; \ +}; + + + +#define KVM_MINSTATE_GET_CURRENT(reg) mov reg=r21 + +/* + * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves + * the minimum state necessary that allows us to turn psr.ic back + * on. + * + * Assumed state upon entry: + * psr.ic: off + * r31: contains saved predicates (pr) + * + * Upon exit, the state is as follows: + * psr.ic: off + * r2 = points to &pt_regs.r16 + * r8 = contents of ar.ccv + * r9 = contents of ar.csd + * r10 = contents of ar.ssd + * r11 = FPSR_DEFAULT + * r12 = kernel sp (kernel virtual address) + * r13 = points to current task_struct (kernel virtual address) + * p15 = TRUE if psr.i is set in cr.ipsr + * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: + * preserved + * + * Note that psr.ic is NOT turned on by this macro. This is so that + * we can pass interruption state as arguments to a handler. + */ + + +#define PT(f) (VMM_PT_REGS_##f##_OFFSET) + +#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \ + KVM_MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \ + mov r27 = ar.rsc; /* M */ \ + mov r20 = r1; /* A */ \ + mov r25 = ar.unat; /* M */ \ + mov r29 = cr.ipsr; /* M */ \ + mov r26 = ar.pfs; /* I */ \ + mov r18 = cr.isr; \ + COVER; /* B;; (or nothing) */ \ + ;; \ + tbit.z p0,p15 = r29,IA64_PSR_I_BIT; \ + mov r1 = r16; \ +/* mov r21=r16; */ \ + /* switch from user to kernel RBS: */ \ + ;; \ + invala; /* M */ \ + SAVE_IFS; \ + ;; \ + KVM_MINSTATE_START_SAVE_MIN \ + adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */ \ + adds r16 = PT(CR_IPSR),r1; \ + ;; \ + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ + st8 [r16] = r29; /* save cr.ipsr */ \ + ;; \ + lfetch.fault.excl.nt1 [r17]; \ + tbit.nz p15,p0 = r29,IA64_PSR_I_BIT; \ + mov r29 = b0 \ + ;; \ + adds r16 = PT(R8),r1; /* initialize first base pointer */\ + adds r17 = PT(R9),r1; /* initialize second base pointer */\ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r8,16; \ +.mem.offset 8,0; st8.spill [r17] = r9,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r10,24; \ +.mem.offset 8,0; st8.spill [r17] = r11,24; \ + ;; \ + mov r9 = cr.iip; /* M */ \ + mov r10 = ar.fpsr; /* M */ \ + ;; \ + st8 [r16] = r9,16; /* save cr.iip */ \ + st8 [r17] = r30,16; /* save cr.ifs */ \ + sub r18 = r18,r22; /* r18=RSE.ndirty*8 */ \ + ;; \ + st8 [r16] = r25,16; /* save ar.unat */ \ + st8 [r17] = r26,16; /* save ar.pfs */ \ + shl r18 = r18,16; /* calu ar.rsc used for "loadrs" */\ + ;; \ + st8 [r16] = r27,16; /* save ar.rsc */ \ + st8 [r17] = r28,16; /* save ar.rnat */ \ + ;; /* avoid RAW on r16 & r17 */ \ + st8 [r16] = r23,16; /* save ar.bspstore */ \ + st8 [r17] = r31,16; /* save predicates */ \ + ;; \ + st8 [r16] = r29,16; /* save b0 */ \ + st8 [r17] = r18,16; /* save ar.rsc value for "loadrs" */\ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */ \ +.mem.offset 8,0; st8.spill [r17] = r12,16; \ + adds r12 = -16,r1; /* switch to kernel memory stack */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r13,16; \ +.mem.offset 8,0; st8.spill [r17] = r10,16; /* save ar.fpsr */\ + mov r13 = r21; /* establish `current' */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r15,16; \ +.mem.offset 8,0; st8.spill [r17] = r14,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16] = r2,16; \ +.mem.offset 8,0; st8.spill [r17] = r3,16; \ + adds r2 = VMM_PT_REGS_R16_OFFSET,r1; \ + ;; \ + adds r16 = VMM_VCPU_IIPA_OFFSET,r13; \ + adds r17 = VMM_VCPU_ISR_OFFSET,r13; \ + mov r26 = cr.iipa; \ + mov r27 = cr.isr; \ + ;; \ + st8 [r16] = r26; \ + st8 [r17] = r27; \ + ;; \ + EXTRA; \ + mov r8 = ar.ccv; \ + mov r9 = ar.csd; \ + mov r10 = ar.ssd; \ + movl r11 = FPSR_DEFAULT; /* L-unit */ \ + adds r17 = VMM_VCPU_GP_OFFSET,r13; \ + ;; \ + ld8 r1 = [r17];/* establish kernel global pointer */ \ + ;; \ + PAL_VSA_SYNC_READ \ + KVM_MINSTATE_END_SAVE_MIN + +/* + * SAVE_REST saves the remainder of pt_regs (with psr.ic on). + * + * Assumed state upon entry: + * psr.ic: on + * r2: points to &pt_regs.f6 + * r3: points to &pt_regs.f7 + * r8: contents of ar.ccv + * r9: contents of ar.csd + * r10: contents of ar.ssd + * r11: FPSR_DEFAULT + * + * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. + */ +#define KVM_SAVE_REST \ +.mem.offset 0,0; st8.spill [r2] = r16,16; \ +.mem.offset 8,0; st8.spill [r3] = r17,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r18,16; \ +.mem.offset 8,0; st8.spill [r3] = r19,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r20,16; \ +.mem.offset 8,0; st8.spill [r3] = r21,16; \ + mov r18=b6; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r22,16; \ +.mem.offset 8,0; st8.spill [r3] = r23,16; \ + mov r19 = b7; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r24,16; \ +.mem.offset 8,0; st8.spill [r3] = r25,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r26,16; \ +.mem.offset 8,0; st8.spill [r3] = r27,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r28,16; \ +.mem.offset 8,0; st8.spill [r3] = r29,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r30,16; \ +.mem.offset 8,0; st8.spill [r3] = r31,32; \ + ;; \ + mov ar.fpsr = r11; \ + st8 [r2] = r8,8; \ + adds r24 = PT(B6)-PT(F7),r3; \ + adds r25 = PT(B7)-PT(F7),r3; \ + ;; \ + st8 [r24] = r18,16; /* b6 */ \ + st8 [r25] = r19,16; /* b7 */ \ + adds r2 = PT(R4)-PT(F6),r2; \ + adds r3 = PT(R5)-PT(F7),r3; \ + ;; \ + st8 [r24] = r9; /* ar.csd */ \ + st8 [r25] = r10; /* ar.ssd */ \ + ;; \ + mov r18 = ar.unat; \ + adds r19 = PT(EML_UNAT)-PT(R4),r2; \ + ;; \ + st8 [r19] = r18; /* eml_unat */ \ + + +#define KVM_SAVE_EXTRA \ +.mem.offset 0,0; st8.spill [r2] = r4,16; \ +.mem.offset 8,0; st8.spill [r3] = r5,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2] = r6,16; \ +.mem.offset 8,0; st8.spill [r3] = r7; \ + ;; \ + mov r26 = ar.unat; \ + ;; \ + st8 [r2] = r26;/* eml_unat */ \ + +#define KVM_SAVE_MIN_WITH_COVER KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,) +#define KVM_SAVE_MIN_WITH_COVER_R19 KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19) +#define KVM_SAVE_MIN KVM_DO_SAVE_MIN( , mov r30 = r0, ) diff --git a/trunk/arch/ia64/kvm/lapic.h b/trunk/arch/ia64/kvm/lapic.h new file mode 100644 index 000000000000..6d6cbcb14893 --- /dev/null +++ b/trunk/arch/ia64/kvm/lapic.h @@ -0,0 +1,25 @@ +#ifndef __KVM_IA64_LAPIC_H +#define __KVM_IA64_LAPIC_H + +#include + +/* + * vlsapic + */ +struct kvm_lapic{ + struct kvm_vcpu *vcpu; + uint64_t insvc[4]; + uint64_t vhpi; + uint8_t xtp; + uint8_t pal_init_pending; + uint8_t pad[2]; +}; + +int kvm_create_lapic(struct kvm_vcpu *vcpu); +void kvm_free_lapic(struct kvm_vcpu *vcpu); + +int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); +int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); +int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); + +#endif diff --git a/trunk/arch/ia64/kvm/misc.h b/trunk/arch/ia64/kvm/misc.h new file mode 100644 index 000000000000..e585c4607344 --- /dev/null +++ b/trunk/arch/ia64/kvm/misc.h @@ -0,0 +1,93 @@ +#ifndef __KVM_IA64_MISC_H +#define __KVM_IA64_MISC_H + +#include +/* + * misc.h + * Copyright (C) 2007, Intel Corporation. + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +/* + *Return p2m base address at host side! + */ +static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm) +{ + return (uint64_t *)(kvm->arch.vm_base + KVM_P2M_OFS); +} + +static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn, + u64 paddr, u64 mem_flags) +{ + uint64_t *pmt_base = kvm_host_get_pmt(kvm); + unsigned long pte; + + pte = PAGE_ALIGN(paddr) | mem_flags; + pmt_base[gfn] = pte; +} + +/*Function for translating host address to guest address*/ + +static inline void *to_guest(struct kvm *kvm, void *addr) +{ + return (void *)((unsigned long)(addr) - kvm->arch.vm_base + + KVM_VM_DATA_BASE); +} + +/*Function for translating guest address to host address*/ + +static inline void *to_host(struct kvm *kvm, void *addr) +{ + return (void *)((unsigned long)addr - KVM_VM_DATA_BASE + + kvm->arch.vm_base); +} + +/* Get host context of the vcpu */ +static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu) +{ + union context *ctx = &vcpu->arch.host; + return to_guest(vcpu->kvm, ctx); +} + +/* Get guest context of the vcpu */ +static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu) +{ + union context *ctx = &vcpu->arch.guest; + return to_guest(vcpu->kvm, ctx); +} + +/* kvm get exit data from gvmm! */ +static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu) +{ + return &vcpu->arch.exit_data; +} + +/*kvm get vcpu ioreq for kvm module!*/ +static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p_ctl_data; + + if (vcpu) { + p_ctl_data = kvm_get_exit_data(vcpu); + if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION) + return &p_ctl_data->u.ioreq; + } + + return NULL; +} + +#endif diff --git a/trunk/arch/ia64/kvm/mmio.c b/trunk/arch/ia64/kvm/mmio.c new file mode 100644 index 000000000000..351bf70da463 --- /dev/null +++ b/trunk/arch/ia64/kvm/mmio.c @@ -0,0 +1,341 @@ +/* + * mmio.c: MMIO emulation components. + * Copyright (c) 2004, Intel Corporation. + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * Kun Tian (Kevin Tian) (Kevin.tian@intel.com) + * + * Copyright (c) 2007 Intel Corporation KVM support. + * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include + +#include "vcpu.h" + +static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val) +{ + VLSAPIC_XTP(v) = val; +} + +/* + * LSAPIC OFFSET + */ +#define PIB_LOW_HALF(ofst) !(ofst & (1 << 20)) +#define PIB_OFST_INTA 0x1E0000 +#define PIB_OFST_XTP 0x1E0008 + +/* + * execute write IPI op. + */ +static void vlsapic_write_ipi(struct kvm_vcpu *vcpu, + uint64_t addr, uint64_t data) +{ + struct exit_ctl_data *p = ¤t_vcpu->arch.exit_data; + unsigned long psr; + + local_irq_save(psr); + + p->exit_reason = EXIT_REASON_IPI; + p->u.ipi_data.addr.val = addr; + p->u.ipi_data.data.val = data; + vmm_transition(current_vcpu); + + local_irq_restore(psr); + +} + +void lsapic_write(struct kvm_vcpu *v, unsigned long addr, + unsigned long length, unsigned long val) +{ + addr &= (PIB_SIZE - 1); + + switch (addr) { + case PIB_OFST_INTA: + /*panic_domain(NULL, "Undefined write on PIB INTA\n");*/ + panic_vm(v); + break; + case PIB_OFST_XTP: + if (length == 1) { + vlsapic_write_xtp(v, val); + } else { + /*panic_domain(NULL, + "Undefined write on PIB XTP\n");*/ + panic_vm(v); + } + break; + default: + if (PIB_LOW_HALF(addr)) { + /*lower half */ + if (length != 8) + /*panic_domain(NULL, + "Can't LHF write with size %ld!\n", + length);*/ + panic_vm(v); + else + vlsapic_write_ipi(v, addr, val); + } else { /* upper half + printk("IPI-UHF write %lx\n",addr);*/ + panic_vm(v); + } + break; + } +} + +unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr, + unsigned long length) +{ + uint64_t result = 0; + + addr &= (PIB_SIZE - 1); + + switch (addr) { + case PIB_OFST_INTA: + if (length == 1) /* 1 byte load */ + ; /* There is no i8259, there is no INTA access*/ + else + /*panic_domain(NULL,"Undefined read on PIB INTA\n"); */ + panic_vm(v); + + break; + case PIB_OFST_XTP: + if (length == 1) { + result = VLSAPIC_XTP(v); + /* printk("read xtp %lx\n", result); */ + } else { + /*panic_domain(NULL, + "Undefined read on PIB XTP\n");*/ + panic_vm(v); + } + break; + default: + panic_vm(v); + break; + } + return result; +} + +static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest, + u16 s, int ma, int dir) +{ + unsigned long iot; + struct exit_ctl_data *p = &vcpu->arch.exit_data; + unsigned long psr; + + iot = __gpfn_is_io(src_pa >> PAGE_SHIFT); + + local_irq_save(psr); + + /*Intercept the acces for PIB range*/ + if (iot == GPFN_PIB) { + if (!dir) + lsapic_write(vcpu, src_pa, s, *dest); + else + *dest = lsapic_read(vcpu, src_pa, s); + goto out; + } + p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION; + p->u.ioreq.addr = src_pa; + p->u.ioreq.size = s; + p->u.ioreq.dir = dir; + if (dir == IOREQ_WRITE) + p->u.ioreq.data = *dest; + p->u.ioreq.state = STATE_IOREQ_READY; + vmm_transition(vcpu); + + if (p->u.ioreq.state == STATE_IORESP_READY) { + if (dir == IOREQ_READ) + *dest = p->u.ioreq.data; + } else + panic_vm(vcpu); +out: + local_irq_restore(psr); + return ; +} + +/* + dir 1: read 0:write + inst_type 0:integer 1:floating point + */ +#define SL_INTEGER 0 /* store/load interger*/ +#define SL_FLOATING 1 /* store/load floating*/ + +void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma) +{ + struct kvm_pt_regs *regs; + IA64_BUNDLE bundle; + int slot, dir = 0; + int inst_type = -1; + u16 size = 0; + u64 data, slot1a, slot1b, temp, update_reg; + s32 imm; + INST64 inst; + + regs = vcpu_regs(vcpu); + + if (fetch_code(vcpu, regs->cr_iip, &bundle)) { + /* if fetch code fail, return and try again */ + return; + } + slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri; + if (!slot) + inst.inst = bundle.slot0; + else if (slot == 1) { + slot1a = bundle.slot1a; + slot1b = bundle.slot1b; + inst.inst = slot1a + (slot1b << 18); + } else if (slot == 2) + inst.inst = bundle.slot2; + + /* Integer Load/Store */ + if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) { + inst_type = SL_INTEGER; + size = (inst.M1.x6 & 0x3); + if ((inst.M1.x6 >> 2) > 0xb) { + /*write*/ + dir = IOREQ_WRITE; + data = vcpu_get_gr(vcpu, inst.M4.r2); + } else if ((inst.M1.x6 >> 2) < 0xb) { + /*read*/ + dir = IOREQ_READ; + } + } else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) { + /* Integer Load + Reg update */ + inst_type = SL_INTEGER; + dir = IOREQ_READ; + size = (inst.M2.x6 & 0x3); + temp = vcpu_get_gr(vcpu, inst.M2.r3); + update_reg = vcpu_get_gr(vcpu, inst.M2.r2); + temp += update_reg; + vcpu_set_gr(vcpu, inst.M2.r3, temp, 0); + } else if (inst.M3.major == 5) { + /*Integer Load/Store + Imm update*/ + inst_type = SL_INTEGER; + size = (inst.M3.x6&0x3); + if ((inst.M5.x6 >> 2) > 0xb) { + /*write*/ + dir = IOREQ_WRITE; + data = vcpu_get_gr(vcpu, inst.M5.r2); + temp = vcpu_get_gr(vcpu, inst.M5.r3); + imm = (inst.M5.s << 31) | (inst.M5.i << 30) | + (inst.M5.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M5.r3, temp, 0); + + } else if ((inst.M3.x6 >> 2) < 0xb) { + /*read*/ + dir = IOREQ_READ; + temp = vcpu_get_gr(vcpu, inst.M3.r3); + imm = (inst.M3.s << 31) | (inst.M3.i << 30) | + (inst.M3.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M3.r3, temp, 0); + + } + } else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B + && inst.M9.m == 0 && inst.M9.x == 0) { + /* Floating-point spill*/ + struct ia64_fpreg v; + + inst_type = SL_FLOATING; + dir = IOREQ_WRITE; + vcpu_get_fpreg(vcpu, inst.M9.f2, &v); + /* Write high word. FIXME: this is a kludge! */ + v.u.bits[1] &= 0x3ffff; + mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); + data = v.u.bits[0]; + size = 3; + } else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) { + /* Floating-point spill + Imm update */ + struct ia64_fpreg v; + + inst_type = SL_FLOATING; + dir = IOREQ_WRITE; + vcpu_get_fpreg(vcpu, inst.M10.f2, &v); + temp = vcpu_get_gr(vcpu, inst.M10.r3); + imm = (inst.M10.s << 31) | (inst.M10.i << 30) | + (inst.M10.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M10.r3, temp, 0); + + /* Write high word.FIXME: this is a kludge! */ + v.u.bits[1] &= 0x3ffff; + mmio_access(vcpu, padr + 8, &v.u.bits[1], 8, ma, IOREQ_WRITE); + data = v.u.bits[0]; + size = 3; + } else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) { + /* Floating-point stf8 + Imm update */ + struct ia64_fpreg v; + inst_type = SL_FLOATING; + dir = IOREQ_WRITE; + size = 3; + vcpu_get_fpreg(vcpu, inst.M10.f2, &v); + data = v.u.bits[0]; /* Significand. */ + temp = vcpu_get_gr(vcpu, inst.M10.r3); + imm = (inst.M10.s << 31) | (inst.M10.i << 30) | + (inst.M10.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M10.r3, temp, 0); + } else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c + && inst.M15.x6 <= 0x2f) { + temp = vcpu_get_gr(vcpu, inst.M15.r3); + imm = (inst.M15.s << 31) | (inst.M15.i << 30) | + (inst.M15.imm7 << 23); + temp += imm >> 23; + vcpu_set_gr(vcpu, inst.M15.r3, temp, 0); + + vcpu_increment_iip(vcpu); + return; + } else if (inst.M12.major == 6 && inst.M12.m == 1 + && inst.M12.x == 1 && inst.M12.x6 == 1) { + /* Floating-point Load Pair + Imm ldfp8 M12*/ + struct ia64_fpreg v; + + inst_type = SL_FLOATING; + dir = IOREQ_READ; + size = 8; /*ldfd*/ + mmio_access(vcpu, padr, &data, size, ma, dir); + v.u.bits[0] = data; + v.u.bits[1] = 0x1003E; + vcpu_set_fpreg(vcpu, inst.M12.f1, &v); + padr += 8; + mmio_access(vcpu, padr, &data, size, ma, dir); + v.u.bits[0] = data; + v.u.bits[1] = 0x1003E; + vcpu_set_fpreg(vcpu, inst.M12.f2, &v); + padr += 8; + vcpu_set_gr(vcpu, inst.M12.r3, padr, 0); + vcpu_increment_iip(vcpu); + return; + } else { + inst_type = -1; + panic_vm(vcpu); + } + + size = 1 << size; + if (dir == IOREQ_WRITE) { + mmio_access(vcpu, padr, &data, size, ma, dir); + } else { + mmio_access(vcpu, padr, &data, size, ma, dir); + if (inst_type == SL_INTEGER) + vcpu_set_gr(vcpu, inst.M1.r1, data, 0); + else + panic_vm(vcpu); + + } + vcpu_increment_iip(vcpu); +} diff --git a/trunk/arch/ia64/kvm/optvfault.S b/trunk/arch/ia64/kvm/optvfault.S new file mode 100644 index 000000000000..e4f15d641b22 --- /dev/null +++ b/trunk/arch/ia64/kvm/optvfault.S @@ -0,0 +1,918 @@ +/* + * arch/ia64/vmx/optvfault.S + * optimize virtualization fault handler + * + * Copyright (C) 2006 Intel Co + * Xuefei Xu (Anthony Xu) + */ + +#include +#include + +#include "vti.h" +#include "asm-offsets.h" + +#define ACCE_MOV_FROM_AR +#define ACCE_MOV_FROM_RR +#define ACCE_MOV_TO_RR +#define ACCE_RSM +#define ACCE_SSM +#define ACCE_MOV_TO_PSR +#define ACCE_THASH + +//mov r1=ar3 +GLOBAL_ENTRY(kvm_asm_mov_from_ar) +#ifndef ACCE_MOV_FROM_AR + br.many kvm_virtualization_fault_back +#endif + add r18=VMM_VCPU_ITC_OFS_OFFSET, r21 + add r16=VMM_VCPU_LAST_ITC_OFFSET,r21 + extr.u r17=r25,6,7 + ;; + ld8 r18=[r18] + mov r19=ar.itc + mov r24=b0 + ;; + add r19=r19,r18 + addl r20=@gprel(asm_mov_to_reg),gp + ;; + st8 [r16] = r19 + adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20 + shladd r17=r17,4,r20 + ;; + mov b0=r17 + br.sptk.few b0 + ;; +END(kvm_asm_mov_from_ar) + + +// mov r1=rr[r3] +GLOBAL_ENTRY(kvm_asm_mov_from_rr) +#ifndef ACCE_MOV_FROM_RR + br.many kvm_virtualization_fault_back +#endif + extr.u r16=r25,20,7 + extr.u r17=r25,6,7 + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20 + shladd r16=r16,4,r20 + mov r24=b0 + ;; + add r27=VMM_VCPU_VRR0_OFFSET,r21 + mov b0=r16 + br.many b0 + ;; +kvm_asm_mov_from_rr_back_1: + adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 + adds r22=asm_mov_to_reg-asm_mov_from_reg,r20 + shr.u r26=r19,61 + ;; + shladd r17=r17,4,r22 + shladd r27=r26,3,r27 + ;; + ld8 r19=[r27] + mov b0=r17 + br.many b0 +END(kvm_asm_mov_from_rr) + + +// mov rr[r3]=r2 +GLOBAL_ENTRY(kvm_asm_mov_to_rr) +#ifndef ACCE_MOV_TO_RR + br.many kvm_virtualization_fault_back +#endif + extr.u r16=r25,20,7 + extr.u r17=r25,13,7 + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20 + shladd r16=r16,4,r20 + mov r22=b0 + ;; + add r27=VMM_VCPU_VRR0_OFFSET,r21 + mov b0=r16 + br.many b0 + ;; +kvm_asm_mov_to_rr_back_1: + adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20 + shr.u r23=r19,61 + shladd r17=r17,4,r20 + ;; + //if rr6, go back + cmp.eq p6,p0=6,r23 + mov b0=r22 + (p6) br.cond.dpnt.many kvm_virtualization_fault_back + ;; + mov r28=r19 + mov b0=r17 + br.many b0 +kvm_asm_mov_to_rr_back_2: + adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20 + shladd r27=r23,3,r27 + ;; // vrr.rid<<4 |0xe + st8 [r27]=r19 + mov b0=r30 + ;; + extr.u r16=r19,8,26 + extr.u r18 =r19,2,6 + mov r17 =0xe + ;; + shladd r16 = r16, 4, r17 + extr.u r19 =r19,0,8 + ;; + shl r16 = r16,8 + ;; + add r19 = r19, r16 + ;; //set ve 1 + dep r19=-1,r19,0,1 + cmp.lt p6,p0=14,r18 + ;; + (p6) mov r18=14 + ;; + (p6) dep r19=r18,r19,2,6 + ;; + cmp.eq p6,p0=0,r23 + ;; + cmp.eq.or p6,p0=4,r23 + ;; + adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + (p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 + ;; + ld4 r16=[r16] + cmp.eq p7,p0=r0,r0 + (p6) shladd r17=r23,1,r17 + ;; + (p6) st8 [r17]=r19 + (p6) tbit.nz p6,p7=r16,0 + ;; + (p7) mov rr[r28]=r19 + mov r24=r22 + br.many b0 +END(kvm_asm_mov_to_rr) + + +//rsm +GLOBAL_ENTRY(kvm_asm_rsm) +#ifndef ACCE_RSM + br.many kvm_virtualization_fault_back +#endif + add r16=VMM_VPD_BASE_OFFSET,r21 + extr.u r26=r25,6,21 + extr.u r27=r25,31,2 + ;; + ld8 r16=[r16] + extr.u r28=r25,36,1 + dep r26=r27,r26,21,2 + ;; + add r17=VPD_VPSR_START_OFFSET,r16 + add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + //r26 is imm24 + dep r26=r28,r26,23,1 + ;; + ld8 r18=[r17] + movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI + ld4 r23=[r22] + sub r27=-1,r26 + mov r24=b0 + ;; + mov r20=cr.ipsr + or r28=r27,r28 + and r19=r18,r27 + ;; + st8 [r17]=r19 + and r20=r20,r28 + /* Comment it out due to short of fp lazy alorgithm support + adds r27=IA64_VCPU_FP_PSR_OFFSET,r21 + ;; + ld8 r27=[r27] + ;; + tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT + ;; + (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1 + */ + ;; + mov cr.ipsr=r20 + tbit.nz p6,p0=r23,0 + ;; + tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT + (p6) br.dptk kvm_resume_to_guest + ;; + add r26=VMM_VCPU_META_RR0_OFFSET,r21 + add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 + dep r23=-1,r23,0,1 + ;; + ld8 r26=[r26] + ld8 r27=[r27] + st4 [r22]=r23 + dep.z r28=4,61,3 + ;; + mov rr[r0]=r26 + ;; + mov rr[r28]=r27 + ;; + srlz.d + br.many kvm_resume_to_guest +END(kvm_asm_rsm) + + +//ssm +GLOBAL_ENTRY(kvm_asm_ssm) +#ifndef ACCE_SSM + br.many kvm_virtualization_fault_back +#endif + add r16=VMM_VPD_BASE_OFFSET,r21 + extr.u r26=r25,6,21 + extr.u r27=r25,31,2 + ;; + ld8 r16=[r16] + extr.u r28=r25,36,1 + dep r26=r27,r26,21,2 + ;; //r26 is imm24 + add r27=VPD_VPSR_START_OFFSET,r16 + dep r26=r28,r26,23,1 + ;; //r19 vpsr + ld8 r29=[r27] + mov r24=b0 + ;; + add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + mov r20=cr.ipsr + or r19=r29,r26 + ;; + ld4 r23=[r22] + st8 [r27]=r19 + or r20=r20,r26 + ;; + mov cr.ipsr=r20 + movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT + ;; + and r19=r28,r19 + tbit.z p6,p0=r23,0 + ;; + cmp.ne.or p6,p0=r28,r19 + (p6) br.dptk kvm_asm_ssm_1 + ;; + add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 + add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21 + dep r23=0,r23,0,1 + ;; + ld8 r26=[r26] + ld8 r27=[r27] + st4 [r22]=r23 + dep.z r28=4,61,3 + ;; + mov rr[r0]=r26 + ;; + mov rr[r28]=r27 + ;; + srlz.d + ;; +kvm_asm_ssm_1: + tbit.nz p6,p0=r29,IA64_PSR_I_BIT + ;; + tbit.z.or p6,p0=r19,IA64_PSR_I_BIT + (p6) br.dptk kvm_resume_to_guest + ;; + add r29=VPD_VTPR_START_OFFSET,r16 + add r30=VPD_VHPI_START_OFFSET,r16 + ;; + ld8 r29=[r29] + ld8 r30=[r30] + ;; + extr.u r17=r29,4,4 + extr.u r18=r29,16,1 + ;; + dep r17=r18,r17,4,1 + ;; + cmp.gt p6,p0=r30,r17 + (p6) br.dpnt.few kvm_asm_dispatch_vexirq + br.many kvm_resume_to_guest +END(kvm_asm_ssm) + + +//mov psr.l=r2 +GLOBAL_ENTRY(kvm_asm_mov_to_psr) +#ifndef ACCE_MOV_TO_PSR + br.many kvm_virtualization_fault_back +#endif + add r16=VMM_VPD_BASE_OFFSET,r21 + extr.u r26=r25,13,7 //r2 + ;; + ld8 r16=[r16] + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20 + shladd r26=r26,4,r20 + mov r24=b0 + ;; + add r27=VPD_VPSR_START_OFFSET,r16 + mov b0=r26 + br.many b0 + ;; +kvm_asm_mov_to_psr_back: + ld8 r17=[r27] + add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21 + dep r19=0,r19,32,32 + ;; + ld4 r23=[r22] + dep r18=0,r17,0,32 + ;; + add r30=r18,r19 + movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT + ;; + st8 [r27]=r30 + and r27=r28,r30 + and r29=r28,r17 + ;; + cmp.eq p5,p0=r29,r27 + cmp.eq p6,p7=r28,r27 + (p5) br.many kvm_asm_mov_to_psr_1 + ;; + //virtual to physical + (p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21 + (p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21 + (p7) dep r23=-1,r23,0,1 + ;; + //physical to virtual + (p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21 + (p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21 + (p6) dep r23=0,r23,0,1 + ;; + ld8 r26=[r26] + ld8 r27=[r27] + st4 [r22]=r23 + dep.z r28=4,61,3 + ;; + mov rr[r0]=r26 + ;; + mov rr[r28]=r27 + ;; + srlz.d + ;; +kvm_asm_mov_to_psr_1: + mov r20=cr.ipsr + movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT + ;; + or r19=r19,r28 + dep r20=0,r20,0,32 + ;; + add r20=r19,r20 + mov b0=r24 + ;; + /* Comment it out due to short of fp lazy algorithm support + adds r27=IA64_VCPU_FP_PSR_OFFSET,r21 + ;; + ld8 r27=[r27] + ;; + tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT + ;; + (p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1 + ;; + */ + mov cr.ipsr=r20 + cmp.ne p6,p0=r0,r0 + ;; + tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT + tbit.z.or p6,p0=r30,IA64_PSR_I_BIT + (p6) br.dpnt.few kvm_resume_to_guest + ;; + add r29=VPD_VTPR_START_OFFSET,r16 + add r30=VPD_VHPI_START_OFFSET,r16 + ;; + ld8 r29=[r29] + ld8 r30=[r30] + ;; + extr.u r17=r29,4,4 + extr.u r18=r29,16,1 + ;; + dep r17=r18,r17,4,1 + ;; + cmp.gt p6,p0=r30,r17 + (p6) br.dpnt.few kvm_asm_dispatch_vexirq + br.many kvm_resume_to_guest +END(kvm_asm_mov_to_psr) + + +ENTRY(kvm_asm_dispatch_vexirq) +//increment iip + mov r16=cr.ipsr + ;; + extr.u r17=r16,IA64_PSR_RI_BIT,2 + tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1 + ;; + (p6) mov r18=cr.iip + (p6) mov r17=r0 + (p7) add r17=1,r17 + ;; + (p6) add r18=0x10,r18 + dep r16=r17,r16,IA64_PSR_RI_BIT,2 + ;; + (p6) mov cr.iip=r18 + mov cr.ipsr=r16 + mov r30 =1 + br.many kvm_dispatch_vexirq +END(kvm_asm_dispatch_vexirq) + +// thash +// TODO: add support when pta.vf = 1 +GLOBAL_ENTRY(kvm_asm_thash) +#ifndef ACCE_THASH + br.many kvm_virtualization_fault_back +#endif + extr.u r17=r25,20,7 // get r3 from opcode in r25 + extr.u r18=r25,6,7 // get r1 from opcode in r25 + addl r20=@gprel(asm_mov_from_reg),gp + ;; + adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20 + shladd r17=r17,4,r20 // get addr of MOVE_FROM_REG(r17) + adds r16=VMM_VPD_BASE_OFFSET,r21 // get vcpu.arch.priveregs + ;; + mov r24=b0 + ;; + ld8 r16=[r16] // get VPD addr + mov b0=r17 + br.many b0 // r19 return value + ;; +kvm_asm_thash_back1: + shr.u r23=r19,61 // get RR number + adds r25=VMM_VCPU_VRR0_OFFSET,r21 // get vcpu->arch.vrr[0]'s addr + adds r16=VMM_VPD_VPTA_OFFSET,r16 // get vpta + ;; + shladd r27=r23,3,r25 // get vcpu->arch.vrr[r23]'s addr + ld8 r17=[r16] // get PTA + mov r26=1 + ;; + extr.u r29=r17,2,6 // get pta.size + ld8 r25=[r27] // get vcpu->arch.vrr[r23]'s value + ;; + extr.u r25=r25,2,6 // get rr.ps + shl r22=r26,r29 // 1UL << pta.size + ;; + shr.u r23=r19,r25 // vaddr >> rr.ps + adds r26=3,r29 // pta.size + 3 + shl r27=r17,3 // pta << 3 + ;; + shl r23=r23,3 // (vaddr >> rr.ps) << 3 + shr.u r27=r27,r26 // (pta << 3) >> (pta.size+3) + movl r16=7<<61 + ;; + adds r22=-1,r22 // (1UL << pta.size) - 1 + shl r27=r27,r29 // ((pta<<3)>>(pta.size+3))<>(pta.size + 3))< + * Xiaoyan Feng (Fleming Feng) + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + */ +#include "vcpu.h" + +#include +#include +#include +#include +#include + +fpswa_interface_t *vmm_fpswa_interface; + +#define IA64_VHPT_TRANS_VECTOR 0x0000 +#define IA64_INST_TLB_VECTOR 0x0400 +#define IA64_DATA_TLB_VECTOR 0x0800 +#define IA64_ALT_INST_TLB_VECTOR 0x0c00 +#define IA64_ALT_DATA_TLB_VECTOR 0x1000 +#define IA64_DATA_NESTED_TLB_VECTOR 0x1400 +#define IA64_INST_KEY_MISS_VECTOR 0x1800 +#define IA64_DATA_KEY_MISS_VECTOR 0x1c00 +#define IA64_DIRTY_BIT_VECTOR 0x2000 +#define IA64_INST_ACCESS_BIT_VECTOR 0x2400 +#define IA64_DATA_ACCESS_BIT_VECTOR 0x2800 +#define IA64_BREAK_VECTOR 0x2c00 +#define IA64_EXTINT_VECTOR 0x3000 +#define IA64_PAGE_NOT_PRESENT_VECTOR 0x5000 +#define IA64_KEY_PERMISSION_VECTOR 0x5100 +#define IA64_INST_ACCESS_RIGHTS_VECTOR 0x5200 +#define IA64_DATA_ACCESS_RIGHTS_VECTOR 0x5300 +#define IA64_GENEX_VECTOR 0x5400 +#define IA64_DISABLED_FPREG_VECTOR 0x5500 +#define IA64_NAT_CONSUMPTION_VECTOR 0x5600 +#define IA64_SPECULATION_VECTOR 0x5700 /* UNUSED */ +#define IA64_DEBUG_VECTOR 0x5900 +#define IA64_UNALIGNED_REF_VECTOR 0x5a00 +#define IA64_UNSUPPORTED_DATA_REF_VECTOR 0x5b00 +#define IA64_FP_FAULT_VECTOR 0x5c00 +#define IA64_FP_TRAP_VECTOR 0x5d00 +#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 0x5e00 +#define IA64_TAKEN_BRANCH_TRAP_VECTOR 0x5f00 +#define IA64_SINGLE_STEP_TRAP_VECTOR 0x6000 + +/* SDM vol2 5.5 - IVA based interruption handling */ +#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\ + IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT | \ + IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT) + +#define DOMN_PAL_REQUEST 0x110000 +#define DOMN_SAL_REQUEST 0x110001 + +static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800, + 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, + 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400, + 0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00, + 0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600, + 0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00, + 0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800, + 0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00 +}; + +static void collect_interruption(struct kvm_vcpu *vcpu) +{ + u64 ipsr; + u64 vdcr; + u64 vifs; + unsigned long vpsr; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + vpsr = vcpu_get_psr(vcpu); + vcpu_bsw0(vcpu); + if (vpsr & IA64_PSR_IC) { + + /* Sync mpsr id/da/dd/ss/ed bits to vipsr + * since after guest do rfi, we still want these bits on in + * mpsr + */ + + ipsr = regs->cr_ipsr; + vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA + | IA64_PSR_DD | IA64_PSR_SS + | IA64_PSR_ED)); + vcpu_set_ipsr(vcpu, vpsr); + + /* Currently, for trap, we do not advance IIP to next + * instruction. That's because we assume caller already + * set up IIP correctly + */ + + vcpu_set_iip(vcpu , regs->cr_iip); + + /* set vifs.v to zero */ + vifs = VCPU(vcpu, ifs); + vifs &= ~IA64_IFS_V; + vcpu_set_ifs(vcpu, vifs); + + vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa)); + } + + vdcr = VCPU(vcpu, dcr); + + /* Set guest psr + * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged + * be: set to the value of dcr.be + * pp: set to the value of dcr.pp + */ + vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION; + vpsr |= (vdcr & IA64_DCR_BE); + + /* VDCR pp bit position is different from VPSR pp bit */ + if (vdcr & IA64_DCR_PP) { + vpsr |= IA64_PSR_PP; + } else { + vpsr &= ~IA64_PSR_PP;; + } + + vcpu_set_psr(vcpu, vpsr); + +} + +void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec) +{ + u64 viva; + struct kvm_pt_regs *regs; + union ia64_isr pt_isr; + + regs = vcpu_regs(vcpu); + + /* clear cr.isr.ir (incomplete register frame)*/ + pt_isr.val = VMX(vcpu, cr_isr); + pt_isr.ir = 0; + VMX(vcpu, cr_isr) = pt_isr.val; + + collect_interruption(vcpu); + + viva = vcpu_get_iva(vcpu); + regs->cr_iip = viva + vec; +} + +static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa) +{ + union ia64_rr rr, rr1; + + rr.val = vcpu_get_rr(vcpu, ifa); + rr1.val = 0; + rr1.ps = rr.ps; + rr1.rid = rr.rid; + return (rr1.val); +} + + +/* + * Set vIFA & vITIR & vIHA, when vPSR.ic =1 + * Parameter: + * set_ifa: if true, set vIFA + * set_itir: if true, set vITIR + * set_iha: if true, set vIHA + */ +void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr, + int set_ifa, int set_itir, int set_iha) +{ + long vpsr; + u64 value; + + vpsr = VCPU(vcpu, vpsr); + /* Vol2, Table 8-1 */ + if (vpsr & IA64_PSR_IC) { + if (set_ifa) + vcpu_set_ifa(vcpu, vadr); + if (set_itir) { + value = vcpu_get_itir_on_fault(vcpu, vadr); + vcpu_set_itir(vcpu, value); + } + + if (set_iha) { + value = vcpu_thash(vcpu, vadr); + vcpu_set_iha(vcpu, value); + } + } +} + +/* + * Data TLB Fault + * @ Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR); +} + +/* + * Instruction TLB Fault + * @ Instruction TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR); +} + + + +/* + * Data Nested TLB Fault + * @ Data Nested TLB Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void nested_dtlb(struct kvm_vcpu *vcpu) +{ + inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR); +} + +/* + * Alternate Data TLB Fault + * @ Alternate Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr) +{ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR); +} + + +/* + * Data TLB Fault + * @ Data TLB vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr) +{ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR); +} + +/* Deal with: + * VHPT Translation Vector + */ +static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR, IHA*/ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 1); + inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR); + + +} + +/* + * VHPT Instruction Fault + * @ VHPT Translation vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + _vhpt_fault(vcpu, vadr); +} + + +/* + * VHPT Data Fault + * @ VHPT Translation vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr) +{ + _vhpt_fault(vcpu, vadr); +} + + + +/* + * Deal with: + * General Exception vector + */ +void _general_exception(struct kvm_vcpu *vcpu) +{ + inject_guest_interruption(vcpu, IA64_GENEX_VECTOR); +} + + +/* + * Illegal Operation Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void illegal_op(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Illegal Dependency Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void illegal_dep(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Reserved Register/Field Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void rsv_reg_field(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} +/* + * Privileged Operation Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ + +void privilege_op(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Unimplement Data Address Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void unimpl_daddr(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* + * Privileged Register Fault + * @ General Exception Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void privilege_reg(struct kvm_vcpu *vcpu) +{ + _general_exception(vcpu); +} + +/* Deal with + * Nat consumption vector + * Parameter: + * vaddr: Optional, if t == REGISTER + */ +static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr, + enum tlb_miss_type t) +{ + /* If vPSR.ic && t == DATA/INST, IFA */ + if (t == DATA || t == INSTRUCTION) { + /* IFA */ + set_ifa_itir_iha(vcpu, vadr, 1, 0, 0); + } + + inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR); +} + +/* + * Instruction Nat Page Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr) +{ + _nat_consumption_fault(vcpu, vadr, INSTRUCTION); +} + +/* + * Register Nat Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void rnat_consumption(struct kvm_vcpu *vcpu) +{ + _nat_consumption_fault(vcpu, 0, REGISTER); +} + +/* + * Data Nat Page Consumption Fault + * @ Nat Consumption Vector + * Refer to SDM Vol2 Table 5-6 & 8-1 + */ +void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr) +{ + _nat_consumption_fault(vcpu, vadr, DATA); +} + +/* Deal with + * Page not present vector + */ +static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR); +} + + +void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) +{ + __page_not_present(vcpu, vadr); +} + + +void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr) +{ + __page_not_present(vcpu, vadr); +} + + +/* Deal with + * Data access rights vector + */ +void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr) +{ + /* If vPSR.ic, IFA, ITIR */ + set_ifa_itir_iha(vcpu, vadr, 1, 1, 0); + inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR); +} + +fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr, + unsigned long *fpsr, unsigned long *isr, unsigned long *pr, + unsigned long *ifs, struct kvm_pt_regs *regs) +{ + fp_state_t fp_state; + fpswa_ret_t ret; + struct kvm_vcpu *vcpu = current_vcpu; + + uint64_t old_rr7 = ia64_get_rr(7UL<<61); + + if (!vmm_fpswa_interface) + return (fpswa_ret_t) {-1, 0, 0, 0}; + + /* + * Just let fpswa driver to use hardware fp registers. + * No fp register is valid in memory. + */ + memset(&fp_state, 0, sizeof(fp_state_t)); + + /* + * unsigned long (*EFI_FPSWA) ( + * unsigned long trap_type, + * void *Bundle, + * unsigned long *pipsr, + * unsigned long *pfsr, + * unsigned long *pisr, + * unsigned long *ppreds, + * unsigned long *pifs, + * void *fp_state); + */ + /*Call host fpswa interface directly to virtualize + *guest fpswa request! + */ + ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]); + ia64_srlz_d(); + + ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle, + ipsr, fpsr, isr, pr, ifs, &fp_state); + ia64_set_rr(7UL << 61, old_rr7); + ia64_srlz_d(); + return ret; +} + +/* + * Handle floating-point assist faults and traps for domain. + */ +unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs, + unsigned long isr) +{ + struct kvm_vcpu *v = current_vcpu; + IA64_BUNDLE bundle; + unsigned long fault_ip; + fpswa_ret_t ret; + + fault_ip = regs->cr_iip; + /* + * When the FP trap occurs, the trapping instruction is completed. + * If ipsr.ri == 0, there is the trapping instruction in previous + * bundle. + */ + if (!fp_fault && (ia64_psr(regs)->ri == 0)) + fault_ip -= 16; + + if (fetch_code(v, fault_ip, &bundle)) + return -EAGAIN; + + if (!bundle.i64[0] && !bundle.i64[1]) + return -EACCES; + + ret = vmm_fp_emulate(fp_fault, &bundle, ®s->cr_ipsr, ®s->ar_fpsr, + &isr, ®s->pr, ®s->cr_ifs, regs); + return ret.status; +} + +void reflect_interruption(u64 ifa, u64 isr, u64 iim, + u64 vec, struct kvm_pt_regs *regs) +{ + u64 vector; + int status ; + struct kvm_vcpu *vcpu = current_vcpu; + u64 vpsr = VCPU(vcpu, vpsr); + + vector = vec2off[vec]; + + if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) { + panic_vm(vcpu); + return; + } + + switch (vec) { + case 32: /*IA64_FP_FAULT_VECTOR*/ + status = vmm_handle_fpu_swa(1, regs, isr); + if (!status) { + vcpu_increment_iip(vcpu); + return; + } else if (-EAGAIN == status) + return; + break; + case 33: /*IA64_FP_TRAP_VECTOR*/ + status = vmm_handle_fpu_swa(0, regs, isr); + if (!status) + return ; + else if (-EAGAIN == status) { + vcpu_decrement_iip(vcpu); + return ; + } + break; + } + + VCPU(vcpu, isr) = isr; + VCPU(vcpu, iipa) = regs->cr_iip; + if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR) + VCPU(vcpu, iim) = iim; + else + set_ifa_itir_iha(vcpu, ifa, 1, 1, 1); + + inject_guest_interruption(vcpu, vector); +} + +static void set_pal_call_data(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + /*FIXME:For static and stacked convention, firmware + * has put the parameters in gr28-gr31 before + * break to vmm !!*/ + + p->u.pal_data.gr28 = vcpu_get_gr(vcpu, 28); + p->u.pal_data.gr29 = vcpu_get_gr(vcpu, 29); + p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30); + p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31); + p->exit_reason = EXIT_REASON_PAL_CALL; +} + +static void set_pal_call_result(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + if (p->exit_reason == EXIT_REASON_PAL_CALL) { + vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0); + vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0); + vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0); + vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0); + } else + panic_vm(vcpu); +} + +static void set_sal_call_data(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32); + p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33); + p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34); + p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35); + p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36); + p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37); + p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38); + p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39); + p->exit_reason = EXIT_REASON_SAL_CALL; +} + +static void set_sal_call_result(struct kvm_vcpu *vcpu) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + + if (p->exit_reason == EXIT_REASON_SAL_CALL) { + vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0); + vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0); + vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0); + vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0); + } else + panic_vm(vcpu); +} + +void kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs, + unsigned long isr, unsigned long iim) +{ + struct kvm_vcpu *v = current_vcpu; + + if (ia64_psr(regs)->cpl == 0) { + /* Allow hypercalls only when cpl = 0. */ + if (iim == DOMN_PAL_REQUEST) { + set_pal_call_data(v); + vmm_transition(v); + set_pal_call_result(v); + vcpu_increment_iip(v); + return; + } else if (iim == DOMN_SAL_REQUEST) { + set_sal_call_data(v); + vmm_transition(v); + set_sal_call_result(v); + vcpu_increment_iip(v); + return; + } + } + reflect_interruption(ifa, isr, iim, 11, regs); +} + +void check_pending_irq(struct kvm_vcpu *vcpu) +{ + int mask, h_pending, h_inservice; + u64 isr; + unsigned long vpsr; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + h_pending = highest_pending_irq(vcpu); + if (h_pending == NULL_VECTOR) { + update_vhpi(vcpu, NULL_VECTOR); + return; + } + h_inservice = highest_inservice_irq(vcpu); + + vpsr = VCPU(vcpu, vpsr); + mask = irq_masked(vcpu, h_pending, h_inservice); + if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) { + isr = vpsr & IA64_PSR_RI; + update_vhpi(vcpu, h_pending); + reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ + } else if (mask == IRQ_MASKED_BY_INSVC) { + if (VCPU(vcpu, vhpi)) + update_vhpi(vcpu, NULL_VECTOR); + } else { + /* masked by vpsr.i or vtpr.*/ + update_vhpi(vcpu, h_pending); + } +} + +static void generate_exirq(struct kvm_vcpu *vcpu) +{ + unsigned vpsr; + uint64_t isr; + + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + vpsr = VCPU(vcpu, vpsr); + isr = vpsr & IA64_PSR_RI; + if (!(vpsr & IA64_PSR_IC)) + panic_vm(vcpu); + reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */ +} + +void vhpi_detection(struct kvm_vcpu *vcpu) +{ + uint64_t threshold, vhpi; + union ia64_tpr vtpr; + struct ia64_psr vpsr; + + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + vtpr.val = VCPU(vcpu, tpr); + + threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic; + vhpi = VCPU(vcpu, vhpi); + if (vhpi > threshold) { + /* interrupt actived*/ + generate_exirq(vcpu); + } +} + + +void leave_hypervisor_tail(void) +{ + struct kvm_vcpu *v = current_vcpu; + + if (VMX(v, timer_check)) { + VMX(v, timer_check) = 0; + if (VMX(v, itc_check)) { + if (vcpu_get_itc(v) > VCPU(v, itm)) { + if (!(VCPU(v, itv) & (1 << 16))) { + vcpu_pend_interrupt(v, VCPU(v, itv) + & 0xff); + VMX(v, itc_check) = 0; + } else { + v->arch.timer_pending = 1; + } + VMX(v, last_itc) = VCPU(v, itm) + 1; + } + } + } + + rmb(); + if (v->arch.irq_new_pending) { + v->arch.irq_new_pending = 0; + VMX(v, irq_check) = 0; + check_pending_irq(v); + return; + } + if (VMX(v, irq_check)) { + VMX(v, irq_check) = 0; + vhpi_detection(v); + } +} + + +static inline void handle_lds(struct kvm_pt_regs *regs) +{ + regs->cr_ipsr |= IA64_PSR_ED; +} + +void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type) +{ + unsigned long pte; + union ia64_rr rr; + + rr.val = ia64_get_rr(vadr); + pte = vadr & _PAGE_PPN_MASK; + pte = pte | PHY_PAGE_WB; + thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type); + return; +} + +void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs) +{ + unsigned long vpsr; + int type; + + u64 vhpt_adr, gppa, pteval, rr, itir; + union ia64_isr misr; + union ia64_pta vpta; + struct thash_data *data; + struct kvm_vcpu *v = current_vcpu; + + vpsr = VCPU(v, vpsr); + misr.val = VMX(v, cr_isr); + + type = vec; + + if (is_physical_mode(v) && (!(vadr << 1 >> 62))) { + if (vec == 2) { + if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) { + emulate_io_inst(v, ((vadr << 1) >> 1), 4); + return; + } + } + physical_tlb_miss(v, vadr, type); + return; + } + data = vtlb_lookup(v, vadr, type); + if (data != 0) { + if (type == D_TLB) { + gppa = (vadr & ((1UL << data->ps) - 1)) + + (data->ppn >> (data->ps - 12) << data->ps); + if (__gpfn_is_io(gppa >> PAGE_SHIFT)) { + if (data->pl >= ((regs->cr_ipsr >> + IA64_PSR_CPL0_BIT) & 3)) + emulate_io_inst(v, gppa, data->ma); + else { + vcpu_set_isr(v, misr.val); + data_access_rights(v, vadr); + } + return ; + } + } + thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type); + + } else if (type == D_TLB) { + if (misr.sp) { + handle_lds(regs); + return; + } + + rr = vcpu_get_rr(v, vadr); + itir = rr & (RR_RID_MASK | RR_PS_MASK); + + if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) { + if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + alt_dtlb(v, vadr); + } else { + nested_dtlb(v); + } + return ; + } + + vpta.val = vcpu_get_pta(v); + /* avoid recursively walking (short format) VHPT */ + + vhpt_adr = vcpu_thash(v, vadr); + if (!guest_vhpt_lookup(vhpt_adr, &pteval)) { + /* VHPT successfully read. */ + if (!(pteval & _PAGE_P)) { + if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + dtlb_fault(v, vadr); + } else { + nested_dtlb(v); + } + } else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) { + thash_purge_and_insert(v, pteval, itir, + vadr, D_TLB); + } else if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + dtlb_fault(v, vadr); + } else { + nested_dtlb(v); + } + } else { + /* Can't read VHPT. */ + if (vpsr & IA64_PSR_IC) { + vcpu_set_isr(v, misr.val); + dvhpt_fault(v, vadr); + } else { + nested_dtlb(v); + } + } + } else if (type == I_TLB) { + if (!(vpsr & IA64_PSR_IC)) + misr.ni = 1; + if (!vhpt_enabled(v, vadr, INST_REF)) { + vcpu_set_isr(v, misr.val); + alt_itlb(v, vadr); + return; + } + + vpta.val = vcpu_get_pta(v); + + vhpt_adr = vcpu_thash(v, vadr); + if (!guest_vhpt_lookup(vhpt_adr, &pteval)) { + /* VHPT successfully read. */ + if (pteval & _PAGE_P) { + if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) { + vcpu_set_isr(v, misr.val); + itlb_fault(v, vadr); + return ; + } + rr = vcpu_get_rr(v, vadr); + itir = rr & (RR_RID_MASK | RR_PS_MASK); + thash_purge_and_insert(v, pteval, itir, + vadr, I_TLB); + } else { + vcpu_set_isr(v, misr.val); + inst_page_not_present(v, vadr); + } + } else { + vcpu_set_isr(v, misr.val); + ivhpt_fault(v, vadr); + } + } +} + +void kvm_vexirq(struct kvm_vcpu *vcpu) +{ + u64 vpsr, isr; + struct kvm_pt_regs *regs; + + regs = vcpu_regs(vcpu); + vpsr = VCPU(vcpu, vpsr); + isr = vpsr & IA64_PSR_RI; + reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/ +} + +void kvm_ia64_handle_irq(struct kvm_vcpu *v) +{ + struct exit_ctl_data *p = &v->arch.exit_data; + long psr; + + local_irq_save(psr); + p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT; + vmm_transition(v); + local_irq_restore(psr); + + VMX(v, timer_check) = 1; + +} + +static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos) +{ + u64 oldrid, moldrid, oldpsbits, vaddr; + struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos]; + vaddr = p->vaddr; + + oldrid = VMX(v, vrr[0]); + VMX(v, vrr[0]) = p->rr; + oldpsbits = VMX(v, psbits[0]); + VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]); + moldrid = ia64_get_rr(0x0); + ia64_set_rr(0x0, vrrtomrr(p->rr)); + ia64_srlz_d(); + + vaddr = PAGEALIGN(vaddr, p->ps); + thash_purge_entries_remote(v, vaddr, p->ps); + + VMX(v, vrr[0]) = oldrid; + VMX(v, psbits[0]) = oldpsbits; + ia64_set_rr(0x0, moldrid); + ia64_dv_serialize_data(); +} + +static void vcpu_do_resume(struct kvm_vcpu *vcpu) +{ + /*Re-init VHPT and VTLB once from resume*/ + vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES; + thash_init(&vcpu->arch.vhpt, VHPT_SHIFT); + vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES; + thash_init(&vcpu->arch.vtlb, VTLB_SHIFT); + + ia64_set_pta(vcpu->arch.vhpt.pta.val); +} + +static void kvm_do_resume_op(struct kvm_vcpu *vcpu) +{ + if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) { + vcpu_do_resume(vcpu); + return; + } + + if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) { + thash_purge_all(vcpu); + return; + } + + if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) { + while (vcpu->arch.ptc_g_count > 0) + ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count); + } +} + +void vmm_transition(struct kvm_vcpu *vcpu) +{ + ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd, + 0, 0, 0, 0, 0, 0); + vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host); + ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd, + 0, 0, 0, 0, 0, 0); + kvm_do_resume_op(vcpu); +} diff --git a/trunk/arch/ia64/kvm/trampoline.S b/trunk/arch/ia64/kvm/trampoline.S new file mode 100644 index 000000000000..30897d44d61e --- /dev/null +++ b/trunk/arch/ia64/kvm/trampoline.S @@ -0,0 +1,1038 @@ +/* Save all processor states + * + * Copyright (c) 2007 Fleming Feng + * Copyright (c) 2007 Anthony Xu + */ + +#include +#include "asm-offsets.h" + + +#define CTX(name) VMM_CTX_##name##_OFFSET + + /* + * r32: context_t base address + */ +#define SAVE_BRANCH_REGS \ + add r2 = CTX(B0),r32; \ + add r3 = CTX(B1),r32; \ + mov r16 = b0; \ + mov r17 = b1; \ + ;; \ + st8 [r2]=r16,16; \ + st8 [r3]=r17,16; \ + ;; \ + mov r16 = b2; \ + mov r17 = b3; \ + ;; \ + st8 [r2]=r16,16; \ + st8 [r3]=r17,16; \ + ;; \ + mov r16 = b4; \ + mov r17 = b5; \ + ;; \ + st8 [r2]=r16; \ + st8 [r3]=r17; \ + ;; + + /* + * r33: context_t base address + */ +#define RESTORE_BRANCH_REGS \ + add r2 = CTX(B0),r33; \ + add r3 = CTX(B1),r33; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov b0 = r16; \ + mov b1 = r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov b2 = r16; \ + mov b3 = r17; \ + ;; \ + ld8 r16=[r2]; \ + ld8 r17=[r3]; \ + ;; \ + mov b4=r16; \ + mov b5=r17; \ + ;; + + + /* + * r32: context_t base address + * bsw == 1 + * Save all bank1 general registers, r4 ~ r7 + */ +#define SAVE_GENERAL_REGS \ + add r2=CTX(R4),r32; \ + add r3=CTX(R5),r32; \ + ;; \ +.mem.offset 0,0; \ + st8.spill [r2]=r4,16; \ +.mem.offset 8,0; \ + st8.spill [r3]=r5,16; \ + ;; \ +.mem.offset 0,0; \ + st8.spill [r2]=r6,48; \ +.mem.offset 8,0; \ + st8.spill [r3]=r7,48; \ + ;; \ +.mem.offset 0,0; \ + st8.spill [r2]=r12; \ +.mem.offset 8,0; \ + st8.spill [r3]=r13; \ + ;; + + /* + * r33: context_t base address + * bsw == 1 + */ +#define RESTORE_GENERAL_REGS \ + add r2=CTX(R4),r33; \ + add r3=CTX(R5),r33; \ + ;; \ + ld8.fill r4=[r2],16; \ + ld8.fill r5=[r3],16; \ + ;; \ + ld8.fill r6=[r2],48; \ + ld8.fill r7=[r3],48; \ + ;; \ + ld8.fill r12=[r2]; \ + ld8.fill r13 =[r3]; \ + ;; + + + + + /* + * r32: context_t base address + */ +#define SAVE_KERNEL_REGS \ + add r2 = CTX(KR0),r32; \ + add r3 = CTX(KR1),r32; \ + mov r16 = ar.k0; \ + mov r17 = ar.k1; \ + ;; \ + st8 [r2] = r16,16; \ + st8 [r3] = r17,16; \ + ;; \ + mov r16 = ar.k2; \ + mov r17 = ar.k3; \ + ;; \ + st8 [r2] = r16,16; \ + st8 [r3] = r17,16; \ + ;; \ + mov r16 = ar.k4; \ + mov r17 = ar.k5; \ + ;; \ + st8 [r2] = r16,16; \ + st8 [r3] = r17,16; \ + ;; \ + mov r16 = ar.k6; \ + mov r17 = ar.k7; \ + ;; \ + st8 [r2] = r16; \ + st8 [r3] = r17; \ + ;; + + + + /* + * r33: context_t base address + */ +#define RESTORE_KERNEL_REGS \ + add r2 = CTX(KR0),r33; \ + add r3 = CTX(KR1),r33; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k0=r16; \ + mov ar.k1=r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k2=r16; \ + mov ar.k3=r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k4=r16; \ + mov ar.k5=r17; \ + ;; \ + ld8 r16=[r2],16; \ + ld8 r17=[r3],16; \ + ;; \ + mov ar.k6=r16; \ + mov ar.k7=r17; \ + ;; + + + + /* + * r32: context_t base address + */ +#define SAVE_APP_REGS \ + add r2 = CTX(BSPSTORE),r32; \ + mov r16 = ar.bspstore; \ + ;; \ + st8 [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\ + mov r16 = ar.rnat; \ + ;; \ + st8 [r2] = r16,CTX(FCR)-CTX(RNAT); \ + mov r16 = ar.fcr; \ + ;; \ + st8 [r2] = r16,CTX(EFLAG)-CTX(FCR); \ + mov r16 = ar.eflag; \ + ;; \ + st8 [r2] = r16,CTX(CFLG)-CTX(EFLAG); \ + mov r16 = ar.cflg; \ + ;; \ + st8 [r2] = r16,CTX(FSR)-CTX(CFLG); \ + mov r16 = ar.fsr; \ + ;; \ + st8 [r2] = r16,CTX(FIR)-CTX(FSR); \ + mov r16 = ar.fir; \ + ;; \ + st8 [r2] = r16,CTX(FDR)-CTX(FIR); \ + mov r16 = ar.fdr; \ + ;; \ + st8 [r2] = r16,CTX(UNAT)-CTX(FDR); \ + mov r16 = ar.unat; \ + ;; \ + st8 [r2] = r16,CTX(FPSR)-CTX(UNAT); \ + mov r16 = ar.fpsr; \ + ;; \ + st8 [r2] = r16,CTX(PFS)-CTX(FPSR); \ + mov r16 = ar.pfs; \ + ;; \ + st8 [r2] = r16,CTX(LC)-CTX(PFS); \ + mov r16 = ar.lc; \ + ;; \ + st8 [r2] = r16; \ + ;; + + /* + * r33: context_t base address + */ +#define RESTORE_APP_REGS \ + add r2=CTX(BSPSTORE),r33; \ + ;; \ + ld8 r16=[r2],CTX(RNAT)-CTX(BSPSTORE); \ + ;; \ + mov ar.bspstore=r16; \ + ld8 r16=[r2],CTX(FCR)-CTX(RNAT); \ + ;; \ + mov ar.rnat=r16; \ + ld8 r16=[r2],CTX(EFLAG)-CTX(FCR); \ + ;; \ + mov ar.fcr=r16; \ + ld8 r16=[r2],CTX(CFLG)-CTX(EFLAG); \ + ;; \ + mov ar.eflag=r16; \ + ld8 r16=[r2],CTX(FSR)-CTX(CFLG); \ + ;; \ + mov ar.cflg=r16; \ + ld8 r16=[r2],CTX(FIR)-CTX(FSR); \ + ;; \ + mov ar.fsr=r16; \ + ld8 r16=[r2],CTX(FDR)-CTX(FIR); \ + ;; \ + mov ar.fir=r16; \ + ld8 r16=[r2],CTX(UNAT)-CTX(FDR); \ + ;; \ + mov ar.fdr=r16; \ + ld8 r16=[r2],CTX(FPSR)-CTX(UNAT); \ + ;; \ + mov ar.unat=r16; \ + ld8 r16=[r2],CTX(PFS)-CTX(FPSR); \ + ;; \ + mov ar.fpsr=r16; \ + ld8 r16=[r2],CTX(LC)-CTX(PFS); \ + ;; \ + mov ar.pfs=r16; \ + ld8 r16=[r2]; \ + ;; \ + mov ar.lc=r16; \ + ;; + + /* + * r32: context_t base address + */ +#define SAVE_CTL_REGS \ + add r2 = CTX(DCR),r32; \ + mov r16 = cr.dcr; \ + ;; \ + st8 [r2] = r16,CTX(IVA)-CTX(DCR); \ + ;; \ + mov r16 = cr.iva; \ + ;; \ + st8 [r2] = r16,CTX(PTA)-CTX(IVA); \ + ;; \ + mov r16 = cr.pta; \ + ;; \ + st8 [r2] = r16 ; \ + ;; + + /* + * r33: context_t base address + */ +#define RESTORE_CTL_REGS \ + add r2 = CTX(DCR),r33; \ + ;; \ + ld8 r16 = [r2],CTX(IVA)-CTX(DCR); \ + ;; \ + mov cr.dcr = r16; \ + dv_serialize_data; \ + ;; \ + ld8 r16 = [r2],CTX(PTA)-CTX(IVA); \ + ;; \ + mov cr.iva = r16; \ + dv_serialize_data; \ + ;; \ + ld8 r16 = [r2]; \ + ;; \ + mov cr.pta = r16; \ + dv_serialize_data; \ + ;; + + + /* + * r32: context_t base address + */ +#define SAVE_REGION_REGS \ + add r2=CTX(RR0),r32; \ + mov r16=rr[r0]; \ + dep.z r18=1,61,3; \ + ;; \ + st8 [r2]=r16,8; \ + mov r17=rr[r18]; \ + dep.z r18=2,61,3; \ + ;; \ + st8 [r2]=r17,8; \ + mov r16=rr[r18]; \ + dep.z r18=3,61,3; \ + ;; \ + st8 [r2]=r16,8; \ + mov r17=rr[r18]; \ + dep.z r18=4,61,3; \ + ;; \ + st8 [r2]=r17,8; \ + mov r16=rr[r18]; \ + dep.z r18=5,61,3; \ + ;; \ + st8 [r2]=r16,8; \ + mov r17=rr[r18]; \ + dep.z r18=7,61,3; \ + ;; \ + st8 [r2]=r17,16; \ + mov r16=rr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + ;; + + /* + * r33:context_t base address + */ +#define RESTORE_REGION_REGS \ + add r2=CTX(RR0),r33;\ + mov r18=r0; \ + ;; \ + ld8 r20=[r2],8; \ + ;; /* rr0 */ \ + ld8 r21=[r2],8; \ + ;; /* rr1 */ \ + ld8 r22=[r2],8; \ + ;; /* rr2 */ \ + ld8 r23=[r2],8; \ + ;; /* rr3 */ \ + ld8 r24=[r2],8; \ + ;; /* rr4 */ \ + ld8 r25=[r2],16; \ + ;; /* rr5 */ \ + ld8 r27=[r2]; \ + ;; /* rr7 */ \ + mov rr[r18]=r20; \ + dep.z r18=1,61,3; \ + ;; /* rr1 */ \ + mov rr[r18]=r21; \ + dep.z r18=2,61,3; \ + ;; /* rr2 */ \ + mov rr[r18]=r22; \ + dep.z r18=3,61,3; \ + ;; /* rr3 */ \ + mov rr[r18]=r23; \ + dep.z r18=4,61,3; \ + ;; /* rr4 */ \ + mov rr[r18]=r24; \ + dep.z r18=5,61,3; \ + ;; /* rr5 */ \ + mov rr[r18]=r25; \ + dep.z r18=7,61,3; \ + ;; /* rr7 */ \ + mov rr[r18]=r27; \ + ;; \ + srlz.i; \ + ;; + + + + /* + * r32: context_t base address + * r36~r39:scratch registers + */ +#define SAVE_DEBUG_REGS \ + add r2=CTX(IBR0),r32; \ + add r3=CTX(DBR0),r32; \ + mov r16=ibr[r0]; \ + mov r17=dbr[r0]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=1,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=2,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=2,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=3,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=4,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=5,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=6,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + add r18=7,r0; \ + ;; \ + mov r16=ibr[r18]; \ + mov r17=dbr[r18]; \ + ;; \ + st8 [r2]=r16,8; \ + st8 [r3]=r17,8; \ + ;; + + +/* + * r33: point to context_t structure + * ar.lc are corrupted. + */ +#define RESTORE_DEBUG_REGS \ + add r2=CTX(IBR0),r33; \ + add r3=CTX(DBR0),r33; \ + mov r16=7; \ + mov r17=r0; \ + ;; \ + mov ar.lc = r16; \ + ;; \ +1: \ + ld8 r18=[r2],8; \ + ld8 r19=[r3],8; \ + ;; \ + mov ibr[r17]=r18; \ + mov dbr[r17]=r19; \ + ;; \ + srlz.i; \ + ;; \ + add r17=1,r17; \ + br.cloop.sptk 1b; \ + ;; + + + /* + * r32: context_t base address + */ +#define SAVE_FPU_LOW \ + add r2=CTX(F2),r32; \ + add r3=CTX(F3),r32; \ + ;; \ + stf.spill.nta [r2]=f2,32; \ + stf.spill.nta [r3]=f3,32; \ + ;; \ + stf.spill.nta [r2]=f4,32; \ + stf.spill.nta [r3]=f5,32; \ + ;; \ + stf.spill.nta [r2]=f6,32; \ + stf.spill.nta [r3]=f7,32; \ + ;; \ + stf.spill.nta [r2]=f8,32; \ + stf.spill.nta [r3]=f9,32; \ + ;; \ + stf.spill.nta [r2]=f10,32; \ + stf.spill.nta [r3]=f11,32; \ + ;; \ + stf.spill.nta [r2]=f12,32; \ + stf.spill.nta [r3]=f13,32; \ + ;; \ + stf.spill.nta [r2]=f14,32; \ + stf.spill.nta [r3]=f15,32; \ + ;; \ + stf.spill.nta [r2]=f16,32; \ + stf.spill.nta [r3]=f17,32; \ + ;; \ + stf.spill.nta [r2]=f18,32; \ + stf.spill.nta [r3]=f19,32; \ + ;; \ + stf.spill.nta [r2]=f20,32; \ + stf.spill.nta [r3]=f21,32; \ + ;; \ + stf.spill.nta [r2]=f22,32; \ + stf.spill.nta [r3]=f23,32; \ + ;; \ + stf.spill.nta [r2]=f24,32; \ + stf.spill.nta [r3]=f25,32; \ + ;; \ + stf.spill.nta [r2]=f26,32; \ + stf.spill.nta [r3]=f27,32; \ + ;; \ + stf.spill.nta [r2]=f28,32; \ + stf.spill.nta [r3]=f29,32; \ + ;; \ + stf.spill.nta [r2]=f30; \ + stf.spill.nta [r3]=f31; \ + ;; + + /* + * r32: context_t base address + */ +#define SAVE_FPU_HIGH \ + add r2=CTX(F32),r32; \ + add r3=CTX(F33),r32; \ + ;; \ + stf.spill.nta [r2]=f32,32; \ + stf.spill.nta [r3]=f33,32; \ + ;; \ + stf.spill.nta [r2]=f34,32; \ + stf.spill.nta [r3]=f35,32; \ + ;; \ + stf.spill.nta [r2]=f36,32; \ + stf.spill.nta [r3]=f37,32; \ + ;; \ + stf.spill.nta [r2]=f38,32; \ + stf.spill.nta [r3]=f39,32; \ + ;; \ + stf.spill.nta [r2]=f40,32; \ + stf.spill.nta [r3]=f41,32; \ + ;; \ + stf.spill.nta [r2]=f42,32; \ + stf.spill.nta [r3]=f43,32; \ + ;; \ + stf.spill.nta [r2]=f44,32; \ + stf.spill.nta [r3]=f45,32; \ + ;; \ + stf.spill.nta [r2]=f46,32; \ + stf.spill.nta [r3]=f47,32; \ + ;; \ + stf.spill.nta [r2]=f48,32; \ + stf.spill.nta [r3]=f49,32; \ + ;; \ + stf.spill.nta [r2]=f50,32; \ + stf.spill.nta [r3]=f51,32; \ + ;; \ + stf.spill.nta [r2]=f52,32; \ + stf.spill.nta [r3]=f53,32; \ + ;; \ + stf.spill.nta [r2]=f54,32; \ + stf.spill.nta [r3]=f55,32; \ + ;; \ + stf.spill.nta [r2]=f56,32; \ + stf.spill.nta [r3]=f57,32; \ + ;; \ + stf.spill.nta [r2]=f58,32; \ + stf.spill.nta [r3]=f59,32; \ + ;; \ + stf.spill.nta [r2]=f60,32; \ + stf.spill.nta [r3]=f61,32; \ + ;; \ + stf.spill.nta [r2]=f62,32; \ + stf.spill.nta [r3]=f63,32; \ + ;; \ + stf.spill.nta [r2]=f64,32; \ + stf.spill.nta [r3]=f65,32; \ + ;; \ + stf.spill.nta [r2]=f66,32; \ + stf.spill.nta [r3]=f67,32; \ + ;; \ + stf.spill.nta [r2]=f68,32; \ + stf.spill.nta [r3]=f69,32; \ + ;; \ + stf.spill.nta [r2]=f70,32; \ + stf.spill.nta [r3]=f71,32; \ + ;; \ + stf.spill.nta [r2]=f72,32; \ + stf.spill.nta [r3]=f73,32; \ + ;; \ + stf.spill.nta [r2]=f74,32; \ + stf.spill.nta [r3]=f75,32; \ + ;; \ + stf.spill.nta [r2]=f76,32; \ + stf.spill.nta [r3]=f77,32; \ + ;; \ + stf.spill.nta [r2]=f78,32; \ + stf.spill.nta [r3]=f79,32; \ + ;; \ + stf.spill.nta [r2]=f80,32; \ + stf.spill.nta [r3]=f81,32; \ + ;; \ + stf.spill.nta [r2]=f82,32; \ + stf.spill.nta [r3]=f83,32; \ + ;; \ + stf.spill.nta [r2]=f84,32; \ + stf.spill.nta [r3]=f85,32; \ + ;; \ + stf.spill.nta [r2]=f86,32; \ + stf.spill.nta [r3]=f87,32; \ + ;; \ + stf.spill.nta [r2]=f88,32; \ + stf.spill.nta [r3]=f89,32; \ + ;; \ + stf.spill.nta [r2]=f90,32; \ + stf.spill.nta [r3]=f91,32; \ + ;; \ + stf.spill.nta [r2]=f92,32; \ + stf.spill.nta [r3]=f93,32; \ + ;; \ + stf.spill.nta [r2]=f94,32; \ + stf.spill.nta [r3]=f95,32; \ + ;; \ + stf.spill.nta [r2]=f96,32; \ + stf.spill.nta [r3]=f97,32; \ + ;; \ + stf.spill.nta [r2]=f98,32; \ + stf.spill.nta [r3]=f99,32; \ + ;; \ + stf.spill.nta [r2]=f100,32; \ + stf.spill.nta [r3]=f101,32; \ + ;; \ + stf.spill.nta [r2]=f102,32; \ + stf.spill.nta [r3]=f103,32; \ + ;; \ + stf.spill.nta [r2]=f104,32; \ + stf.spill.nta [r3]=f105,32; \ + ;; \ + stf.spill.nta [r2]=f106,32; \ + stf.spill.nta [r3]=f107,32; \ + ;; \ + stf.spill.nta [r2]=f108,32; \ + stf.spill.nta [r3]=f109,32; \ + ;; \ + stf.spill.nta [r2]=f110,32; \ + stf.spill.nta [r3]=f111,32; \ + ;; \ + stf.spill.nta [r2]=f112,32; \ + stf.spill.nta [r3]=f113,32; \ + ;; \ + stf.spill.nta [r2]=f114,32; \ + stf.spill.nta [r3]=f115,32; \ + ;; \ + stf.spill.nta [r2]=f116,32; \ + stf.spill.nta [r3]=f117,32; \ + ;; \ + stf.spill.nta [r2]=f118,32; \ + stf.spill.nta [r3]=f119,32; \ + ;; \ + stf.spill.nta [r2]=f120,32; \ + stf.spill.nta [r3]=f121,32; \ + ;; \ + stf.spill.nta [r2]=f122,32; \ + stf.spill.nta [r3]=f123,32; \ + ;; \ + stf.spill.nta [r2]=f124,32; \ + stf.spill.nta [r3]=f125,32; \ + ;; \ + stf.spill.nta [r2]=f126; \ + stf.spill.nta [r3]=f127; \ + ;; + + /* + * r33: point to context_t structure + */ +#define RESTORE_FPU_LOW \ + add r2 = CTX(F2), r33; \ + add r3 = CTX(F3), r33; \ + ;; \ + ldf.fill.nta f2 = [r2], 32; \ + ldf.fill.nta f3 = [r3], 32; \ + ;; \ + ldf.fill.nta f4 = [r2], 32; \ + ldf.fill.nta f5 = [r3], 32; \ + ;; \ + ldf.fill.nta f6 = [r2], 32; \ + ldf.fill.nta f7 = [r3], 32; \ + ;; \ + ldf.fill.nta f8 = [r2], 32; \ + ldf.fill.nta f9 = [r3], 32; \ + ;; \ + ldf.fill.nta f10 = [r2], 32; \ + ldf.fill.nta f11 = [r3], 32; \ + ;; \ + ldf.fill.nta f12 = [r2], 32; \ + ldf.fill.nta f13 = [r3], 32; \ + ;; \ + ldf.fill.nta f14 = [r2], 32; \ + ldf.fill.nta f15 = [r3], 32; \ + ;; \ + ldf.fill.nta f16 = [r2], 32; \ + ldf.fill.nta f17 = [r3], 32; \ + ;; \ + ldf.fill.nta f18 = [r2], 32; \ + ldf.fill.nta f19 = [r3], 32; \ + ;; \ + ldf.fill.nta f20 = [r2], 32; \ + ldf.fill.nta f21 = [r3], 32; \ + ;; \ + ldf.fill.nta f22 = [r2], 32; \ + ldf.fill.nta f23 = [r3], 32; \ + ;; \ + ldf.fill.nta f24 = [r2], 32; \ + ldf.fill.nta f25 = [r3], 32; \ + ;; \ + ldf.fill.nta f26 = [r2], 32; \ + ldf.fill.nta f27 = [r3], 32; \ + ;; \ + ldf.fill.nta f28 = [r2], 32; \ + ldf.fill.nta f29 = [r3], 32; \ + ;; \ + ldf.fill.nta f30 = [r2], 32; \ + ldf.fill.nta f31 = [r3], 32; \ + ;; + + + + /* + * r33: point to context_t structure + */ +#define RESTORE_FPU_HIGH \ + add r2 = CTX(F32), r33; \ + add r3 = CTX(F33), r33; \ + ;; \ + ldf.fill.nta f32 = [r2], 32; \ + ldf.fill.nta f33 = [r3], 32; \ + ;; \ + ldf.fill.nta f34 = [r2], 32; \ + ldf.fill.nta f35 = [r3], 32; \ + ;; \ + ldf.fill.nta f36 = [r2], 32; \ + ldf.fill.nta f37 = [r3], 32; \ + ;; \ + ldf.fill.nta f38 = [r2], 32; \ + ldf.fill.nta f39 = [r3], 32; \ + ;; \ + ldf.fill.nta f40 = [r2], 32; \ + ldf.fill.nta f41 = [r3], 32; \ + ;; \ + ldf.fill.nta f42 = [r2], 32; \ + ldf.fill.nta f43 = [r3], 32; \ + ;; \ + ldf.fill.nta f44 = [r2], 32; \ + ldf.fill.nta f45 = [r3], 32; \ + ;; \ + ldf.fill.nta f46 = [r2], 32; \ + ldf.fill.nta f47 = [r3], 32; \ + ;; \ + ldf.fill.nta f48 = [r2], 32; \ + ldf.fill.nta f49 = [r3], 32; \ + ;; \ + ldf.fill.nta f50 = [r2], 32; \ + ldf.fill.nta f51 = [r3], 32; \ + ;; \ + ldf.fill.nta f52 = [r2], 32; \ + ldf.fill.nta f53 = [r3], 32; \ + ;; \ + ldf.fill.nta f54 = [r2], 32; \ + ldf.fill.nta f55 = [r3], 32; \ + ;; \ + ldf.fill.nta f56 = [r2], 32; \ + ldf.fill.nta f57 = [r3], 32; \ + ;; \ + ldf.fill.nta f58 = [r2], 32; \ + ldf.fill.nta f59 = [r3], 32; \ + ;; \ + ldf.fill.nta f60 = [r2], 32; \ + ldf.fill.nta f61 = [r3], 32; \ + ;; \ + ldf.fill.nta f62 = [r2], 32; \ + ldf.fill.nta f63 = [r3], 32; \ + ;; \ + ldf.fill.nta f64 = [r2], 32; \ + ldf.fill.nta f65 = [r3], 32; \ + ;; \ + ldf.fill.nta f66 = [r2], 32; \ + ldf.fill.nta f67 = [r3], 32; \ + ;; \ + ldf.fill.nta f68 = [r2], 32; \ + ldf.fill.nta f69 = [r3], 32; \ + ;; \ + ldf.fill.nta f70 = [r2], 32; \ + ldf.fill.nta f71 = [r3], 32; \ + ;; \ + ldf.fill.nta f72 = [r2], 32; \ + ldf.fill.nta f73 = [r3], 32; \ + ;; \ + ldf.fill.nta f74 = [r2], 32; \ + ldf.fill.nta f75 = [r3], 32; \ + ;; \ + ldf.fill.nta f76 = [r2], 32; \ + ldf.fill.nta f77 = [r3], 32; \ + ;; \ + ldf.fill.nta f78 = [r2], 32; \ + ldf.fill.nta f79 = [r3], 32; \ + ;; \ + ldf.fill.nta f80 = [r2], 32; \ + ldf.fill.nta f81 = [r3], 32; \ + ;; \ + ldf.fill.nta f82 = [r2], 32; \ + ldf.fill.nta f83 = [r3], 32; \ + ;; \ + ldf.fill.nta f84 = [r2], 32; \ + ldf.fill.nta f85 = [r3], 32; \ + ;; \ + ldf.fill.nta f86 = [r2], 32; \ + ldf.fill.nta f87 = [r3], 32; \ + ;; \ + ldf.fill.nta f88 = [r2], 32; \ + ldf.fill.nta f89 = [r3], 32; \ + ;; \ + ldf.fill.nta f90 = [r2], 32; \ + ldf.fill.nta f91 = [r3], 32; \ + ;; \ + ldf.fill.nta f92 = [r2], 32; \ + ldf.fill.nta f93 = [r3], 32; \ + ;; \ + ldf.fill.nta f94 = [r2], 32; \ + ldf.fill.nta f95 = [r3], 32; \ + ;; \ + ldf.fill.nta f96 = [r2], 32; \ + ldf.fill.nta f97 = [r3], 32; \ + ;; \ + ldf.fill.nta f98 = [r2], 32; \ + ldf.fill.nta f99 = [r3], 32; \ + ;; \ + ldf.fill.nta f100 = [r2], 32; \ + ldf.fill.nta f101 = [r3], 32; \ + ;; \ + ldf.fill.nta f102 = [r2], 32; \ + ldf.fill.nta f103 = [r3], 32; \ + ;; \ + ldf.fill.nta f104 = [r2], 32; \ + ldf.fill.nta f105 = [r3], 32; \ + ;; \ + ldf.fill.nta f106 = [r2], 32; \ + ldf.fill.nta f107 = [r3], 32; \ + ;; \ + ldf.fill.nta f108 = [r2], 32; \ + ldf.fill.nta f109 = [r3], 32; \ + ;; \ + ldf.fill.nta f110 = [r2], 32; \ + ldf.fill.nta f111 = [r3], 32; \ + ;; \ + ldf.fill.nta f112 = [r2], 32; \ + ldf.fill.nta f113 = [r3], 32; \ + ;; \ + ldf.fill.nta f114 = [r2], 32; \ + ldf.fill.nta f115 = [r3], 32; \ + ;; \ + ldf.fill.nta f116 = [r2], 32; \ + ldf.fill.nta f117 = [r3], 32; \ + ;; \ + ldf.fill.nta f118 = [r2], 32; \ + ldf.fill.nta f119 = [r3], 32; \ + ;; \ + ldf.fill.nta f120 = [r2], 32; \ + ldf.fill.nta f121 = [r3], 32; \ + ;; \ + ldf.fill.nta f122 = [r2], 32; \ + ldf.fill.nta f123 = [r3], 32; \ + ;; \ + ldf.fill.nta f124 = [r2], 32; \ + ldf.fill.nta f125 = [r3], 32; \ + ;; \ + ldf.fill.nta f126 = [r2], 32; \ + ldf.fill.nta f127 = [r3], 32; \ + ;; + + /* + * r32: context_t base address + */ +#define SAVE_PTK_REGS \ + add r2=CTX(PKR0), r32; \ + mov r16=7; \ + ;; \ + mov ar.lc=r16; \ + mov r17=r0; \ + ;; \ +1: \ + mov r18=pkr[r17]; \ + ;; \ + srlz.i; \ + ;; \ + st8 [r2]=r18, 8; \ + ;; \ + add r17 =1,r17; \ + ;; \ + br.cloop.sptk 1b; \ + ;; + +/* + * r33: point to context_t structure + * ar.lc are corrupted. + */ +#define RESTORE_PTK_REGS \ + add r2=CTX(PKR0), r33; \ + mov r16=7; \ + ;; \ + mov ar.lc=r16; \ + mov r17=r0; \ + ;; \ +1: \ + ld8 r18=[r2], 8; \ + ;; \ + mov pkr[r17]=r18; \ + ;; \ + srlz.i; \ + ;; \ + add r17 =1,r17; \ + ;; \ + br.cloop.sptk 1b; \ + ;; + + +/* + * void vmm_trampoline( context_t * from, + * context_t * to) + * + * from: r32 + * to: r33 + * note: interrupt disabled before call this function. + */ +GLOBAL_ENTRY(vmm_trampoline) + mov r16 = psr + adds r2 = CTX(PSR), r32 + ;; + st8 [r2] = r16, 8 // psr + mov r17 = pr + ;; + st8 [r2] = r17, 8 // pr + mov r18 = ar.unat + ;; + st8 [r2] = r18 + mov r17 = ar.rsc + ;; + adds r2 = CTX(RSC),r32 + ;; + st8 [r2]= r17 + mov ar.rsc =0 + flushrs + ;; + SAVE_GENERAL_REGS + ;; + SAVE_KERNEL_REGS + ;; + SAVE_APP_REGS + ;; + SAVE_BRANCH_REGS + ;; + SAVE_CTL_REGS + ;; + SAVE_REGION_REGS + ;; + //SAVE_DEBUG_REGS + ;; + rsm psr.dfl + ;; + srlz.d + ;; + SAVE_FPU_LOW + ;; + rsm psr.dfh + ;; + srlz.d + ;; + SAVE_FPU_HIGH + ;; + SAVE_PTK_REGS + ;; + RESTORE_PTK_REGS + ;; + RESTORE_FPU_HIGH + ;; + RESTORE_FPU_LOW + ;; + //RESTORE_DEBUG_REGS + ;; + RESTORE_REGION_REGS + ;; + RESTORE_CTL_REGS + ;; + RESTORE_BRANCH_REGS + ;; + RESTORE_APP_REGS + ;; + RESTORE_KERNEL_REGS + ;; + RESTORE_GENERAL_REGS + ;; + adds r2=CTX(PSR), r33 + ;; + ld8 r16=[r2], 8 // psr + ;; + mov psr.l=r16 + ;; + srlz.d + ;; + ld8 r16=[r2], 8 // pr + ;; + mov pr =r16,-1 + ld8 r16=[r2] // unat + ;; + mov ar.unat=r16 + ;; + adds r2=CTX(RSC),r33 + ;; + ld8 r16 =[r2] + ;; + mov ar.rsc = r16 + ;; + br.ret.sptk.few b0 +END(vmm_trampoline) diff --git a/trunk/arch/ia64/kvm/vcpu.c b/trunk/arch/ia64/kvm/vcpu.c new file mode 100644 index 000000000000..e44027ce5667 --- /dev/null +++ b/trunk/arch/ia64/kvm/vcpu.c @@ -0,0 +1,2163 @@ +/* + * kvm_vcpu.c: handling all virtual cpu related thing. + * Copyright (c) 2005, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Shaofan Li (Susue Li) + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "asm-offsets.h" +#include "vcpu.h" + +/* + * Special notes: + * - Index by it/dt/rt sequence + * - Only existing mode transitions are allowed in this table + * - RSE is placed at lazy mode when emulating guest partial mode + * - If gva happens to be rr0 and rr4, only allowed case is identity + * mapping (gva=gpa), or panic! (How?) + */ +int mm_switch_table[8][8] = { + /* 2004/09/12(Kevin): Allow switch to self */ + /* + * (it,dt,rt): (0,0,0) -> (1,1,1) + * This kind of transition usually occurs in the very early + * stage of Linux boot up procedure. Another case is in efi + * and pal calls. (see "arch/ia64/kernel/head.S") + * + * (it,dt,rt): (0,0,0) -> (0,1,1) + * This kind of transition is found when OSYa exits efi boot + * service. Due to gva = gpa in this case (Same region), + * data access can be satisfied though itlb entry for physical + * emulation is hit. + */ + {SW_SELF, 0, 0, SW_NOP, 0, 0, 0, SW_P2V}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + /* + * (it,dt,rt): (0,1,1) -> (1,1,1) + * This kind of transition is found in OSYa. + * + * (it,dt,rt): (0,1,1) -> (0,0,0) + * This kind of transition is found in OSYa + */ + {SW_NOP, 0, 0, SW_SELF, 0, 0, 0, SW_P2V}, + /* (1,0,0)->(1,1,1) */ + {0, 0, 0, 0, 0, 0, 0, SW_P2V}, + /* + * (it,dt,rt): (1,0,1) -> (1,1,1) + * This kind of transition usually occurs when Linux returns + * from the low level TLB miss handlers. + * (see "arch/ia64/kernel/ivt.S") + */ + {0, 0, 0, 0, 0, SW_SELF, 0, SW_P2V}, + {0, 0, 0, 0, 0, 0, 0, 0}, + /* + * (it,dt,rt): (1,1,1) -> (1,0,1) + * This kind of transition usually occurs in Linux low level + * TLB miss handler. (see "arch/ia64/kernel/ivt.S") + * + * (it,dt,rt): (1,1,1) -> (0,0,0) + * This kind of transition usually occurs in pal and efi calls, + * which requires running in physical mode. + * (see "arch/ia64/kernel/head.S") + * (1,1,1)->(1,0,0) + */ + + {SW_V2P, 0, 0, 0, SW_V2P, SW_V2P, 0, SW_SELF}, +}; + +void physical_mode_init(struct kvm_vcpu *vcpu) +{ + vcpu->arch.mode_flags = GUEST_IN_PHY; +} + +void switch_to_physical_rid(struct kvm_vcpu *vcpu) +{ + unsigned long psr; + + /* Save original virtual mode rr[0] and rr[4] */ + psr = ia64_clear_ic(); + ia64_set_rr(VRN0<arch.metaphysical_rr0); + ia64_srlz_d(); + ia64_set_rr(VRN4<arch.metaphysical_rr4); + ia64_srlz_d(); + + ia64_set_psr(psr); + return; +} + + +void switch_to_virtual_rid(struct kvm_vcpu *vcpu) +{ + unsigned long psr; + + psr = ia64_clear_ic(); + ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0); + ia64_srlz_d(); + ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4); + ia64_srlz_d(); + ia64_set_psr(psr); + return; +} + +static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr) +{ + return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)]; +} + +void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr, + struct ia64_psr new_psr) +{ + int act; + act = mm_switch_action(old_psr, new_psr); + switch (act) { + case SW_V2P: + /*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n", + old_psr.val, new_psr.val);*/ + switch_to_physical_rid(vcpu); + /* + * Set rse to enforced lazy, to prevent active rse + *save/restor when guest physical mode. + */ + vcpu->arch.mode_flags |= GUEST_IN_PHY; + break; + case SW_P2V: + switch_to_virtual_rid(vcpu); + /* + * recover old mode which is saved when entering + * guest physical mode + */ + vcpu->arch.mode_flags &= ~GUEST_IN_PHY; + break; + case SW_SELF: + break; + case SW_NOP: + break; + default: + /* Sanity check */ + break; + } + return; +} + + + +/* + * In physical mode, insert tc/tr for region 0 and 4 uses + * RID[0] and RID[4] which is for physical mode emulation. + * However what those inserted tc/tr wants is rid for + * virtual mode. So original virtual rid needs to be restored + * before insert. + * + * Operations which required such switch include: + * - insertions (itc.*, itr.*) + * - purges (ptc.* and ptr.*) + * - tpa + * - tak + * - thash?, ttag? + * All above needs actual virtual rid for destination entry. + */ + +void check_mm_mode_switch(struct kvm_vcpu *vcpu, struct ia64_psr old_psr, + struct ia64_psr new_psr) +{ + + if ((old_psr.dt != new_psr.dt) + || (old_psr.it != new_psr.it) + || (old_psr.rt != new_psr.rt)) + switch_mm_mode(vcpu, old_psr, new_psr); + + return; +} + + +/* + * In physical mode, insert tc/tr for region 0 and 4 uses + * RID[0] and RID[4] which is for physical mode emulation. + * However what those inserted tc/tr wants is rid for + * virtual mode. So original virtual rid needs to be restored + * before insert. + * + * Operations which required such switch include: + * - insertions (itc.*, itr.*) + * - purges (ptc.* and ptr.*) + * - tpa + * - tak + * - thash?, ttag? + * All above needs actual virtual rid for destination entry. + */ + +void prepare_if_physical_mode(struct kvm_vcpu *vcpu) +{ + if (is_physical_mode(vcpu)) { + vcpu->arch.mode_flags |= GUEST_PHY_EMUL; + switch_to_virtual_rid(vcpu); + } + return; +} + +/* Recover always follows prepare */ +void recover_if_physical_mode(struct kvm_vcpu *vcpu) +{ + if (is_physical_mode(vcpu)) + switch_to_physical_rid(vcpu); + vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL; + return; +} + +#define RPT(x) ((u16) &((struct kvm_pt_regs *)0)->x) + +static u16 gr_info[32] = { + 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */ + RPT(r1), RPT(r2), RPT(r3), + RPT(r4), RPT(r5), RPT(r6), RPT(r7), + RPT(r8), RPT(r9), RPT(r10), RPT(r11), + RPT(r12), RPT(r13), RPT(r14), RPT(r15), + RPT(r16), RPT(r17), RPT(r18), RPT(r19), + RPT(r20), RPT(r21), RPT(r22), RPT(r23), + RPT(r24), RPT(r25), RPT(r26), RPT(r27), + RPT(r28), RPT(r29), RPT(r30), RPT(r31) +}; + +#define IA64_FIRST_STACKED_GR 32 +#define IA64_FIRST_ROTATING_FR 32 + +static inline unsigned long +rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg) +{ + reg += rrb; + if (reg >= sor) + reg -= sor; + return reg; +} + +/* + * Return the (rotated) index for floating point register + * be in the REGNUM (REGNUM must range from 32-127, + * result is in the range from 0-95. + */ +static inline unsigned long fph_index(struct kvm_pt_regs *regs, + long regnum) +{ + unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f; + return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR)); +} + + +/* + * The inverse of the above: given bspstore and the number of + * registers, calculate ar.bsp. + */ +static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr, + long num_regs) +{ + long delta = ia64_rse_slot_num(addr) + num_regs; + int i = 0; + + if (num_regs < 0) + delta -= 0x3e; + if (delta < 0) { + while (delta <= -0x3f) { + i--; + delta += 0x3f; + } + } else { + while (delta >= 0x3f) { + i++; + delta -= 0x3f; + } + } + + return addr + num_regs + i; +} + +static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1, + unsigned long *val, int *nat) +{ + unsigned long *bsp, *addr, *rnat_addr, *bspstore; + unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET; + unsigned long nat_mask; + unsigned long old_rsc, new_rsc; + long sof = (regs->cr_ifs) & 0x7f; + long sor = (((regs->cr_ifs >> 14) & 0xf) << 3); + long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; + long ridx = r1 - 32; + + if (ridx < sor) + ridx = rotate_reg(sor, rrb_gr, ridx); + + old_rsc = ia64_getreg(_IA64_REG_AR_RSC); + new_rsc = old_rsc&(~(0x3)); + ia64_setreg(_IA64_REG_AR_RSC, new_rsc); + + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + bsp = kbs + (regs->loadrs >> 19); + + addr = kvm_rse_skip_regs(bsp, -sof + ridx); + nat_mask = 1UL << ia64_rse_slot_num(addr); + rnat_addr = ia64_rse_rnat_addr(addr); + + if (addr >= bspstore) { + ia64_flushrs(); + ia64_mf(); + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + } + *val = *addr; + if (nat) { + if (bspstore < rnat_addr) + *nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT) + & nat_mask); + else + *nat = (int)!!((*rnat_addr) & nat_mask); + ia64_setreg(_IA64_REG_AR_RSC, old_rsc); + } +} + +void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1, + unsigned long val, unsigned long nat) +{ + unsigned long *bsp, *bspstore, *addr, *rnat_addr; + unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET; + unsigned long nat_mask; + unsigned long old_rsc, new_rsc, psr; + unsigned long rnat; + long sof = (regs->cr_ifs) & 0x7f; + long sor = (((regs->cr_ifs >> 14) & 0xf) << 3); + long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; + long ridx = r1 - 32; + + if (ridx < sor) + ridx = rotate_reg(sor, rrb_gr, ridx); + + old_rsc = ia64_getreg(_IA64_REG_AR_RSC); + /* put RSC to lazy mode, and set loadrs 0 */ + new_rsc = old_rsc & (~0x3fff0003); + ia64_setreg(_IA64_REG_AR_RSC, new_rsc); + bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */ + + addr = kvm_rse_skip_regs(bsp, -sof + ridx); + nat_mask = 1UL << ia64_rse_slot_num(addr); + rnat_addr = ia64_rse_rnat_addr(addr); + + local_irq_save(psr); + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + if (addr >= bspstore) { + + ia64_flushrs(); + ia64_mf(); + *addr = val; + bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE); + rnat = ia64_getreg(_IA64_REG_AR_RNAT); + if (bspstore < rnat_addr) + rnat = rnat & (~nat_mask); + else + *rnat_addr = (*rnat_addr)&(~nat_mask); + + ia64_mf(); + ia64_loadrs(); + ia64_setreg(_IA64_REG_AR_RNAT, rnat); + } else { + rnat = ia64_getreg(_IA64_REG_AR_RNAT); + *addr = val; + if (bspstore < rnat_addr) + rnat = rnat&(~nat_mask); + else + *rnat_addr = (*rnat_addr) & (~nat_mask); + + ia64_setreg(_IA64_REG_AR_BSPSTORE, bspstore); + ia64_setreg(_IA64_REG_AR_RNAT, rnat); + } + local_irq_restore(psr); + ia64_setreg(_IA64_REG_AR_RSC, old_rsc); +} + +void getreg(unsigned long regnum, unsigned long *val, + int *nat, struct kvm_pt_regs *regs) +{ + unsigned long addr, *unat; + if (regnum >= IA64_FIRST_STACKED_GR) { + get_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + addr = (unsigned long)regs; + unat = ®s->eml_unat;; + + addr += gr_info[regnum]; + + *val = *(unsigned long *)addr; + /* + * do it only when requested + */ + if (nat) + *nat = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL; +} + +void setreg(unsigned long regnum, unsigned long val, + int nat, struct kvm_pt_regs *regs) +{ + unsigned long addr; + unsigned long bitmask; + unsigned long *unat; + + /* + * First takes care of stacked registers + */ + if (regnum >= IA64_FIRST_STACKED_GR) { + set_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + addr = (unsigned long)regs; + unat = ®s->eml_unat; + /* + * add offset from base of struct + * and do it ! + */ + addr += gr_info[regnum]; + + *(unsigned long *)addr = val; + + /* + * We need to clear the corresponding UNAT bit to fully emulate the load + * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4 + */ + bitmask = 1UL << ((addr >> 3) & 0x3f); + if (nat) + *unat |= bitmask; + else + *unat &= ~bitmask; + +} + +u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + u64 val; + + if (!reg) + return 0; + getreg(reg, &val, 0, regs); + return val; +} + +void vcpu_set_gr(struct kvm_vcpu *vcpu, u64 reg, u64 value, int nat) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + long sof = (regs->cr_ifs) & 0x7f; + + if (!reg) + return; + if (reg >= sof + 32) + return; + setreg(reg, value, nat, regs); /* FIXME: handle NATs later*/ +} + +void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval, + struct kvm_pt_regs *regs) +{ + /* Take floating register rotation into consideration*/ + if (regnum >= IA64_FIRST_ROTATING_FR) + regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum); +#define CASE_FIXED_FP(reg) \ + case (reg) : \ + ia64_stf_spill(fpval, reg); \ + break + + switch (regnum) { + CASE_FIXED_FP(0); + CASE_FIXED_FP(1); + CASE_FIXED_FP(2); + CASE_FIXED_FP(3); + CASE_FIXED_FP(4); + CASE_FIXED_FP(5); + + CASE_FIXED_FP(6); + CASE_FIXED_FP(7); + CASE_FIXED_FP(8); + CASE_FIXED_FP(9); + CASE_FIXED_FP(10); + CASE_FIXED_FP(11); + + CASE_FIXED_FP(12); + CASE_FIXED_FP(13); + CASE_FIXED_FP(14); + CASE_FIXED_FP(15); + CASE_FIXED_FP(16); + CASE_FIXED_FP(17); + CASE_FIXED_FP(18); + CASE_FIXED_FP(19); + CASE_FIXED_FP(20); + CASE_FIXED_FP(21); + CASE_FIXED_FP(22); + CASE_FIXED_FP(23); + CASE_FIXED_FP(24); + CASE_FIXED_FP(25); + CASE_FIXED_FP(26); + CASE_FIXED_FP(27); + CASE_FIXED_FP(28); + CASE_FIXED_FP(29); + CASE_FIXED_FP(30); + CASE_FIXED_FP(31); + CASE_FIXED_FP(32); + CASE_FIXED_FP(33); + CASE_FIXED_FP(34); + CASE_FIXED_FP(35); + CASE_FIXED_FP(36); + CASE_FIXED_FP(37); + CASE_FIXED_FP(38); + CASE_FIXED_FP(39); + CASE_FIXED_FP(40); + CASE_FIXED_FP(41); + CASE_FIXED_FP(42); + CASE_FIXED_FP(43); + CASE_FIXED_FP(44); + CASE_FIXED_FP(45); + CASE_FIXED_FP(46); + CASE_FIXED_FP(47); + CASE_FIXED_FP(48); + CASE_FIXED_FP(49); + CASE_FIXED_FP(50); + CASE_FIXED_FP(51); + CASE_FIXED_FP(52); + CASE_FIXED_FP(53); + CASE_FIXED_FP(54); + CASE_FIXED_FP(55); + CASE_FIXED_FP(56); + CASE_FIXED_FP(57); + CASE_FIXED_FP(58); + CASE_FIXED_FP(59); + CASE_FIXED_FP(60); + CASE_FIXED_FP(61); + CASE_FIXED_FP(62); + CASE_FIXED_FP(63); + CASE_FIXED_FP(64); + CASE_FIXED_FP(65); + CASE_FIXED_FP(66); + CASE_FIXED_FP(67); + CASE_FIXED_FP(68); + CASE_FIXED_FP(69); + CASE_FIXED_FP(70); + CASE_FIXED_FP(71); + CASE_FIXED_FP(72); + CASE_FIXED_FP(73); + CASE_FIXED_FP(74); + CASE_FIXED_FP(75); + CASE_FIXED_FP(76); + CASE_FIXED_FP(77); + CASE_FIXED_FP(78); + CASE_FIXED_FP(79); + CASE_FIXED_FP(80); + CASE_FIXED_FP(81); + CASE_FIXED_FP(82); + CASE_FIXED_FP(83); + CASE_FIXED_FP(84); + CASE_FIXED_FP(85); + CASE_FIXED_FP(86); + CASE_FIXED_FP(87); + CASE_FIXED_FP(88); + CASE_FIXED_FP(89); + CASE_FIXED_FP(90); + CASE_FIXED_FP(91); + CASE_FIXED_FP(92); + CASE_FIXED_FP(93); + CASE_FIXED_FP(94); + CASE_FIXED_FP(95); + CASE_FIXED_FP(96); + CASE_FIXED_FP(97); + CASE_FIXED_FP(98); + CASE_FIXED_FP(99); + CASE_FIXED_FP(100); + CASE_FIXED_FP(101); + CASE_FIXED_FP(102); + CASE_FIXED_FP(103); + CASE_FIXED_FP(104); + CASE_FIXED_FP(105); + CASE_FIXED_FP(106); + CASE_FIXED_FP(107); + CASE_FIXED_FP(108); + CASE_FIXED_FP(109); + CASE_FIXED_FP(110); + CASE_FIXED_FP(111); + CASE_FIXED_FP(112); + CASE_FIXED_FP(113); + CASE_FIXED_FP(114); + CASE_FIXED_FP(115); + CASE_FIXED_FP(116); + CASE_FIXED_FP(117); + CASE_FIXED_FP(118); + CASE_FIXED_FP(119); + CASE_FIXED_FP(120); + CASE_FIXED_FP(121); + CASE_FIXED_FP(122); + CASE_FIXED_FP(123); + CASE_FIXED_FP(124); + CASE_FIXED_FP(125); + CASE_FIXED_FP(126); + CASE_FIXED_FP(127); + } +#undef CASE_FIXED_FP +} + +void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval, + struct kvm_pt_regs *regs) +{ + /* Take floating register rotation into consideration*/ + if (regnum >= IA64_FIRST_ROTATING_FR) + regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum); + +#define CASE_FIXED_FP(reg) \ + case (reg) : \ + ia64_ldf_fill(reg, fpval); \ + break + + switch (regnum) { + CASE_FIXED_FP(2); + CASE_FIXED_FP(3); + CASE_FIXED_FP(4); + CASE_FIXED_FP(5); + + CASE_FIXED_FP(6); + CASE_FIXED_FP(7); + CASE_FIXED_FP(8); + CASE_FIXED_FP(9); + CASE_FIXED_FP(10); + CASE_FIXED_FP(11); + + CASE_FIXED_FP(12); + CASE_FIXED_FP(13); + CASE_FIXED_FP(14); + CASE_FIXED_FP(15); + CASE_FIXED_FP(16); + CASE_FIXED_FP(17); + CASE_FIXED_FP(18); + CASE_FIXED_FP(19); + CASE_FIXED_FP(20); + CASE_FIXED_FP(21); + CASE_FIXED_FP(22); + CASE_FIXED_FP(23); + CASE_FIXED_FP(24); + CASE_FIXED_FP(25); + CASE_FIXED_FP(26); + CASE_FIXED_FP(27); + CASE_FIXED_FP(28); + CASE_FIXED_FP(29); + CASE_FIXED_FP(30); + CASE_FIXED_FP(31); + CASE_FIXED_FP(32); + CASE_FIXED_FP(33); + CASE_FIXED_FP(34); + CASE_FIXED_FP(35); + CASE_FIXED_FP(36); + CASE_FIXED_FP(37); + CASE_FIXED_FP(38); + CASE_FIXED_FP(39); + CASE_FIXED_FP(40); + CASE_FIXED_FP(41); + CASE_FIXED_FP(42); + CASE_FIXED_FP(43); + CASE_FIXED_FP(44); + CASE_FIXED_FP(45); + CASE_FIXED_FP(46); + CASE_FIXED_FP(47); + CASE_FIXED_FP(48); + CASE_FIXED_FP(49); + CASE_FIXED_FP(50); + CASE_FIXED_FP(51); + CASE_FIXED_FP(52); + CASE_FIXED_FP(53); + CASE_FIXED_FP(54); + CASE_FIXED_FP(55); + CASE_FIXED_FP(56); + CASE_FIXED_FP(57); + CASE_FIXED_FP(58); + CASE_FIXED_FP(59); + CASE_FIXED_FP(60); + CASE_FIXED_FP(61); + CASE_FIXED_FP(62); + CASE_FIXED_FP(63); + CASE_FIXED_FP(64); + CASE_FIXED_FP(65); + CASE_FIXED_FP(66); + CASE_FIXED_FP(67); + CASE_FIXED_FP(68); + CASE_FIXED_FP(69); + CASE_FIXED_FP(70); + CASE_FIXED_FP(71); + CASE_FIXED_FP(72); + CASE_FIXED_FP(73); + CASE_FIXED_FP(74); + CASE_FIXED_FP(75); + CASE_FIXED_FP(76); + CASE_FIXED_FP(77); + CASE_FIXED_FP(78); + CASE_FIXED_FP(79); + CASE_FIXED_FP(80); + CASE_FIXED_FP(81); + CASE_FIXED_FP(82); + CASE_FIXED_FP(83); + CASE_FIXED_FP(84); + CASE_FIXED_FP(85); + CASE_FIXED_FP(86); + CASE_FIXED_FP(87); + CASE_FIXED_FP(88); + CASE_FIXED_FP(89); + CASE_FIXED_FP(90); + CASE_FIXED_FP(91); + CASE_FIXED_FP(92); + CASE_FIXED_FP(93); + CASE_FIXED_FP(94); + CASE_FIXED_FP(95); + CASE_FIXED_FP(96); + CASE_FIXED_FP(97); + CASE_FIXED_FP(98); + CASE_FIXED_FP(99); + CASE_FIXED_FP(100); + CASE_FIXED_FP(101); + CASE_FIXED_FP(102); + CASE_FIXED_FP(103); + CASE_FIXED_FP(104); + CASE_FIXED_FP(105); + CASE_FIXED_FP(106); + CASE_FIXED_FP(107); + CASE_FIXED_FP(108); + CASE_FIXED_FP(109); + CASE_FIXED_FP(110); + CASE_FIXED_FP(111); + CASE_FIXED_FP(112); + CASE_FIXED_FP(113); + CASE_FIXED_FP(114); + CASE_FIXED_FP(115); + CASE_FIXED_FP(116); + CASE_FIXED_FP(117); + CASE_FIXED_FP(118); + CASE_FIXED_FP(119); + CASE_FIXED_FP(120); + CASE_FIXED_FP(121); + CASE_FIXED_FP(122); + CASE_FIXED_FP(123); + CASE_FIXED_FP(124); + CASE_FIXED_FP(125); + CASE_FIXED_FP(126); + CASE_FIXED_FP(127); + } +} + +void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, + struct ia64_fpreg *val) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + getfpreg(reg, val, regs); /* FIXME: handle NATs later*/ +} + +void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg, + struct ia64_fpreg *val) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + if (reg > 1) + setfpreg(reg, val, regs); /* FIXME: handle NATs later*/ +} + +/************************************************************************ + * lsapic timer + ***********************************************************************/ +u64 vcpu_get_itc(struct kvm_vcpu *vcpu) +{ + unsigned long guest_itc; + guest_itc = VMX(vcpu, itc_offset) + ia64_getreg(_IA64_REG_AR_ITC); + + if (guest_itc >= VMX(vcpu, last_itc)) { + VMX(vcpu, last_itc) = guest_itc; + return guest_itc; + } else + return VMX(vcpu, last_itc); +} + +static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val); +static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val) +{ + struct kvm_vcpu *v; + int i; + long itc_offset = val - ia64_getreg(_IA64_REG_AR_ITC); + unsigned long vitv = VCPU(vcpu, itv); + + if (vcpu->vcpu_id == 0) { + for (i = 0; i < MAX_VCPU_NUM; i++) { + v = (struct kvm_vcpu *)((char *)vcpu + VCPU_SIZE * i); + VMX(v, itc_offset) = itc_offset; + VMX(v, last_itc) = 0; + } + } + VMX(vcpu, last_itc) = 0; + if (VCPU(vcpu, itm) <= val) { + VMX(vcpu, itc_check) = 0; + vcpu_unpend_interrupt(vcpu, vitv); + } else { + VMX(vcpu, itc_check) = 1; + vcpu_set_itm(vcpu, VCPU(vcpu, itm)); + } + +} + +static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, itm)); +} + +static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val) +{ + unsigned long vitv = VCPU(vcpu, itv); + VCPU(vcpu, itm) = val; + + if (val > vcpu_get_itc(vcpu)) { + VMX(vcpu, itc_check) = 1; + vcpu_unpend_interrupt(vcpu, vitv); + VMX(vcpu, timer_pending) = 0; + } else + VMX(vcpu, itc_check) = 0; +} + +#define ITV_VECTOR(itv) (itv&0xff) +#define ITV_IRQ_MASK(itv) (itv&(1<<16)) + +static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, itv) = val; + if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) { + vcpu_pend_interrupt(vcpu, ITV_VECTOR(val)); + vcpu->arch.timer_pending = 0; + } +} + +static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val) +{ + int vec; + + vec = highest_inservice_irq(vcpu); + if (vec == NULL_VECTOR) + return; + VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63)); + VCPU(vcpu, eoi) = 0; + vcpu->arch.irq_new_pending = 1; + +} + +/* See Table 5-8 in SDM vol2 for the definition */ +int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice) +{ + union ia64_tpr vtpr; + + vtpr.val = VCPU(vcpu, tpr); + + if (h_inservice == NMI_VECTOR) + return IRQ_MASKED_BY_INSVC; + + if (h_pending == NMI_VECTOR) { + /* Non Maskable Interrupt */ + return IRQ_NO_MASKED; + } + + if (h_inservice == ExtINT_VECTOR) + return IRQ_MASKED_BY_INSVC; + + if (h_pending == ExtINT_VECTOR) { + if (vtpr.mmi) { + /* mask all external IRQ */ + return IRQ_MASKED_BY_VTPR; + } else + return IRQ_NO_MASKED; + } + + if (is_higher_irq(h_pending, h_inservice)) { + if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4))) + return IRQ_NO_MASKED; + else + return IRQ_MASKED_BY_VTPR; + } else { + return IRQ_MASKED_BY_INSVC; + } +} + +void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec) +{ + long spsr; + int ret; + + local_irq_save(spsr); + ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0])); + local_irq_restore(spsr); + + vcpu->arch.irq_new_pending = 1; +} + +void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec) +{ + long spsr; + int ret; + + local_irq_save(spsr); + ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0])); + local_irq_restore(spsr); + if (ret) { + vcpu->arch.irq_new_pending = 1; + wmb(); + } +} + +void update_vhpi(struct kvm_vcpu *vcpu, int vec) +{ + u64 vhpi; + + if (vec == NULL_VECTOR) + vhpi = 0; + else if (vec == NMI_VECTOR) + vhpi = 32; + else if (vec == ExtINT_VECTOR) + vhpi = 16; + else + vhpi = vec >> 4; + + VCPU(vcpu, vhpi) = vhpi; + if (VCPU(vcpu, vac).a_int) + ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT, + (u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0); +} + +u64 vcpu_get_ivr(struct kvm_vcpu *vcpu) +{ + int vec, h_inservice, mask; + + vec = highest_pending_irq(vcpu); + h_inservice = highest_inservice_irq(vcpu); + mask = irq_masked(vcpu, vec, h_inservice); + if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) { + if (VCPU(vcpu, vhpi)) + update_vhpi(vcpu, NULL_VECTOR); + return IA64_SPURIOUS_INT_VECTOR; + } + if (mask == IRQ_MASKED_BY_VTPR) { + update_vhpi(vcpu, vec); + return IA64_SPURIOUS_INT_VECTOR; + } + VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63)); + vcpu_unpend_interrupt(vcpu, vec); + return (u64)vec; +} + +/************************************************************************** + Privileged operation emulation routines + **************************************************************************/ +u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr) +{ + union ia64_pta vpta; + union ia64_rr vrr; + u64 pval; + u64 vhpt_offset; + + vpta.val = vcpu_get_pta(vcpu); + vrr.val = vcpu_get_rr(vcpu, vadr); + vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1); + if (vpta.vf) { + pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val, + vpta.val, 0, 0, 0, 0); + } else { + pval = (vadr & VRN_MASK) | vhpt_offset | + (vpta.val << 3 >> (vpta.size + 3) << (vpta.size)); + } + return pval; +} + +u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr) +{ + union ia64_rr vrr; + union ia64_pta vpta; + u64 pval; + + vpta.val = vcpu_get_pta(vcpu); + vrr.val = vcpu_get_rr(vcpu, vadr); + if (vpta.vf) { + pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val, + 0, 0, 0, 0, 0); + } else + pval = 1; + + return pval; +} + +u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr) +{ + struct thash_data *data; + union ia64_pta vpta; + u64 key; + + vpta.val = vcpu_get_pta(vcpu); + if (vpta.vf == 0) { + key = 1; + return key; + } + data = vtlb_lookup(vcpu, vadr, D_TLB); + if (!data || !data->p) + key = 1; + else + key = data->key; + + return key; +} + + + +void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long thash, vadr; + + vadr = vcpu_get_gr(vcpu, inst.M46.r3); + thash = vcpu_thash(vcpu, vadr); + vcpu_set_gr(vcpu, inst.M46.r1, thash, 0); +} + + +void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long tag, vadr; + + vadr = vcpu_get_gr(vcpu, inst.M46.r3); + tag = vcpu_ttag(vcpu, vadr); + vcpu_set_gr(vcpu, inst.M46.r1, tag, 0); +} + +int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, u64 *padr) +{ + struct thash_data *data; + union ia64_isr visr, pt_isr; + struct kvm_pt_regs *regs; + struct ia64_psr vpsr; + + regs = vcpu_regs(vcpu); + pt_isr.val = VMX(vcpu, cr_isr); + visr.val = 0; + visr.ei = pt_isr.ei; + visr.ir = pt_isr.ir; + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + visr.na = 1; + + data = vhpt_lookup(vadr); + if (data) { + if (data->p == 0) { + vcpu_set_isr(vcpu, visr.val); + data_page_not_present(vcpu, vadr); + return IA64_FAULT; + } else if (data->ma == VA_MATTR_NATPAGE) { + vcpu_set_isr(vcpu, visr.val); + dnat_page_consumption(vcpu, vadr); + return IA64_FAULT; + } else { + *padr = (data->gpaddr >> data->ps << data->ps) | + (vadr & (PSIZE(data->ps) - 1)); + return IA64_NO_FAULT; + } + } + + data = vtlb_lookup(vcpu, vadr, D_TLB); + if (data) { + if (data->p == 0) { + vcpu_set_isr(vcpu, visr.val); + data_page_not_present(vcpu, vadr); + return IA64_FAULT; + } else if (data->ma == VA_MATTR_NATPAGE) { + vcpu_set_isr(vcpu, visr.val); + dnat_page_consumption(vcpu, vadr); + return IA64_FAULT; + } else{ + *padr = ((data->ppn >> (data->ps - 12)) << data->ps) + | (vadr & (PSIZE(data->ps) - 1)); + return IA64_NO_FAULT; + } + } + if (!vhpt_enabled(vcpu, vadr, NA_REF)) { + if (vpsr.ic) { + vcpu_set_isr(vcpu, visr.val); + alt_dtlb(vcpu, vadr); + return IA64_FAULT; + } else { + nested_dtlb(vcpu); + return IA64_FAULT; + } + } else { + if (vpsr.ic) { + vcpu_set_isr(vcpu, visr.val); + dvhpt_fault(vcpu, vadr); + return IA64_FAULT; + } else{ + nested_dtlb(vcpu); + return IA64_FAULT; + } + } + + return IA64_NO_FAULT; +} + + +int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r1, r3; + + r3 = vcpu_get_gr(vcpu, inst.M46.r3); + + if (vcpu_tpa(vcpu, r3, &r1)) + return IA64_FAULT; + + vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); + return(IA64_NO_FAULT); +} + +void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r1, r3; + + r3 = vcpu_get_gr(vcpu, inst.M46.r3); + r1 = vcpu_tak(vcpu, r3); + vcpu_set_gr(vcpu, inst.M46.r1, r1, 0); +} + + +/************************************ + * Insert/Purge translation register/cache + ************************************/ +void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa) +{ + thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB); +} + +void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa) +{ + thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB); +} + +void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) +{ + u64 ps, va, rid; + struct thash_data *p_itr; + + ps = itir_ps(itir); + va = PAGEALIGN(ifa, ps); + pte &= ~PAGE_FLAGS_RV_MASK; + rid = vcpu_get_rr(vcpu, ifa); + rid = rid & RR_RID_MASK; + p_itr = (struct thash_data *)&vcpu->arch.itrs[slot]; + vcpu_set_tr(p_itr, pte, itir, va, rid); + vcpu_quick_region_set(VMX(vcpu, itr_regions), va); +} + + +void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa) +{ + u64 gpfn; + u64 ps, va, rid; + struct thash_data *p_dtr; + + ps = itir_ps(itir); + va = PAGEALIGN(ifa, ps); + pte &= ~PAGE_FLAGS_RV_MASK; + + if (ps != _PAGE_SIZE_16M) + thash_purge_entries(vcpu, va, ps); + gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT; + if (__gpfn_is_io(gpfn)) + pte |= VTLB_PTE_IO; + rid = vcpu_get_rr(vcpu, va); + rid = rid & RR_RID_MASK; + p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot]; + vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot], + pte, itir, va, rid); + vcpu_quick_region_set(VMX(vcpu, dtr_regions), va); +} + +void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps) +{ + int index; + u64 va; + + va = PAGEALIGN(ifa, ps); + while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0) + vcpu->arch.dtrs[index].page_flags = 0; + + thash_purge_entries(vcpu, va, ps); +} + +void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps) +{ + int index; + u64 va; + + va = PAGEALIGN(ifa, ps); + while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0) + vcpu->arch.itrs[index].page_flags = 0; + + thash_purge_entries(vcpu, va, ps); +} + +void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps) +{ + va = PAGEALIGN(va, ps); + thash_purge_entries(vcpu, va, ps); +} + +void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va) +{ + thash_purge_all(vcpu); +} + +void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps) +{ + struct exit_ctl_data *p = &vcpu->arch.exit_data; + long psr; + local_irq_save(psr); + p->exit_reason = EXIT_REASON_PTC_G; + + p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va); + p->u.ptc_g_data.vaddr = va; + p->u.ptc_g_data.ps = ps; + vmm_transition(vcpu); + /* Do Local Purge Here*/ + vcpu_ptc_l(vcpu, va, ps); + local_irq_restore(psr); +} + + +void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps) +{ + vcpu_ptc_ga(vcpu, va, ps); +} + +void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + vcpu_ptc_e(vcpu, ifa); +} + +void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptc_g(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptc_ga(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptc_l(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptr_d(vcpu, ifa, itir_ps(itir)); +} + +void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long ifa, itir; + + ifa = vcpu_get_gr(vcpu, inst.M45.r3); + itir = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_ptr_i(vcpu, ifa, itir_ps(itir)); +} + +void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte, slot; + + slot = vcpu_get_gr(vcpu, inst.M45.r3); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + vcpu_itr_d(vcpu, slot, pte, itir, ifa); +} + + + +void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte, slot; + + slot = vcpu_get_gr(vcpu, inst.M45.r3); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + vcpu_itr_i(vcpu, slot, pte, itir, ifa); +} + +void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte; + + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_itc_d(vcpu, pte, itir, ifa); +} + +void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long itir, ifa, pte; + + itir = vcpu_get_itir(vcpu); + ifa = vcpu_get_ifa(vcpu); + pte = vcpu_get_gr(vcpu, inst.M45.r2); + vcpu_itc_i(vcpu, pte, itir, ifa); +} + +/************************************* + * Moves to semi-privileged registers + *************************************/ + +void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long imm; + + if (inst.M30.s) + imm = -inst.M30.imm; + else + imm = inst.M30.imm; + + vcpu_set_itc(vcpu, imm); +} + +void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r2; + + r2 = vcpu_get_gr(vcpu, inst.M29.r2); + vcpu_set_itc(vcpu, r2); +} + + +void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r1; + + r1 = vcpu_get_itc(vcpu); + vcpu_set_gr(vcpu, inst.M31.r1, r1, 0); +} +/************************************************************************** + struct kvm_vcpu*protection key register access routines + **************************************************************************/ + +unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg) +{ + return ((unsigned long)ia64_get_pkr(reg)); +} + +void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val) +{ + ia64_set_pkr(reg, val); +} + + +unsigned long vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, unsigned long ifa) +{ + union ia64_rr rr, rr1; + + rr.val = vcpu_get_rr(vcpu, ifa); + rr1.val = 0; + rr1.ps = rr.ps; + rr1.rid = rr.rid; + return (rr1.val); +} + + + +/******************************** + * Moves to privileged registers + ********************************/ +unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg, + unsigned long val) +{ + union ia64_rr oldrr, newrr; + unsigned long rrval; + struct exit_ctl_data *p = &vcpu->arch.exit_data; + unsigned long psr; + + oldrr.val = vcpu_get_rr(vcpu, reg); + newrr.val = val; + vcpu->arch.vrr[reg >> VRN_SHIFT] = val; + + switch ((unsigned long)(reg >> VRN_SHIFT)) { + case VRN6: + vcpu->arch.vmm_rr = vrrtomrr(val); + local_irq_save(psr); + p->exit_reason = EXIT_REASON_SWITCH_RR6; + vmm_transition(vcpu); + local_irq_restore(psr); + break; + case VRN4: + rrval = vrrtomrr(val); + vcpu->arch.metaphysical_saved_rr4 = rrval; + if (!is_physical_mode(vcpu)) + ia64_set_rr(reg, rrval); + break; + case VRN0: + rrval = vrrtomrr(val); + vcpu->arch.metaphysical_saved_rr0 = rrval; + if (!is_physical_mode(vcpu)) + ia64_set_rr(reg, rrval); + break; + default: + ia64_set_rr(reg, vrrtomrr(val)); + break; + } + + return (IA64_NO_FAULT); +} + + + +void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_rr(vcpu, r3, r2); +} + +void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst) +{ +} + +void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst) +{ +} + +void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_pmc(vcpu, r3, r2); +} + +void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_pmd(vcpu, r3, r2); +} + +void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst) +{ + u64 r3, r2; + + r3 = vcpu_get_gr(vcpu, inst.M42.r3); + r2 = vcpu_get_gr(vcpu, inst.M42.r2); + vcpu_set_pkr(vcpu, r3, r2); +} + + + +void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_rr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_pkr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_dbr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_ibr(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_pmc(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + + +unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg) +{ + /* FIXME: This could get called as a result of a rsvd-reg fault */ + if (reg > (ia64_get_cpuid(3) & 0xff)) + return 0; + else + return ia64_get_cpuid(reg); +} + +void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r3, r1; + + r3 = vcpu_get_gr(vcpu, inst.M43.r3); + r1 = vcpu_get_cpuid(vcpu, r3); + vcpu_set_gr(vcpu, inst.M43.r1, r1, 0); +} + +void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val) +{ + VCPU(vcpu, tpr) = val; + vcpu->arch.irq_check = 1; +} + +unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long r2; + + r2 = vcpu_get_gr(vcpu, inst.M32.r2); + VCPU(vcpu, vcr[inst.M32.cr3]) = r2; + + switch (inst.M32.cr3) { + case 0: + vcpu_set_dcr(vcpu, r2); + break; + case 1: + vcpu_set_itm(vcpu, r2); + break; + case 66: + vcpu_set_tpr(vcpu, r2); + break; + case 67: + vcpu_set_eoi(vcpu, r2); + break; + default: + break; + } + + return 0; +} + + +unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long tgt = inst.M33.r1; + unsigned long val; + + switch (inst.M33.cr3) { + case 65: + val = vcpu_get_ivr(vcpu); + vcpu_set_gr(vcpu, tgt, val, 0); + break; + + case 67: + vcpu_set_gr(vcpu, tgt, 0L, 0); + break; + default: + val = VCPU(vcpu, vcr[inst.M33.cr3]); + vcpu_set_gr(vcpu, tgt, val, 0); + break; + } + + return 0; +} + + + +void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val) +{ + + unsigned long mask; + struct kvm_pt_regs *regs; + struct ia64_psr old_psr, new_psr; + + old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + + regs = vcpu_regs(vcpu); + /* We only support guest as: + * vpsr.pk = 0 + * vpsr.is = 0 + * Otherwise panic + */ + if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM)) + panic_vm(vcpu); + + /* + * For those IA64_PSR bits: id/da/dd/ss/ed/ia + * Since these bits will become 0, after success execution of each + * instruction, we will change set them to mIA64_PSR + */ + VCPU(vcpu, vpsr) = val + & (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | + IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA)); + + if (!old_psr.i && (val & IA64_PSR_I)) { + /* vpsr.i 0->1 */ + vcpu->arch.irq_check = 1; + } + new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + + /* + * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr) + * , except for the following bits: + * ic/i/dt/si/rt/mc/it/bn/vm + */ + mask = IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI + + IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN + + IA64_PSR_VM; + + regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask)); + + check_mm_mode_switch(vcpu, old_psr, new_psr); + + return ; +} + +unsigned long vcpu_cover(struct kvm_vcpu *vcpu) +{ + struct ia64_psr vpsr; + + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + + if (!vpsr.ic) + VCPU(vcpu, ifs) = regs->cr_ifs; + regs->cr_ifs = IA64_IFS_V; + return (IA64_NO_FAULT); +} + + + +/************************************************************************** + VCPU banked general register access routines + **************************************************************************/ +#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \ + do { \ + __asm__ __volatile__ ( \ + ";;extr.u %0 = %3,%6,16;;\n" \ + "dep %1 = %0, %1, 0, 16;;\n" \ + "st8 [%4] = %1\n" \ + "extr.u %0 = %2, 16, 16;;\n" \ + "dep %3 = %0, %3, %6, 16;;\n" \ + "st8 [%5] = %3\n" \ + ::"r"(i), "r"(*b1unat), "r"(*b0unat), \ + "r"(*runat), "r"(b1unat), "r"(runat), \ + "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \ + } while (0) + +void vcpu_bsw0(struct kvm_vcpu *vcpu) +{ + unsigned long i; + + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + unsigned long *r = ®s->r16; + unsigned long *b0 = &VCPU(vcpu, vbgr[0]); + unsigned long *b1 = &VCPU(vcpu, vgr[0]); + unsigned long *runat = ®s->eml_unat; + unsigned long *b0unat = &VCPU(vcpu, vbnat); + unsigned long *b1unat = &VCPU(vcpu, vnat); + + + if (VCPU(vcpu, vpsr) & IA64_PSR_BN) { + for (i = 0; i < 16; i++) { + *b1++ = *r; + *r++ = *b0++; + } + vcpu_bsw0_unat(i, b0unat, b1unat, runat, + VMM_PT_REGS_R16_SLOT); + VCPU(vcpu, vpsr) &= ~IA64_PSR_BN; + } +} + +#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT) \ + do { \ + __asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n" \ + "dep %1 = %0, %1, 16, 16;;\n" \ + "st8 [%4] = %1\n" \ + "extr.u %0 = %2, 0, 16;;\n" \ + "dep %3 = %0, %3, %6, 16;;\n" \ + "st8 [%5] = %3\n" \ + ::"r"(i), "r"(*b0unat), "r"(*b1unat), \ + "r"(*runat), "r"(b0unat), "r"(runat), \ + "i"(VMM_PT_REGS_R16_SLOT) : "memory"); \ + } while (0) + +void vcpu_bsw1(struct kvm_vcpu *vcpu) +{ + unsigned long i; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + unsigned long *r = ®s->r16; + unsigned long *b0 = &VCPU(vcpu, vbgr[0]); + unsigned long *b1 = &VCPU(vcpu, vgr[0]); + unsigned long *runat = ®s->eml_unat; + unsigned long *b0unat = &VCPU(vcpu, vbnat); + unsigned long *b1unat = &VCPU(vcpu, vnat); + + if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) { + for (i = 0; i < 16; i++) { + *b0++ = *r; + *r++ = *b1++; + } + vcpu_bsw1_unat(i, b0unat, b1unat, runat, + VMM_PT_REGS_R16_SLOT); + VCPU(vcpu, vpsr) |= IA64_PSR_BN; + } +} + + + + +void vcpu_rfi(struct kvm_vcpu *vcpu) +{ + unsigned long ifs, psr; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + psr = VCPU(vcpu, ipsr); + if (psr & IA64_PSR_BN) + vcpu_bsw1(vcpu); + else + vcpu_bsw0(vcpu); + vcpu_set_psr(vcpu, psr); + ifs = VCPU(vcpu, ifs); + if (ifs >> 63) + regs->cr_ifs = ifs; + regs->cr_iip = VCPU(vcpu, iip); +} + + +/* + VPSR can't keep track of below bits of guest PSR + This function gets guest PSR + */ + +unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu) +{ + unsigned long mask; + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + + mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | + IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI; + return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask); +} + +void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long vpsr; + unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21) + | inst.M44.imm; + + vpsr = vcpu_get_psr(vcpu); + vpsr &= (~imm24); + vcpu_set_psr(vcpu, vpsr); +} + +void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long vpsr; + unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21) + | inst.M44.imm; + + vpsr = vcpu_get_psr(vcpu); + vpsr |= imm24; + vcpu_set_psr(vcpu, vpsr); +} + +/* Generate Mask + * Parameter: + * bit -- starting bit + * len -- how many bits + */ +#define MASK(bit,len) \ +({ \ + __u64 ret; \ + \ + __asm __volatile("dep %0=-1, r0, %1, %2"\ + : "=r" (ret): \ + "M" (bit), \ + "M" (len)); \ + ret; \ +}) + +void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val) +{ + val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32)); + vcpu_set_psr(vcpu, val); +} + +void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long val; + + val = vcpu_get_gr(vcpu, inst.M35.r2); + vcpu_set_psr_l(vcpu, val); +} + +void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst) +{ + unsigned long val; + + val = vcpu_get_psr(vcpu); + val = (val & MASK(0, 32)) | (val & MASK(35, 2)); + vcpu_set_gr(vcpu, inst.M33.r1, val, 0); +} + +void vcpu_increment_iip(struct kvm_vcpu *vcpu) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr; + if (ipsr->ri == 2) { + ipsr->ri = 0; + regs->cr_iip += 16; + } else + ipsr->ri++; +} + +void vcpu_decrement_iip(struct kvm_vcpu *vcpu) +{ + struct kvm_pt_regs *regs = vcpu_regs(vcpu); + struct ia64_psr *ipsr = (struct ia64_psr *)®s->cr_ipsr; + + if (ipsr->ri == 0) { + ipsr->ri = 2; + regs->cr_iip -= 16; + } else + ipsr->ri--; +} + +/** Emulate a privileged operation. + * + * + * @param vcpu virtual cpu + * @cause the reason cause virtualization fault + * @opcode the instruction code which cause virtualization fault + */ + +void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs) +{ + unsigned long status, cause, opcode ; + INST64 inst; + + status = IA64_NO_FAULT; + cause = VMX(vcpu, cause); + opcode = VMX(vcpu, opcode); + inst.inst = opcode; + /* + * Switch to actual virtual rid in rr0 and rr4, + * which is required by some tlb related instructions. + */ + prepare_if_physical_mode(vcpu); + + switch (cause) { + case EVENT_RSM: + kvm_rsm(vcpu, inst); + break; + case EVENT_SSM: + kvm_ssm(vcpu, inst); + break; + case EVENT_MOV_TO_PSR: + kvm_mov_to_psr(vcpu, inst); + break; + case EVENT_MOV_FROM_PSR: + kvm_mov_from_psr(vcpu, inst); + break; + case EVENT_MOV_FROM_CR: + kvm_mov_from_cr(vcpu, inst); + break; + case EVENT_MOV_TO_CR: + kvm_mov_to_cr(vcpu, inst); + break; + case EVENT_BSW_0: + vcpu_bsw0(vcpu); + break; + case EVENT_BSW_1: + vcpu_bsw1(vcpu); + break; + case EVENT_COVER: + vcpu_cover(vcpu); + break; + case EVENT_RFI: + vcpu_rfi(vcpu); + break; + case EVENT_ITR_D: + kvm_itr_d(vcpu, inst); + break; + case EVENT_ITR_I: + kvm_itr_i(vcpu, inst); + break; + case EVENT_PTR_D: + kvm_ptr_d(vcpu, inst); + break; + case EVENT_PTR_I: + kvm_ptr_i(vcpu, inst); + break; + case EVENT_ITC_D: + kvm_itc_d(vcpu, inst); + break; + case EVENT_ITC_I: + kvm_itc_i(vcpu, inst); + break; + case EVENT_PTC_L: + kvm_ptc_l(vcpu, inst); + break; + case EVENT_PTC_G: + kvm_ptc_g(vcpu, inst); + break; + case EVENT_PTC_GA: + kvm_ptc_ga(vcpu, inst); + break; + case EVENT_PTC_E: + kvm_ptc_e(vcpu, inst); + break; + case EVENT_MOV_TO_RR: + kvm_mov_to_rr(vcpu, inst); + break; + case EVENT_MOV_FROM_RR: + kvm_mov_from_rr(vcpu, inst); + break; + case EVENT_THASH: + kvm_thash(vcpu, inst); + break; + case EVENT_TTAG: + kvm_ttag(vcpu, inst); + break; + case EVENT_TPA: + status = kvm_tpa(vcpu, inst); + break; + case EVENT_TAK: + kvm_tak(vcpu, inst); + break; + case EVENT_MOV_TO_AR_IMM: + kvm_mov_to_ar_imm(vcpu, inst); + break; + case EVENT_MOV_TO_AR: + kvm_mov_to_ar_reg(vcpu, inst); + break; + case EVENT_MOV_FROM_AR: + kvm_mov_from_ar_reg(vcpu, inst); + break; + case EVENT_MOV_TO_DBR: + kvm_mov_to_dbr(vcpu, inst); + break; + case EVENT_MOV_TO_IBR: + kvm_mov_to_ibr(vcpu, inst); + break; + case EVENT_MOV_TO_PMC: + kvm_mov_to_pmc(vcpu, inst); + break; + case EVENT_MOV_TO_PMD: + kvm_mov_to_pmd(vcpu, inst); + break; + case EVENT_MOV_TO_PKR: + kvm_mov_to_pkr(vcpu, inst); + break; + case EVENT_MOV_FROM_DBR: + kvm_mov_from_dbr(vcpu, inst); + break; + case EVENT_MOV_FROM_IBR: + kvm_mov_from_ibr(vcpu, inst); + break; + case EVENT_MOV_FROM_PMC: + kvm_mov_from_pmc(vcpu, inst); + break; + case EVENT_MOV_FROM_PKR: + kvm_mov_from_pkr(vcpu, inst); + break; + case EVENT_MOV_FROM_CPUID: + kvm_mov_from_cpuid(vcpu, inst); + break; + case EVENT_VMSW: + status = IA64_FAULT; + break; + default: + break; + }; + /*Assume all status is NO_FAULT ?*/ + if (status == IA64_NO_FAULT && cause != EVENT_RFI) + vcpu_increment_iip(vcpu); + + recover_if_physical_mode(vcpu); +} + +void init_vcpu(struct kvm_vcpu *vcpu) +{ + int i; + + vcpu->arch.mode_flags = GUEST_IN_PHY; + VMX(vcpu, vrr[0]) = 0x38; + VMX(vcpu, vrr[1]) = 0x38; + VMX(vcpu, vrr[2]) = 0x38; + VMX(vcpu, vrr[3]) = 0x38; + VMX(vcpu, vrr[4]) = 0x38; + VMX(vcpu, vrr[5]) = 0x38; + VMX(vcpu, vrr[6]) = 0x38; + VMX(vcpu, vrr[7]) = 0x38; + VCPU(vcpu, vpsr) = IA64_PSR_BN; + VCPU(vcpu, dcr) = 0; + /* pta.size must not be 0. The minimum is 15 (32k) */ + VCPU(vcpu, pta) = 15 << 2; + VCPU(vcpu, itv) = 0x10000; + VCPU(vcpu, itm) = 0; + VMX(vcpu, last_itc) = 0; + + VCPU(vcpu, lid) = VCPU_LID(vcpu); + VCPU(vcpu, ivr) = 0; + VCPU(vcpu, tpr) = 0x10000; + VCPU(vcpu, eoi) = 0; + VCPU(vcpu, irr[0]) = 0; + VCPU(vcpu, irr[1]) = 0; + VCPU(vcpu, irr[2]) = 0; + VCPU(vcpu, irr[3]) = 0; + VCPU(vcpu, pmv) = 0x10000; + VCPU(vcpu, cmcv) = 0x10000; + VCPU(vcpu, lrr0) = 0x10000; /* default reset value? */ + VCPU(vcpu, lrr1) = 0x10000; /* default reset value? */ + update_vhpi(vcpu, NULL_VECTOR); + VLSAPIC_XTP(vcpu) = 0x80; /* disabled */ + + for (i = 0; i < 4; i++) + VLSAPIC_INSVC(vcpu, i) = 0; +} + +void kvm_init_all_rr(struct kvm_vcpu *vcpu) +{ + unsigned long psr; + + local_irq_save(psr); + + /* WARNING: not allow co-exist of both virtual mode and physical + * mode in same region + */ + + vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0])); + vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4])); + + if (is_physical_mode(vcpu)) { + if (vcpu->arch.mode_flags & GUEST_PHY_EMUL) + panic_vm(vcpu); + + ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0); + ia64_dv_serialize_data(); + ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4); + ia64_dv_serialize_data(); + } else { + ia64_set_rr((VRN0 << VRN_SHIFT), + vcpu->arch.metaphysical_saved_rr0); + ia64_dv_serialize_data(); + ia64_set_rr((VRN4 << VRN_SHIFT), + vcpu->arch.metaphysical_saved_rr4); + ia64_dv_serialize_data(); + } + ia64_set_rr((VRN1 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN1]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN2 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN2]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN3 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN3]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN5 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN5]))); + ia64_dv_serialize_data(); + ia64_set_rr((VRN7 << VRN_SHIFT), + vrrtomrr(VMX(vcpu, vrr[VRN7]))); + ia64_dv_serialize_data(); + ia64_srlz_d(); + ia64_set_psr(psr); +} + +int vmm_entry(void) +{ + struct kvm_vcpu *v; + v = current_vcpu; + + ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd, + 0, 0, 0, 0, 0, 0); + kvm_init_vtlb(v); + kvm_init_vhpt(v); + init_vcpu(v); + kvm_init_all_rr(v); + vmm_reset_entry(); + + return 0; +} + +void panic_vm(struct kvm_vcpu *v) +{ + struct exit_ctl_data *p = &v->arch.exit_data; + + p->exit_reason = EXIT_REASON_VM_PANIC; + vmm_transition(v); + /*Never to return*/ + while (1); +} diff --git a/trunk/arch/ia64/kvm/vcpu.h b/trunk/arch/ia64/kvm/vcpu.h new file mode 100644 index 000000000000..b0fcfb62c49e --- /dev/null +++ b/trunk/arch/ia64/kvm/vcpu.h @@ -0,0 +1,740 @@ +/* + * vcpu.h: vcpu routines + * Copyright (c) 2005, Intel Corporation. + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com) + * + * Copyright (c) 2007, Intel Corporation. + * Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com) + * Xiantao Zhang (xiantao.zhang@intel.com) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + + +#ifndef __KVM_VCPU_H__ +#define __KVM_VCPU_H__ + +#include +#include +#include + +#ifndef __ASSEMBLY__ +#include "vti.h" + +#include +#include + +typedef unsigned long IA64_INST; + +typedef union U_IA64_BUNDLE { + unsigned long i64[2]; + struct { unsigned long template:5, slot0:41, slot1a:18, + slot1b:23, slot2:41; }; + /* NOTE: following doesn't work because bitfields can't cross natural + size boundaries + struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */ +} IA64_BUNDLE; + +typedef union U_INST64_A5 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5, + imm9d:9, s:1, major:4; }; +} INST64_A5; + +typedef union U_INST64_B4 { + IA64_INST inst; + struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6, + wh:2, d:1, un1:1, major:4; }; +} INST64_B4; + +typedef union U_INST64_B8 { + IA64_INST inst; + struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; }; +} INST64_B8; + +typedef union U_INST64_B9 { + IA64_INST inst; + struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; }; +} INST64_B9; + +typedef union U_INST64_I19 { + IA64_INST inst; + struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; }; +} INST64_I19; + +typedef union U_INST64_I26 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_I26; + +typedef union U_INST64_I27 { + IA64_INST inst; + struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; }; +} INST64_I27; + +typedef union U_INST64_I28 { /* not privileged (mov from AR) */ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_I28; + +typedef union U_INST64_M28 { + IA64_INST inst; + struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M28; + +typedef union U_INST64_M29 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M29; + +typedef union U_INST64_M30 { + IA64_INST inst; + struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2, + x3:3, s:1, major:4; }; +} INST64_M30; + +typedef union U_INST64_M31 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M31; + +typedef union U_INST64_M32 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M32; + +typedef union U_INST64_M33 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M33; + +typedef union U_INST64_M35 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; }; + +} INST64_M35; + +typedef union U_INST64_M36 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; }; +} INST64_M36; + +typedef union U_INST64_M37 { + IA64_INST inst; + struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3, + i:1, major:4; }; +} INST64_M37; + +typedef union U_INST64_M41 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; }; +} INST64_M41; + +typedef union U_INST64_M42 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M42; + +typedef union U_INST64_M43 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M43; + +typedef union U_INST64_M44 { + IA64_INST inst; + struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; }; +} INST64_M44; + +typedef union U_INST64_M45 { + IA64_INST inst; + struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; }; +} INST64_M45; + +typedef union U_INST64_M46 { + IA64_INST inst; + struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6, + x3:3, un1:1, major:4; }; +} INST64_M46; + +typedef union U_INST64_M47 { + IA64_INST inst; + struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; }; +} INST64_M47; + +typedef union U_INST64_M1{ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M1; + +typedef union U_INST64_M2{ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M2; + +typedef union U_INST64_M3{ + IA64_INST inst; + struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M3; + +typedef union U_INST64_M4 { + IA64_INST inst; + struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M4; + +typedef union U_INST64_M5 { + IA64_INST inst; + struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M5; + +typedef union U_INST64_M6 { + IA64_INST inst; + struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M6; + +typedef union U_INST64_M9 { + IA64_INST inst; + struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M9; + +typedef union U_INST64_M10 { + IA64_INST inst; + struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M10; + +typedef union U_INST64_M12 { + IA64_INST inst; + struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2, + x6:6, m:1, major:4; }; +} INST64_M12; + +typedef union U_INST64_M15 { + IA64_INST inst; + struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2, + x6:6, s:1, major:4; }; +} INST64_M15; + +typedef union U_INST64 { + IA64_INST inst; + struct { unsigned long :37, major:4; } generic; + INST64_A5 A5; /* used in build_hypercall_bundle only */ + INST64_B4 B4; /* used in build_hypercall_bundle only */ + INST64_B8 B8; /* rfi, bsw.[01] */ + INST64_B9 B9; /* break.b */ + INST64_I19 I19; /* used in build_hypercall_bundle only */ + INST64_I26 I26; /* mov register to ar (I unit) */ + INST64_I27 I27; /* mov immediate to ar (I unit) */ + INST64_I28 I28; /* mov from ar (I unit) */ + INST64_M1 M1; /* ld integer */ + INST64_M2 M2; + INST64_M3 M3; + INST64_M4 M4; /* st integer */ + INST64_M5 M5; + INST64_M6 M6; /* ldfd floating pointer */ + INST64_M9 M9; /* stfd floating pointer */ + INST64_M10 M10; /* stfd floating pointer */ + INST64_M12 M12; /* ldfd pair floating pointer */ + INST64_M15 M15; /* lfetch + imm update */ + INST64_M28 M28; /* purge translation cache entry */ + INST64_M29 M29; /* mov register to ar (M unit) */ + INST64_M30 M30; /* mov immediate to ar (M unit) */ + INST64_M31 M31; /* mov from ar (M unit) */ + INST64_M32 M32; /* mov reg to cr */ + INST64_M33 M33; /* mov from cr */ + INST64_M35 M35; /* mov to psr */ + INST64_M36 M36; /* mov from psr */ + INST64_M37 M37; /* break.m */ + INST64_M41 M41; /* translation cache insert */ + INST64_M42 M42; /* mov to indirect reg/translation reg insert*/ + INST64_M43 M43; /* mov from indirect reg */ + INST64_M44 M44; /* set/reset system mask */ + INST64_M45 M45; /* translation purge */ + INST64_M46 M46; /* translation access (tpa,tak) */ + INST64_M47 M47; /* purge translation entry */ +} INST64; + +#define MASK_41 ((unsigned long)0x1ffffffffff) + +/* Virtual address memory attributes encoding */ +#define VA_MATTR_WB 0x0 +#define VA_MATTR_UC 0x4 +#define VA_MATTR_UCE 0x5 +#define VA_MATTR_WC 0x6 +#define VA_MATTR_NATPAGE 0x7 + +#define PMASK(size) (~((size) - 1)) +#define PSIZE(size) (1UL<<(size)) +#define CLEARLSB(ppn, nbits) (((ppn) >> (nbits)) << (nbits)) +#define PAGEALIGN(va, ps) CLEARLSB(va, ps) +#define PAGE_FLAGS_RV_MASK (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53)) +#define _PAGE_MA_ST (0x1 << 2) /* is reserved for software use */ + +#define ARCH_PAGE_SHIFT 12 + +#define INVALID_TI_TAG (1UL << 63) + +#define VTLB_PTE_P_BIT 0 +#define VTLB_PTE_IO_BIT 60 +#define VTLB_PTE_IO (1UL<> 61))) + +#define vcpu_quick_region_set(_tr_regions,_ifa) \ + do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0) + +static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir, + u64 va, u64 rid) +{ + trp->page_flags = pte; + trp->itir = itir; + trp->vadr = va; + trp->rid = rid; +} + +extern u64 kvm_lookup_mpa(u64 gpfn); +extern u64 kvm_gpa_to_mpa(u64 gpa); + +/* Return I/O type if trye */ +#define __gpfn_is_io(gpfn) \ + ({ \ + u64 pte, ret = 0; \ + pte = kvm_lookup_mpa(gpfn); \ + if (!(pte & GPFN_INV_MASK)) \ + ret = pte & GPFN_IO_MASK; \ + ret; \ + }) + +#endif + +#define IA64_NO_FAULT 0 +#define IA64_FAULT 1 + +#define VMM_RBS_OFFSET ((VMM_TASK_SIZE + 15) & ~15) + +#define SW_BAD 0 /* Bad mode transitition */ +#define SW_V2P 1 /* Physical emulatino is activated */ +#define SW_P2V 2 /* Exit physical mode emulation */ +#define SW_SELF 3 /* No mode transition */ +#define SW_NOP 4 /* Mode transition, but without action required */ + +#define GUEST_IN_PHY 0x1 +#define GUEST_PHY_EMUL 0x2 + +#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP)) + +#define VRN_SHIFT 61 +#define VRN_MASK 0xe000000000000000 +#define VRN0 0x0UL +#define VRN1 0x1UL +#define VRN2 0x2UL +#define VRN3 0x3UL +#define VRN4 0x4UL +#define VRN5 0x5UL +#define VRN6 0x6UL +#define VRN7 0x7UL + +#define IRQ_NO_MASKED 0 +#define IRQ_MASKED_BY_VTPR 1 +#define IRQ_MASKED_BY_INSVC 2 /* masked by inservice IRQ */ + +#define PTA_BASE_SHIFT 15 + +#define IA64_PSR_VM_BIT 46 +#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT) + +/* Interruption Function State */ +#define IA64_IFS_V_BIT 63 +#define IA64_IFS_V (__IA64_UL(1) << IA64_IFS_V_BIT) + +#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX) +#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX) + +#ifndef __ASSEMBLY__ + +#include + +#define is_physical_mode(v) \ + ((v->arch.mode_flags) & GUEST_IN_PHY) + +#define is_virtual_mode(v) \ + (!is_physical_mode(v)) + +#define MODE_IND(psr) \ + (((psr).it << 2) + ((psr).dt << 1) + (psr).rt) + +#define _vmm_raw_spin_lock(x) \ + do { \ + __u32 *ia64_spinlock_ptr = (__u32 *) (x); \ + __u64 ia64_spinlock_val; \ + ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\ + if (unlikely(ia64_spinlock_val)) { \ + do { \ + while (*ia64_spinlock_ptr) \ + ia64_barrier(); \ + ia64_spinlock_val = \ + ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\ + } while (ia64_spinlock_val); \ + } \ + } while (0) + +#define _vmm_raw_spin_unlock(x) \ + do { barrier(); \ + ((spinlock_t *)x)->raw_lock.lock = 0; } \ +while (0) + +void vmm_spin_lock(spinlock_t *lock); +void vmm_spin_unlock(spinlock_t *lock); +enum { + I_TLB = 1, + D_TLB = 2 +}; + +union kvm_va { + struct { + unsigned long off : 60; /* intra-region offset */ + unsigned long reg : 4; /* region number */ + } f; + unsigned long l; + void *p; +}; + +#define __kvm_pa(x) ({union kvm_va _v; _v.l = (long) (x); \ + _v.f.reg = 0; _v.l; }) +#define __kvm_va(x) ({union kvm_va _v; _v.l = (long) (x); \ + _v.f.reg = -1; _v.p; }) + +#define _REGION_ID(x) ({union ia64_rr _v; _v.val = (long)(x); \ + _v.rid; }) +#define _REGION_PAGE_SIZE(x) ({union ia64_rr _v; _v.val = (long)(x); \ + _v.ps; }) +#define _REGION_HW_WALKER(x) ({union ia64_rr _v; _v.val = (long)(x); \ + _v.ve; }) + +enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF }; +enum tlb_miss_type { INSTRUCTION, DATA, REGISTER }; + +#define VCPU(_v, _x) ((_v)->arch.vpd->_x) +#define VMX(_v, _x) ((_v)->arch._x) + +#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i]) +#define VLSAPIC_XTP(_v) VMX(_v, xtp) + +static inline unsigned long itir_ps(unsigned long itir) +{ + return ((itir >> 2) & 0x3f); +} + + +/************************************************************************** + VCPU control register access routines + **************************************************************************/ + +static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, itir)); +} + +static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, itir) = val; +} + +static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, ifa)); +} + +static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, ifa) = val; +} + +static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, iva)); +} + +static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, pta)); +} + +static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, lid)); +} + +static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, tpr)); +} + +static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu) +{ + return (0UL); /*reads of eoi always return 0 */ +} + +static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[0])); +} + +static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[1])); +} + +static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[2])); +} + +static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu) +{ + return ((u64)VCPU(vcpu, irr[3])); +} + +static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val) +{ + ia64_setreg(_IA64_REG_CR_DCR, val); +} + +static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, isr) = val; +} + +static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, lid) = val; +} + +static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, ipsr) = val; +} + +static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, iip) = val; +} + +static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, ifs) = val; +} + +static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, iipa) = val; +} + +static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val) +{ + VCPU(vcpu, iha) = val; +} + + +static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg) +{ + return vcpu->arch.vrr[reg>>61]; +} + +/************************************************************************** + VCPU debug breakpoint register access routines + **************************************************************************/ + +static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + __ia64_set_dbr(reg, val); +} + +static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + ia64_set_ibr(reg, val); +} + +static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg) +{ + return ((u64)__ia64_get_dbr(reg)); +} + +static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg) +{ + return ((u64)ia64_get_ibr(reg)); +} + +/************************************************************************** + VCPU performance monitor register access routines + **************************************************************************/ +static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + /* NOTE: Writes to unimplemented PMC registers are discarded */ + ia64_set_pmc(reg, val); +} + +static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val) +{ + /* NOTE: Writes to unimplemented PMD registers are discarded */ + ia64_set_pmd(reg, val); +} + +static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg) +{ + /* NOTE: Reads from unimplemented PMC registers return zero */ + return ((u64)ia64_get_pmc(reg)); +} + +static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg) +{ + /* NOTE: Reads from unimplemented PMD registers return zero */ + return ((u64)ia64_get_pmd(reg)); +} + +static inline unsigned long vrrtomrr(unsigned long val) +{ + union ia64_rr rr; + rr.val = val; + rr.rid = (rr.rid << 4) | 0xe; + if (rr.ps > PAGE_SHIFT) + rr.ps = PAGE_SHIFT; + rr.ve = 1; + return rr.val; +} + + +static inline int highest_bits(int *dat) +{ + u32 bits, bitnum; + int i; + + /* loop for all 256 bits */ + for (i = 7; i >= 0 ; i--) { + bits = dat[i]; + if (bits) { + bitnum = fls(bits); + return i * 32 + bitnum - 1; + } + } + return NULL_VECTOR; +} + +/* + * The pending irq is higher than the inservice one. + * + */ +static inline int is_higher_irq(int pending, int inservice) +{ + return ((pending > inservice) + || ((pending != NULL_VECTOR) + && (inservice == NULL_VECTOR))); +} + +static inline int is_higher_class(int pending, int mic) +{ + return ((pending >> 4) > mic); +} + +/* + * Return 0-255 for pending irq. + * NULL_VECTOR: when no pending. + */ +static inline int highest_pending_irq(struct kvm_vcpu *vcpu) +{ + if (VCPU(vcpu, irr[0]) & (1UL< +#include + +#include "vcpu.h" + +MODULE_AUTHOR("Intel"); +MODULE_LICENSE("GPL"); + +extern char kvm_ia64_ivt; +extern fpswa_interface_t *vmm_fpswa_interface; + +struct kvm_vmm_info vmm_info = { + .module = THIS_MODULE, + .vmm_entry = vmm_entry, + .tramp_entry = vmm_trampoline, + .vmm_ivt = (unsigned long)&kvm_ia64_ivt, +}; + +static int __init kvm_vmm_init(void) +{ + + vmm_fpswa_interface = fpswa_interface; + + /*Register vmm data to kvm side*/ + return kvm_init(&vmm_info, 1024, THIS_MODULE); +} + +static void __exit kvm_vmm_exit(void) +{ + kvm_exit(); + return ; +} + +void vmm_spin_lock(spinlock_t *lock) +{ + _vmm_raw_spin_lock(lock); +} + +void vmm_spin_unlock(spinlock_t *lock) +{ + _vmm_raw_spin_unlock(lock); +} +module_init(kvm_vmm_init) +module_exit(kvm_vmm_exit) diff --git a/trunk/arch/ia64/kvm/vmm_ivt.S b/trunk/arch/ia64/kvm/vmm_ivt.S new file mode 100644 index 000000000000..3ee5f481c06d --- /dev/null +++ b/trunk/arch/ia64/kvm/vmm_ivt.S @@ -0,0 +1,1424 @@ +/* + * /ia64/kvm_ivt.S + * + * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger + * Copyright (C) 2000, 2002-2003 Intel Co + * Asit Mallick + * Suresh Siddha + * Kenneth Chen + * Fenghua Yu + * + * + * 00/08/23 Asit Mallick TLB handling + * for SMP + * 00/12/20 David Mosberger-Tang DTLB/ITLB + * handler now uses virtual PT. + * + * 07/6/20 Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + * Supporting Intel virtualization architecture + * + */ + +/* + * This file defines the interruption vector table used by the CPU. + * It does not include one entry per possible cause of interruption. + * + * The first 20 entries of the table contain 64 bundles each while the + * remaining 48 entries contain only 16 bundles each. + * + * The 64 bundles are used to allow inlining the whole handler for + * critical + * interruptions like TLB misses. + * + * For each entry, the comment is as follows: + * + * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss + * (12,51) + * entry offset ----/ / / / + * / + * entry number ---------/ / / + * / + * size of the entry -------------/ / + * / + * vector name -------------------------------------/ + * / + * interruptions triggering this vector + * ----------------------/ + * + * The table is 32KB in size and must be aligned on 32KB + * boundary. + * (The CPU ignores the 15 lower bits of the address) + * + * Table is based upon EAS2.6 (Oct 1999) + */ + + +#include +#include +#include + +#include "asm-offsets.h" +#include "vcpu.h" +#include "kvm_minstate.h" +#include "vti.h" + +#if 1 +# define PSR_DEFAULT_BITS psr.ac +#else +# define PSR_DEFAULT_BITS 0 +#endif + + +#define KVM_FAULT(n) \ + kvm_fault_##n:; \ + mov r19=n;; \ + br.sptk.many kvm_fault_##n; \ + ;; \ + + +#define KVM_REFLECT(n) \ + mov r31=pr; \ + mov r19=n; /* prepare to save predicates */ \ + mov r29=cr.ipsr; \ + ;; \ + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; \ +(p7)br.sptk.many kvm_dispatch_reflection; \ + br.sptk.many kvm_panic; \ + + +GLOBAL_ENTRY(kvm_panic) + br.sptk.many kvm_panic + ;; +END(kvm_panic) + + + + + + .section .text.ivt,"ax" + + .align 32768 // align on 32KB boundary + .global kvm_ia64_ivt +kvm_ia64_ivt: +/////////////////////////////////////////////////////////////// +// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) +ENTRY(kvm_vhpt_miss) + KVM_FAULT(0) +END(kvm_vhpt_miss) + + + .org kvm_ia64_ivt+0x400 +//////////////////////////////////////////////////////////////// +// 0x0400 Entry 1 (size 64 bundles) ITLB (21) +ENTRY(kvm_itlb_miss) + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; + (p6) br.sptk kvm_alt_itlb_miss + mov r19 = 1 + br.sptk kvm_itlb_miss_dispatch + KVM_FAULT(1); +END(kvm_itlb_miss) + + .org kvm_ia64_ivt+0x0800 +////////////////////////////////////////////////////////////////// +// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) +ENTRY(kvm_dtlb_miss) + mov r31 = pr + mov r29=cr.ipsr; + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT; +(p6)br.sptk kvm_alt_dtlb_miss + br.sptk kvm_dtlb_miss_dispatch +END(kvm_dtlb_miss) + + .org kvm_ia64_ivt+0x0c00 +//////////////////////////////////////////////////////////////////// +// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) +ENTRY(kvm_alt_itlb_miss) + mov r16=cr.ifa // get address that caused the TLB miss + ;; + movl r17=PAGE_KERNEL + mov r24=cr.ipsr + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + ;; + or r19=r17,r19 // insert PTE control bits into r19 + ;; + movl r20=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r20 + ;; + itc.i r19 // insert the TLB entry + mov pr=r31,-1 + rfi +END(kvm_alt_itlb_miss) + + .org kvm_ia64_ivt+0x1000 +///////////////////////////////////////////////////////////////////// +// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) +ENTRY(kvm_alt_dtlb_miss) + mov r16=cr.ifa // get address that caused the TLB miss + ;; + movl r17=PAGE_KERNEL + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + mov r24=cr.ipsr + ;; + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + ;; + or r19=r19,r17 // insert PTE control bits into r19 + ;; + movl r20=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r20 + ;; + itc.d r19 // insert the TLB entry + mov pr=r31,-1 + rfi +END(kvm_alt_dtlb_miss) + + .org kvm_ia64_ivt+0x1400 +////////////////////////////////////////////////////////////////////// +// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) +ENTRY(kvm_nested_dtlb_miss) + KVM_FAULT(5) +END(kvm_nested_dtlb_miss) + + .org kvm_ia64_ivt+0x1800 +///////////////////////////////////////////////////////////////////// +// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) +ENTRY(kvm_ikey_miss) + KVM_REFLECT(6) +END(kvm_ikey_miss) + + .org kvm_ia64_ivt+0x1c00 +///////////////////////////////////////////////////////////////////// +// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) +ENTRY(kvm_dkey_miss) + KVM_REFLECT(7) +END(kvm_dkey_miss) + + .org kvm_ia64_ivt+0x2000 +//////////////////////////////////////////////////////////////////// +// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) +ENTRY(kvm_dirty_bit) + KVM_REFLECT(8) +END(kvm_dirty_bit) + + .org kvm_ia64_ivt+0x2400 +//////////////////////////////////////////////////////////////////// +// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) +ENTRY(kvm_iaccess_bit) + KVM_REFLECT(9) +END(kvm_iaccess_bit) + + .org kvm_ia64_ivt+0x2800 +/////////////////////////////////////////////////////////////////// +// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) +ENTRY(kvm_daccess_bit) + KVM_REFLECT(10) +END(kvm_daccess_bit) + + .org kvm_ia64_ivt+0x2c00 +///////////////////////////////////////////////////////////////// +// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) +ENTRY(kvm_break_fault) + mov r31=pr + mov r19=11 + mov r29=cr.ipsr + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,4,0 // now it's safe (must be first in insn group!) + mov out0=cr.ifa + mov out2=cr.isr // FIXME: pity to make this slow access twice + mov out3=cr.iim // FIXME: pity to make this slow access twice + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15)ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out1=16,sp + br.call.sptk.many b6=kvm_ia64_handle_break + ;; +END(kvm_break_fault) + + .org kvm_ia64_ivt+0x3000 +///////////////////////////////////////////////////////////////// +// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) +ENTRY(kvm_interrupt) + mov r31=pr // prepare to save predicates + mov r19=12 + mov r29=cr.ipsr + ;; + tbit.z p6,p7=r29,IA64_PSR_VM_BIT + tbit.z p0,p15=r29,IA64_PSR_I_BIT + ;; +(p7) br.sptk kvm_dispatch_interrupt + ;; + mov r27=ar.rsc /* M */ + mov r20=r1 /* A */ + mov r25=ar.unat /* M */ + mov r26=ar.pfs /* I */ + mov r28=cr.iip /* M */ + cover /* B (or nothing) */ + ;; + mov r1=sp + ;; + invala /* M */ + mov r30=cr.ifs + ;; + addl r1=-VMM_PT_REGS_SIZE,r1 + ;; + adds r17=2*L1_CACHE_BYTES,r1 /* really: biggest cache-line size */ + adds r16=PT(CR_IPSR),r1 + ;; + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES + st8 [r16]=r29 /* save cr.ipsr */ + ;; + lfetch.fault.excl.nt1 [r17] + mov r29=b0 + ;; + adds r16=PT(R8),r1 /* initialize first base pointer */ + adds r17=PT(R9),r1 /* initialize second base pointer */ + mov r18=r0 /* make sure r18 isn't NaT */ + ;; +.mem.offset 0,0; st8.spill [r16]=r8,16 +.mem.offset 8,0; st8.spill [r17]=r9,16 + ;; +.mem.offset 0,0; st8.spill [r16]=r10,24 +.mem.offset 8,0; st8.spill [r17]=r11,24 + ;; + st8 [r16]=r28,16 /* save cr.iip */ + st8 [r17]=r30,16 /* save cr.ifs */ + mov r8=ar.fpsr /* M */ + mov r9=ar.csd + mov r10=ar.ssd + movl r11=FPSR_DEFAULT /* L-unit */ + ;; + st8 [r16]=r25,16 /* save ar.unat */ + st8 [r17]=r26,16 /* save ar.pfs */ + shl r18=r18,16 /* compute ar.rsc to be used for "loadrs" */ + ;; + st8 [r16]=r27,16 /* save ar.rsc */ + adds r17=16,r17 /* skip over ar_rnat field */ + ;; + st8 [r17]=r31,16 /* save predicates */ + adds r16=16,r16 /* skip over ar_bspstore field */ + ;; + st8 [r16]=r29,16 /* save b0 */ + st8 [r17]=r18,16 /* save ar.rsc value for "loadrs" */ + ;; +.mem.offset 0,0; st8.spill [r16]=r20,16 /* save original r1 */ +.mem.offset 8,0; st8.spill [r17]=r12,16 + adds r12=-16,r1 + /* switch to kernel memory stack (with 16 bytes of scratch) */ + ;; +.mem.offset 0,0; st8.spill [r16]=r13,16 +.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */ + ;; +.mem.offset 0,0; st8.spill [r16]=r15,16 +.mem.offset 8,0; st8.spill [r17]=r14,16 + dep r14=-1,r0,60,4 + ;; +.mem.offset 0,0; st8.spill [r16]=r2,16 +.mem.offset 8,0; st8.spill [r17]=r3,16 + adds r2=VMM_PT_REGS_R16_OFFSET,r1 + adds r14 = VMM_VCPU_GP_OFFSET,r13 + ;; + mov r8=ar.ccv + ld8 r14 = [r14] + ;; + mov r1=r14 /* establish kernel global pointer */ + ;; \ + bsw.1 + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group + mov out0=r13 + ;; + ssm psr.ic + ;; + srlz.i + ;; + //(p15) ssm psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + srlz.i // ensure everybody knows psr.ic is back on + ;; +.mem.offset 0,0; st8.spill [r2]=r16,16 +.mem.offset 8,0; st8.spill [r3]=r17,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r18,16 +.mem.offset 8,0; st8.spill [r3]=r19,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r20,16 +.mem.offset 8,0; st8.spill [r3]=r21,16 + mov r18=b6 + ;; +.mem.offset 0,0; st8.spill [r2]=r22,16 +.mem.offset 8,0; st8.spill [r3]=r23,16 + mov r19=b7 + ;; +.mem.offset 0,0; st8.spill [r2]=r24,16 +.mem.offset 8,0; st8.spill [r3]=r25,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r26,16 +.mem.offset 8,0; st8.spill [r3]=r27,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r28,16 +.mem.offset 8,0; st8.spill [r3]=r29,16 + ;; +.mem.offset 0,0; st8.spill [r2]=r30,16 +.mem.offset 8,0; st8.spill [r3]=r31,32 + ;; + mov ar.fpsr=r11 /* M-unit */ + st8 [r2]=r8,8 /* ar.ccv */ + adds r24=PT(B6)-PT(F7),r3 + ;; + stf.spill [r2]=f6,32 + stf.spill [r3]=f7,32 + ;; + stf.spill [r2]=f8,32 + stf.spill [r3]=f9,32 + ;; + stf.spill [r2]=f10 + stf.spill [r3]=f11 + adds r25=PT(B7)-PT(F11),r3 + ;; + st8 [r24]=r18,16 /* b6 */ + st8 [r25]=r19,16 /* b7 */ + ;; + st8 [r24]=r9 /* ar.csd */ + st8 [r25]=r10 /* ar.ssd */ + ;; + srlz.d // make sure we see the effect of cr.ivr + addl r14=@gprel(ia64_leave_nested),gp + ;; + mov rp=r14 + br.call.sptk.many b6=kvm_ia64_handle_irq + ;; +END(kvm_interrupt) + + .global kvm_dispatch_vexirq + .org kvm_ia64_ivt+0x3400 +////////////////////////////////////////////////////////////////////// +// 0x3400 Entry 13 (size 64 bundles) Reserved +ENTRY(kvm_virtual_exirq) + mov r31=pr + mov r19=13 + mov r30 =r0 + ;; +kvm_dispatch_vexirq: + cmp.eq p6,p0 = 1,r30 + ;; +(p6)add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; +(p6)ld8 r1 = [r29] + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,1,0 + mov out0=r13 + + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + KVM_SAVE_REST + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + mov rp=r14 + br.call.sptk.many b6=kvm_vexirq +END(kvm_virtual_exirq) + + .org kvm_ia64_ivt+0x3800 +///////////////////////////////////////////////////////////////////// +// 0x3800 Entry 14 (size 64 bundles) Reserved + KVM_FAULT(14) + // this code segment is from 2.6.16.13 + + + .org kvm_ia64_ivt+0x3c00 +/////////////////////////////////////////////////////////////////////// +// 0x3c00 Entry 15 (size 64 bundles) Reserved + KVM_FAULT(15) + + + .org kvm_ia64_ivt+0x4000 +/////////////////////////////////////////////////////////////////////// +// 0x4000 Entry 16 (size 64 bundles) Reserved + KVM_FAULT(16) + + .org kvm_ia64_ivt+0x4400 +////////////////////////////////////////////////////////////////////// +// 0x4400 Entry 17 (size 64 bundles) Reserved + KVM_FAULT(17) + + .org kvm_ia64_ivt+0x4800 +////////////////////////////////////////////////////////////////////// +// 0x4800 Entry 18 (size 64 bundles) Reserved + KVM_FAULT(18) + + .org kvm_ia64_ivt+0x4c00 +////////////////////////////////////////////////////////////////////// +// 0x4c00 Entry 19 (size 64 bundles) Reserved + KVM_FAULT(19) + + .org kvm_ia64_ivt+0x5000 +////////////////////////////////////////////////////////////////////// +// 0x5000 Entry 20 (size 16 bundles) Page Not Present +ENTRY(kvm_page_not_present) + KVM_REFLECT(20) +END(kvm_page_not_present) + + .org kvm_ia64_ivt+0x5100 +/////////////////////////////////////////////////////////////////////// +// 0x5100 Entry 21 (size 16 bundles) Key Permission vector +ENTRY(kvm_key_permission) + KVM_REFLECT(21) +END(kvm_key_permission) + + .org kvm_ia64_ivt+0x5200 +////////////////////////////////////////////////////////////////////// +// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) +ENTRY(kvm_iaccess_rights) + KVM_REFLECT(22) +END(kvm_iaccess_rights) + + .org kvm_ia64_ivt+0x5300 +////////////////////////////////////////////////////////////////////// +// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) +ENTRY(kvm_daccess_rights) + KVM_REFLECT(23) +END(kvm_daccess_rights) + + .org kvm_ia64_ivt+0x5400 +///////////////////////////////////////////////////////////////////// +// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) +ENTRY(kvm_general_exception) + KVM_REFLECT(24) + KVM_FAULT(24) +END(kvm_general_exception) + + .org kvm_ia64_ivt+0x5500 +////////////////////////////////////////////////////////////////////// +// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) +ENTRY(kvm_disabled_fp_reg) + KVM_REFLECT(25) +END(kvm_disabled_fp_reg) + + .org kvm_ia64_ivt+0x5600 +//////////////////////////////////////////////////////////////////// +// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) +ENTRY(kvm_nat_consumption) + KVM_REFLECT(26) +END(kvm_nat_consumption) + + .org kvm_ia64_ivt+0x5700 +///////////////////////////////////////////////////////////////////// +// 0x5700 Entry 27 (size 16 bundles) Speculation (40) +ENTRY(kvm_speculation_vector) + KVM_REFLECT(27) +END(kvm_speculation_vector) + + .org kvm_ia64_ivt+0x5800 +///////////////////////////////////////////////////////////////////// +// 0x5800 Entry 28 (size 16 bundles) Reserved + KVM_FAULT(28) + + .org kvm_ia64_ivt+0x5900 +/////////////////////////////////////////////////////////////////// +// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) +ENTRY(kvm_debug_vector) + KVM_FAULT(29) +END(kvm_debug_vector) + + .org kvm_ia64_ivt+0x5a00 +/////////////////////////////////////////////////////////////// +// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) +ENTRY(kvm_unaligned_access) + KVM_REFLECT(30) +END(kvm_unaligned_access) + + .org kvm_ia64_ivt+0x5b00 +////////////////////////////////////////////////////////////////////// +// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) +ENTRY(kvm_unsupported_data_reference) + KVM_REFLECT(31) +END(kvm_unsupported_data_reference) + + .org kvm_ia64_ivt+0x5c00 +//////////////////////////////////////////////////////////////////// +// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65) +ENTRY(kvm_floating_point_fault) + KVM_REFLECT(32) +END(kvm_floating_point_fault) + + .org kvm_ia64_ivt+0x5d00 +///////////////////////////////////////////////////////////////////// +// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) +ENTRY(kvm_floating_point_trap) + KVM_REFLECT(33) +END(kvm_floating_point_trap) + + .org kvm_ia64_ivt+0x5e00 +////////////////////////////////////////////////////////////////////// +// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) +ENTRY(kvm_lower_privilege_trap) + KVM_REFLECT(34) +END(kvm_lower_privilege_trap) + + .org kvm_ia64_ivt+0x5f00 +////////////////////////////////////////////////////////////////////// +// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) +ENTRY(kvm_taken_branch_trap) + KVM_REFLECT(35) +END(kvm_taken_branch_trap) + + .org kvm_ia64_ivt+0x6000 +//////////////////////////////////////////////////////////////////// +// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) +ENTRY(kvm_single_step_trap) + KVM_REFLECT(36) +END(kvm_single_step_trap) + .global kvm_virtualization_fault_back + .org kvm_ia64_ivt+0x6100 +///////////////////////////////////////////////////////////////////// +// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault +ENTRY(kvm_virtualization_fault) + mov r31=pr + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + st8 [r16] = r1 + adds r17 = VMM_VCPU_GP_OFFSET, r21 + ;; + ld8 r1 = [r17] + cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24 + cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24 + cmp.eq p8,p0=EVENT_MOV_TO_RR,r24 + cmp.eq p9,p0=EVENT_RSM,r24 + cmp.eq p10,p0=EVENT_SSM,r24 + cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24 + cmp.eq p12,p0=EVENT_THASH,r24 + (p6) br.dptk.many kvm_asm_mov_from_ar + (p7) br.dptk.many kvm_asm_mov_from_rr + (p8) br.dptk.many kvm_asm_mov_to_rr + (p9) br.dptk.many kvm_asm_rsm + (p10) br.dptk.many kvm_asm_ssm + (p11) br.dptk.many kvm_asm_mov_to_psr + (p12) br.dptk.many kvm_asm_thash + ;; +kvm_virtualization_fault_back: + adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21 + ;; + ld8 r1 = [r16] + ;; + mov r19=37 + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 + ;; + st8 [r16] = r24 + st8 [r17] = r25 + ;; + cmp.ne p6,p0=EVENT_RFI, r24 + (p6) br.sptk kvm_dispatch_virtualization_fault + ;; + adds r18=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r18] + ;; + adds r18=VMM_VPD_VIFS_OFFSET,r18 + ;; + ld8 r18=[r18] + ;; + tbit.z p6,p0=r18,63 + (p6) br.sptk kvm_dispatch_virtualization_fault + ;; + //if vifs.v=1 desert current register frame + alloc r18=ar.pfs,0,0,0,0 + br.sptk kvm_dispatch_virtualization_fault +END(kvm_virtualization_fault) + + .org kvm_ia64_ivt+0x6200 +////////////////////////////////////////////////////////////// +// 0x6200 Entry 38 (size 16 bundles) Reserved + KVM_FAULT(38) + + .org kvm_ia64_ivt+0x6300 +///////////////////////////////////////////////////////////////// +// 0x6300 Entry 39 (size 16 bundles) Reserved + KVM_FAULT(39) + + .org kvm_ia64_ivt+0x6400 +///////////////////////////////////////////////////////////////// +// 0x6400 Entry 40 (size 16 bundles) Reserved + KVM_FAULT(40) + + .org kvm_ia64_ivt+0x6500 +////////////////////////////////////////////////////////////////// +// 0x6500 Entry 41 (size 16 bundles) Reserved + KVM_FAULT(41) + + .org kvm_ia64_ivt+0x6600 +////////////////////////////////////////////////////////////////// +// 0x6600 Entry 42 (size 16 bundles) Reserved + KVM_FAULT(42) + + .org kvm_ia64_ivt+0x6700 +////////////////////////////////////////////////////////////////// +// 0x6700 Entry 43 (size 16 bundles) Reserved + KVM_FAULT(43) + + .org kvm_ia64_ivt+0x6800 +////////////////////////////////////////////////////////////////// +// 0x6800 Entry 44 (size 16 bundles) Reserved + KVM_FAULT(44) + + .org kvm_ia64_ivt+0x6900 +/////////////////////////////////////////////////////////////////// +// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception +//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) +ENTRY(kvm_ia32_exception) + KVM_FAULT(45) +END(kvm_ia32_exception) + + .org kvm_ia64_ivt+0x6a00 +//////////////////////////////////////////////////////////////////// +// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) +ENTRY(kvm_ia32_intercept) + KVM_FAULT(47) +END(kvm_ia32_intercept) + + .org kvm_ia64_ivt+0x6c00 +///////////////////////////////////////////////////////////////////// +// 0x6c00 Entry 48 (size 16 bundles) Reserved + KVM_FAULT(48) + + .org kvm_ia64_ivt+0x6d00 +////////////////////////////////////////////////////////////////////// +// 0x6d00 Entry 49 (size 16 bundles) Reserved + KVM_FAULT(49) + + .org kvm_ia64_ivt+0x6e00 +////////////////////////////////////////////////////////////////////// +// 0x6e00 Entry 50 (size 16 bundles) Reserved + KVM_FAULT(50) + + .org kvm_ia64_ivt+0x6f00 +///////////////////////////////////////////////////////////////////// +// 0x6f00 Entry 51 (size 16 bundles) Reserved + KVM_FAULT(52) + + .org kvm_ia64_ivt+0x7100 +//////////////////////////////////////////////////////////////////// +// 0x7100 Entry 53 (size 16 bundles) Reserved + KVM_FAULT(53) + + .org kvm_ia64_ivt+0x7200 +///////////////////////////////////////////////////////////////////// +// 0x7200 Entry 54 (size 16 bundles) Reserved + KVM_FAULT(54) + + .org kvm_ia64_ivt+0x7300 +//////////////////////////////////////////////////////////////////// +// 0x7300 Entry 55 (size 16 bundles) Reserved + KVM_FAULT(55) + + .org kvm_ia64_ivt+0x7400 +//////////////////////////////////////////////////////////////////// +// 0x7400 Entry 56 (size 16 bundles) Reserved + KVM_FAULT(56) + + .org kvm_ia64_ivt+0x7500 +///////////////////////////////////////////////////////////////////// +// 0x7500 Entry 57 (size 16 bundles) Reserved + KVM_FAULT(57) + + .org kvm_ia64_ivt+0x7600 +///////////////////////////////////////////////////////////////////// +// 0x7600 Entry 58 (size 16 bundles) Reserved + KVM_FAULT(58) + + .org kvm_ia64_ivt+0x7700 +//////////////////////////////////////////////////////////////////// +// 0x7700 Entry 59 (size 16 bundles) Reserved + KVM_FAULT(59) + + .org kvm_ia64_ivt+0x7800 +//////////////////////////////////////////////////////////////////// +// 0x7800 Entry 60 (size 16 bundles) Reserved + KVM_FAULT(60) + + .org kvm_ia64_ivt+0x7900 +///////////////////////////////////////////////////////////////////// +// 0x7900 Entry 61 (size 16 bundles) Reserved + KVM_FAULT(61) + + .org kvm_ia64_ivt+0x7a00 +///////////////////////////////////////////////////////////////////// +// 0x7a00 Entry 62 (size 16 bundles) Reserved + KVM_FAULT(62) + + .org kvm_ia64_ivt+0x7b00 +///////////////////////////////////////////////////////////////////// +// 0x7b00 Entry 63 (size 16 bundles) Reserved + KVM_FAULT(63) + + .org kvm_ia64_ivt+0x7c00 +//////////////////////////////////////////////////////////////////// +// 0x7c00 Entry 64 (size 16 bundles) Reserved + KVM_FAULT(64) + + .org kvm_ia64_ivt+0x7d00 +///////////////////////////////////////////////////////////////////// +// 0x7d00 Entry 65 (size 16 bundles) Reserved + KVM_FAULT(65) + + .org kvm_ia64_ivt+0x7e00 +///////////////////////////////////////////////////////////////////// +// 0x7e00 Entry 66 (size 16 bundles) Reserved + KVM_FAULT(66) + + .org kvm_ia64_ivt+0x7f00 +//////////////////////////////////////////////////////////////////// +// 0x7f00 Entry 67 (size 16 bundles) Reserved + KVM_FAULT(67) + + .org kvm_ia64_ivt+0x8000 +// There is no particular reason for this code to be here, other than that +// there happens to be space here that would go unused otherwise. If this +// fault ever gets "unreserved", simply moved the following code to a more +// suitable spot... + + +ENTRY(kvm_dtlb_miss_dispatch) + mov r19 = 2 + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp + ;; + KVM_SAVE_REST + KVM_SAVE_EXTRA + mov rp=r14 + ;; + adds out2=16,r12 + br.call.sptk.many b6=kvm_page_fault +END(kvm_dtlb_miss_dispatch) + +ENTRY(kvm_itlb_miss_dispatch) + + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,3,0 + mov out0=cr.ifa + mov out1=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 + br.call.sptk.many b6=kvm_page_fault +END(kvm_itlb_miss_dispatch) + +ENTRY(kvm_dispatch_reflection) + /* + * Input: + * psr.ic: off + * r19: intr type (offset into ivt, see ia64_int.h) + * r31: contains saved predicates (pr) + */ + KVM_SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,5,0 + mov out0=cr.ifa + mov out1=cr.isr + mov out2=cr.iim + mov out3=r15 + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + adds out4=16,r12 + br.call.sptk.many b6=reflect_interruption +END(kvm_dispatch_reflection) + +ENTRY(kvm_dispatch_virtualization_fault) + adds r16 = VMM_VCPU_CAUSE_OFFSET,r21 + adds r17 = VMM_VCPU_OPCODE_OFFSET,r21 + ;; + st8 [r16] = r24 + st8 [r17] = r25 + ;; + KVM_SAVE_MIN_WITH_COVER_R19 + ;; + alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) + mov out0=r13 //vcpu + adds r3=8,r2 // set up second base pointer + ;; + ssm psr.ic + ;; + srlz.i // guarantee that interruption collection is on + ;; + //(p15) ssm psr.i // restore psr.i + addl r14=@gprel(ia64_leave_hypervisor_prepare),gp + ;; + KVM_SAVE_REST + KVM_SAVE_EXTRA + mov rp=r14 + ;; + adds out1=16,sp //regs + br.call.sptk.many b6=kvm_emulate +END(kvm_dispatch_virtualization_fault) + + +ENTRY(kvm_dispatch_interrupt) + KVM_SAVE_MIN_WITH_COVER_R19 // uses r31; defines r2 and r3 + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group + //mov out0=cr.ivr // pass cr.ivr as first arg + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + ssm psr.ic + ;; + srlz.i + ;; + //(p15) ssm psr.i + addl r14=@gprel(ia64_leave_hypervisor),gp + ;; + KVM_SAVE_REST + mov rp=r14 + ;; + mov out0=r13 // pass pointer to pt_regs as second arg + br.call.sptk.many b6=kvm_ia64_handle_irq +END(kvm_dispatch_interrupt) + + + + +GLOBAL_ENTRY(ia64_leave_nested) + rsm psr.i + ;; + adds r21=PT(PR)+16,r12 + ;; + lfetch [r21],PT(CR_IPSR)-PT(PR) + adds r2=PT(B6)+16,r12 + adds r3=PT(R16)+16,r12 + ;; + lfetch [r21] + ld8 r28=[r2],8 // load b6 + adds r29=PT(R24)+16,r12 + + ld8.fill r16=[r3] + adds r3=PT(AR_CSD)-PT(R16),r3 + adds r30=PT(AR_CCV)+16,r12 + ;; + ld8.fill r24=[r29] + ld8 r15=[r30] // load ar.ccv + ;; + ld8 r29=[r2],16 // load b7 + ld8 r30=[r3],16 // load ar.csd + ;; + ld8 r31=[r2],16 // load ar.ssd + ld8.fill r8=[r3],16 + ;; + ld8.fill r9=[r2],16 + ld8.fill r10=[r3],PT(R17)-PT(R10) + ;; + ld8.fill r11=[r2],PT(R18)-PT(R11) + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + mov ar.csd=r30 + mov ar.ssd=r31 + ;; + rsm psr.i | psr.ic + // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + srlz.i + ;; + ld8.fill r22=[r2],24 + ld8.fill r23=[r3],24 + mov b6=r28 + ;; + ld8.fill r25=[r2],16 + ld8.fill r26=[r3],16 + mov b7=r29 + ;; + ld8.fill r27=[r2],16 + ld8.fill r28=[r3],16 + ;; + ld8.fill r29=[r2],16 + ld8.fill r30=[r3],24 + ;; + ld8.fill r31=[r2],PT(F9)-PT(R31) + adds r3=PT(F10)-PT(F6),r3 + ;; + ldf.fill f9=[r2],PT(F6)-PT(F9) + ldf.fill f10=[r3],PT(F8)-PT(F10) + ;; + ldf.fill f6=[r2],PT(F7)-PT(F6) + ;; + ldf.fill f7=[r2],PT(F11)-PT(F7) + ldf.fill f8=[r3],32 + ;; + srlz.i // ensure interruption collection is off + mov ar.ccv=r15 + ;; + bsw.0 // switch back to bank 0 (no stop bit required beforehand...) + ;; + ldf.fill f11=[r2] +// mov r18=r13 +// mov r21=r13 + adds r16=PT(CR_IPSR)+16,r12 + adds r17=PT(CR_IIP)+16,r12 + ;; + ld8 r29=[r16],16 // load cr.ipsr + ld8 r28=[r17],16 // load cr.iip + ;; + ld8 r30=[r16],16 // load cr.ifs + ld8 r25=[r17],16 // load ar.unat + ;; + ld8 r26=[r16],16 // load ar.pfs + ld8 r27=[r17],16 // load ar.rsc + cmp.eq p9,p0=r0,r0 + // set p9 to indicate that we should restore cr.ifs + ;; + ld8 r24=[r16],16 // load ar.rnat (may be garbage) + ld8 r23=[r17],16// load ar.bspstore (may be garbage) + ;; + ld8 r31=[r16],16 // load predicates + ld8 r22=[r17],16 // load b0 + ;; + ld8 r19=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 // load r1 + ;; + ld8.fill r12=[r16],16 + ld8.fill r13=[r17],16 + ;; + ld8 r20=[r16],16 // ar.fpsr + ld8.fill r15=[r17],16 + ;; + ld8.fill r14=[r16],16 + ld8.fill r2=[r17] + ;; + ld8.fill r3=[r16] + ;; + mov r16=ar.bsp // get existing backing store pointer + ;; + mov b0=r22 + mov ar.pfs=r26 + mov cr.ifs=r30 + mov cr.ipsr=r29 + mov ar.fpsr=r20 + mov cr.iip=r28 + ;; + mov ar.rsc=r27 + mov ar.unat=r25 + mov pr=r31,-1 + rfi +END(ia64_leave_nested) + + + +GLOBAL_ENTRY(ia64_leave_hypervisor_prepare) + /* + * work.need_resched etc. mustn't get changed + *by this CPU before it returns to + ;; + * user- or fsys-mode, hence we disable interrupts early on: + */ + adds r2 = PT(R4)+16,r12 + adds r3 = PT(R5)+16,r12 + adds r8 = PT(EML_UNAT)+16,r12 + ;; + ld8 r8 = [r8] + ;; + mov ar.unat=r8 + ;; + ld8.fill r4=[r2],16 //load r4 + ld8.fill r5=[r3],16 //load r5 + ;; + ld8.fill r6=[r2] //load r6 + ld8.fill r7=[r3] //load r7 + ;; +END(ia64_leave_hypervisor_prepare) +//fall through +GLOBAL_ENTRY(ia64_leave_hypervisor) + rsm psr.i + ;; + br.call.sptk.many b0=leave_hypervisor_tail + ;; + adds r20=PT(PR)+16,r12 + adds r8=PT(EML_UNAT)+16,r12 + ;; + ld8 r8=[r8] + ;; + mov ar.unat=r8 + ;; + lfetch [r20],PT(CR_IPSR)-PT(PR) + adds r2 = PT(B6)+16,r12 + adds r3 = PT(B7)+16,r12 + ;; + lfetch [r20] + ;; + ld8 r24=[r2],16 /* B6 */ + ld8 r25=[r3],16 /* B7 */ + ;; + ld8 r26=[r2],16 /* ar_csd */ + ld8 r27=[r3],16 /* ar_ssd */ + mov b6 = r24 + ;; + ld8.fill r8=[r2],16 + ld8.fill r9=[r3],16 + mov b7 = r25 + ;; + mov ar.csd = r26 + mov ar.ssd = r27 + ;; + ld8.fill r10=[r2],PT(R15)-PT(R10) + ld8.fill r11=[r3],PT(R14)-PT(R11) + ;; + ld8.fill r15=[r2],PT(R16)-PT(R15) + ld8.fill r14=[r3],PT(R17)-PT(R14) + ;; + ld8.fill r16=[r2],16 + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + ;; + ld8.fill r22=[r2],16 + ld8.fill r23=[r3],16 + ;; + ld8.fill r24=[r2],16 + ld8.fill r25=[r3],16 + ;; + ld8.fill r26=[r2],16 + ld8.fill r27=[r3],16 + ;; + ld8.fill r28=[r2],16 + ld8.fill r29=[r3],16 + ;; + ld8.fill r30=[r2],PT(F6)-PT(R30) + ld8.fill r31=[r3],PT(F7)-PT(R31) + ;; + rsm psr.i | psr.ic + // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + srlz.i // ensure interruption collection is off + ;; + bsw.0 + ;; + adds r16 = PT(CR_IPSR)+16,r12 + adds r17 = PT(CR_IIP)+16,r12 + mov r21=r13 // get current + ;; + ld8 r31=[r16],16 // load cr.ipsr + ld8 r30=[r17],16 // load cr.iip + ;; + ld8 r29=[r16],16 // load cr.ifs + ld8 r28=[r17],16 // load ar.unat + ;; + ld8 r27=[r16],16 // load ar.pfs + ld8 r26=[r17],16 // load ar.rsc + ;; + ld8 r25=[r16],16 // load ar.rnat + ld8 r24=[r17],16 // load ar.bspstore + ;; + ld8 r23=[r16],16 // load predicates + ld8 r22=[r17],16 // load b0 + ;; + ld8 r20=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 //load r1 + ;; + ld8.fill r12=[r16],16 //load r12 + ld8.fill r13=[r17],PT(R2)-PT(R13) //load r13 + ;; + ld8 r19=[r16],PT(R3)-PT(AR_FPSR) //load ar_fpsr + ld8.fill r2=[r17],PT(AR_CCV)-PT(R2) //load r2 + ;; + ld8.fill r3=[r16] //load r3 + ld8 r18=[r17] //load ar_ccv + ;; + mov ar.fpsr=r19 + mov ar.ccv=r18 + shr.u r18=r20,16 + ;; +kvm_rbs_switch: + mov r19=96 + +kvm_dont_preserve_current_frame: +/* + * To prevent leaking bits between the hypervisor and guest domain, + * we must clear the stacked registers in the "invalid" partition here. + * 5 registers/cycle on McKinley). + */ +# define pRecurse p6 +# define pReturn p7 +# define Nregs 14 + + alloc loc0=ar.pfs,2,Nregs-2,2,0 + shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) + sub r19=r19,r18 // r19 = (physStackedSize + 8) - dirtySize + ;; + mov ar.rsc=r20 // load ar.rsc to be used for "loadrs" + shladd in0=loc1,3,r19 + mov in1=0 + ;; + TEXT_ALIGN(32) +kvm_rse_clear_invalid: + alloc loc0=ar.pfs,2,Nregs-2,2,0 + cmp.lt pRecurse,p0=Nregs*8,in0 + // if more than Nregs regs left to clear, (re)curse + add out0=-Nregs*8,in0 + add out1=1,in1 // increment recursion count + mov loc1=0 + mov loc2=0 + ;; + mov loc3=0 + mov loc4=0 + mov loc5=0 + mov loc6=0 + mov loc7=0 +(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid + ;; + mov loc8=0 + mov loc9=0 + cmp.ne pReturn,p0=r0,in1 + // if recursion count != 0, we need to do a br.ret + mov loc10=0 + mov loc11=0 +(pReturn) br.ret.dptk.many b0 + +# undef pRecurse +# undef pReturn + +// loadrs has already been shifted + alloc r16=ar.pfs,0,0,0,0 // drop current register frame + ;; + loadrs + ;; + mov ar.bspstore=r24 + ;; + mov ar.unat=r28 + mov ar.rnat=r25 + mov ar.rsc=r26 + ;; + mov cr.ipsr=r31 + mov cr.iip=r30 + mov cr.ifs=r29 + mov ar.pfs=r27 + adds r18=VMM_VPD_BASE_OFFSET,r21 + ;; + ld8 r18=[r18] //vpd + adds r17=VMM_VCPU_ISR_OFFSET,r21 + ;; + ld8 r17=[r17] + adds r19=VMM_VPD_VPSR_OFFSET,r18 + ;; + ld8 r19=[r19] //vpsr + adds r20=VMM_VCPU_VSA_BASE_OFFSET,r21 + ;; + ld8 r20=[r20] + ;; +//vsa_sync_write_start + mov r25=r18 + adds r16= VMM_VCPU_GP_OFFSET,r21 + ;; + ld8 r16= [r16] // Put gp in r24 + movl r24=@gprel(ia64_vmm_entry) // calculate return address + ;; + add r24=r24,r16 + ;; + add r16=PAL_VPS_SYNC_WRITE,r20 + ;; + mov b0=r16 + br.cond.sptk b0 // call the service + ;; +END(ia64_leave_hypervisor) +// fall through +GLOBAL_ENTRY(ia64_vmm_entry) +/* + * must be at bank 0 + * parameter: + * r17:cr.isr + * r18:vpd + * r19:vpsr + * r20:__vsa_base + * r22:b0 + * r23:predicate + */ + mov r24=r22 + mov r25=r18 + tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT // p1=vpsr.ic + ;; + (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 + (p1) br.sptk.many ia64_vmm_entry_out + ;; + tbit.nz p1,p2 = r17,IA64_ISR_IR_BIT //p1=cr.isr.ir + ;; + (p1) add r29=PAL_VPS_RESUME_NORMAL,r20 + (p2) add r29=PAL_VPS_RESUME_HANDLER,r20 + (p2) ld8 r26=[r25] + ;; +ia64_vmm_entry_out: + mov pr=r23,-2 + mov b0=r29 + ;; + br.cond.sptk b0 // call pal service +END(ia64_vmm_entry) + + + +/* + * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, + * u64 arg3, u64 arg4, u64 arg5, + * u64 arg6, u64 arg7); + * + * XXX: The currently defined services use only 4 args at the max. The + * rest are not consumed. + */ +GLOBAL_ENTRY(ia64_call_vsa) + .regstk 4,4,0,0 + +rpsave = loc0 +pfssave = loc1 +psrsave = loc2 +entry = loc3 +hostret = r24 + + alloc pfssave=ar.pfs,4,4,0,0 + mov rpsave=rp + adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13 + ;; + ld8 entry=[entry] +1: mov hostret=ip + mov r25=in1 // copy arguments + mov r26=in2 + mov r27=in3 + mov psrsave=psr + ;; + tbit.nz p6,p0=psrsave,14 // IA64_PSR_I + tbit.nz p7,p0=psrsave,13 // IA64_PSR_IC + ;; + add hostret=2f-1b,hostret // calculate return address + add entry=entry,in0 + ;; + rsm psr.i | psr.ic + ;; + srlz.i + mov b6=entry + br.cond.sptk b6 // call the service +2: + // Architectural sequence for enabling interrupts if necessary +(p7) ssm psr.ic + ;; +(p7) srlz.i + ;; +//(p6) ssm psr.i + ;; + mov rp=rpsave + mov ar.pfs=pfssave + mov r8=r31 + ;; + srlz.d + br.ret.sptk rp + +END(ia64_call_vsa) + +#define INIT_BSPSTORE ((4<<30)-(12<<20)-0x100) + +GLOBAL_ENTRY(vmm_reset_entry) + //set up ipsr, iip, vpd.vpsr, dcr + // For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1 + // For DCR: all bits 0 + adds r14=-VMM_PT_REGS_SIZE, r12 + ;; + movl r6=0x501008826000 // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1 + movl r10=0x8000000000000000 + adds r16=PT(CR_IIP), r14 + adds r20=PT(R1), r14 + ;; + rsm psr.ic | psr.i + ;; + srlz.i + ;; + bsw.0 + ;; + mov r21 =r13 + ;; + bsw.1 + ;; + mov ar.rsc = 0 + ;; + flushrs + ;; + mov ar.bspstore = 0 + // clear BSPSTORE + ;; + mov cr.ipsr=r6 + mov cr.ifs=r10 + ld8 r4 = [r16] // Set init iip for first run. + ld8 r1 = [r20] + ;; + mov cr.iip=r4 + ;; + adds r16=VMM_VPD_BASE_OFFSET,r13 + adds r20=VMM_VCPU_VSA_BASE_OFFSET,r13 + ;; + ld8 r18=[r16] + ld8 r20=[r20] + ;; + adds r19=VMM_VPD_VPSR_OFFSET,r18 + ;; + ld8 r19=[r19] + mov r17=r0 + mov r22=r0 + mov r23=r0 + br.cond.sptk ia64_vmm_entry + br.ret.sptk b0 +END(vmm_reset_entry) diff --git a/trunk/arch/ia64/kvm/vti.h b/trunk/arch/ia64/kvm/vti.h new file mode 100644 index 000000000000..f6c5617e16af --- /dev/null +++ b/trunk/arch/ia64/kvm/vti.h @@ -0,0 +1,290 @@ +/* + * vti.h: prototype for generial vt related interface + * Copyright (c) 2004, Intel Corporation. + * + * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com) + * Fred Yang (fred.yang@intel.com) + * Kun Tian (Kevin Tian) (kevin.tian@intel.com) + * + * Copyright (c) 2007, Intel Corporation. + * Zhang xiantao + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + */ +#ifndef _KVM_VT_I_H +#define _KVM_VT_I_H + +#ifndef __ASSEMBLY__ +#include + +#include + +/* define itr.i and itr.d in ia64_itr function */ +#define ITR 0x01 +#define DTR 0x02 +#define IaDTR 0x03 + +#define IA64_TR_VMM 6 /*itr6, dtr6 : maps vmm code, vmbuffer*/ +#define IA64_TR_VM_DATA 7 /*dtr7 : maps current vm data*/ + +#define RR6 (6UL<<61) +#define RR7 (7UL<<61) + + +/* config_options in pal_vp_init_env */ +#define VP_INITIALIZE 1UL +#define VP_FR_PMC 1UL<<1 +#define VP_OPCODE 1UL<<8 +#define VP_CAUSE 1UL<<9 +#define VP_FW_ACC 1UL<<63 + +/* init vp env with initializing vm_buffer */ +#define VP_INIT_ENV_INITALIZE (VP_INITIALIZE | VP_FR_PMC |\ + VP_OPCODE | VP_CAUSE | VP_FW_ACC) +/* init vp env without initializing vm_buffer */ +#define VP_INIT_ENV VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC + +#define PAL_VP_CREATE 265 +/* Stacked Virt. Initializes a new VPD for the operation of + * a new virtual processor in the virtual environment. + */ +#define PAL_VP_ENV_INFO 266 +/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/ +#define PAL_VP_EXIT_ENV 267 +/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/ +#define PAL_VP_INIT_ENV 268 +/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/ +#define PAL_VP_REGISTER 269 +/*Stacked Virt. Register a different host IVT for the virtual processor.*/ +#define PAL_VP_RESUME 270 +/* Renamed from PAL_VP_RESUME */ +#define PAL_VP_RESTORE 270 +/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/ +#define PAL_VP_SUSPEND 271 +/* Renamed from PAL_VP_SUSPEND */ +#define PAL_VP_SAVE 271 +/* Stacked Virt. Suspends operation for the specified virtual processor on + * the logical processor. + */ +#define PAL_VP_TERMINATE 272 +/* Stacked Virt. Terminates operation for the specified virtual processor.*/ + +union vac { + unsigned long value; + struct { + int a_int:1; + int a_from_int_cr:1; + int a_to_int_cr:1; + int a_from_psr:1; + int a_from_cpuid:1; + int a_cover:1; + int a_bsw:1; + long reserved:57; + }; +}; + +union vdc { + unsigned long value; + struct { + int d_vmsw:1; + int d_extint:1; + int d_ibr_dbr:1; + int d_pmc:1; + int d_to_pmd:1; + int d_itm:1; + long reserved:58; + }; +}; + +struct vpd { + union vac vac; + union vdc vdc; + unsigned long virt_env_vaddr; + unsigned long reserved1[29]; + unsigned long vhpi; + unsigned long reserved2[95]; + unsigned long vgr[16]; + unsigned long vbgr[16]; + unsigned long vnat; + unsigned long vbnat; + unsigned long vcpuid[5]; + unsigned long reserved3[11]; + unsigned long vpsr; + unsigned long vpr; + unsigned long reserved4[76]; + union { + unsigned long vcr[128]; + struct { + unsigned long dcr; + unsigned long itm; + unsigned long iva; + unsigned long rsv1[5]; + unsigned long pta; + unsigned long rsv2[7]; + unsigned long ipsr; + unsigned long isr; + unsigned long rsv3; + unsigned long iip; + unsigned long ifa; + unsigned long itir; + unsigned long iipa; + unsigned long ifs; + unsigned long iim; + unsigned long iha; + unsigned long rsv4[38]; + unsigned long lid; + unsigned long ivr; + unsigned long tpr; + unsigned long eoi; + unsigned long irr[4]; + unsigned long itv; + unsigned long pmv; + unsigned long cmcv; + unsigned long rsv5[5]; + unsigned long lrr0; + unsigned long lrr1; + unsigned long rsv6[46]; + }; + }; + unsigned long reserved5[128]; + unsigned long reserved6[3456]; + unsigned long vmm_avail[128]; + unsigned long reserved7[4096]; +}; + +#define PAL_PROC_VM_BIT (1UL << 40) +#define PAL_PROC_VMSW_BIT (1UL << 54) + +static inline s64 ia64_pal_vp_env_info(u64 *buffer_size, + u64 *vp_env_info) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0); + *buffer_size = iprv.v0; + *vp_env_info = iprv.v1; + return iprv.status; +} + +static inline s64 ia64_pal_vp_exit_env(u64 iva) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0); + return iprv.status; +} + +static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr, + u64 vbase_addr, u64 *vsa_base) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr, + vbase_addr); + *vsa_base = iprv.v0; + + return iprv.status; +} + +static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0); + + return iprv.status; +} + +static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector) +{ + struct ia64_pal_retval iprv; + + PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0); + + return iprv.status; +} + +#endif + +/*VPD field offset*/ +#define VPD_VAC_START_OFFSET 0 +#define VPD_VDC_START_OFFSET 8 +#define VPD_VHPI_START_OFFSET 256 +#define VPD_VGR_START_OFFSET 1024 +#define VPD_VBGR_START_OFFSET 1152 +#define VPD_VNAT_START_OFFSET 1280 +#define VPD_VBNAT_START_OFFSET 1288 +#define VPD_VCPUID_START_OFFSET 1296 +#define VPD_VPSR_START_OFFSET 1424 +#define VPD_VPR_START_OFFSET 1432 +#define VPD_VRSE_CFLE_START_OFFSET 1440 +#define VPD_VCR_START_OFFSET 2048 +#define VPD_VTPR_START_OFFSET 2576 +#define VPD_VRR_START_OFFSET 3072 +#define VPD_VMM_VAIL_START_OFFSET 31744 + +/*Virtualization faults*/ + +#define EVENT_MOV_TO_AR 1 +#define EVENT_MOV_TO_AR_IMM 2 +#define EVENT_MOV_FROM_AR 3 +#define EVENT_MOV_TO_CR 4 +#define EVENT_MOV_FROM_CR 5 +#define EVENT_MOV_TO_PSR 6 +#define EVENT_MOV_FROM_PSR 7 +#define EVENT_ITC_D 8 +#define EVENT_ITC_I 9 +#define EVENT_MOV_TO_RR 10 +#define EVENT_MOV_TO_DBR 11 +#define EVENT_MOV_TO_IBR 12 +#define EVENT_MOV_TO_PKR 13 +#define EVENT_MOV_TO_PMC 14 +#define EVENT_MOV_TO_PMD 15 +#define EVENT_ITR_D 16 +#define EVENT_ITR_I 17 +#define EVENT_MOV_FROM_RR 18 +#define EVENT_MOV_FROM_DBR 19 +#define EVENT_MOV_FROM_IBR 20 +#define EVENT_MOV_FROM_PKR 21 +#define EVENT_MOV_FROM_PMC 22 +#define EVENT_MOV_FROM_CPUID 23 +#define EVENT_SSM 24 +#define EVENT_RSM 25 +#define EVENT_PTC_L 26 +#define EVENT_PTC_G 27 +#define EVENT_PTC_GA 28 +#define EVENT_PTR_D 29 +#define EVENT_PTR_I 30 +#define EVENT_THASH 31 +#define EVENT_TTAG 32 +#define EVENT_TPA 33 +#define EVENT_TAK 34 +#define EVENT_PTC_E 35 +#define EVENT_COVER 36 +#define EVENT_RFI 37 +#define EVENT_BSW_0 38 +#define EVENT_BSW_1 39 +#define EVENT_VMSW 40 + +/**PAL virtual services offsets */ +#define PAL_VPS_RESUME_NORMAL 0x0000 +#define PAL_VPS_RESUME_HANDLER 0x0400 +#define PAL_VPS_SYNC_READ 0x0800 +#define PAL_VPS_SYNC_WRITE 0x0c00 +#define PAL_VPS_SET_PENDING_INTERRUPT 0x1000 +#define PAL_VPS_THASH 0x1400 +#define PAL_VPS_TTAG 0x1800 +#define PAL_VPS_RESTORE 0x1c00 +#define PAL_VPS_SAVE 0x2000 + +#endif/* _VT_I_H*/ diff --git a/trunk/arch/ia64/kvm/vtlb.c b/trunk/arch/ia64/kvm/vtlb.c new file mode 100644 index 000000000000..def4576d22b1 --- /dev/null +++ b/trunk/arch/ia64/kvm/vtlb.c @@ -0,0 +1,636 @@ +/* + * vtlb.c: guest virtual tlb handling module. + * Copyright (c) 2004, Intel Corporation. + * Yaozu Dong (Eddie Dong) + * Xuefei Xu (Anthony Xu) + * + * Copyright (c) 2007, Intel Corporation. + * Xuefei Xu (Anthony Xu) + * Xiantao Zhang + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include "vcpu.h" + +#include + +#include + +/* + * Check to see if the address rid:va is translated by the TLB + */ + +static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va) +{ + return ((trp->p) && (trp->rid == rid) + && ((va-trp->vadr) < PSIZE(trp->ps))); +} + +/* + * Only for GUEST TR format. + */ +static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva) +{ + u64 sa1, ea1; + + if (!trp->p || trp->rid != rid) + return 0; + + sa1 = trp->vadr; + ea1 = sa1 + PSIZE(trp->ps) - 1; + eva -= 1; + if ((sva > ea1) || (sa1 > eva)) + return 0; + else + return 1; + +} + +void machine_tlb_purge(u64 va, u64 ps) +{ + ia64_ptcl(va, ps << 2); +} + +void local_flush_tlb_all(void) +{ + int i, j; + unsigned long flags, count0, count1; + unsigned long stride0, stride1, addr; + + addr = current_vcpu->arch.ptce_base; + count0 = current_vcpu->arch.ptce_count[0]; + count1 = current_vcpu->arch.ptce_count[1]; + stride0 = current_vcpu->arch.ptce_stride[0]; + stride1 = current_vcpu->arch.ptce_stride[1]; + + local_irq_save(flags); + for (i = 0; i < count0; ++i) { + for (j = 0; j < count1; ++j) { + ia64_ptce(addr); + addr += stride1; + } + addr += stride0; + } + local_irq_restore(flags); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref) +{ + union ia64_rr vrr; + union ia64_pta vpta; + struct ia64_psr vpsr; + + vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr); + vrr.val = vcpu_get_rr(vcpu, vadr); + vpta.val = vcpu_get_pta(vcpu); + + if (vrr.ve & vpta.ve) { + switch (ref) { + case DATA_REF: + case NA_REF: + return vpsr.dt; + case INST_REF: + return vpsr.dt && vpsr.it && vpsr.ic; + case RSE_REF: + return vpsr.dt && vpsr.rt; + + } + } + return 0; +} + +struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag) +{ + u64 index, pfn, rid, pfn_bits; + + pfn_bits = vpta.size - 5 - 8; + pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr); + rid = _REGION_ID(vrr); + index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1)); + *tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16); + + return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) + + (index << 5)); +} + +struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type) +{ + + struct thash_data *trp; + int i; + u64 rid; + + rid = vcpu_get_rr(vcpu, va); + rid = rid & RR_RID_MASK;; + if (type == D_TLB) { + if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; + i < NDTRS; i++, trp++) { + if (__is_tr_translated(trp, rid, va)) + return trp; + } + } + } else { + if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; + i < NITRS; i++, trp++) { + if (__is_tr_translated(trp, rid, va)) + return trp; + } + } + } + + return NULL; +} + +static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte) +{ + union ia64_rr rr; + struct thash_data *head; + unsigned long ps, gpaddr; + + ps = itir_ps(itir); + + gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) | + (ifa & ((1UL << ps) - 1)); + + rr.val = ia64_get_rr(ifa); + head = (struct thash_data *)ia64_thash(ifa); + head->etag = INVALID_TI_TAG; + ia64_mf(); + head->page_flags = pte & ~PAGE_FLAGS_RV_MASK; + head->itir = rr.ps << 2; + head->etag = ia64_ttag(ifa); + head->gpaddr = gpaddr; +} + +void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps) +{ + u64 i, dirty_pages = 1; + u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT; + spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa); + void *dirty_bitmap = (void *)v - (KVM_VCPU_OFS + v->vcpu_id * VCPU_SIZE) + + KVM_MEM_DIRTY_LOG_OFS; + dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT; + + vmm_spin_lock(lock); + for (i = 0; i < dirty_pages; i++) { + /* avoid RMW */ + if (!test_bit(base_gfn + i, dirty_bitmap)) + set_bit(base_gfn + i , dirty_bitmap); + } + vmm_spin_unlock(lock); +} + +void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type) +{ + u64 phy_pte, psr; + union ia64_rr mrr; + + mrr.val = ia64_get_rr(va); + phy_pte = translate_phy_pte(&pte, itir, va); + + if (itir_ps(itir) >= mrr.ps) { + vhpt_insert(phy_pte, itir, va, pte); + } else { + phy_pte &= ~PAGE_FLAGS_RV_MASK; + psr = ia64_clear_ic(); + ia64_itc(type, va, phy_pte, itir_ps(itir)); + ia64_set_psr(psr); + } + + if (!(pte&VTLB_PTE_IO)) + mark_pages_dirty(v, pte, itir_ps(itir)); +} + +/* + * vhpt lookup + */ +struct thash_data *vhpt_lookup(u64 va) +{ + struct thash_data *head; + u64 tag; + + head = (struct thash_data *)ia64_thash(va); + tag = ia64_ttag(va); + if (head->etag == tag) + return head; + return NULL; +} + +u64 guest_vhpt_lookup(u64 iha, u64 *pte) +{ + u64 ret; + struct thash_data *data; + + data = __vtr_lookup(current_vcpu, iha, D_TLB); + if (data != NULL) + thash_vhpt_insert(current_vcpu, data->page_flags, + data->itir, iha, D_TLB); + + asm volatile ("rsm psr.ic|psr.i;;" + "srlz.d;;" + "ld8.s r9=[%1];;" + "tnat.nz p6,p7=r9;;" + "(p6) mov %0=1;" + "(p6) mov r9=r0;" + "(p7) extr.u r9=r9,0,53;;" + "(p7) mov %0=r0;" + "(p7) st8 [%2]=r9;;" + "ssm psr.ic;;" + "srlz.d;;" + /* "ssm psr.i;;" Once interrupts in vmm open, need fix*/ + : "=r"(ret) : "r"(iha), "r"(pte):"memory"); + + return ret; +} + +/* + * purge software guest tlb + */ + +static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps) +{ + struct thash_data *cur; + u64 start, curadr, size, psbits, tag, rr_ps, num; + union ia64_rr vrr; + struct thash_cb *hcb = &v->arch.vtlb; + + vrr.val = vcpu_get_rr(v, va); + psbits = VMX(v, psbits[(va >> 61)]); + start = va & ~((1UL << ps) - 1); + while (psbits) { + curadr = start; + rr_ps = __ffs(psbits); + psbits &= ~(1UL << rr_ps); + num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps)); + size = PSIZE(rr_ps); + vrr.ps = rr_ps; + while (num) { + cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag); + if (cur->etag == tag && cur->ps == rr_ps) + cur->etag = INVALID_TI_TAG; + curadr += size; + num--; + } + } +} + + +/* + * purge VHPT and machine TLB + */ +static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps) +{ + struct thash_data *cur; + u64 start, size, tag, num; + union ia64_rr rr; + + start = va & ~((1UL << ps) - 1); + rr.val = ia64_get_rr(va); + size = PSIZE(rr.ps); + num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps)); + while (num) { + cur = (struct thash_data *)ia64_thash(start); + tag = ia64_ttag(start); + if (cur->etag == tag) + cur->etag = INVALID_TI_TAG; + start += size; + num--; + } + machine_tlb_purge(va, ps); +} + +/* + * Insert an entry into hash TLB or VHPT. + * NOTES: + * 1: When inserting VHPT to thash, "va" is a must covered + * address by the inserted machine VHPT entry. + * 2: The format of entry is always in TLB. + * 3: The caller need to make sure the new entry will not overlap + * with any existed entry. + */ +void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va) +{ + struct thash_data *head; + union ia64_rr vrr; + u64 tag; + struct thash_cb *hcb = &v->arch.vtlb; + + vrr.val = vcpu_get_rr(v, va); + vrr.ps = itir_ps(itir); + VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps); + head = vsa_thash(hcb->pta, va, vrr.val, &tag); + head->page_flags = pte; + head->itir = itir; + head->etag = tag; +} + +int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type) +{ + struct thash_data *trp; + int i; + u64 end, rid; + + rid = vcpu_get_rr(vcpu, va); + rid = rid & RR_RID_MASK; + end = va + PSIZE(ps); + if (type == D_TLB) { + if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0; + i < NDTRS; i++, trp++) { + if (__is_tr_overlap(trp, rid, va, end)) + return i; + } + } + } else { + if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) { + for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0; + i < NITRS; i++, trp++) { + if (__is_tr_overlap(trp, rid, va, end)) + return i; + } + } + } + return -1; +} + +/* + * Purge entries in VTLB and VHPT + */ +void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps) +{ + if (vcpu_quick_region_check(v->arch.tc_regions, va)) + vtlb_purge(v, va, ps); + vhpt_purge(v, va, ps); +} + +void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps) +{ + u64 old_va = va; + va = REGION_OFFSET(va); + if (vcpu_quick_region_check(v->arch.tc_regions, old_va)) + vtlb_purge(v, va, ps); + vhpt_purge(v, va, ps); +} + +u64 translate_phy_pte(u64 *pte, u64 itir, u64 va) +{ + u64 ps, ps_mask, paddr, maddr; + union pte_flags phy_pte; + + ps = itir_ps(itir); + ps_mask = ~((1UL << ps) - 1); + phy_pte.val = *pte; + paddr = *pte; + paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask); + maddr = kvm_lookup_mpa(paddr >> PAGE_SHIFT); + if (maddr & GPFN_IO_MASK) { + *pte |= VTLB_PTE_IO; + return -1; + } + maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) | + (paddr & ~PAGE_MASK); + phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT; + return phy_pte.val; +} + +/* + * Purge overlap TCs and then insert the new entry to emulate itc ops. + * Notes: Only TC entry can purge and insert. + * 1 indicates this is MMIO + */ +int thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir, + u64 ifa, int type) +{ + u64 ps; + u64 phy_pte; + union ia64_rr vrr, mrr; + int ret = 0; + + ps = itir_ps(itir); + vrr.val = vcpu_get_rr(v, ifa); + mrr.val = ia64_get_rr(ifa); + + phy_pte = translate_phy_pte(&pte, itir, ifa); + + /* Ensure WB attribute if pte is related to a normal mem page, + * which is required by vga acceleration since qemu maps shared + * vram buffer with WB. + */ + if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT)) { + pte &= ~_PAGE_MA_MASK; + phy_pte &= ~_PAGE_MA_MASK; + } + + if (pte & VTLB_PTE_IO) + ret = 1; + + vtlb_purge(v, ifa, ps); + vhpt_purge(v, ifa, ps); + + if (ps == mrr.ps) { + if (!(pte&VTLB_PTE_IO)) { + vhpt_insert(phy_pte, itir, ifa, pte); + } else { + vtlb_insert(v, pte, itir, ifa); + vcpu_quick_region_set(VMX(v, tc_regions), ifa); + } + } else if (ps > mrr.ps) { + vtlb_insert(v, pte, itir, ifa); + vcpu_quick_region_set(VMX(v, tc_regions), ifa); + if (!(pte&VTLB_PTE_IO)) + vhpt_insert(phy_pte, itir, ifa, pte); + } else { + u64 psr; + phy_pte &= ~PAGE_FLAGS_RV_MASK; + psr = ia64_clear_ic(); + ia64_itc(type, ifa, phy_pte, ps); + ia64_set_psr(psr); + } + if (!(pte&VTLB_PTE_IO)) + mark_pages_dirty(v, pte, ps); + + return ret; +} + +/* + * Purge all TCs or VHPT entries including those in Hash table. + * + */ + +void thash_purge_all(struct kvm_vcpu *v) +{ + int i; + struct thash_data *head; + struct thash_cb *vtlb, *vhpt; + vtlb = &v->arch.vtlb; + vhpt = &v->arch.vhpt; + + for (i = 0; i < 8; i++) + VMX(v, psbits[i]) = 0; + + head = vtlb->hash; + for (i = 0; i < vtlb->num; i++) { + head->page_flags = 0; + head->etag = INVALID_TI_TAG; + head->itir = 0; + head->next = 0; + head++; + }; + + head = vhpt->hash; + for (i = 0; i < vhpt->num; i++) { + head->page_flags = 0; + head->etag = INVALID_TI_TAG; + head->itir = 0; + head->next = 0; + head++; + }; + + local_flush_tlb_all(); +} + + +/* + * Lookup the hash table and its collision chain to find an entry + * covering this address rid:va or the entry. + * + * INPUT: + * in: TLB format for both VHPT & TLB. + */ + +struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data) +{ + struct thash_data *cch; + u64 psbits, ps, tag; + union ia64_rr vrr; + + struct thash_cb *hcb = &v->arch.vtlb; + + cch = __vtr_lookup(v, va, is_data);; + if (cch) + return cch; + + if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0) + return NULL; + + psbits = VMX(v, psbits[(va >> 61)]); + vrr.val = vcpu_get_rr(v, va); + while (psbits) { + ps = __ffs(psbits); + psbits &= ~(1UL << ps); + vrr.ps = ps; + cch = vsa_thash(hcb->pta, va, vrr.val, &tag); + if (cch->etag == tag && cch->ps == ps) + return cch; + } + + return NULL; +} + + +/* + * Initialize internal control data before service. + */ +void thash_init(struct thash_cb *hcb, u64 sz) +{ + int i; + struct thash_data *head; + + hcb->pta.val = (unsigned long)hcb->hash; + hcb->pta.vf = 1; + hcb->pta.ve = 1; + hcb->pta.size = sz; + head = hcb->hash; + for (i = 0; i < hcb->num; i++) { + head->page_flags = 0; + head->itir = 0; + head->etag = INVALID_TI_TAG; + head->next = 0; + head++; + } +} + +u64 kvm_lookup_mpa(u64 gpfn) +{ + u64 *base = (u64 *) KVM_P2M_BASE; + return *(base + gpfn); +} + +u64 kvm_gpa_to_mpa(u64 gpa) +{ + u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT); + return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK); +} + + +/* + * Fetch guest bundle code. + * INPUT: + * gip: guest ip + * pbundle: used to return fetched bundle. + */ +int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle) +{ + u64 gpip = 0; /* guest physical IP*/ + u64 *vpa; + struct thash_data *tlb; + u64 maddr; + + if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) { + /* I-side physical mode */ + gpip = gip; + } else { + tlb = vtlb_lookup(vcpu, gip, I_TLB); + if (tlb) + gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) | + (gip & (PSIZE(tlb->ps) - 1)); + } + if (gpip) { + maddr = kvm_gpa_to_mpa(gpip); + } else { + tlb = vhpt_lookup(gip); + if (tlb == NULL) { + ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2); + return IA64_FAULT; + } + maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) + | (gip & (PSIZE(tlb->ps) - 1)); + } + vpa = (u64 *)__kvm_va(maddr); + + pbundle->i64[0] = *vpa++; + pbundle->i64[1] = *vpa; + + return IA64_NO_FAULT; +} + + +void kvm_init_vhpt(struct kvm_vcpu *v) +{ + v->arch.vhpt.num = VHPT_NUM_ENTRIES; + thash_init(&v->arch.vhpt, VHPT_SHIFT); + ia64_set_pta(v->arch.vhpt.pta.val); + /*Enable VHPT here?*/ +} + +void kvm_init_vtlb(struct kvm_vcpu *v) +{ + v->arch.vtlb.num = VTLB_NUM_ENTRIES; + thash_init(&v->arch.vtlb, VTLB_SHIFT); +} diff --git a/trunk/arch/powerpc/Kconfig b/trunk/arch/powerpc/Kconfig index 4bb2e9310a56..4e40c122bf26 100644 --- a/trunk/arch/powerpc/Kconfig +++ b/trunk/arch/powerpc/Kconfig @@ -626,20 +626,6 @@ config ADVANCED_OPTIONS comment "Default settings for advanced configuration options are used" depends on !ADVANCED_OPTIONS -config HIGHMEM_START_BOOL - bool "Set high memory pool address" - depends on ADVANCED_OPTIONS && HIGHMEM - help - This option allows you to set the base address of the kernel virtual - area used to map high memory pages. This can be useful in - optimizing the layout of kernel virtual memory. - - Say N here unless you know what you are doing. - -config HIGHMEM_START - hex "Virtual start address of high memory pool" if HIGHMEM_START_BOOL - default "0xfe000000" - config LOWMEM_SIZE_BOOL bool "Set maximum low memory" depends on ADVANCED_OPTIONS @@ -656,21 +642,76 @@ config LOWMEM_SIZE hex "Maximum low memory size (in bytes)" if LOWMEM_SIZE_BOOL default "0x30000000" +config RELOCATABLE + bool "Build a relocatable kernel (EXPERIMENTAL)" + depends on EXPERIMENTAL && ADVANCED_OPTIONS && FLATMEM && FSL_BOOKE + help + This builds a kernel image that is capable of running at the + location the kernel is loaded at (some alignment restrictions may + exist). + + One use is for the kexec on panic case where the recovery kernel + must live at a different physical address than the primary + kernel. + + Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address + it has been loaded at and the compile time physical addresses + CONFIG_PHYSICAL_START is ignored. However CONFIG_PHYSICAL_START + setting can still be useful to bootwrappers that need to know the + load location of the kernel (eg. u-boot/mkimage). + +config PAGE_OFFSET_BOOL + bool "Set custom page offset address" + depends on ADVANCED_OPTIONS + help + This option allows you to set the kernel virtual address at which + the kernel will map low memory. This can be useful in optimizing + the virtual memory layout of the system. + + Say N here unless you know what you are doing. + +config PAGE_OFFSET + hex "Virtual address of memory base" if PAGE_OFFSET_BOOL + default "0xc0000000" + config KERNEL_START_BOOL bool "Set custom kernel base address" depends on ADVANCED_OPTIONS help This option allows you to set the kernel virtual address at which - the kernel will map low memory (the kernel image will be linked at - this address). This can be useful in optimizing the virtual memory - layout of the system. + the kernel will be loaded. Normally this should match PAGE_OFFSET + however there are times (like kdump) that one might not want them + to be the same. Say N here unless you know what you are doing. config KERNEL_START hex "Virtual address of kernel base" if KERNEL_START_BOOL + default PAGE_OFFSET if PAGE_OFFSET_BOOL + default "0xc2000000" if CRASH_DUMP default "0xc0000000" +config PHYSICAL_START_BOOL + bool "Set physical address where the kernel is loaded" + depends on ADVANCED_OPTIONS && FLATMEM && FSL_BOOKE + help + This gives the physical address where the kernel is loaded. + + Say N here unless you know what you are doing. + +config PHYSICAL_START + hex "Physical address where the kernel is loaded" if PHYSICAL_START_BOOL + default "0x02000000" if PPC_STD_MMU && CRASH_DUMP + default "0x00000000" + +config PHYSICAL_ALIGN + hex + default "0x10000000" if FSL_BOOKE + help + This value puts the alignment restrictions on physical address + where kernel is loaded and run from. Kernel is compiled for an + address which meets above alignment restriction. + config TASK_SIZE_BOOL bool "Set custom user task size" depends on ADVANCED_OPTIONS @@ -717,9 +758,17 @@ config PIN_TLB endmenu if PPC64 +config PAGE_OFFSET + hex + default "0xc000000000000000" config KERNEL_START hex + default "0xc000000002000000" if CRASH_DUMP default "0xc000000000000000" +config PHYSICAL_START + hex + default "0x02000000" if CRASH_DUMP + default "0x00000000" endif source "net/Kconfig" @@ -754,3 +803,4 @@ config PPC_CLOCK config PPC_LIB_RHEAP bool +source "arch/powerpc/kvm/Kconfig" diff --git a/trunk/arch/powerpc/Kconfig.debug b/trunk/arch/powerpc/Kconfig.debug index a86d8d853214..807a2dce6263 100644 --- a/trunk/arch/powerpc/Kconfig.debug +++ b/trunk/arch/powerpc/Kconfig.debug @@ -151,6 +151,9 @@ config BOOTX_TEXT config PPC_EARLY_DEBUG bool "Early debugging (dangerous)" + # PPC_EARLY_DEBUG on 440 leaves AS=1 mappings above the TLB high water + # mark, which doesn't work with current 440 KVM. + depends on !KVM help Say Y to enable some early debugging facilities that may be available for your processor/board combination. Those facilities are hacks diff --git a/trunk/arch/powerpc/Makefile b/trunk/arch/powerpc/Makefile index e2ec4a91ccef..9dcdc036cdf7 100644 --- a/trunk/arch/powerpc/Makefile +++ b/trunk/arch/powerpc/Makefile @@ -145,6 +145,7 @@ core-y += arch/powerpc/kernel/ \ arch/powerpc/platforms/ core-$(CONFIG_MATH_EMULATION) += arch/powerpc/math-emu/ core-$(CONFIG_XMON) += arch/powerpc/xmon/ +core-$(CONFIG_KVM) += arch/powerpc/kvm/ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/ diff --git a/trunk/arch/powerpc/boot/.gitignore b/trunk/arch/powerpc/boot/.gitignore index 5ef2bdf8d189..2347294ff35b 100644 --- a/trunk/arch/powerpc/boot/.gitignore +++ b/trunk/arch/powerpc/boot/.gitignore @@ -27,6 +27,7 @@ zImage.chrp zImage.coff zImage.coff.lds zImage.ep* +zImage.iseries zImage.*lds zImage.miboot zImage.pmac diff --git a/trunk/arch/powerpc/boot/Makefile b/trunk/arch/powerpc/boot/Makefile index 5ba50c673390..7822d25c9d31 100644 --- a/trunk/arch/powerpc/boot/Makefile +++ b/trunk/arch/powerpc/boot/Makefile @@ -40,7 +40,7 @@ $(obj)/ebony.o: BOOTCFLAGS += -mcpu=405 $(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405 $(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405 $(obj)/treeboot-walnut.o: BOOTCFLAGS += -mcpu=405 -$(obj)/virtex405-head.o: BOOTCFLAGS += -mcpu=405 +$(obj)/virtex405-head.o: BOOTAFLAGS += -mcpu=405 zlib := inffast.c inflate.c inftrees.c diff --git a/trunk/arch/powerpc/boot/dts/canyonlands.dts b/trunk/arch/powerpc/boot/dts/canyonlands.dts index 6f3d38a1554f..39634124929b 100644 --- a/trunk/arch/powerpc/boot/dts/canyonlands.dts +++ b/trunk/arch/powerpc/boot/dts/canyonlands.dts @@ -142,8 +142,45 @@ #address-cells = <2>; #size-cells = <1>; clock-frequency = <0>; /* Filled in by U-Boot */ + /* ranges property is supplied by U-Boot */ interrupts = <6 4>; interrupt-parent = <&UIC1>; + + nor_flash@0,0 { + compatible = "amd,s29gl512n", "cfi-flash"; + bank-width = <2>; + reg = <0 000000 4000000>; + #address-cells = <1>; + #size-cells = <1>; + partition@0 { + label = "kernel"; + reg = <0 1e0000>; + }; + partition@1e0000 { + label = "dtb"; + reg = <1e0000 20000>; + }; + partition@200000 { + label = "ramdisk"; + reg = <200000 1400000>; + }; + partition@1600000 { + label = "jffs2"; + reg = <1600000 400000>; + }; + partition@1a00000 { + label = "user"; + reg = <1a00000 2560000>; + }; + partition@3f60000 { + label = "env"; + reg = <3f60000 40000>; + }; + partition@3fa0000 { + label = "u-boot"; + reg = <3fa0000 60000>; + }; + }; }; UART0: serial@ef600300 { diff --git a/trunk/arch/powerpc/boot/dts/glacier.dts b/trunk/arch/powerpc/boot/dts/glacier.dts index 958a5ca53d35..0f2fc077d8db 100644 --- a/trunk/arch/powerpc/boot/dts/glacier.dts +++ b/trunk/arch/powerpc/boot/dts/glacier.dts @@ -145,8 +145,45 @@ #address-cells = <2>; #size-cells = <1>; clock-frequency = <0>; /* Filled in by U-Boot */ + /* ranges property is supplied by U-Boot */ interrupts = <6 4>; interrupt-parent = <&UIC1>; + + nor_flash@0,0 { + compatible = "amd,s29gl512n", "cfi-flash"; + bank-width = <2>; + reg = <0 000000 4000000>; + #address-cells = <1>; + #size-cells = <1>; + partition@0 { + label = "kernel"; + reg = <0 1e0000>; + }; + partition@1e0000 { + label = "dtb"; + reg = <1e0000 20000>; + }; + partition@200000 { + label = "ramdisk"; + reg = <200000 1400000>; + }; + partition@1600000 { + label = "jffs2"; + reg = <1600000 400000>; + }; + partition@1a00000 { + label = "user"; + reg = <1a00000 2560000>; + }; + partition@3f60000 { + label = "env"; + reg = <3f60000 40000>; + }; + partition@3fa0000 { + label = "u-boot"; + reg = <3fa0000 60000>; + }; + }; }; UART0: serial@ef600300 { diff --git a/trunk/arch/powerpc/boot/ns16550.c b/trunk/arch/powerpc/boot/ns16550.c index aef3bdc89160..8c9ead94be06 100644 --- a/trunk/arch/powerpc/boot/ns16550.c +++ b/trunk/arch/powerpc/boot/ns16550.c @@ -55,10 +55,15 @@ static u8 ns16550_tstc(void) int ns16550_console_init(void *devp, struct serial_console_data *scdp) { int n; + u32 reg_offset; if (dt_get_virtual_reg(devp, (void **)®_base, 1) < 1) return -1; + n = getprop(devp, "reg-offset", ®_offset, sizeof(reg_offset)); + if (n == sizeof(reg_offset)) + reg_base += reg_offset; + n = getprop(devp, "reg-shift", ®_shift, sizeof(reg_shift)); if (n != sizeof(reg_shift)) reg_shift = 0; diff --git a/trunk/arch/powerpc/kernel/Makefile b/trunk/arch/powerpc/kernel/Makefile index ce1e8d24e747..9177b21b1a95 100644 --- a/trunk/arch/powerpc/kernel/Makefile +++ b/trunk/arch/powerpc/kernel/Makefile @@ -106,4 +106,13 @@ PHONY += systbl_chk systbl_chk: $(src)/systbl_chk.sh $(obj)/systbl_chk.i $(call cmd,systbl_chk) +$(obj)/built-in.o: prom_init_check + +quiet_cmd_prom_init_check = CALL $< + cmd_prom_init_check = $(CONFIG_SHELL) $< "$(NM)" "$(obj)/prom_init.o" + +PHONY += prom_init_check +prom_init_check: $(src)/prom_init_check.sh $(obj)/prom_init.o + $(call cmd,prom_init_check) + clean-files := vmlinux.lds diff --git a/trunk/arch/powerpc/kernel/asm-offsets.c b/trunk/arch/powerpc/kernel/asm-offsets.c index 292c6d8db0e1..62134845af08 100644 --- a/trunk/arch/powerpc/kernel/asm-offsets.c +++ b/trunk/arch/powerpc/kernel/asm-offsets.c @@ -23,6 +23,9 @@ #include #include #include +#ifdef CONFIG_KVM +#include +#endif #ifdef CONFIG_PPC64 #include #include @@ -93,10 +96,7 @@ int main(void) DEFINE(TI_LOCAL_FLAGS, offsetof(struct thread_info, local_flags)); DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count)); DEFINE(TI_TASK, offsetof(struct thread_info, task)); -#ifdef CONFIG_PPC32 - DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain)); DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); -#endif /* CONFIG_PPC32 */ #ifdef CONFIG_PPC64 DEFINE(DCACHEL1LINESIZE, offsetof(struct ppc64_caches, dline_size)); @@ -165,13 +165,9 @@ int main(void) /* Interrupt register frame */ DEFINE(STACK_FRAME_OVERHEAD, STACK_FRAME_OVERHEAD); -#ifndef CONFIG_PPC64 - DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); -#else /* CONFIG_PPC64 */ + DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); +#ifdef CONFIG_PPC64 DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); - /* 288 = # of volatile regs, int & fp, for leaf routines */ - /* which do not stack a frame. See the PPC64 ABI. */ - DEFINE(INT_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 288); /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */ DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16); @@ -331,5 +327,30 @@ int main(void) DEFINE(PGD_TABLE_SIZE, PGD_TABLE_SIZE); +#ifdef CONFIG_KVM + DEFINE(TLBE_BYTES, sizeof(struct tlbe)); + + DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); + DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); + DEFINE(VCPU_HOST_TLB, offsetof(struct kvm_vcpu, arch.host_tlb)); + DEFINE(VCPU_SHADOW_TLB, offsetof(struct kvm_vcpu, arch.shadow_tlb)); + DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); + DEFINE(VCPU_LR, offsetof(struct kvm_vcpu, arch.lr)); + DEFINE(VCPU_CR, offsetof(struct kvm_vcpu, arch.cr)); + DEFINE(VCPU_XER, offsetof(struct kvm_vcpu, arch.xer)); + DEFINE(VCPU_CTR, offsetof(struct kvm_vcpu, arch.ctr)); + DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.pc)); + DEFINE(VCPU_MSR, offsetof(struct kvm_vcpu, arch.msr)); + DEFINE(VCPU_SPRG4, offsetof(struct kvm_vcpu, arch.sprg4)); + DEFINE(VCPU_SPRG5, offsetof(struct kvm_vcpu, arch.sprg5)); + DEFINE(VCPU_SPRG6, offsetof(struct kvm_vcpu, arch.sprg6)); + DEFINE(VCPU_SPRG7, offsetof(struct kvm_vcpu, arch.sprg7)); + DEFINE(VCPU_PID, offsetof(struct kvm_vcpu, arch.pid)); + + DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst)); + DEFINE(VCPU_FAULT_DEAR, offsetof(struct kvm_vcpu, arch.fault_dear)); + DEFINE(VCPU_FAULT_ESR, offsetof(struct kvm_vcpu, arch.fault_esr)); +#endif + return 0; } diff --git a/trunk/arch/powerpc/kernel/cpu_setup_44x.S b/trunk/arch/powerpc/kernel/cpu_setup_44x.S index 5465e8de0e61..e3623e3e3451 100644 --- a/trunk/arch/powerpc/kernel/cpu_setup_44x.S +++ b/trunk/arch/powerpc/kernel/cpu_setup_44x.S @@ -33,7 +33,6 @@ _GLOBAL(__setup_cpu_440grx) mtlr r4 blr _GLOBAL(__setup_cpu_460ex) -_GLOBAL(__setup_cpu_460gt) b __init_fpu_44x _GLOBAL(__setup_cpu_440gx) _GLOBAL(__setup_cpu_440spe) diff --git a/trunk/arch/powerpc/kernel/cpu_setup_6xx.S b/trunk/arch/powerpc/kernel/cpu_setup_6xx.S index f1ee0b3f78f2..72d1d7395254 100644 --- a/trunk/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/trunk/arch/powerpc/kernel/cpu_setup_6xx.S @@ -17,7 +17,13 @@ #include _GLOBAL(__setup_cpu_603) - b setup_common_caches + mflr r4 +BEGIN_FTR_SECTION + bl __init_fpu_registers +END_FTR_SECTION_IFCLR(CPU_FTR_FPU_UNAVAILABLE) + bl setup_common_caches + mtlr r4 + blr _GLOBAL(__setup_cpu_604) mflr r4 bl setup_common_caches diff --git a/trunk/arch/powerpc/kernel/cputable.c b/trunk/arch/powerpc/kernel/cputable.c index 26ffb44e2701..36080d4d1922 100644 --- a/trunk/arch/powerpc/kernel/cputable.c +++ b/trunk/arch/powerpc/kernel/cputable.c @@ -37,7 +37,6 @@ extern void __setup_cpu_440gx(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_440grx(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_440spe(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_460ex(unsigned long offset, struct cpu_spec* spec); -extern void __setup_cpu_460gt(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_603(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_604(unsigned long offset, struct cpu_spec* spec); extern void __setup_cpu_750(unsigned long offset, struct cpu_spec* spec); @@ -1416,10 +1415,9 @@ static struct cpu_spec __initdata cpu_specs[] = { .pvr_value = 0x13020000, .cpu_name = "460GT", .cpu_features = CPU_FTRS_44X, - .cpu_user_features = COMMON_USER_BOOKE | PPC_FEATURE_HAS_FPU, + .cpu_user_features = COMMON_USER_BOOKE, .icache_bsize = 32, .dcache_bsize = 32, - .cpu_setup = __setup_cpu_460gt, .machine_check = machine_check_440A, .platform = "ppc440", }, diff --git a/trunk/arch/powerpc/kernel/head_fsl_booke.S b/trunk/arch/powerpc/kernel/head_fsl_booke.S index 4ff744143566..e581524d85bc 100644 --- a/trunk/arch/powerpc/kernel/head_fsl_booke.S +++ b/trunk/arch/powerpc/kernel/head_fsl_booke.S @@ -371,6 +371,17 @@ skpinv: addi r6,r6,1 /* Increment */ bl early_init +#ifdef CONFIG_RELOCATABLE + lis r3,kernstart_addr@ha + la r3,kernstart_addr@l(r3) +#ifdef CONFIG_PHYS_64BIT + stw r23,0(r3) + stw r25,4(r3) +#else + stw r25,0(r3) +#endif +#endif + mfspr r3,SPRN_TLB1CFG andi. r3,r3,0xfff lis r4,num_tlbcam_entries@ha diff --git a/trunk/arch/powerpc/kernel/misc_32.S b/trunk/arch/powerpc/kernel/misc_32.S index 9d2c56621f1e..92ccc6fcc5b0 100644 --- a/trunk/arch/powerpc/kernel/misc_32.S +++ b/trunk/arch/powerpc/kernel/misc_32.S @@ -152,7 +152,7 @@ _GLOBAL(low_choose_750fx_pll) mtspr SPRN_HID1,r4 /* Store new HID1 image */ - rlwinm r6,r1,0,0,18 + rlwinm r6,r1,0,0,(31-THREAD_SHIFT) lwz r6,TI_CPU(r6) slwi r6,r6,2 addis r6,r6,nap_save_hid1@ha @@ -281,7 +281,7 @@ _GLOBAL(_tlbia) #endif /* CONFIG_SMP */ #else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */ #if defined(CONFIG_SMP) - rlwinm r8,r1,0,0,18 + rlwinm r8,r1,0,0,(31-THREAD_SHIFT) lwz r8,TI_CPU(r8) oris r8,r8,10 mfmsr r10 @@ -377,7 +377,7 @@ _GLOBAL(_tlbie) #endif /* CONFIG_SMP */ #else /* !(CONFIG_40x || CONFIG_44x || CONFIG_FSL_BOOKE) */ #if defined(CONFIG_SMP) - rlwinm r8,r1,0,0,18 + rlwinm r8,r1,0,0,(31-THREAD_SHIFT) lwz r8,TI_CPU(r8) oris r8,r8,11 mfmsr r10 diff --git a/trunk/arch/powerpc/kernel/misc_64.S b/trunk/arch/powerpc/kernel/misc_64.S index a3c491e88a72..942951e76586 100644 --- a/trunk/arch/powerpc/kernel/misc_64.S +++ b/trunk/arch/powerpc/kernel/misc_64.S @@ -27,23 +27,11 @@ .text -_GLOBAL(get_msr) - mfmsr r3 - blr - -_GLOBAL(get_srr0) - mfsrr0 r3 - blr - -_GLOBAL(get_srr1) - mfsrr1 r3 - blr - #ifdef CONFIG_IRQSTACKS _GLOBAL(call_do_softirq) mflr r0 std r0,16(r1) - stdu r1,THREAD_SIZE-112(r3) + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) mr r1,r3 bl .__do_softirq ld r1,0(r1) @@ -56,7 +44,7 @@ _GLOBAL(call_handle_irq) mflr r0 std r0,16(r1) mtctr r8 - stdu r1,THREAD_SIZE-112(r5) + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r5) mr r1,r5 bctrl ld r1,0(r1) @@ -599,7 +587,7 @@ _GLOBAL(kexec_sequence) std r0,16(r1) /* switch stacks to newstack -- &kexec_stack.stack */ - stdu r1,THREAD_SIZE-112(r3) + stdu r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3) mr r1,r3 li r0,0 @@ -616,7 +604,7 @@ _GLOBAL(kexec_sequence) std r26,-48(r1) std r25,-56(r1) - stdu r1,-112-64(r1) + stdu r1,-STACK_FRAME_OVERHEAD-64(r1) /* save args into preserved regs */ mr r31,r3 /* newstack (both) */ diff --git a/trunk/arch/powerpc/kernel/of_platform.c b/trunk/arch/powerpc/kernel/of_platform.c index fb698d47082d..e79ad8afda07 100644 --- a/trunk/arch/powerpc/kernel/of_platform.c +++ b/trunk/arch/powerpc/kernel/of_platform.c @@ -275,6 +275,8 @@ static int __devinit of_pci_phb_probe(struct of_device *dev, /* Scan the bus */ scan_phb(phb); + if (phb->bus == NULL) + return -ENXIO; /* Claim resources. This might need some rework as well depending * wether we are doing probe-only or not, like assigning unassigned diff --git a/trunk/arch/powerpc/kernel/paca.c b/trunk/arch/powerpc/kernel/paca.c index ac163bd46cfd..c9bf17eec31b 100644 --- a/trunk/arch/powerpc/kernel/paca.c +++ b/trunk/arch/powerpc/kernel/paca.c @@ -7,17 +7,11 @@ * 2 of the License, or (at your option) any later version. */ -#include #include #include -#include -#include -#include #include #include -#include - /* This symbol is provided by the linker - let it fill in the paca * field correctly */ @@ -65,60 +59,29 @@ struct slb_shadow slb_shadow[] __cacheline_aligned = { * processors. The processor VPD array needs one entry per physical * processor (not thread). */ -#define PACA_INIT(number) \ -{ \ - .lppaca_ptr = &lppaca[number], \ - .lock_token = 0x8000, \ - .paca_index = (number), /* Paca Index */ \ - .kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL, \ - .hw_cpu_id = 0xffff, \ - .slb_shadow_ptr = &slb_shadow[number], \ - .__current = &init_task, \ -} - -struct paca_struct paca[] = { - PACA_INIT(0), -#if NR_CPUS > 1 - PACA_INIT( 1), PACA_INIT( 2), PACA_INIT( 3), -#if NR_CPUS > 4 - PACA_INIT( 4), PACA_INIT( 5), PACA_INIT( 6), PACA_INIT( 7), -#if NR_CPUS > 8 - PACA_INIT( 8), PACA_INIT( 9), PACA_INIT( 10), PACA_INIT( 11), - PACA_INIT( 12), PACA_INIT( 13), PACA_INIT( 14), PACA_INIT( 15), - PACA_INIT( 16), PACA_INIT( 17), PACA_INIT( 18), PACA_INIT( 19), - PACA_INIT( 20), PACA_INIT( 21), PACA_INIT( 22), PACA_INIT( 23), - PACA_INIT( 24), PACA_INIT( 25), PACA_INIT( 26), PACA_INIT( 27), - PACA_INIT( 28), PACA_INIT( 29), PACA_INIT( 30), PACA_INIT( 31), -#if NR_CPUS > 32 - PACA_INIT( 32), PACA_INIT( 33), PACA_INIT( 34), PACA_INIT( 35), - PACA_INIT( 36), PACA_INIT( 37), PACA_INIT( 38), PACA_INIT( 39), - PACA_INIT( 40), PACA_INIT( 41), PACA_INIT( 42), PACA_INIT( 43), - PACA_INIT( 44), PACA_INIT( 45), PACA_INIT( 46), PACA_INIT( 47), - PACA_INIT( 48), PACA_INIT( 49), PACA_INIT( 50), PACA_INIT( 51), - PACA_INIT( 52), PACA_INIT( 53), PACA_INIT( 54), PACA_INIT( 55), - PACA_INIT( 56), PACA_INIT( 57), PACA_INIT( 58), PACA_INIT( 59), - PACA_INIT( 60), PACA_INIT( 61), PACA_INIT( 62), PACA_INIT( 63), -#if NR_CPUS > 64 - PACA_INIT( 64), PACA_INIT( 65), PACA_INIT( 66), PACA_INIT( 67), - PACA_INIT( 68), PACA_INIT( 69), PACA_INIT( 70), PACA_INIT( 71), - PACA_INIT( 72), PACA_INIT( 73), PACA_INIT( 74), PACA_INIT( 75), - PACA_INIT( 76), PACA_INIT( 77), PACA_INIT( 78), PACA_INIT( 79), - PACA_INIT( 80), PACA_INIT( 81), PACA_INIT( 82), PACA_INIT( 83), - PACA_INIT( 84), PACA_INIT( 85), PACA_INIT( 86), PACA_INIT( 87), - PACA_INIT( 88), PACA_INIT( 89), PACA_INIT( 90), PACA_INIT( 91), - PACA_INIT( 92), PACA_INIT( 93), PACA_INIT( 94), PACA_INIT( 95), - PACA_INIT( 96), PACA_INIT( 97), PACA_INIT( 98), PACA_INIT( 99), - PACA_INIT(100), PACA_INIT(101), PACA_INIT(102), PACA_INIT(103), - PACA_INIT(104), PACA_INIT(105), PACA_INIT(106), PACA_INIT(107), - PACA_INIT(108), PACA_INIT(109), PACA_INIT(110), PACA_INIT(111), - PACA_INIT(112), PACA_INIT(113), PACA_INIT(114), PACA_INIT(115), - PACA_INIT(116), PACA_INIT(117), PACA_INIT(118), PACA_INIT(119), - PACA_INIT(120), PACA_INIT(121), PACA_INIT(122), PACA_INIT(123), - PACA_INIT(124), PACA_INIT(125), PACA_INIT(126), PACA_INIT(127), -#endif -#endif -#endif -#endif -#endif -}; +struct paca_struct paca[NR_CPUS]; EXPORT_SYMBOL(paca); + +void __init initialise_pacas(void) +{ + int cpu; + + /* The TOC register (GPR2) points 32kB into the TOC, so that 64kB + * of the TOC can be addressed using a single machine instruction. + */ + unsigned long kernel_toc = (unsigned long)(&__toc_start) + 0x8000UL; + + /* Can't use for_each_*_cpu, as they aren't functional yet */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + struct paca_struct *new_paca = &paca[cpu]; + + new_paca->lppaca_ptr = &lppaca[cpu]; + new_paca->lock_token = 0x8000; + new_paca->paca_index = cpu; + new_paca->kernel_toc = kernel_toc; + new_paca->hw_cpu_id = 0xffff; + new_paca->slb_shadow_ptr = &slb_shadow[cpu]; + new_paca->__current = &init_task; + + } +} diff --git a/trunk/arch/powerpc/kernel/ppc32.h b/trunk/arch/powerpc/kernel/ppc32.h index fda05e2211d6..90e562771791 100644 --- a/trunk/arch/powerpc/kernel/ppc32.h +++ b/trunk/arch/powerpc/kernel/ppc32.h @@ -135,6 +135,4 @@ struct ucontext32 { struct mcontext32 uc_mcontext; }; -extern int copy_siginfo_to_user32(struct compat_siginfo __user *d, siginfo_t *s); - #endif /* _PPC64_PPC32_H */ diff --git a/trunk/arch/powerpc/kernel/process.c b/trunk/arch/powerpc/kernel/process.c index 703100d5e458..6caad17ea72e 100644 --- a/trunk/arch/powerpc/kernel/process.c +++ b/trunk/arch/powerpc/kernel/process.c @@ -1033,3 +1033,34 @@ void ppc64_runlatch_off(void) } } #endif + +#if THREAD_SHIFT < PAGE_SHIFT + +static struct kmem_cache *thread_info_cache; + +struct thread_info *alloc_thread_info(struct task_struct *tsk) +{ + struct thread_info *ti; + + ti = kmem_cache_alloc(thread_info_cache, GFP_KERNEL); + if (unlikely(ti == NULL)) + return NULL; +#ifdef CONFIG_DEBUG_STACK_USAGE + memset(ti, 0, THREAD_SIZE); +#endif + return ti; +} + +void free_thread_info(struct thread_info *ti) +{ + kmem_cache_free(thread_info_cache, ti); +} + +void thread_info_cache_init(void) +{ + thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE, + THREAD_SIZE, 0, NULL); + BUG_ON(thread_info_cache == NULL); +} + +#endif /* THREAD_SHIFT < PAGE_SHIFT */ diff --git a/trunk/arch/powerpc/kernel/prom.c b/trunk/arch/powerpc/kernel/prom.c index 3bfe7837e820..2aefe2a4129a 100644 --- a/trunk/arch/powerpc/kernel/prom.c +++ b/trunk/arch/powerpc/kernel/prom.c @@ -53,6 +53,7 @@ #include #include #include +#include #ifdef DEBUG #define DBG(fmt...) printk(KERN_ERR fmt) @@ -978,7 +979,10 @@ static int __init early_init_dt_scan_memory(unsigned long node, } #endif lmb_add(base, size); + + memstart_addr = min((u64)memstart_addr, base); } + return 0; } diff --git a/trunk/arch/powerpc/kernel/prom_init_check.sh b/trunk/arch/powerpc/kernel/prom_init_check.sh new file mode 100644 index 000000000000..8e24fc1821e8 --- /dev/null +++ b/trunk/arch/powerpc/kernel/prom_init_check.sh @@ -0,0 +1,58 @@ +#!/bin/sh +# +# Copyright © 2008 IBM Corporation +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version +# 2 of the License, or (at your option) any later version. + +# This script checks prom_init.o to see what external symbols it +# is using, if it finds symbols not in the whitelist it returns +# an error. The point of this is to discourage people from +# intentionally or accidentally adding new code to prom_init.c +# which has side effects on other parts of the kernel. + +# If you really need to reference something from prom_init.o add +# it to the list below: + +WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush +_end enter_prom memcpy memset reloc_offset __secondary_hold +__secondary_hold_acknowledge __secondary_hold_spinloop __start +strcmp strcpy strlcpy strlen strncmp strstr logo_linux_clut224 +reloc_got2" + +NM="$1" +OBJ="$2" + +ERROR=0 + +for UNDEF in $($NM -u $OBJ | awk '{print $2}') +do + # On 64-bit nm gives us the function descriptors, which have + # a leading . on the name, so strip it off here. + UNDEF="${UNDEF#.}" + + if [ $KBUILD_VERBOSE ]; then + if [ $KBUILD_VERBOSE -ne 0 ]; then + echo "Checking prom_init.o symbol '$UNDEF'" + fi + fi + + OK=0 + for WHITE in $WHITELIST + do + if [ "$UNDEF" = "$WHITE" ]; then + OK=1 + break + fi + done + + if [ $OK -eq 0 ]; then + ERROR=1 + echo "Error: External symbol '$UNDEF' referenced" \ + "from prom_init.c" >&2 + fi +done + +exit $ERROR diff --git a/trunk/arch/powerpc/kernel/ptrace32.c b/trunk/arch/powerpc/kernel/ptrace32.c index 9d30e10970ac..4c1de6af4c09 100644 --- a/trunk/arch/powerpc/kernel/ptrace32.c +++ b/trunk/arch/powerpc/kernel/ptrace32.c @@ -29,15 +29,12 @@ #include #include #include -#include #include #include #include #include -#include "ppc32.h" - /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. @@ -67,27 +64,6 @@ static long compat_ptrace_old(struct task_struct *child, long request, return -EPERM; } -static int compat_ptrace_getsiginfo(struct task_struct *child, compat_siginfo_t __user *data) -{ - siginfo_t lastinfo; - int error = -ESRCH; - - read_lock(&tasklist_lock); - if (likely(child->sighand != NULL)) { - error = -EINVAL; - spin_lock_irq(&child->sighand->siglock); - if (likely(child->last_siginfo != NULL)) { - lastinfo = *child->last_siginfo; - error = 0; - } - spin_unlock_irq(&child->sighand->siglock); - } - read_unlock(&tasklist_lock); - if (!error) - return copy_siginfo_to_user32(data, &lastinfo); - return error; -} - long compat_arch_ptrace(struct task_struct *child, compat_long_t request, compat_ulong_t caddr, compat_ulong_t cdata) { @@ -306,9 +282,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, 0, PT_REGS_COUNT * sizeof(compat_long_t), compat_ptr(data)); - case PTRACE_GETSIGINFO: - return compat_ptrace_getsiginfo(child, compat_ptr(data)); - case PTRACE_GETFPREGS: case PTRACE_SETFPREGS: case PTRACE_GETVRREGS: diff --git a/trunk/arch/powerpc/kernel/setup_64.c b/trunk/arch/powerpc/kernel/setup_64.c index 31ada9fdfc5c..dff6308d1b5e 100644 --- a/trunk/arch/powerpc/kernel/setup_64.c +++ b/trunk/arch/powerpc/kernel/setup_64.c @@ -170,6 +170,9 @@ void __init setup_paca(int cpu) void __init early_setup(unsigned long dt_ptr) { + /* Fill in any unititialised pacas */ + initialise_pacas(); + /* Identify CPU type */ identify_cpu(0, mfspr(SPRN_PVR)); @@ -435,7 +438,7 @@ void __init setup_system(void) printk("htab_address = 0x%p\n", htab_address); printk("htab_hash_mask = 0x%lx\n", htab_hash_mask); #if PHYSICAL_START > 0 - printk("physical_start = 0x%x\n", PHYSICAL_START); + printk("physical_start = 0x%lx\n", PHYSICAL_START); #endif printk("-----------------------------------------------------\n"); diff --git a/trunk/arch/powerpc/kernel/stacktrace.c b/trunk/arch/powerpc/kernel/stacktrace.c index e3638eeaaae7..962944038430 100644 --- a/trunk/arch/powerpc/kernel/stacktrace.c +++ b/trunk/arch/powerpc/kernel/stacktrace.c @@ -13,7 +13,6 @@ #include #include #include -#include /* * Save stack-backtrace addresses into a stack_trace buffer. diff --git a/trunk/arch/powerpc/kernel/udbg.c b/trunk/arch/powerpc/kernel/udbg.c index 7aad6203e411..7d6c9bb8c77f 100644 --- a/trunk/arch/powerpc/kernel/udbg.c +++ b/trunk/arch/powerpc/kernel/udbg.c @@ -154,8 +154,8 @@ static void udbg_console_write(struct console *con, const char *s, static struct console udbg_console = { .name = "udbg", .write = udbg_console_write, - .flags = CON_PRINTBUFFER | CON_ENABLED | CON_BOOT, - .index = -1, + .flags = CON_PRINTBUFFER | CON_ENABLED | CON_BOOT | CON_ANYTIME, + .index = 0, }; static int early_console_initialized; diff --git a/trunk/arch/powerpc/kvm/44x_tlb.c b/trunk/arch/powerpc/kvm/44x_tlb.c new file mode 100644 index 000000000000..f5d7a5eab96e --- /dev/null +++ b/trunk/arch/powerpc/kvm/44x_tlb.c @@ -0,0 +1,224 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard + */ + +#include +#include +#include +#include +#include +#include + +#include "44x_tlb.h" + +#define PPC44x_TLB_USER_PERM_MASK (PPC44x_TLB_UX|PPC44x_TLB_UR|PPC44x_TLB_UW) +#define PPC44x_TLB_SUPER_PERM_MASK (PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW) + +static unsigned int kvmppc_tlb_44x_pos; + +static u32 kvmppc_44x_tlb_shadow_attrib(u32 attrib, int usermode) +{ + /* Mask off reserved bits. */ + attrib &= PPC44x_TLB_PERM_MASK|PPC44x_TLB_ATTR_MASK; + + if (!usermode) { + /* Guest is in supervisor mode, so we need to translate guest + * supervisor permissions into user permissions. */ + attrib &= ~PPC44x_TLB_USER_PERM_MASK; + attrib |= (attrib & PPC44x_TLB_SUPER_PERM_MASK) << 3; + } + + /* Make sure host can always access this memory. */ + attrib |= PPC44x_TLB_SX|PPC44x_TLB_SR|PPC44x_TLB_SW; + + return attrib; +} + +/* Search the guest TLB for a matching entry. */ +int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, + unsigned int as) +{ + int i; + + /* XXX Replace loop with fancy data structures. */ + for (i = 0; i < PPC44x_TLB_SIZE; i++) { + struct tlbe *tlbe = &vcpu->arch.guest_tlb[i]; + unsigned int tid; + + if (eaddr < get_tlb_eaddr(tlbe)) + continue; + + if (eaddr > get_tlb_end(tlbe)) + continue; + + tid = get_tlb_tid(tlbe); + if (tid && (tid != pid)) + continue; + + if (!get_tlb_v(tlbe)) + continue; + + if (get_tlb_ts(tlbe) != as) + continue; + + return i; + } + + return -1; +} + +struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) +{ + unsigned int as = !!(vcpu->arch.msr & MSR_IS); + unsigned int index; + + index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); + if (index == -1) + return NULL; + return &vcpu->arch.guest_tlb[index]; +} + +struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr) +{ + unsigned int as = !!(vcpu->arch.msr & MSR_DS); + unsigned int index; + + index = kvmppc_44x_tlb_index(vcpu, eaddr, vcpu->arch.pid, as); + if (index == -1) + return NULL; + return &vcpu->arch.guest_tlb[index]; +} + +static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe) +{ + return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW); +} + +/* Must be called with mmap_sem locked for writing. */ +static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu, + unsigned int index) +{ + struct tlbe *stlbe = &vcpu->arch.shadow_tlb[index]; + struct page *page = vcpu->arch.shadow_pages[index]; + + kunmap(vcpu->arch.shadow_pages[index]); + + if (get_tlb_v(stlbe)) { + if (kvmppc_44x_tlbe_is_writable(stlbe)) + kvm_release_page_dirty(page); + else + kvm_release_page_clean(page); + } +} + +/* Caller must ensure that the specified guest TLB entry is safe to insert into + * the shadow TLB. */ +void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, + u32 flags) +{ + struct page *new_page; + struct tlbe *stlbe; + hpa_t hpaddr; + unsigned int victim; + + /* Future optimization: don't overwrite the TLB entry containing the + * current PC (or stack?). */ + victim = kvmppc_tlb_44x_pos++; + if (kvmppc_tlb_44x_pos > tlb_44x_hwater) + kvmppc_tlb_44x_pos = 0; + stlbe = &vcpu->arch.shadow_tlb[victim]; + + /* Get reference to new page. */ + down_write(¤t->mm->mmap_sem); + new_page = gfn_to_page(vcpu->kvm, gfn); + if (is_error_page(new_page)) { + printk(KERN_ERR "Couldn't get guest page!\n"); + kvm_release_page_clean(new_page); + return; + } + hpaddr = page_to_phys(new_page); + + /* Drop reference to old page. */ + kvmppc_44x_shadow_release(vcpu, victim); + up_write(¤t->mm->mmap_sem); + + vcpu->arch.shadow_pages[victim] = new_page; + + /* XXX Make sure (va, size) doesn't overlap any other + * entries. 440x6 user manual says the result would be + * "undefined." */ + + /* XXX what about AS? */ + + stlbe->tid = asid & 0xff; + + /* Force TS=1 for all guest mappings. */ + /* For now we hardcode 4KB mappings, but it will be important to + * use host large pages in the future. */ + stlbe->word0 = (gvaddr & PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS + | PPC44x_TLB_4K; + + stlbe->word1 = (hpaddr & 0xfffffc00) | ((hpaddr >> 32) & 0xf); + stlbe->word2 = kvmppc_44x_tlb_shadow_attrib(flags, + vcpu->arch.msr & MSR_PR); +} + +void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, u64 eaddr, u64 asid) +{ + unsigned int pid = asid & 0xff; + int i; + + /* XXX Replace loop with fancy data structures. */ + down_write(¤t->mm->mmap_sem); + for (i = 0; i <= tlb_44x_hwater; i++) { + struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i]; + unsigned int tid; + + if (!get_tlb_v(stlbe)) + continue; + + if (eaddr < get_tlb_eaddr(stlbe)) + continue; + + if (eaddr > get_tlb_end(stlbe)) + continue; + + tid = get_tlb_tid(stlbe); + if (tid && (tid != pid)) + continue; + + kvmppc_44x_shadow_release(vcpu, i); + stlbe->word0 = 0; + } + up_write(¤t->mm->mmap_sem); +} + +/* Invalidate all mappings, so that when they fault back in they will get the + * proper permission bits. */ +void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode) +{ + int i; + + /* XXX Replace loop with fancy data structures. */ + down_write(¤t->mm->mmap_sem); + for (i = 0; i <= tlb_44x_hwater; i++) { + kvmppc_44x_shadow_release(vcpu, i); + vcpu->arch.shadow_tlb[i].word0 = 0; + } + up_write(¤t->mm->mmap_sem); +} diff --git a/trunk/arch/powerpc/kvm/44x_tlb.h b/trunk/arch/powerpc/kvm/44x_tlb.h new file mode 100644 index 000000000000..2ccd46b6f6b7 --- /dev/null +++ b/trunk/arch/powerpc/kvm/44x_tlb.h @@ -0,0 +1,91 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard + */ + +#ifndef __KVM_POWERPC_TLB_H__ +#define __KVM_POWERPC_TLB_H__ + +#include +#include + +extern int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, + unsigned int pid, unsigned int as); +extern struct tlbe *kvmppc_44x_dtlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); +extern struct tlbe *kvmppc_44x_itlb_search(struct kvm_vcpu *vcpu, gva_t eaddr); + +/* TLB helper functions */ +static inline unsigned int get_tlb_size(const struct tlbe *tlbe) +{ + return (tlbe->word0 >> 4) & 0xf; +} + +static inline gva_t get_tlb_eaddr(const struct tlbe *tlbe) +{ + return tlbe->word0 & 0xfffffc00; +} + +static inline gva_t get_tlb_bytes(const struct tlbe *tlbe) +{ + unsigned int pgsize = get_tlb_size(tlbe); + return 1 << 10 << (pgsize << 1); +} + +static inline gva_t get_tlb_end(const struct tlbe *tlbe) +{ + return get_tlb_eaddr(tlbe) + get_tlb_bytes(tlbe) - 1; +} + +static inline u64 get_tlb_raddr(const struct tlbe *tlbe) +{ + u64 word1 = tlbe->word1; + return ((word1 & 0xf) << 32) | (word1 & 0xfffffc00); +} + +static inline unsigned int get_tlb_tid(const struct tlbe *tlbe) +{ + return tlbe->tid & 0xff; +} + +static inline unsigned int get_tlb_ts(const struct tlbe *tlbe) +{ + return (tlbe->word0 >> 8) & 0x1; +} + +static inline unsigned int get_tlb_v(const struct tlbe *tlbe) +{ + return (tlbe->word0 >> 9) & 0x1; +} + +static inline unsigned int get_mmucr_stid(const struct kvm_vcpu *vcpu) +{ + return vcpu->arch.mmucr & 0xff; +} + +static inline unsigned int get_mmucr_sts(const struct kvm_vcpu *vcpu) +{ + return (vcpu->arch.mmucr >> 16) & 0x1; +} + +static inline gpa_t tlb_xlate(struct tlbe *tlbe, gva_t eaddr) +{ + unsigned int pgmask = get_tlb_bytes(tlbe) - 1; + + return get_tlb_raddr(tlbe) | (eaddr & pgmask); +} + +#endif /* __KVM_POWERPC_TLB_H__ */ diff --git a/trunk/arch/powerpc/kvm/Kconfig b/trunk/arch/powerpc/kvm/Kconfig new file mode 100644 index 000000000000..6b076010213b --- /dev/null +++ b/trunk/arch/powerpc/kvm/Kconfig @@ -0,0 +1,42 @@ +# +# KVM configuration +# + +menuconfig VIRTUALIZATION + bool "Virtualization" + ---help--- + Say Y here to get to see options for using your Linux host to run + other operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and + disabled. + +if VIRTUALIZATION + +config KVM + bool "Kernel-based Virtual Machine (KVM) support" + depends on 44x && EXPERIMENTAL + select PREEMPT_NOTIFIERS + select ANON_INODES + # We can only run on Book E hosts so far + select KVM_BOOKE_HOST + ---help--- + Support hosting virtualized guest machines. You will also + need to select one or more of the processor modules below. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + If unsure, say N. + +config KVM_BOOKE_HOST + bool "KVM host support for Book E PowerPC processors" + depends on KVM && 44x + ---help--- + Provides host support for KVM on Book E PowerPC processors. Currently + this works on 440 processors only. + +source drivers/virtio/Kconfig + +endif # VIRTUALIZATION diff --git a/trunk/arch/powerpc/kvm/Makefile b/trunk/arch/powerpc/kvm/Makefile new file mode 100644 index 000000000000..d0d358d367ec --- /dev/null +++ b/trunk/arch/powerpc/kvm/Makefile @@ -0,0 +1,15 @@ +# +# Makefile for Kernel-based Virtual Machine module +# + +EXTRA_CFLAGS += -Ivirt/kvm -Iarch/powerpc/kvm + +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) + +kvm-objs := $(common-objs) powerpc.o emulate.o booke_guest.o +obj-$(CONFIG_KVM) += kvm.o + +AFLAGS_booke_interrupts.o := -I$(obj) + +kvm-booke-host-objs := booke_host.o booke_interrupts.o 44x_tlb.o +obj-$(CONFIG_KVM_BOOKE_HOST) += kvm-booke-host.o diff --git a/trunk/arch/powerpc/kvm/booke_guest.c b/trunk/arch/powerpc/kvm/booke_guest.c new file mode 100644 index 000000000000..6d9884a6884a --- /dev/null +++ b/trunk/arch/powerpc/kvm/booke_guest.c @@ -0,0 +1,615 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard + * Christian Ehrhardt + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "44x_tlb.h" + +#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM +#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { "exits", VCPU_STAT(sum_exits) }, + { "mmio", VCPU_STAT(mmio_exits) }, + { "dcr", VCPU_STAT(dcr_exits) }, + { "sig", VCPU_STAT(signal_exits) }, + { "light", VCPU_STAT(light_exits) }, + { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, + { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, + { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, + { "dtlb_v", VCPU_STAT(dtlb_virt_miss_exits) }, + { "sysc", VCPU_STAT(syscall_exits) }, + { "isi", VCPU_STAT(isi_exits) }, + { "dsi", VCPU_STAT(dsi_exits) }, + { "inst_emu", VCPU_STAT(emulated_inst_exits) }, + { "dec", VCPU_STAT(dec_exits) }, + { "ext_intr", VCPU_STAT(ext_intr_exits) }, + { NULL } +}; + +static const u32 interrupt_msr_mask[16] = { + [BOOKE_INTERRUPT_CRITICAL] = MSR_ME, + [BOOKE_INTERRUPT_MACHINE_CHECK] = 0, + [BOOKE_INTERRUPT_DATA_STORAGE] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_INST_STORAGE] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_EXTERNAL] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_ALIGNMENT] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_PROGRAM] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_FP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_SYSCALL] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_AP_UNAVAIL] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_DECREMENTER] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_FIT] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_WATCHDOG] = MSR_ME, + [BOOKE_INTERRUPT_DTLB_MISS] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_ITLB_MISS] = MSR_CE|MSR_ME|MSR_DE, + [BOOKE_INTERRUPT_DEBUG] = MSR_ME, +}; + +const unsigned char exception_priority[] = { + [BOOKE_INTERRUPT_DATA_STORAGE] = 0, + [BOOKE_INTERRUPT_INST_STORAGE] = 1, + [BOOKE_INTERRUPT_ALIGNMENT] = 2, + [BOOKE_INTERRUPT_PROGRAM] = 3, + [BOOKE_INTERRUPT_FP_UNAVAIL] = 4, + [BOOKE_INTERRUPT_SYSCALL] = 5, + [BOOKE_INTERRUPT_AP_UNAVAIL] = 6, + [BOOKE_INTERRUPT_DTLB_MISS] = 7, + [BOOKE_INTERRUPT_ITLB_MISS] = 8, + [BOOKE_INTERRUPT_MACHINE_CHECK] = 9, + [BOOKE_INTERRUPT_DEBUG] = 10, + [BOOKE_INTERRUPT_CRITICAL] = 11, + [BOOKE_INTERRUPT_WATCHDOG] = 12, + [BOOKE_INTERRUPT_EXTERNAL] = 13, + [BOOKE_INTERRUPT_FIT] = 14, + [BOOKE_INTERRUPT_DECREMENTER] = 15, +}; + +const unsigned char priority_exception[] = { + BOOKE_INTERRUPT_DATA_STORAGE, + BOOKE_INTERRUPT_INST_STORAGE, + BOOKE_INTERRUPT_ALIGNMENT, + BOOKE_INTERRUPT_PROGRAM, + BOOKE_INTERRUPT_FP_UNAVAIL, + BOOKE_INTERRUPT_SYSCALL, + BOOKE_INTERRUPT_AP_UNAVAIL, + BOOKE_INTERRUPT_DTLB_MISS, + BOOKE_INTERRUPT_ITLB_MISS, + BOOKE_INTERRUPT_MACHINE_CHECK, + BOOKE_INTERRUPT_DEBUG, + BOOKE_INTERRUPT_CRITICAL, + BOOKE_INTERRUPT_WATCHDOG, + BOOKE_INTERRUPT_EXTERNAL, + BOOKE_INTERRUPT_FIT, + BOOKE_INTERRUPT_DECREMENTER, +}; + + +void kvmppc_dump_tlbs(struct kvm_vcpu *vcpu) +{ + struct tlbe *tlbe; + int i; + + printk("vcpu %d TLB dump:\n", vcpu->vcpu_id); + printk("| %2s | %3s | %8s | %8s | %8s |\n", + "nr", "tid", "word0", "word1", "word2"); + + for (i = 0; i < PPC44x_TLB_SIZE; i++) { + tlbe = &vcpu->arch.guest_tlb[i]; + if (tlbe->word0 & PPC44x_TLB_VALID) + printk(" G%2d | %02X | %08X | %08X | %08X |\n", + i, tlbe->tid, tlbe->word0, tlbe->word1, + tlbe->word2); + } + + for (i = 0; i < PPC44x_TLB_SIZE; i++) { + tlbe = &vcpu->arch.shadow_tlb[i]; + if (tlbe->word0 & PPC44x_TLB_VALID) + printk(" S%2d | %02X | %08X | %08X | %08X |\n", + i, tlbe->tid, tlbe->word0, tlbe->word1, + tlbe->word2); + } +} + +/* TODO: use vcpu_printf() */ +void kvmppc_dump_vcpu(struct kvm_vcpu *vcpu) +{ + int i; + + printk("pc: %08x msr: %08x\n", vcpu->arch.pc, vcpu->arch.msr); + printk("lr: %08x ctr: %08x\n", vcpu->arch.lr, vcpu->arch.ctr); + printk("srr0: %08x srr1: %08x\n", vcpu->arch.srr0, vcpu->arch.srr1); + + printk("exceptions: %08lx\n", vcpu->arch.pending_exceptions); + + for (i = 0; i < 32; i += 4) { + printk("gpr%02d: %08x %08x %08x %08x\n", i, + vcpu->arch.gpr[i], + vcpu->arch.gpr[i+1], + vcpu->arch.gpr[i+2], + vcpu->arch.gpr[i+3]); + } +} + +/* Check if we are ready to deliver the interrupt */ +static int kvmppc_can_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) +{ + int r; + + switch (interrupt) { + case BOOKE_INTERRUPT_CRITICAL: + r = vcpu->arch.msr & MSR_CE; + break; + case BOOKE_INTERRUPT_MACHINE_CHECK: + r = vcpu->arch.msr & MSR_ME; + break; + case BOOKE_INTERRUPT_EXTERNAL: + r = vcpu->arch.msr & MSR_EE; + break; + case BOOKE_INTERRUPT_DECREMENTER: + r = vcpu->arch.msr & MSR_EE; + break; + case BOOKE_INTERRUPT_FIT: + r = vcpu->arch.msr & MSR_EE; + break; + case BOOKE_INTERRUPT_WATCHDOG: + r = vcpu->arch.msr & MSR_CE; + break; + case BOOKE_INTERRUPT_DEBUG: + r = vcpu->arch.msr & MSR_DE; + break; + default: + r = 1; + } + + return r; +} + +static void kvmppc_deliver_interrupt(struct kvm_vcpu *vcpu, int interrupt) +{ + switch (interrupt) { + case BOOKE_INTERRUPT_DECREMENTER: + vcpu->arch.tsr |= TSR_DIS; + break; + } + + vcpu->arch.srr0 = vcpu->arch.pc; + vcpu->arch.srr1 = vcpu->arch.msr; + vcpu->arch.pc = vcpu->arch.ivpr | vcpu->arch.ivor[interrupt]; + kvmppc_set_msr(vcpu, vcpu->arch.msr & interrupt_msr_mask[interrupt]); +} + +/* Check pending exceptions and deliver one, if possible. */ +void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu) +{ + unsigned long *pending = &vcpu->arch.pending_exceptions; + unsigned int exception; + unsigned int priority; + + priority = find_first_bit(pending, BITS_PER_BYTE * sizeof(*pending)); + while (priority <= BOOKE_MAX_INTERRUPT) { + exception = priority_exception[priority]; + if (kvmppc_can_deliver_interrupt(vcpu, exception)) { + kvmppc_clear_exception(vcpu, exception); + kvmppc_deliver_interrupt(vcpu, exception); + break; + } + + priority = find_next_bit(pending, + BITS_PER_BYTE * sizeof(*pending), + priority + 1); + } +} + +static int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + int r; + + er = kvmppc_emulate_instruction(run, vcpu); + switch (er) { + case EMULATE_DONE: + /* Future optimization: only reload non-volatiles if they were + * actually modified. */ + r = RESUME_GUEST_NV; + break; + case EMULATE_DO_MMIO: + run->exit_reason = KVM_EXIT_MMIO; + /* We must reload nonvolatiles because "update" load/store + * instructions modify register state. */ + /* Future optimization: only reload non-volatiles if they were + * actually modified. */ + r = RESUME_HOST_NV; + break; + case EMULATE_FAIL: + /* XXX Deliver Program interrupt to guest. */ + printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, + vcpu->arch.last_inst); + r = RESUME_HOST; + break; + default: + BUG(); + } + + return r; +} + +/** + * kvmppc_handle_exit + * + * Return value is in the form (errcode<<2 | RESUME_FLAG_HOST | RESUME_FLAG_NV) + */ +int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int exit_nr) +{ + enum emulation_result er; + int r = RESUME_HOST; + + local_irq_enable(); + + run->exit_reason = KVM_EXIT_UNKNOWN; + run->ready_for_interrupt_injection = 1; + + switch (exit_nr) { + case BOOKE_INTERRUPT_MACHINE_CHECK: + printk("MACHINE CHECK: %lx\n", mfspr(SPRN_MCSR)); + kvmppc_dump_vcpu(vcpu); + r = RESUME_HOST; + break; + + case BOOKE_INTERRUPT_EXTERNAL: + case BOOKE_INTERRUPT_DECREMENTER: + /* Since we switched IVPR back to the host's value, the host + * handled this interrupt the moment we enabled interrupts. + * Now we just offer it a chance to reschedule the guest. */ + + /* XXX At this point the TLB still holds our shadow TLB, so if + * we do reschedule the host will fault over it. Perhaps we + * should politely restore the host's entries to minimize + * misses before ceding control. */ + if (need_resched()) + cond_resched(); + if (exit_nr == BOOKE_INTERRUPT_DECREMENTER) + vcpu->stat.dec_exits++; + else + vcpu->stat.ext_intr_exits++; + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_PROGRAM: + if (vcpu->arch.msr & MSR_PR) { + /* Program traps generated by user-level software must be handled + * by the guest kernel. */ + vcpu->arch.esr = vcpu->arch.fault_esr; + kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); + r = RESUME_GUEST; + break; + } + + er = kvmppc_emulate_instruction(run, vcpu); + switch (er) { + case EMULATE_DONE: + /* Future optimization: only reload non-volatiles if + * they were actually modified by emulation. */ + vcpu->stat.emulated_inst_exits++; + r = RESUME_GUEST_NV; + break; + case EMULATE_DO_DCR: + run->exit_reason = KVM_EXIT_DCR; + r = RESUME_HOST; + break; + case EMULATE_FAIL: + /* XXX Deliver Program interrupt to guest. */ + printk(KERN_CRIT "%s: emulation at %x failed (%08x)\n", + __func__, vcpu->arch.pc, vcpu->arch.last_inst); + /* For debugging, encode the failing instruction and + * report it to userspace. */ + run->hw.hardware_exit_reason = ~0ULL << 32; + run->hw.hardware_exit_reason |= vcpu->arch.last_inst; + r = RESUME_HOST; + break; + default: + BUG(); + } + break; + + case BOOKE_INTERRUPT_DATA_STORAGE: + vcpu->arch.dear = vcpu->arch.fault_dear; + vcpu->arch.esr = vcpu->arch.fault_esr; + kvmppc_queue_exception(vcpu, exit_nr); + vcpu->stat.dsi_exits++; + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_INST_STORAGE: + vcpu->arch.esr = vcpu->arch.fault_esr; + kvmppc_queue_exception(vcpu, exit_nr); + vcpu->stat.isi_exits++; + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_SYSCALL: + kvmppc_queue_exception(vcpu, exit_nr); + vcpu->stat.syscall_exits++; + r = RESUME_GUEST; + break; + + case BOOKE_INTERRUPT_DTLB_MISS: { + struct tlbe *gtlbe; + unsigned long eaddr = vcpu->arch.fault_dear; + gfn_t gfn; + + /* Check the guest TLB. */ + gtlbe = kvmppc_44x_dtlb_search(vcpu, eaddr); + if (!gtlbe) { + /* The guest didn't have a mapping for it. */ + kvmppc_queue_exception(vcpu, exit_nr); + vcpu->arch.dear = vcpu->arch.fault_dear; + vcpu->arch.esr = vcpu->arch.fault_esr; + vcpu->stat.dtlb_real_miss_exits++; + r = RESUME_GUEST; + break; + } + + vcpu->arch.paddr_accessed = tlb_xlate(gtlbe, eaddr); + gfn = vcpu->arch.paddr_accessed >> PAGE_SHIFT; + + if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { + /* The guest TLB had a mapping, but the shadow TLB + * didn't, and it is RAM. This could be because: + * a) the entry is mapping the host kernel, or + * b) the guest used a large mapping which we're faking + * Either way, we need to satisfy the fault without + * invoking the guest. */ + kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, + gtlbe->word2); + vcpu->stat.dtlb_virt_miss_exits++; + r = RESUME_GUEST; + } else { + /* Guest has mapped and accessed a page which is not + * actually RAM. */ + r = kvmppc_emulate_mmio(run, vcpu); + } + + break; + } + + case BOOKE_INTERRUPT_ITLB_MISS: { + struct tlbe *gtlbe; + unsigned long eaddr = vcpu->arch.pc; + gfn_t gfn; + + r = RESUME_GUEST; + + /* Check the guest TLB. */ + gtlbe = kvmppc_44x_itlb_search(vcpu, eaddr); + if (!gtlbe) { + /* The guest didn't have a mapping for it. */ + kvmppc_queue_exception(vcpu, exit_nr); + vcpu->stat.itlb_real_miss_exits++; + break; + } + + vcpu->stat.itlb_virt_miss_exits++; + + gfn = tlb_xlate(gtlbe, eaddr) >> PAGE_SHIFT; + + if (kvm_is_visible_gfn(vcpu->kvm, gfn)) { + /* The guest TLB had a mapping, but the shadow TLB + * didn't. This could be because: + * a) the entry is mapping the host kernel, or + * b) the guest used a large mapping which we're faking + * Either way, we need to satisfy the fault without + * invoking the guest. */ + kvmppc_mmu_map(vcpu, eaddr, gfn, gtlbe->tid, + gtlbe->word2); + } else { + /* Guest mapped and leaped at non-RAM! */ + kvmppc_queue_exception(vcpu, + BOOKE_INTERRUPT_MACHINE_CHECK); + } + + break; + } + + default: + printk(KERN_EMERG "exit_nr %d\n", exit_nr); + BUG(); + } + + local_irq_disable(); + + kvmppc_check_and_deliver_interrupts(vcpu); + + /* Do some exit accounting. */ + vcpu->stat.sum_exits++; + if (!(r & RESUME_HOST)) { + /* To avoid clobbering exit_reason, only check for signals if + * we aren't already exiting to userspace for some other + * reason. */ + if (signal_pending(current)) { + run->exit_reason = KVM_EXIT_INTR; + r = (-EINTR << 2) | RESUME_HOST | (r & RESUME_FLAG_NV); + + vcpu->stat.signal_exits++; + } else { + vcpu->stat.light_exits++; + } + } else { + switch (run->exit_reason) { + case KVM_EXIT_MMIO: + vcpu->stat.mmio_exits++; + break; + case KVM_EXIT_DCR: + vcpu->stat.dcr_exits++; + break; + case KVM_EXIT_INTR: + vcpu->stat.signal_exits++; + break; + } + } + + return r; +} + +/* Initial guest state: 16MB mapping 0 -> 0, PC = 0, MSR = 0, R1 = 16MB */ +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + struct tlbe *tlbe = &vcpu->arch.guest_tlb[0]; + + tlbe->tid = 0; + tlbe->word0 = PPC44x_TLB_16M | PPC44x_TLB_VALID; + tlbe->word1 = 0; + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR; + + tlbe++; + tlbe->tid = 0; + tlbe->word0 = 0xef600000 | PPC44x_TLB_4K | PPC44x_TLB_VALID; + tlbe->word1 = 0xef600000; + tlbe->word2 = PPC44x_TLB_SX | PPC44x_TLB_SW | PPC44x_TLB_SR + | PPC44x_TLB_I | PPC44x_TLB_G; + + vcpu->arch.pc = 0; + vcpu->arch.msr = 0; + vcpu->arch.gpr[1] = (16<<20) - 8; /* -8 for the callee-save LR slot */ + + /* Eye-catching number so we know if the guest takes an interrupt + * before it's programmed its own IVPR. */ + vcpu->arch.ivpr = 0x55550000; + + /* Since the guest can directly access the timebase, it must know the + * real timebase frequency. Accordingly, it must see the state of + * CCR1[TCS]. */ + vcpu->arch.ccr1 = mfspr(SPRN_CCR1); + + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + + regs->pc = vcpu->arch.pc; + regs->cr = vcpu->arch.cr; + regs->ctr = vcpu->arch.ctr; + regs->lr = vcpu->arch.lr; + regs->xer = vcpu->arch.xer; + regs->msr = vcpu->arch.msr; + regs->srr0 = vcpu->arch.srr0; + regs->srr1 = vcpu->arch.srr1; + regs->pid = vcpu->arch.pid; + regs->sprg0 = vcpu->arch.sprg0; + regs->sprg1 = vcpu->arch.sprg1; + regs->sprg2 = vcpu->arch.sprg2; + regs->sprg3 = vcpu->arch.sprg3; + regs->sprg5 = vcpu->arch.sprg4; + regs->sprg6 = vcpu->arch.sprg5; + regs->sprg7 = vcpu->arch.sprg6; + + for (i = 0; i < ARRAY_SIZE(regs->gpr); i++) + regs->gpr[i] = vcpu->arch.gpr[i]; + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + int i; + + vcpu->arch.pc = regs->pc; + vcpu->arch.cr = regs->cr; + vcpu->arch.ctr = regs->ctr; + vcpu->arch.lr = regs->lr; + vcpu->arch.xer = regs->xer; + vcpu->arch.msr = regs->msr; + vcpu->arch.srr0 = regs->srr0; + vcpu->arch.srr1 = regs->srr1; + vcpu->arch.sprg0 = regs->sprg0; + vcpu->arch.sprg1 = regs->sprg1; + vcpu->arch.sprg2 = regs->sprg2; + vcpu->arch.sprg3 = regs->sprg3; + vcpu->arch.sprg5 = regs->sprg4; + vcpu->arch.sprg6 = regs->sprg5; + vcpu->arch.sprg7 = regs->sprg6; + + for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++) + vcpu->arch.gpr[i] = regs->gpr[i]; + + return 0; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -ENOTSUPP; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + return -ENOTSUPP; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -ENOTSUPP; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + return -ENOTSUPP; +} + +/* 'linear_address' is actually an encoding of AS|PID|EADDR . */ +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + struct tlbe *gtlbe; + int index; + gva_t eaddr; + u8 pid; + u8 as; + + eaddr = tr->linear_address; + pid = (tr->linear_address >> 32) & 0xff; + as = (tr->linear_address >> 40) & 0x1; + + index = kvmppc_44x_tlb_index(vcpu, eaddr, pid, as); + if (index == -1) { + tr->valid = 0; + return 0; + } + + gtlbe = &vcpu->arch.guest_tlb[index]; + + tr->physical_address = tlb_xlate(gtlbe, eaddr); + /* XXX what does "writeable" and "usermode" even mean? */ + tr->valid = 1; + + return 0; +} diff --git a/trunk/arch/powerpc/kvm/booke_host.c b/trunk/arch/powerpc/kvm/booke_host.c new file mode 100644 index 000000000000..b480341bc31e --- /dev/null +++ b/trunk/arch/powerpc/kvm/booke_host.c @@ -0,0 +1,83 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2008 + * + * Authors: Hollis Blanchard + */ + +#include +#include +#include +#include +#include + +unsigned long kvmppc_booke_handlers; + +static int kvmppc_booke_init(void) +{ + unsigned long ivor[16]; + unsigned long max_ivor = 0; + int i; + + /* We install our own exception handlers by hijacking IVPR. IVPR must + * be 16-bit aligned, so we need a 64KB allocation. */ + kvmppc_booke_handlers = __get_free_pages(GFP_KERNEL | __GFP_ZERO, + VCPU_SIZE_ORDER); + if (!kvmppc_booke_handlers) + return -ENOMEM; + + /* XXX make sure our handlers are smaller than Linux's */ + + /* Copy our interrupt handlers to match host IVORs. That way we don't + * have to swap the IVORs on every guest/host transition. */ + ivor[0] = mfspr(SPRN_IVOR0); + ivor[1] = mfspr(SPRN_IVOR1); + ivor[2] = mfspr(SPRN_IVOR2); + ivor[3] = mfspr(SPRN_IVOR3); + ivor[4] = mfspr(SPRN_IVOR4); + ivor[5] = mfspr(SPRN_IVOR5); + ivor[6] = mfspr(SPRN_IVOR6); + ivor[7] = mfspr(SPRN_IVOR7); + ivor[8] = mfspr(SPRN_IVOR8); + ivor[9] = mfspr(SPRN_IVOR9); + ivor[10] = mfspr(SPRN_IVOR10); + ivor[11] = mfspr(SPRN_IVOR11); + ivor[12] = mfspr(SPRN_IVOR12); + ivor[13] = mfspr(SPRN_IVOR13); + ivor[14] = mfspr(SPRN_IVOR14); + ivor[15] = mfspr(SPRN_IVOR15); + + for (i = 0; i < 16; i++) { + if (ivor[i] > max_ivor) + max_ivor = ivor[i]; + + memcpy((void *)kvmppc_booke_handlers + ivor[i], + kvmppc_handlers_start + i * kvmppc_handler_len, + kvmppc_handler_len); + } + flush_icache_range(kvmppc_booke_handlers, + kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); + + return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); +} + +static void __exit kvmppc_booke_exit(void) +{ + free_pages(kvmppc_booke_handlers, VCPU_SIZE_ORDER); + kvm_exit(); +} + +module_init(kvmppc_booke_init) +module_exit(kvmppc_booke_exit) diff --git a/trunk/arch/powerpc/kvm/booke_interrupts.S b/trunk/arch/powerpc/kvm/booke_interrupts.S new file mode 100644 index 000000000000..3b653b5309b8 --- /dev/null +++ b/trunk/arch/powerpc/kvm/booke_interrupts.S @@ -0,0 +1,436 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard + */ + +#include +#include +#include +#include +#include +#include + +#define KVMPPC_MSR_MASK (MSR_CE|MSR_EE|MSR_PR|MSR_DE|MSR_ME|MSR_IS|MSR_DS) + +#define VCPU_GPR(n) (VCPU_GPRS + (n * 4)) + +/* The host stack layout: */ +#define HOST_R1 0 /* Implied by stwu. */ +#define HOST_CALLEE_LR 4 +#define HOST_RUN 8 +/* r2 is special: it holds 'current', and it made nonvolatile in the + * kernel with the -ffixed-r2 gcc option. */ +#define HOST_R2 12 +#define HOST_NV_GPRS 16 +#define HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * 4)) +#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(31) + 4) +#define HOST_STACK_SIZE (((HOST_MIN_STACK_SIZE + 15) / 16) * 16) /* Align. */ +#define HOST_STACK_LR (HOST_STACK_SIZE + 4) /* In caller stack frame. */ + +#define NEED_INST_MASK ((1< + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "44x_tlb.h" + +/* Instruction decoding */ +static inline unsigned int get_op(u32 inst) +{ + return inst >> 26; +} + +static inline unsigned int get_xop(u32 inst) +{ + return (inst >> 1) & 0x3ff; +} + +static inline unsigned int get_sprn(u32 inst) +{ + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); +} + +static inline unsigned int get_dcrn(u32 inst) +{ + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); +} + +static inline unsigned int get_rt(u32 inst) +{ + return (inst >> 21) & 0x1f; +} + +static inline unsigned int get_rs(u32 inst) +{ + return (inst >> 21) & 0x1f; +} + +static inline unsigned int get_ra(u32 inst) +{ + return (inst >> 16) & 0x1f; +} + +static inline unsigned int get_rb(u32 inst) +{ + return (inst >> 11) & 0x1f; +} + +static inline unsigned int get_rc(u32 inst) +{ + return inst & 0x1; +} + +static inline unsigned int get_ws(u32 inst) +{ + return (inst >> 11) & 0x1f; +} + +static inline unsigned int get_d(u32 inst) +{ + return inst & 0xffff; +} + +static int tlbe_is_host_safe(const struct kvm_vcpu *vcpu, + const struct tlbe *tlbe) +{ + gpa_t gpa; + + if (!get_tlb_v(tlbe)) + return 0; + + /* Does it match current guest AS? */ + /* XXX what about IS != DS? */ + if (get_tlb_ts(tlbe) != !!(vcpu->arch.msr & MSR_IS)) + return 0; + + gpa = get_tlb_raddr(tlbe); + if (!gfn_to_memslot(vcpu->kvm, gpa >> PAGE_SHIFT)) + /* Mapping is not for RAM. */ + return 0; + + return 1; +} + +static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst) +{ + u64 eaddr; + u64 raddr; + u64 asid; + u32 flags; + struct tlbe *tlbe; + unsigned int ra; + unsigned int rs; + unsigned int ws; + unsigned int index; + + ra = get_ra(inst); + rs = get_rs(inst); + ws = get_ws(inst); + + index = vcpu->arch.gpr[ra]; + if (index > PPC44x_TLB_SIZE) { + printk("%s: index %d\n", __func__, index); + kvmppc_dump_vcpu(vcpu); + return EMULATE_FAIL; + } + + tlbe = &vcpu->arch.guest_tlb[index]; + + /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ + if (tlbe->word0 & PPC44x_TLB_VALID) { + eaddr = get_tlb_eaddr(tlbe); + asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; + kvmppc_mmu_invalidate(vcpu, eaddr, asid); + } + + switch (ws) { + case PPC44x_TLB_PAGEID: + tlbe->tid = vcpu->arch.mmucr & 0xff; + tlbe->word0 = vcpu->arch.gpr[rs]; + break; + + case PPC44x_TLB_XLAT: + tlbe->word1 = vcpu->arch.gpr[rs]; + break; + + case PPC44x_TLB_ATTRIB: + tlbe->word2 = vcpu->arch.gpr[rs]; + break; + + default: + return EMULATE_FAIL; + } + + if (tlbe_is_host_safe(vcpu, tlbe)) { + eaddr = get_tlb_eaddr(tlbe); + raddr = get_tlb_raddr(tlbe); + asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; + flags = tlbe->word2 & 0xffff; + + /* Create a 4KB mapping on the host. If the guest wanted a + * large page, only the first 4KB is mapped here and the rest + * are mapped on the fly. */ + kvmppc_mmu_map(vcpu, eaddr, raddr >> PAGE_SHIFT, asid, flags); + } + + return EMULATE_DONE; +} + +static void kvmppc_emulate_dec(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.tcr & TCR_DIE) { + /* The decrementer ticks at the same rate as the timebase, so + * that's how we convert the guest DEC value to the number of + * host ticks. */ + unsigned long nr_jiffies; + + nr_jiffies = vcpu->arch.dec / tb_ticks_per_jiffy; + mod_timer(&vcpu->arch.dec_timer, + get_jiffies_64() + nr_jiffies); + } else { + del_timer(&vcpu->arch.dec_timer); + } +} + +static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pc = vcpu->arch.srr0; + kvmppc_set_msr(vcpu, vcpu->arch.srr1); +} + +/* XXX to do: + * lhax + * lhaux + * lswx + * lswi + * stswx + * stswi + * lha + * lhau + * lmw + * stmw + * + * XXX is_bigendian should depend on MMU mapping or MSR[LE] + */ +int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + u32 inst = vcpu->arch.last_inst; + u32 ea; + int ra; + int rb; + int rc; + int rs; + int rt; + int sprn; + int dcrn; + enum emulation_result emulated = EMULATE_DONE; + int advance = 1; + + switch (get_op(inst)) { + case 3: /* trap */ + printk("trap!\n"); + kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_PROGRAM); + advance = 0; + break; + + case 19: + switch (get_xop(inst)) { + case 50: /* rfi */ + kvmppc_emul_rfi(vcpu); + advance = 0; + break; + + default: + emulated = EMULATE_FAIL; + break; + } + break; + + case 31: + switch (get_xop(inst)) { + + case 83: /* mfmsr */ + rt = get_rt(inst); + vcpu->arch.gpr[rt] = vcpu->arch.msr; + break; + + case 87: /* lbzx */ + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + break; + + case 131: /* wrtee */ + rs = get_rs(inst); + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (vcpu->arch.gpr[rs] & MSR_EE); + break; + + case 146: /* mtmsr */ + rs = get_rs(inst); + kvmppc_set_msr(vcpu, vcpu->arch.gpr[rs]); + break; + + case 163: /* wrteei */ + vcpu->arch.msr = (vcpu->arch.msr & ~MSR_EE) + | (inst & MSR_EE); + break; + + case 215: /* stbx */ + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, + vcpu->arch.gpr[rs], + 1, 1); + break; + + case 247: /* stbux */ + rs = get_rs(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + ea = vcpu->arch.gpr[rb]; + if (ra) + ea += vcpu->arch.gpr[ra]; + + emulated = kvmppc_handle_store(run, vcpu, + vcpu->arch.gpr[rs], + 1, 1); + vcpu->arch.gpr[rs] = ea; + break; + + case 279: /* lhzx */ + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + break; + + case 311: /* lhzux */ + rt = get_rt(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + ea = vcpu->arch.gpr[rb]; + if (ra) + ea += vcpu->arch.gpr[ra]; + + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + vcpu->arch.gpr[ra] = ea; + break; + + case 323: /* mfdcr */ + dcrn = get_dcrn(inst); + rt = get_rt(inst); + + /* The guest may access CPR0 registers to determine the timebase + * frequency, and it must know the real host frequency because it + * can directly access the timebase registers. + * + * It would be possible to emulate those accesses in userspace, + * but userspace can really only figure out the end frequency. + * We could decompose that into the factors that compute it, but + * that's tricky math, and it's easier to just report the real + * CPR0 values. + */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.gpr[rt] = vcpu->arch.cpr0_cfgaddr; + break; + case DCRN_CPR0_CONFIG_DATA: + local_irq_disable(); + mtdcr(DCRN_CPR0_CONFIG_ADDR, + vcpu->arch.cpr0_cfgaddr); + vcpu->arch.gpr[rt] = mfdcr(DCRN_CPR0_CONFIG_DATA); + local_irq_enable(); + break; + default: + run->dcr.dcrn = dcrn; + run->dcr.data = 0; + run->dcr.is_write = 0; + vcpu->arch.io_gpr = rt; + vcpu->arch.dcr_needed = 1; + emulated = EMULATE_DO_DCR; + } + + break; + + case 339: /* mfspr */ + sprn = get_sprn(inst); + rt = get_rt(inst); + + switch (sprn) { + case SPRN_SRR0: + vcpu->arch.gpr[rt] = vcpu->arch.srr0; break; + case SPRN_SRR1: + vcpu->arch.gpr[rt] = vcpu->arch.srr1; break; + case SPRN_MMUCR: + vcpu->arch.gpr[rt] = vcpu->arch.mmucr; break; + case SPRN_PID: + vcpu->arch.gpr[rt] = vcpu->arch.pid; break; + case SPRN_IVPR: + vcpu->arch.gpr[rt] = vcpu->arch.ivpr; break; + case SPRN_CCR0: + vcpu->arch.gpr[rt] = vcpu->arch.ccr0; break; + case SPRN_CCR1: + vcpu->arch.gpr[rt] = vcpu->arch.ccr1; break; + case SPRN_PVR: + vcpu->arch.gpr[rt] = vcpu->arch.pvr; break; + case SPRN_DEAR: + vcpu->arch.gpr[rt] = vcpu->arch.dear; break; + case SPRN_ESR: + vcpu->arch.gpr[rt] = vcpu->arch.esr; break; + case SPRN_DBCR0: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr0; break; + case SPRN_DBCR1: + vcpu->arch.gpr[rt] = vcpu->arch.dbcr1; break; + + /* Note: mftb and TBRL/TBWL are user-accessible, so + * the guest can always access the real TB anyways. + * In fact, we probably will never see these traps. */ + case SPRN_TBWL: + vcpu->arch.gpr[rt] = mftbl(); break; + case SPRN_TBWU: + vcpu->arch.gpr[rt] = mftbu(); break; + + case SPRN_SPRG0: + vcpu->arch.gpr[rt] = vcpu->arch.sprg0; break; + case SPRN_SPRG1: + vcpu->arch.gpr[rt] = vcpu->arch.sprg1; break; + case SPRN_SPRG2: + vcpu->arch.gpr[rt] = vcpu->arch.sprg2; break; + case SPRN_SPRG3: + vcpu->arch.gpr[rt] = vcpu->arch.sprg3; break; + /* Note: SPRG4-7 are user-readable, so we don't get + * a trap. */ + + case SPRN_IVOR0: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[0]; break; + case SPRN_IVOR1: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[1]; break; + case SPRN_IVOR2: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[2]; break; + case SPRN_IVOR3: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[3]; break; + case SPRN_IVOR4: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[4]; break; + case SPRN_IVOR5: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[5]; break; + case SPRN_IVOR6: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[6]; break; + case SPRN_IVOR7: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[7]; break; + case SPRN_IVOR8: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[8]; break; + case SPRN_IVOR9: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[9]; break; + case SPRN_IVOR10: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[10]; break; + case SPRN_IVOR11: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[11]; break; + case SPRN_IVOR12: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[12]; break; + case SPRN_IVOR13: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[13]; break; + case SPRN_IVOR14: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[14]; break; + case SPRN_IVOR15: + vcpu->arch.gpr[rt] = vcpu->arch.ivor[15]; break; + + default: + printk("mfspr: unknown spr %x\n", sprn); + vcpu->arch.gpr[rt] = 0; + break; + } + break; + + case 407: /* sthx */ + rs = get_rs(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + emulated = kvmppc_handle_store(run, vcpu, + vcpu->arch.gpr[rs], + 2, 1); + break; + + case 439: /* sthux */ + rs = get_rs(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + ea = vcpu->arch.gpr[rb]; + if (ra) + ea += vcpu->arch.gpr[ra]; + + emulated = kvmppc_handle_store(run, vcpu, + vcpu->arch.gpr[rs], + 2, 1); + vcpu->arch.gpr[ra] = ea; + break; + + case 451: /* mtdcr */ + dcrn = get_dcrn(inst); + rs = get_rs(inst); + + /* emulate some access in kernel */ + switch (dcrn) { + case DCRN_CPR0_CONFIG_ADDR: + vcpu->arch.cpr0_cfgaddr = vcpu->arch.gpr[rs]; + break; + default: + run->dcr.dcrn = dcrn; + run->dcr.data = vcpu->arch.gpr[rs]; + run->dcr.is_write = 1; + vcpu->arch.dcr_needed = 1; + emulated = EMULATE_DO_DCR; + } + + break; + + case 467: /* mtspr */ + sprn = get_sprn(inst); + rs = get_rs(inst); + switch (sprn) { + case SPRN_SRR0: + vcpu->arch.srr0 = vcpu->arch.gpr[rs]; break; + case SPRN_SRR1: + vcpu->arch.srr1 = vcpu->arch.gpr[rs]; break; + case SPRN_MMUCR: + vcpu->arch.mmucr = vcpu->arch.gpr[rs]; break; + case SPRN_PID: + vcpu->arch.pid = vcpu->arch.gpr[rs]; break; + case SPRN_CCR0: + vcpu->arch.ccr0 = vcpu->arch.gpr[rs]; break; + case SPRN_CCR1: + vcpu->arch.ccr1 = vcpu->arch.gpr[rs]; break; + case SPRN_DEAR: + vcpu->arch.dear = vcpu->arch.gpr[rs]; break; + case SPRN_ESR: + vcpu->arch.esr = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR0: + vcpu->arch.dbcr0 = vcpu->arch.gpr[rs]; break; + case SPRN_DBCR1: + vcpu->arch.dbcr1 = vcpu->arch.gpr[rs]; break; + + /* XXX We need to context-switch the timebase for + * watchdog and FIT. */ + case SPRN_TBWL: break; + case SPRN_TBWU: break; + + case SPRN_DEC: + vcpu->arch.dec = vcpu->arch.gpr[rs]; + kvmppc_emulate_dec(vcpu); + break; + + case SPRN_TSR: + vcpu->arch.tsr &= ~vcpu->arch.gpr[rs]; break; + + case SPRN_TCR: + vcpu->arch.tcr = vcpu->arch.gpr[rs]; + kvmppc_emulate_dec(vcpu); + break; + + case SPRN_SPRG0: + vcpu->arch.sprg0 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG1: + vcpu->arch.sprg1 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG2: + vcpu->arch.sprg2 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG3: + vcpu->arch.sprg3 = vcpu->arch.gpr[rs]; break; + + /* Note: SPRG4-7 are user-readable. These values are + * loaded into the real SPRGs when resuming the + * guest. */ + case SPRN_SPRG4: + vcpu->arch.sprg4 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG5: + vcpu->arch.sprg5 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG6: + vcpu->arch.sprg6 = vcpu->arch.gpr[rs]; break; + case SPRN_SPRG7: + vcpu->arch.sprg7 = vcpu->arch.gpr[rs]; break; + + case SPRN_IVPR: + vcpu->arch.ivpr = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR0: + vcpu->arch.ivor[0] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR1: + vcpu->arch.ivor[1] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR2: + vcpu->arch.ivor[2] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR3: + vcpu->arch.ivor[3] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR4: + vcpu->arch.ivor[4] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR5: + vcpu->arch.ivor[5] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR6: + vcpu->arch.ivor[6] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR7: + vcpu->arch.ivor[7] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR8: + vcpu->arch.ivor[8] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR9: + vcpu->arch.ivor[9] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR10: + vcpu->arch.ivor[10] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR11: + vcpu->arch.ivor[11] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR12: + vcpu->arch.ivor[12] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR13: + vcpu->arch.ivor[13] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR14: + vcpu->arch.ivor[14] = vcpu->arch.gpr[rs]; break; + case SPRN_IVOR15: + vcpu->arch.ivor[15] = vcpu->arch.gpr[rs]; break; + + default: + printk("mtspr: unknown spr %x\n", sprn); + emulated = EMULATE_FAIL; + break; + } + break; + + case 470: /* dcbi */ + /* Do nothing. The guest is performing dcbi because + * hardware DMA is not snooped by the dcache, but + * emulated DMA either goes through the dcache as + * normal writes, or the host kernel has handled dcache + * coherence. */ + break; + + case 534: /* lwbrx */ + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 0); + break; + + case 566: /* tlbsync */ + break; + + case 662: /* stwbrx */ + rs = get_rs(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + emulated = kvmppc_handle_store(run, vcpu, + vcpu->arch.gpr[rs], + 4, 0); + break; + + case 978: /* tlbwe */ + emulated = kvmppc_emul_tlbwe(vcpu, inst); + break; + + case 914: { /* tlbsx */ + int index; + unsigned int as = get_mmucr_sts(vcpu); + unsigned int pid = get_mmucr_stid(vcpu); + + rt = get_rt(inst); + ra = get_ra(inst); + rb = get_rb(inst); + rc = get_rc(inst); + + ea = vcpu->arch.gpr[rb]; + if (ra) + ea += vcpu->arch.gpr[ra]; + + index = kvmppc_44x_tlb_index(vcpu, ea, pid, as); + if (rc) { + if (index < 0) + vcpu->arch.cr &= ~0x20000000; + else + vcpu->arch.cr |= 0x20000000; + } + vcpu->arch.gpr[rt] = index; + + } + break; + + case 790: /* lhbrx */ + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 0); + break; + + case 918: /* sthbrx */ + rs = get_rs(inst); + ra = get_ra(inst); + rb = get_rb(inst); + + emulated = kvmppc_handle_store(run, vcpu, + vcpu->arch.gpr[rs], + 2, 0); + break; + + case 966: /* iccci */ + break; + + default: + printk("unknown: op %d xop %d\n", get_op(inst), + get_xop(inst)); + emulated = EMULATE_FAIL; + break; + } + break; + + case 32: /* lwz */ + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + break; + + case 33: /* lwzu */ + ra = get_ra(inst); + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); + vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; + break; + + case 34: /* lbz */ + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + break; + + case 35: /* lbzu */ + ra = get_ra(inst); + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1); + vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; + break; + + case 36: /* stw */ + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], + 4, 1); + break; + + case 37: /* stwu */ + ra = get_ra(inst); + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], + 4, 1); + vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; + break; + + case 38: /* stb */ + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], + 1, 1); + break; + + case 39: /* stbu */ + ra = get_ra(inst); + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], + 1, 1); + vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; + break; + + case 40: /* lhz */ + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + break; + + case 41: /* lhzu */ + ra = get_ra(inst); + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1); + vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; + break; + + case 44: /* sth */ + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], + 2, 1); + break; + + case 45: /* sthu */ + ra = get_ra(inst); + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, vcpu->arch.gpr[rs], + 2, 1); + vcpu->arch.gpr[ra] = vcpu->arch.paddr_accessed; + break; + + default: + printk("unknown op %d\n", get_op(inst)); + emulated = EMULATE_FAIL; + break; + } + + if (advance) + vcpu->arch.pc += 4; /* Advance past emulated instruction. */ + + return emulated; +} diff --git a/trunk/arch/powerpc/kvm/powerpc.c b/trunk/arch/powerpc/kvm/powerpc.c new file mode 100644 index 000000000000..bad40bd2d3ac --- /dev/null +++ b/trunk/arch/powerpc/kvm/powerpc.c @@ -0,0 +1,436 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright IBM Corp. 2007 + * + * Authors: Hollis Blanchard + * Christian Ehrhardt + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) +{ + return gfn; +} + +int kvm_cpu_has_interrupt(struct kvm_vcpu *v) +{ + /* XXX implement me */ + return 0; +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) +{ + return 1; +} + + +int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + int r; + + er = kvmppc_emulate_instruction(run, vcpu); + switch (er) { + case EMULATE_DONE: + /* Future optimization: only reload non-volatiles if they were + * actually modified. */ + r = RESUME_GUEST_NV; + break; + case EMULATE_DO_MMIO: + run->exit_reason = KVM_EXIT_MMIO; + /* We must reload nonvolatiles because "update" load/store + * instructions modify register state. */ + /* Future optimization: only reload non-volatiles if they were + * actually modified. */ + r = RESUME_HOST_NV; + break; + case EMULATE_FAIL: + /* XXX Deliver Program interrupt to guest. */ + printk(KERN_EMERG "%s: emulation failed (%08x)\n", __func__, + vcpu->arch.last_inst); + r = RESUME_HOST; + break; + default: + BUG(); + } + + return r; +} + +void kvm_arch_hardware_enable(void *garbage) +{ +} + +void kvm_arch_hardware_disable(void *garbage) +{ +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ +} + +void kvm_arch_check_processor_compat(void *rtn) +{ + int r; + + if (strcmp(cur_cpu_spec->platform, "ppc440") == 0) + r = 0; + else + r = -ENOTSUPP; + + *(int *)rtn = r; +} + +struct kvm *kvm_arch_create_vm(void) +{ + struct kvm *kvm; + + kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); + if (!kvm) + return ERR_PTR(-ENOMEM); + + return kvm; +} + +static void kvmppc_free_vcpus(struct kvm *kvm) +{ + unsigned int i; + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm_arch_vcpu_free(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } + } +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + kvmppc_free_vcpus(kvm); + kvm_free_physmem(kvm); + kfree(kvm); +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + int r; + + switch (ext) { + case KVM_CAP_USER_MEMORY: + r = 1; + break; + default: + r = 0; + break; + } + return r; + +} + +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +int kvm_arch_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + return 0; +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) +{ + struct kvm_vcpu *vcpu; + int err; + + vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); + if (!vcpu) { + err = -ENOMEM; + goto out; + } + + err = kvm_vcpu_init(vcpu, kvm, id); + if (err) + goto free_vcpu; + + return vcpu; + +free_vcpu: + kmem_cache_free(kvm_vcpu_cache, vcpu); +out: + return ERR_PTR(err); +} + +void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) +{ + kvm_vcpu_uninit(vcpu); + kmem_cache_free(kvm_vcpu_cache, vcpu); +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + kvm_arch_vcpu_free(vcpu); +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + unsigned int priority = exception_priority[BOOKE_INTERRUPT_DECREMENTER]; + + return test_bit(priority, &vcpu->arch.pending_exceptions); +} + +static void kvmppc_decrementer_func(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + + kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER); +} + +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + setup_timer(&vcpu->arch.dec_timer, kvmppc_decrementer_func, + (unsigned long)vcpu); + + return 0; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ +} + +void decache_vcpus_on_cpu(int cpu) +{ +} + +int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, + struct kvm_debug_guest *dbg) +{ + return -ENOTSUPP; +} + +static void kvmppc_complete_dcr_load(struct kvm_vcpu *vcpu, + struct kvm_run *run) +{ + u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; + *gpr = run->dcr.data; +} + +static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, + struct kvm_run *run) +{ + u32 *gpr = &vcpu->arch.gpr[vcpu->arch.io_gpr]; + + if (run->mmio.len > sizeof(*gpr)) { + printk(KERN_ERR "bad MMIO length: %d\n", run->mmio.len); + return; + } + + if (vcpu->arch.mmio_is_bigendian) { + switch (run->mmio.len) { + case 4: *gpr = *(u32 *)run->mmio.data; break; + case 2: *gpr = *(u16 *)run->mmio.data; break; + case 1: *gpr = *(u8 *)run->mmio.data; break; + } + } else { + /* Convert BE data from userland back to LE. */ + switch (run->mmio.len) { + case 4: *gpr = ld_le32((u32 *)run->mmio.data); break; + case 2: *gpr = ld_le16((u16 *)run->mmio.data); break; + case 1: *gpr = *(u8 *)run->mmio.data; break; + } + } +} + +int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int rt, unsigned int bytes, int is_bigendian) +{ + if (bytes > sizeof(run->mmio.data)) { + printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, + run->mmio.len); + } + + run->mmio.phys_addr = vcpu->arch.paddr_accessed; + run->mmio.len = bytes; + run->mmio.is_write = 0; + + vcpu->arch.io_gpr = rt; + vcpu->arch.mmio_is_bigendian = is_bigendian; + vcpu->mmio_needed = 1; + vcpu->mmio_is_write = 0; + + return EMULATE_DO_MMIO; +} + +int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu, + u32 val, unsigned int bytes, int is_bigendian) +{ + void *data = run->mmio.data; + + if (bytes > sizeof(run->mmio.data)) { + printk(KERN_ERR "%s: bad MMIO length: %d\n", __func__, + run->mmio.len); + } + + run->mmio.phys_addr = vcpu->arch.paddr_accessed; + run->mmio.len = bytes; + run->mmio.is_write = 1; + vcpu->mmio_needed = 1; + vcpu->mmio_is_write = 1; + + /* Store the value at the lowest bytes in 'data'. */ + if (is_bigendian) { + switch (bytes) { + case 4: *(u32 *)data = val; break; + case 2: *(u16 *)data = val; break; + case 1: *(u8 *)data = val; break; + } + } else { + /* Store LE value into 'data'. */ + switch (bytes) { + case 4: st_le32(data, val); break; + case 2: st_le16(data, val); break; + case 1: *(u8 *)data = val; break; + } + } + + return EMULATE_DO_MMIO; +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + int r; + sigset_t sigsaved; + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + + if (vcpu->mmio_needed) { + if (!vcpu->mmio_is_write) + kvmppc_complete_mmio_load(vcpu, run); + vcpu->mmio_needed = 0; + } else if (vcpu->arch.dcr_needed) { + if (!vcpu->arch.dcr_is_write) + kvmppc_complete_dcr_load(vcpu, run); + vcpu->arch.dcr_needed = 0; + } + + kvmppc_check_and_deliver_interrupts(vcpu); + + local_irq_disable(); + kvm_guest_enter(); + r = __kvmppc_vcpu_run(run, vcpu); + kvm_guest_exit(); + local_irq_enable(); + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + return r; +} + +int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq) +{ + kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + long r; + + switch (ioctl) { + case KVM_INTERRUPT: { + struct kvm_interrupt irq; + r = -EFAULT; + if (copy_from_user(&irq, argp, sizeof(irq))) + goto out; + r = kvm_vcpu_ioctl_interrupt(vcpu, &irq); + break; + } + default: + r = -EINVAL; + } + +out: + return r; +} + +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) +{ + return -ENOTSUPP; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + long r; + + switch (ioctl) { + default: + r = -EINVAL; + } + + return r; +} + +int kvm_arch_init(void *opaque) +{ + return 0; +} + +void kvm_arch_exit(void) +{ +} diff --git a/trunk/arch/powerpc/mm/fsl_booke_mmu.c b/trunk/arch/powerpc/mm/fsl_booke_mmu.c index ada249bf9779..ce10e2b1b902 100644 --- a/trunk/arch/powerpc/mm/fsl_booke_mmu.c +++ b/trunk/arch/powerpc/mm/fsl_booke_mmu.c @@ -202,7 +202,7 @@ adjust_total_lowmem(void) cam_max_size = max_lowmem_size; /* adjust lowmem size to max_lowmem_size */ - ram = min(max_lowmem_size, total_lowmem); + ram = min(max_lowmem_size, (phys_addr_t)total_lowmem); /* Calculate CAM values */ __cam0 = 1UL << 2 * (__ilog2(ram) / 2); diff --git a/trunk/arch/powerpc/mm/hash_low_32.S b/trunk/arch/powerpc/mm/hash_low_32.S index e10d76a860d3..ddeaf9e38ad5 100644 --- a/trunk/arch/powerpc/mm/hash_low_32.S +++ b/trunk/arch/powerpc/mm/hash_low_32.S @@ -191,7 +191,7 @@ _GLOBAL(add_hash_page) add r3,r3,r0 /* note create_hpte trims to 24 bits */ #ifdef CONFIG_SMP - rlwinm r8,r1,0,0,18 /* use cpu number to make tag */ + rlwinm r8,r1,0,0,(31-THREAD_SHIFT) /* use cpu number to make tag */ lwz r8,TI_CPU(r8) /* to go in mmu_hash_lock */ oris r8,r8,12 #endif /* CONFIG_SMP */ @@ -526,7 +526,7 @@ _GLOBAL(flush_hash_pages) #ifdef CONFIG_SMP addis r9,r7,mmu_hash_lock@ha addi r9,r9,mmu_hash_lock@l - rlwinm r8,r1,0,0,18 + rlwinm r8,r1,0,0,(31-THREAD_SHIFT) add r8,r8,r7 lwz r8,TI_CPU(r8) oris r8,r8,9 diff --git a/trunk/arch/powerpc/mm/init_32.c b/trunk/arch/powerpc/mm/init_32.c index 47325f23c51f..1952b4d3fa7f 100644 --- a/trunk/arch/powerpc/mm/init_32.c +++ b/trunk/arch/powerpc/mm/init_32.c @@ -59,7 +59,10 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); unsigned long total_memory; unsigned long total_lowmem; -phys_addr_t memstart_addr; +phys_addr_t memstart_addr = (phys_addr_t)~0ull; +EXPORT_SYMBOL(memstart_addr); +phys_addr_t kernstart_addr; +EXPORT_SYMBOL(kernstart_addr); phys_addr_t lowmem_end_addr; int boot_mapsize; @@ -68,14 +71,6 @@ unsigned long agp_special_page; EXPORT_SYMBOL(agp_special_page); #endif -#ifdef CONFIG_HIGHMEM -pte_t *kmap_pte; -pgprot_t kmap_prot; - -EXPORT_SYMBOL(kmap_prot); -EXPORT_SYMBOL(kmap_pte); -#endif - void MMU_init(void); /* XXX should be in current.h -- paulus */ diff --git a/trunk/arch/powerpc/mm/init_64.c b/trunk/arch/powerpc/mm/init_64.c index 698bd000f98b..c5ac532a0161 100644 --- a/trunk/arch/powerpc/mm/init_64.c +++ b/trunk/arch/powerpc/mm/init_64.c @@ -72,7 +72,8 @@ #warning TASK_SIZE is smaller than it needs to be. #endif -phys_addr_t memstart_addr; +phys_addr_t memstart_addr = ~0; +phys_addr_t kernstart_addr; void free_initmem(void) { diff --git a/trunk/arch/powerpc/mm/mem.c b/trunk/arch/powerpc/mm/mem.c index 16def4dcff6d..5ccb579b81e4 100644 --- a/trunk/arch/powerpc/mm/mem.c +++ b/trunk/arch/powerpc/mm/mem.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "mmu_decl.h" @@ -57,6 +58,20 @@ int init_bootmem_done; int mem_init_done; unsigned long memory_limit; +#ifdef CONFIG_HIGHMEM +pte_t *kmap_pte; +pgprot_t kmap_prot; + +EXPORT_SYMBOL(kmap_prot); +EXPORT_SYMBOL(kmap_pte); + +static inline pte_t *virt_to_kpte(unsigned long vaddr) +{ + return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), + vaddr), vaddr), vaddr); +} +#endif + int page_is_ram(unsigned long pfn) { unsigned long paddr = (pfn << PAGE_SHIFT); @@ -216,7 +231,7 @@ void __init do_init_bootmem(void) unsigned long total_pages; int boot_mapsize; - max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; + max_low_pfn = max_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT; total_pages = (lmb_end_of_DRAM() - memstart_addr) >> PAGE_SHIFT; #ifdef CONFIG_HIGHMEM total_pages = total_lowmem >> PAGE_SHIFT; @@ -232,7 +247,8 @@ void __init do_init_bootmem(void) start = lmb_alloc(bootmap_pages << PAGE_SHIFT, PAGE_SIZE); - boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages); + min_low_pfn = MEMORY_START >> PAGE_SHIFT; + boot_mapsize = init_bootmem_node(NODE_DATA(0), start >> PAGE_SHIFT, min_low_pfn, max_low_pfn); /* Add active regions with valid PFNs */ for (i = 0; i < lmb.memory.cnt; i++) { @@ -310,14 +326,19 @@ void __init paging_init(void) unsigned long top_of_ram = lmb_end_of_DRAM(); unsigned long max_zone_pfns[MAX_NR_ZONES]; +#ifdef CONFIG_PPC32 + unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1); + unsigned long end = __fix_to_virt(FIX_HOLE); + + for (; v < end; v += PAGE_SIZE) + map_page(v, 0, 0); /* XXX gross */ +#endif + #ifdef CONFIG_HIGHMEM map_page(PKMAP_BASE, 0, 0); /* XXX gross */ - pkmap_page_table = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k - (PKMAP_BASE), PKMAP_BASE), PKMAP_BASE), PKMAP_BASE); - map_page(KMAP_FIX_BEGIN, 0, 0); /* XXX gross */ - kmap_pte = pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k - (KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), - KMAP_FIX_BEGIN); + pkmap_page_table = virt_to_kpte(PKMAP_BASE); + + kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN)); kmap_prot = PAGE_KERNEL; #endif /* CONFIG_HIGHMEM */ diff --git a/trunk/arch/powerpc/mm/numa.c b/trunk/arch/powerpc/mm/numa.c index 1efd631211ef..dc704da363eb 100644 --- a/trunk/arch/powerpc/mm/numa.c +++ b/trunk/arch/powerpc/mm/numa.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/trunk/arch/powerpc/mm/pgtable_32.c b/trunk/arch/powerpc/mm/pgtable_32.c index 64c44bcc68de..80d1babb230d 100644 --- a/trunk/arch/powerpc/mm/pgtable_32.c +++ b/trunk/arch/powerpc/mm/pgtable_32.c @@ -29,6 +29,7 @@ #include #include +#include #include #include "mmu_decl.h" @@ -387,3 +388,25 @@ void kernel_map_pages(struct page *page, int numpages, int enable) change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); } #endif /* CONFIG_DEBUG_PAGEALLOC */ + +static int fixmaps; +unsigned long FIXADDR_TOP = 0xfffff000; +EXPORT_SYMBOL(FIXADDR_TOP); + +void __set_fixmap (enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) +{ + unsigned long address = __fix_to_virt(idx); + + if (idx >= __end_of_fixed_addresses) { + BUG(); + return; + } + + map_page(address, phys, flags); + fixmaps++; +} + +void __this_fixmap_does_not_exist(void) +{ + WARN_ON(1); +} diff --git a/trunk/arch/powerpc/platforms/Kconfig b/trunk/arch/powerpc/platforms/Kconfig index f38c50b4ce56..87454c526973 100644 --- a/trunk/arch/powerpc/platforms/Kconfig +++ b/trunk/arch/powerpc/platforms/Kconfig @@ -45,7 +45,6 @@ source "arch/powerpc/platforms/powermac/Kconfig" source "arch/powerpc/platforms/prep/Kconfig" source "arch/powerpc/platforms/maple/Kconfig" source "arch/powerpc/platforms/pasemi/Kconfig" -source "arch/powerpc/platforms/celleb/Kconfig" source "arch/powerpc/platforms/ps3/Kconfig" source "arch/powerpc/platforms/cell/Kconfig" source "arch/powerpc/platforms/8xx/Kconfig" diff --git a/trunk/arch/powerpc/platforms/Kconfig.cputype b/trunk/arch/powerpc/platforms/Kconfig.cputype index 5fc7fac10e93..f7efaa925a13 100644 --- a/trunk/arch/powerpc/platforms/Kconfig.cputype +++ b/trunk/arch/powerpc/platforms/Kconfig.cputype @@ -220,8 +220,8 @@ config SMP If you don't know what to do here, say N. config NR_CPUS - int "Maximum number of CPUs (2-128)" - range 2 128 + int "Maximum number of CPUs (2-1024)" + range 2 1024 depends on SMP default "32" if PPC64 default "4" diff --git a/trunk/arch/powerpc/platforms/Makefile b/trunk/arch/powerpc/platforms/Makefile index a984894466d9..423a0234dc31 100644 --- a/trunk/arch/powerpc/platforms/Makefile +++ b/trunk/arch/powerpc/platforms/Makefile @@ -24,5 +24,4 @@ obj-$(CONFIG_PPC_MAPLE) += maple/ obj-$(CONFIG_PPC_PASEMI) += pasemi/ obj-$(CONFIG_PPC_CELL) += cell/ obj-$(CONFIG_PPC_PS3) += ps3/ -obj-$(CONFIG_PPC_CELLEB) += celleb/ obj-$(CONFIG_EMBEDDED6xx) += embedded6xx/ diff --git a/trunk/arch/powerpc/platforms/cell/Kconfig b/trunk/arch/powerpc/platforms/cell/Kconfig index 2f169991896d..3959fcfe731c 100644 --- a/trunk/arch/powerpc/platforms/cell/Kconfig +++ b/trunk/arch/powerpc/platforms/cell/Kconfig @@ -25,6 +25,19 @@ config PPC_IBM_CELL_BLADE select PPC_UDBG_16550 select UDBG_RTAS_CONSOLE +config PPC_CELLEB + bool "Toshiba's Cell Reference Set 'Celleb' Architecture" + depends on PPC_MULTIPLATFORM && PPC64 + select PPC_CELL + select PPC_CELL_NATIVE + select PPC_RTAS + select PPC_INDIRECT_IO + select PPC_OF_PLATFORM_PCI + select HAS_TXX9_SERIAL + select PPC_UDBG_BEAT + select USB_OHCI_BIG_ENDIAN_MMIO + select USB_EHCI_BIG_ENDIAN_MMIO + menu "Cell Broadband Engine options" depends on PPC_CELL diff --git a/trunk/arch/powerpc/platforms/cell/Makefile b/trunk/arch/powerpc/platforms/cell/Makefile index c89964c6fb1f..c2a7e4e5ddf9 100644 --- a/trunk/arch/powerpc/platforms/cell/Makefile +++ b/trunk/arch/powerpc/platforms/cell/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_PPC_CELL_NATIVE) += interrupt.o iommu.o setup.o \ cbe_regs.o spider-pic.o \ - pervasive.o pmu.o io-workarounds.o + pervasive.o pmu.o io-workarounds.o \ + spider-pci.o obj-$(CONFIG_CBE_RAS) += ras.o obj-$(CONFIG_CBE_THERM) += cbe_thermal.o @@ -26,3 +27,20 @@ obj-$(CONFIG_SPU_BASE) += spu_callbacks.o spu_base.o \ spufs/ obj-$(CONFIG_PCI_MSI) += axon_msi.o + + +# celleb stuff +ifeq ($(CONFIG_PPC_CELLEB),y) +obj-y += celleb_setup.o \ + celleb_pci.o celleb_scc_epci.o \ + celleb_scc_pciex.o \ + celleb_scc_uhc.o \ + io-workarounds.o spider-pci.o \ + beat.o beat_htab.o beat_hvCall.o \ + beat_interrupt.o beat_iommu.o + +obj-$(CONFIG_SMP) += beat_smp.o +obj-$(CONFIG_PPC_UDBG_BEAT) += beat_udbg.o +obj-$(CONFIG_SERIAL_TXX9) += celleb_scc_sio.o +obj-$(CONFIG_SPU_BASE) += beat_spu_priv1.o +endif diff --git a/trunk/arch/powerpc/platforms/cell/axon_msi.c b/trunk/arch/powerpc/platforms/cell/axon_msi.c index d95e71dee91f..c39f5c225f2e 100644 --- a/trunk/arch/powerpc/platforms/cell/axon_msi.c +++ b/trunk/arch/powerpc/platforms/cell/axon_msi.c @@ -123,7 +123,7 @@ static struct axon_msic *find_msi_translator(struct pci_dev *dev) return NULL; } - for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) { + for (; dn; dn = of_get_next_parent(dn)) { ph = of_get_property(dn, "msi-translator", NULL); if (ph) break; @@ -169,7 +169,7 @@ static int axon_msi_check_device(struct pci_dev *dev, int nvec, int type) static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) { - struct device_node *dn, *tmp; + struct device_node *dn; struct msi_desc *entry; int len; const u32 *prop; @@ -182,7 +182,7 @@ static int setup_msi_msg_address(struct pci_dev *dev, struct msi_msg *msg) entry = list_first_entry(&dev->msi_list, struct msi_desc, list); - for (; dn; tmp = of_get_parent(dn), of_node_put(dn), dn = tmp) { + for (; dn; dn = of_get_next_parent(dn)) { if (entry->msi_attrib.is_64) { prop = of_get_property(dn, "msi-address-64", &len); if (prop) diff --git a/trunk/arch/powerpc/platforms/celleb/beat.c b/trunk/arch/powerpc/platforms/cell/beat.c similarity index 99% rename from trunk/arch/powerpc/platforms/celleb/beat.c rename to trunk/arch/powerpc/platforms/cell/beat.c index b64b171f245b..48c690ea65da 100644 --- a/trunk/arch/powerpc/platforms/celleb/beat.c +++ b/trunk/arch/powerpc/platforms/cell/beat.c @@ -33,7 +33,7 @@ #include "beat_wrapper.h" #include "beat.h" -#include "interrupt.h" +#include "beat_interrupt.h" static int beat_pm_poweroff_flag; diff --git a/trunk/arch/powerpc/platforms/celleb/beat.h b/trunk/arch/powerpc/platforms/cell/beat.h similarity index 100% rename from trunk/arch/powerpc/platforms/celleb/beat.h rename to trunk/arch/powerpc/platforms/cell/beat.h diff --git a/trunk/arch/powerpc/platforms/celleb/htab.c b/trunk/arch/powerpc/platforms/cell/beat_htab.c similarity index 100% rename from trunk/arch/powerpc/platforms/celleb/htab.c rename to trunk/arch/powerpc/platforms/cell/beat_htab.c diff --git a/trunk/arch/powerpc/platforms/celleb/hvCall.S b/trunk/arch/powerpc/platforms/cell/beat_hvCall.S similarity index 100% rename from trunk/arch/powerpc/platforms/celleb/hvCall.S rename to trunk/arch/powerpc/platforms/cell/beat_hvCall.S diff --git a/trunk/arch/powerpc/platforms/celleb/interrupt.c b/trunk/arch/powerpc/platforms/cell/beat_interrupt.c similarity index 99% rename from trunk/arch/powerpc/platforms/celleb/interrupt.c rename to trunk/arch/powerpc/platforms/cell/beat_interrupt.c index 69562a867876..192a93509372 100644 --- a/trunk/arch/powerpc/platforms/celleb/interrupt.c +++ b/trunk/arch/powerpc/platforms/cell/beat_interrupt.c @@ -26,7 +26,7 @@ #include -#include "interrupt.h" +#include "beat_interrupt.h" #include "beat_wrapper.h" #define MAX_IRQS NR_IRQS diff --git a/trunk/arch/powerpc/platforms/celleb/interrupt.h b/trunk/arch/powerpc/platforms/cell/beat_interrupt.h similarity index 100% rename from trunk/arch/powerpc/platforms/celleb/interrupt.h rename to trunk/arch/powerpc/platforms/cell/beat_interrupt.h diff --git a/trunk/arch/powerpc/platforms/celleb/iommu.c b/trunk/arch/powerpc/platforms/cell/beat_iommu.c similarity index 100% rename from trunk/arch/powerpc/platforms/celleb/iommu.c rename to trunk/arch/powerpc/platforms/cell/beat_iommu.c diff --git a/trunk/arch/powerpc/platforms/celleb/smp.c b/trunk/arch/powerpc/platforms/cell/beat_smp.c similarity index 99% rename from trunk/arch/powerpc/platforms/celleb/smp.c rename to trunk/arch/powerpc/platforms/cell/beat_smp.c index a7631250aeb4..26efc204c47f 100644 --- a/trunk/arch/powerpc/platforms/celleb/smp.c +++ b/trunk/arch/powerpc/platforms/cell/beat_smp.c @@ -37,7 +37,7 @@ #include #include -#include "interrupt.h" +#include "beat_interrupt.h" #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) diff --git a/trunk/arch/powerpc/platforms/celleb/spu_priv1.c b/trunk/arch/powerpc/platforms/cell/beat_spu_priv1.c similarity index 100% rename from trunk/arch/powerpc/platforms/celleb/spu_priv1.c rename to trunk/arch/powerpc/platforms/cell/beat_spu_priv1.c diff --git a/trunk/arch/powerpc/platforms/celleb/beat_syscall.h b/trunk/arch/powerpc/platforms/cell/beat_syscall.h similarity index 100% rename from trunk/arch/powerpc/platforms/celleb/beat_syscall.h rename to trunk/arch/powerpc/platforms/cell/beat_syscall.h diff --git a/trunk/arch/powerpc/platforms/celleb/udbg_beat.c b/trunk/arch/powerpc/platforms/cell/beat_udbg.c similarity index 100% rename from trunk/arch/powerpc/platforms/celleb/udbg_beat.c rename to trunk/arch/powerpc/platforms/cell/beat_udbg.c diff --git a/trunk/arch/powerpc/platforms/celleb/beat_wrapper.h b/trunk/arch/powerpc/platforms/cell/beat_wrapper.h similarity index 100% rename from trunk/arch/powerpc/platforms/celleb/beat_wrapper.h rename to trunk/arch/powerpc/platforms/cell/beat_wrapper.h diff --git a/trunk/arch/powerpc/platforms/celleb/pci.c b/trunk/arch/powerpc/platforms/cell/celleb_pci.c similarity index 93% rename from trunk/arch/powerpc/platforms/celleb/pci.c rename to trunk/arch/powerpc/platforms/cell/celleb_pci.c index 51b390d34e4d..f39a3b2a1667 100644 --- a/trunk/arch/powerpc/platforms/celleb/pci.c +++ b/trunk/arch/powerpc/platforms/cell/celleb_pci.c @@ -37,12 +37,11 @@ #include #include #include -#include #include #include -#include "pci.h" -#include "interrupt.h" +#include "io-workarounds.h" +#include "celleb_pci.h" #define MAX_PCI_DEVICES 32 #define MAX_PCI_FUNCTIONS 8 @@ -190,7 +189,7 @@ static int celleb_fake_pci_read_config(struct pci_bus *bus, static int celleb_fake_pci_write_config(struct pci_bus *bus, - unsigned int devfn, int where, int size, u32 val) + unsigned int devfn, int where, int size, u32 val) { char *config; struct device_node *node; @@ -457,33 +456,42 @@ static int __init celleb_setup_fake_pci(struct device_node *dev, return 0; } -void __init fake_pci_workaround_init(struct pci_controller *phb) -{ - /** - * We will add fake pci bus to scc_pci_bus for the purpose to improve - * I/O Macro performance. But device-tree and device drivers - * are not ready to use address with a token. - */ - - /* celleb_pci_add_one(phb, NULL); */ -} +static struct celleb_phb_spec celleb_fake_pci_spec __initdata = { + .setup = celleb_setup_fake_pci, +}; static struct of_device_id celleb_phb_match[] __initdata = { { .name = "pci-pseudo", - .data = celleb_setup_fake_pci, + .data = &celleb_fake_pci_spec, }, { .name = "epci", - .data = celleb_setup_epci, + .data = &celleb_epci_spec, + }, { + .name = "pcie", + .data = &celleb_pciex_spec, }, { }, }; +static int __init celleb_io_workaround_init(struct pci_controller *phb, + struct celleb_phb_spec *phb_spec) +{ + if (phb_spec->ops) { + iowa_register_bus(phb, phb_spec->ops, phb_spec->iowa_init, + phb_spec->iowa_data); + io_workaround_init(); + } + + return 0; +} + int __init celleb_setup_phb(struct pci_controller *phb) { struct device_node *dev = phb->dn; const struct of_device_id *match; - int (*setup_func)(struct device_node *, struct pci_controller *); + struct celleb_phb_spec *phb_spec; + int rc; match = of_match_node(celleb_phb_match, dev); if (!match) @@ -492,8 +500,12 @@ int __init celleb_setup_phb(struct pci_controller *phb) phb_set_bus_ranges(dev, phb); phb->buid = 1; - setup_func = match->data; - return (*setup_func)(dev, phb); + phb_spec = match->data; + rc = (*phb_spec->setup)(dev, phb); + if (rc) + return 1; + + return celleb_io_workaround_init(phb, phb_spec); } int celleb_pci_probe_mode(struct pci_bus *bus) diff --git a/trunk/arch/powerpc/platforms/celleb/pci.h b/trunk/arch/powerpc/platforms/cell/celleb_pci.h similarity index 73% rename from trunk/arch/powerpc/platforms/celleb/pci.h rename to trunk/arch/powerpc/platforms/cell/celleb_pci.h index 5d5544ffeddb..4cba1523ec50 100644 --- a/trunk/arch/powerpc/platforms/celleb/pci.h +++ b/trunk/arch/powerpc/platforms/cell/celleb_pci.h @@ -27,16 +27,19 @@ #include #include +#include "io-workarounds.h" + +struct celleb_phb_spec { + int (*setup)(struct device_node *, struct pci_controller *); + struct ppc_pci_io *ops; + int (*iowa_init)(struct iowa_bus *, void *); + void *iowa_data; +}; + extern int celleb_setup_phb(struct pci_controller *); extern int celleb_pci_probe_mode(struct pci_bus *); -extern int celleb_setup_epci(struct device_node *, struct pci_controller *); - -extern void *celleb_dummy_page_va; -extern int __init celleb_pci_workaround_init(void); -extern void __init celleb_pci_add_one(struct pci_controller *, - void (*)(struct pci_controller *)); -extern void fake_pci_workaround_init(struct pci_controller *); -extern void epci_workaround_init(struct pci_controller *); +extern struct celleb_phb_spec celleb_epci_spec; +extern struct celleb_phb_spec celleb_pciex_spec; #endif /* _CELLEB_PCI_H */ diff --git a/trunk/arch/powerpc/platforms/celleb/scc.h b/trunk/arch/powerpc/platforms/cell/celleb_scc.h similarity index 68% rename from trunk/arch/powerpc/platforms/celleb/scc.h rename to trunk/arch/powerpc/platforms/cell/celleb_scc.h index 6be1542a6e66..b596a711c348 100644 --- a/trunk/arch/powerpc/platforms/celleb/scc.h +++ b/trunk/arch/powerpc/platforms/cell/celleb_scc.h @@ -125,6 +125,93 @@ /* bits for SCC_EPCI_CNTOPT */ #define SCC_EPCI_CNTOPT_O2PMB 0x00000002 +/* SCC PCIEXC SMMIO registers */ +#define PEXCADRS 0x000 +#define PEXCWDATA 0x004 +#define PEXCRDATA 0x008 +#define PEXDADRS 0x010 +#define PEXDCMND 0x014 +#define PEXDWDATA 0x018 +#define PEXDRDATA 0x01c +#define PEXREQID 0x020 +#define PEXTIDMAP 0x024 +#define PEXINTMASK 0x028 +#define PEXINTSTS 0x02c +#define PEXAERRMASK 0x030 +#define PEXAERRSTS 0x034 +#define PEXPRERRMASK 0x040 +#define PEXPRERRSTS 0x044 +#define PEXPRERRID01 0x048 +#define PEXPRERRID23 0x04c +#define PEXVDMASK 0x050 +#define PEXVDSTS 0x054 +#define PEXRCVCPLIDA 0x060 +#define PEXLENERRIDA 0x068 +#define PEXPHYPLLST 0x070 +#define PEXDMRDEN0 0x100 +#define PEXDMRDADR0 0x104 +#define PEXDMRDENX 0x110 +#define PEXDMRDADRX 0x114 +#define PEXECMODE 0xf00 +#define PEXMAEA(n) (0xf50 + (8 * n)) +#define PEXMAEC(n) (0xf54 + (8 * n)) +#define PEXCCRCTRL 0xff0 + +/* SCC PCIEXC bits and shifts for PEXCADRS */ +#define PEXCADRS_BYTE_EN_SHIFT 20 +#define PEXCADRS_CMD_SHIFT 16 +#define PEXCADRS_CMD_READ (0xa << PEXCADRS_CMD_SHIFT) +#define PEXCADRS_CMD_WRITE (0xb << PEXCADRS_CMD_SHIFT) + +/* SCC PCIEXC shifts for PEXDADRS */ +#define PEXDADRS_BUSNO_SHIFT 20 +#define PEXDADRS_DEVNO_SHIFT 15 +#define PEXDADRS_FUNCNO_SHIFT 12 + +/* SCC PCIEXC bits and shifts for PEXDCMND */ +#define PEXDCMND_BYTE_EN_SHIFT 4 +#define PEXDCMND_IO_READ 0x2 +#define PEXDCMND_IO_WRITE 0x3 +#define PEXDCMND_CONFIG_READ 0xa +#define PEXDCMND_CONFIG_WRITE 0xb + +/* SCC PCIEXC bits for PEXPHYPLLST */ +#define PEXPHYPLLST_PEXPHYAPLLST 0x00000001 + +/* SCC PCIEXC bits for PEXECMODE */ +#define PEXECMODE_ALL_THROUGH 0x00000000 +#define PEXECMODE_ALL_8BIT 0x00550155 +#define PEXECMODE_ALL_16BIT 0x00aa02aa + +/* SCC PCIEXC bits for PEXCCRCTRL */ +#define PEXCCRCTRL_PEXIPCOREEN 0x00040000 +#define PEXCCRCTRL_PEXIPCONTEN 0x00020000 +#define PEXCCRCTRL_PEXPHYPLLEN 0x00010000 +#define PEXCCRCTRL_PCIEXCAOCKEN 0x00000100 + +/* SCC PCIEXC port configuration registers */ +#define PEXTCERRCHK 0x21c +#define PEXTAMAPB0 0x220 +#define PEXTAMAPL0 0x224 +#define PEXTAMAPB(n) (PEXTAMAPB0 + 8 * (n)) +#define PEXTAMAPL(n) (PEXTAMAPL0 + 8 * (n)) +#define PEXCHVC0P 0x500 +#define PEXCHVC0NP 0x504 +#define PEXCHVC0C 0x508 +#define PEXCDVC0P 0x50c +#define PEXCDVC0NP 0x510 +#define PEXCDVC0C 0x514 +#define PEXCHVCXP 0x518 +#define PEXCHVCXNP 0x51c +#define PEXCHVCXC 0x520 +#define PEXCDVCXP 0x524 +#define PEXCDVCXNP 0x528 +#define PEXCDVCXC 0x52c +#define PEXCTTRG 0x530 +#define PEXTSCTRL 0x700 +#define PEXTSSTS 0x704 +#define PEXSKPCTRL 0x708 + /* UHC registers */ #define SCC_UHC_CKRCTRL 0xff0 #define SCC_UHC_ECMODE 0xf00 diff --git a/trunk/arch/powerpc/platforms/celleb/scc_epci.c b/trunk/arch/powerpc/platforms/cell/celleb_scc_epci.c similarity index 86% rename from trunk/arch/powerpc/platforms/celleb/scc_epci.c rename to trunk/arch/powerpc/platforms/cell/celleb_scc_epci.c index a999b393f6f6..08c285b10e30 100644 --- a/trunk/arch/powerpc/platforms/celleb/scc_epci.c +++ b/trunk/arch/powerpc/platforms/cell/celleb_scc_epci.c @@ -30,23 +30,17 @@ #include #include #include -#include #include #include -#include "scc.h" -#include "pci.h" -#include "interrupt.h" +#include "celleb_scc.h" +#include "celleb_pci.h" #define MAX_PCI_DEVICES 32 #define MAX_PCI_FUNCTIONS 8 #define iob() __asm__ __volatile__("eieio; sync":::"memory") -struct epci_private { - dma_addr_t dummy_page_da; -}; - static inline PCI_IO_ADDR celleb_epci_get_epci_base( struct pci_controller *hose) { @@ -71,42 +65,6 @@ static inline PCI_IO_ADDR celleb_epci_get_epci_cfg( return hose->cfg_data; } -static void scc_epci_dummy_read(struct pci_controller *hose) -{ - PCI_IO_ADDR epci_base; - u32 val; - - epci_base = celleb_epci_get_epci_base(hose); - - val = in_be32(epci_base + SCC_EPCI_WATRP); - iosync(); - - return; -} - -void __init epci_workaround_init(struct pci_controller *hose) -{ - PCI_IO_ADDR epci_base; - PCI_IO_ADDR reg; - struct epci_private *private = hose->private_data; - - BUG_ON(!private); - - private->dummy_page_da = dma_map_single(hose->parent, - celleb_dummy_page_va, PAGE_SIZE, DMA_FROM_DEVICE); - if (private->dummy_page_da == DMA_ERROR_CODE) { - printk(KERN_ERR "EPCI: dummy read disabled. " - "Map dummy page failed.\n"); - return; - } - - celleb_pci_add_one(hose, scc_epci_dummy_read); - epci_base = celleb_epci_get_epci_base(hose); - - reg = epci_base + SCC_EPCI_DUMYRADR; - out_be32(reg, private->dummy_page_da); -} - static inline void clear_and_disable_master_abort_interrupt( struct pci_controller *hose) { @@ -151,10 +109,8 @@ static int celleb_epci_check_abort(struct pci_controller *hose, return PCIBIOS_SUCCESSFUL; } -static PCI_IO_ADDR celleb_epci_make_config_addr( - struct pci_bus *bus, - struct pci_controller *hose, - unsigned int devfn, int where) +static PCI_IO_ADDR celleb_epci_make_config_addr(struct pci_bus *bus, + struct pci_controller *hose, unsigned int devfn, int where) { PCI_IO_ADDR addr; @@ -425,8 +381,8 @@ static int __init celleb_epci_init(struct pci_controller *hose) return 0; } -int __init celleb_setup_epci(struct device_node *node, - struct pci_controller *hose) +static int __init celleb_setup_epci(struct device_node *node, + struct pci_controller *hose) { struct resource r; @@ -450,8 +406,7 @@ int __init celleb_setup_epci(struct device_node *node, if (!hose->cfg_addr) goto error; pr_debug("EPCI: cfg_addr map 0x%016lx->0x%016lx + 0x%016lx\n", - r.start, (unsigned long)hose->cfg_addr, - (r.end - r.start + 1)); + r.start, (unsigned long)hose->cfg_addr, (r.end - r.start + 1)); if (of_address_to_resource(node, 2, &r)) goto error; @@ -459,14 +414,7 @@ int __init celleb_setup_epci(struct device_node *node, if (!hose->cfg_data) goto error; pr_debug("EPCI: cfg_data map 0x%016lx->0x%016lx + 0x%016lx\n", - r.start, (unsigned long)hose->cfg_data, - (r.end - r.start + 1)); - - hose->private_data = kzalloc(sizeof(struct epci_private), GFP_KERNEL); - if (hose->private_data == NULL) { - printk(KERN_ERR "EPCI: no memory for private data.\n"); - goto error; - } + r.start, (unsigned long)hose->cfg_data, (r.end - r.start + 1)); hose->ops = &celleb_epci_ops; celleb_epci_init(hose); @@ -474,8 +422,6 @@ int __init celleb_setup_epci(struct device_node *node, return 0; error: - kfree(hose->private_data); - if (hose->cfg_addr) iounmap(hose->cfg_addr); @@ -483,3 +429,10 @@ int __init celleb_setup_epci(struct device_node *node, iounmap(hose->cfg_data); return 1; } + +struct celleb_phb_spec celleb_epci_spec __initdata = { + .setup = celleb_setup_epci, + .ops = &spiderpci_ops, + .iowa_init = &spiderpci_iowa_init, + .iowa_data = (void *)0, +}; diff --git a/trunk/arch/powerpc/platforms/cell/celleb_scc_pciex.c b/trunk/arch/powerpc/platforms/cell/celleb_scc_pciex.c new file mode 100644 index 000000000000..ab24d94baab6 --- /dev/null +++ b/trunk/arch/powerpc/platforms/cell/celleb_scc_pciex.c @@ -0,0 +1,547 @@ +/* + * Support for Celleb PCI-Express. + * + * (C) Copyright 2007-2008 TOSHIBA CORPORATION + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "celleb_scc.h" +#include "celleb_pci.h" + +#define PEX_IN(base, off) in_be32((void *)(base) + (off)) +#define PEX_OUT(base, off, data) out_be32((void *)(base) + (off), (data)) + +static void scc_pciex_io_flush(struct iowa_bus *bus) +{ + (void)PEX_IN(bus->phb->cfg_addr, PEXDMRDEN0); +} + +/* + * Memory space access to device on PCIEX + */ +#define PCIEX_MMIO_READ(name, ret) \ +static ret scc_pciex_##name(const PCI_IO_ADDR addr) \ +{ \ + ret val = __do_##name(addr); \ + scc_pciex_io_flush(iowa_mem_find_bus(addr)); \ + return val; \ +} + +#define PCIEX_MMIO_READ_STR(name) \ +static void scc_pciex_##name(const PCI_IO_ADDR addr, void *buf, \ + unsigned long count) \ +{ \ + __do_##name(addr, buf, count); \ + scc_pciex_io_flush(iowa_mem_find_bus(addr)); \ +} + +PCIEX_MMIO_READ(readb, u8) +PCIEX_MMIO_READ(readw, u16) +PCIEX_MMIO_READ(readl, u32) +PCIEX_MMIO_READ(readq, u64) +PCIEX_MMIO_READ(readw_be, u16) +PCIEX_MMIO_READ(readl_be, u32) +PCIEX_MMIO_READ(readq_be, u64) +PCIEX_MMIO_READ_STR(readsb) +PCIEX_MMIO_READ_STR(readsw) +PCIEX_MMIO_READ_STR(readsl) + +static void scc_pciex_memcpy_fromio(void *dest, const PCI_IO_ADDR src, + unsigned long n) +{ + __do_memcpy_fromio(dest, src, n); + scc_pciex_io_flush(iowa_mem_find_bus(src)); +} + +/* + * I/O port access to devices on PCIEX. + */ + +static inline unsigned long get_bus_address(struct pci_controller *phb, + unsigned long port) +{ + return port - ((unsigned long)(phb->io_base_virt) - _IO_BASE); +} + +static u32 scc_pciex_read_port(struct pci_controller *phb, + unsigned long port, int size) +{ + unsigned int byte_enable; + unsigned int cmd, shift; + unsigned long addr; + u32 data, ret; + + BUG_ON(((port & 0x3ul) + size) > 4); + + addr = get_bus_address(phb, port); + shift = addr & 0x3ul; + byte_enable = ((1 << size) - 1) << shift; + cmd = PEXDCMND_IO_READ | (byte_enable << PEXDCMND_BYTE_EN_SHIFT); + PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul)); + PEX_OUT(phb->cfg_addr, PEXDCMND, cmd); + data = PEX_IN(phb->cfg_addr, PEXDRDATA); + ret = (data >> (shift * 8)) & (0xFFFFFFFF >> ((4 - size) * 8)); + + pr_debug("PCIEX:PIO READ:port=0x%lx, addr=0x%lx, size=%d, be=%x," + " cmd=%x, data=%x, ret=%x\n", port, addr, size, byte_enable, + cmd, data, ret); + + return ret; +} + +static void scc_pciex_write_port(struct pci_controller *phb, + unsigned long port, int size, u32 val) +{ + unsigned int byte_enable; + unsigned int cmd, shift; + unsigned long addr; + u32 data; + + BUG_ON(((port & 0x3ul) + size) > 4); + + addr = get_bus_address(phb, port); + shift = addr & 0x3ul; + byte_enable = ((1 << size) - 1) << shift; + cmd = PEXDCMND_IO_WRITE | (byte_enable << PEXDCMND_BYTE_EN_SHIFT); + data = (val & (0xFFFFFFFF >> (4 - size) * 8)) << (shift * 8); + PEX_OUT(phb->cfg_addr, PEXDADRS, (addr & ~0x3ul)); + PEX_OUT(phb->cfg_addr, PEXDCMND, cmd); + PEX_OUT(phb->cfg_addr, PEXDWDATA, data); + + pr_debug("PCIEX:PIO WRITE:port=0x%lx, addr=%lx, size=%d, val=%x," + " be=%x, cmd=%x, data=%x\n", port, addr, size, val, + byte_enable, cmd, data); +} + +static u8 __scc_pciex_inb(struct pci_controller *phb, unsigned long port) +{ + return (u8)scc_pciex_read_port(phb, port, 1); +} + +static u16 __scc_pciex_inw(struct pci_controller *phb, unsigned long port) +{ + u32 data; + if ((port & 0x3ul) < 3) + data = scc_pciex_read_port(phb, port, 2); + else { + u32 d1 = scc_pciex_read_port(phb, port, 1); + u32 d2 = scc_pciex_read_port(phb, port + 1, 1); + data = d1 | (d2 << 8); + } + return (u16)data; +} + +static u32 __scc_pciex_inl(struct pci_controller *phb, unsigned long port) +{ + unsigned int mod = port & 0x3ul; + u32 data; + if (mod == 0) + data = scc_pciex_read_port(phb, port, 4); + else { + u32 d1 = scc_pciex_read_port(phb, port, 4 - mod); + u32 d2 = scc_pciex_read_port(phb, port + 1, mod); + data = d1 | (d2 << (mod * 8)); + } + return data; +} + +static void __scc_pciex_outb(struct pci_controller *phb, + u8 val, unsigned long port) +{ + scc_pciex_write_port(phb, port, 1, (u32)val); +} + +static void __scc_pciex_outw(struct pci_controller *phb, + u16 val, unsigned long port) +{ + if ((port & 0x3ul) < 3) + scc_pciex_write_port(phb, port, 2, (u32)val); + else { + u32 d1 = val & 0x000000FF; + u32 d2 = (val & 0x0000FF00) >> 8; + scc_pciex_write_port(phb, port, 1, d1); + scc_pciex_write_port(phb, port + 1, 1, d2); + } +} + +static void __scc_pciex_outl(struct pci_controller *phb, + u32 val, unsigned long port) +{ + unsigned int mod = port & 0x3ul; + if (mod == 0) + scc_pciex_write_port(phb, port, 4, val); + else { + u32 d1 = val & (0xFFFFFFFFul >> (mod * 8)); + u32 d2 = val >> ((4 - mod) * 8); + scc_pciex_write_port(phb, port, 4 - mod, d1); + scc_pciex_write_port(phb, port + 1, mod, d2); + } +} + +#define PCIEX_PIO_FUNC(size, name) \ +static u##size scc_pciex_in##name(unsigned long port) \ +{ \ + struct iowa_bus *bus = iowa_pio_find_bus(port); \ + u##size data = __scc_pciex_in##name(bus->phb, port); \ + scc_pciex_io_flush(bus); \ + return data; \ +} \ +static void scc_pciex_ins##name(unsigned long p, void *b, unsigned long c) \ +{ \ + struct iowa_bus *bus = iowa_pio_find_bus(p); \ + u##size *dst = b; \ + for (; c != 0; c--, dst++) \ + *dst = cpu_to_le##size(__scc_pciex_in##name(bus->phb, p)); \ + scc_pciex_io_flush(bus); \ +} \ +static void scc_pciex_out##name(u##size val, unsigned long port) \ +{ \ + struct iowa_bus *bus = iowa_pio_find_bus(port); \ + __scc_pciex_out##name(bus->phb, val, port); \ +} \ +static void scc_pciex_outs##name(unsigned long p, const void *b, \ + unsigned long c) \ +{ \ + struct iowa_bus *bus = iowa_pio_find_bus(p); \ + const u##size *src = b; \ + for (; c != 0; c--, src++) \ + __scc_pciex_out##name(bus->phb, le##size##_to_cpu(*src), p); \ +} +#define cpu_to_le8(x) (x) +#define le8_to_cpu(x) (x) +PCIEX_PIO_FUNC(8, b) +PCIEX_PIO_FUNC(16, w) +PCIEX_PIO_FUNC(32, l) + +static struct ppc_pci_io scc_pciex_ops = { + .readb = scc_pciex_readb, + .readw = scc_pciex_readw, + .readl = scc_pciex_readl, + .readq = scc_pciex_readq, + .readw_be = scc_pciex_readw_be, + .readl_be = scc_pciex_readl_be, + .readq_be = scc_pciex_readq_be, + .readsb = scc_pciex_readsb, + .readsw = scc_pciex_readsw, + .readsl = scc_pciex_readsl, + .memcpy_fromio = scc_pciex_memcpy_fromio, + .inb = scc_pciex_inb, + .inw = scc_pciex_inw, + .inl = scc_pciex_inl, + .outb = scc_pciex_outb, + .outw = scc_pciex_outw, + .outl = scc_pciex_outl, + .insb = scc_pciex_insb, + .insw = scc_pciex_insw, + .insl = scc_pciex_insl, + .outsb = scc_pciex_outsb, + .outsw = scc_pciex_outsw, + .outsl = scc_pciex_outsl, +}; + +static int __init scc_pciex_iowa_init(struct iowa_bus *bus, void *data) +{ + dma_addr_t dummy_page_da; + void *dummy_page_va; + + dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!dummy_page_va) { + pr_err("PCIEX:Alloc dummy_page_va failed\n"); + return -1; + } + + dummy_page_da = dma_map_single(bus->phb->parent, dummy_page_va, + PAGE_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dummy_page_da)) { + pr_err("PCIEX:Map dummy page failed.\n"); + kfree(dummy_page_va); + return -1; + } + + PEX_OUT(bus->phb->cfg_addr, PEXDMRDADR0, dummy_page_da); + + return 0; +} + +/* + * config space access + */ +#define MK_PEXDADRS(bus_no, dev_no, func_no, addr) \ + ((uint32_t)(((addr) & ~0x3UL) | \ + ((bus_no) << PEXDADRS_BUSNO_SHIFT) | \ + ((dev_no) << PEXDADRS_DEVNO_SHIFT) | \ + ((func_no) << PEXDADRS_FUNCNO_SHIFT))) + +#define MK_PEXDCMND_BYTE_EN(addr, size) \ + ((((0x1 << (size))-1) << ((addr) & 0x3)) << PEXDCMND_BYTE_EN_SHIFT) +#define MK_PEXDCMND(cmd, addr, size) ((cmd) | MK_PEXDCMND_BYTE_EN(addr, size)) + +static uint32_t config_read_pciex_dev(unsigned int *base, + uint64_t bus_no, uint64_t dev_no, uint64_t func_no, + uint64_t off, uint64_t size) +{ + uint32_t ret; + uint32_t addr, cmd; + + addr = MK_PEXDADRS(bus_no, dev_no, func_no, off); + cmd = MK_PEXDCMND(PEXDCMND_CONFIG_READ, off, size); + PEX_OUT(base, PEXDADRS, addr); + PEX_OUT(base, PEXDCMND, cmd); + ret = (PEX_IN(base, PEXDRDATA) + >> ((off & (4-size)) * 8)) & ((0x1 << (size * 8)) - 1); + return ret; +} + +static void config_write_pciex_dev(unsigned int *base, uint64_t bus_no, + uint64_t dev_no, uint64_t func_no, uint64_t off, uint64_t size, + uint32_t data) +{ + uint32_t addr, cmd; + + addr = MK_PEXDADRS(bus_no, dev_no, func_no, off); + cmd = MK_PEXDCMND(PEXDCMND_CONFIG_WRITE, off, size); + PEX_OUT(base, PEXDADRS, addr); + PEX_OUT(base, PEXDCMND, cmd); + PEX_OUT(base, PEXDWDATA, + (data & ((0x1 << (size * 8)) - 1)) << ((off & (4-size)) * 8)); +} + +#define MK_PEXCADRS_BYTE_EN(off, len) \ + ((((0x1 << (len)) - 1) << ((off) & 0x3)) << PEXCADRS_BYTE_EN_SHIFT) +#define MK_PEXCADRS(cmd, addr, size) \ + ((cmd) | MK_PEXCADRS_BYTE_EN(addr, size) | ((addr) & ~0x3)) +static uint32_t config_read_pciex_rc(unsigned int *base, + uint32_t where, uint32_t size) +{ + PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_READ, where, size)); + return (PEX_IN(base, PEXCRDATA) + >> ((where & (4 - size)) * 8)) & ((0x1 << (size * 8)) - 1); +} + +static void config_write_pciex_rc(unsigned int *base, uint32_t where, + uint32_t size, uint32_t val) +{ + uint32_t data; + + data = (val & ((0x1 << (size * 8)) - 1)) << ((where & (4 - size)) * 8); + PEX_OUT(base, PEXCADRS, MK_PEXCADRS(PEXCADRS_CMD_WRITE, where, size)); + PEX_OUT(base, PEXCWDATA, data); +} + +/* Interfaces */ +/* Note: Work-around + * On SCC PCIEXC, one device is seen on all 32 dev_no. + * As SCC PCIEXC can have only one device on the bus, we look only one dev_no. + * (dev_no = 1) + */ +static int scc_pciex_read_config(struct pci_bus *bus, unsigned int devfn, + int where, int size, unsigned int *val) +{ + struct device_node *dn; + struct pci_controller *phb; + + dn = bus->sysdata; + phb = pci_find_hose_for_OF_device(dn); + + if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1) { + *val = ~0; + return PCIBIOS_DEVICE_NOT_FOUND; + } + + if (bus->number == 0 && PCI_SLOT(devfn) == 0) + *val = config_read_pciex_rc(phb->cfg_addr, where, size); + else + *val = config_read_pciex_dev(phb->cfg_addr, bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), where, size); + + return PCIBIOS_SUCCESSFUL; +} + +static int scc_pciex_write_config(struct pci_bus *bus, unsigned int devfn, + int where, int size, unsigned int val) +{ + struct device_node *dn; + struct pci_controller *phb; + + dn = bus->sysdata; + phb = pci_find_hose_for_OF_device(dn); + + if (bus->number == phb->first_busno && PCI_SLOT(devfn) != 1) + return PCIBIOS_DEVICE_NOT_FOUND; + + if (bus->number == 0 && PCI_SLOT(devfn) == 0) + config_write_pciex_rc(phb->cfg_addr, where, size, val); + else + config_write_pciex_dev(phb->cfg_addr, bus->number, + PCI_SLOT(devfn), PCI_FUNC(devfn), where, size, val); + return PCIBIOS_SUCCESSFUL; +} + +static struct pci_ops scc_pciex_pci_ops = { + scc_pciex_read_config, + scc_pciex_write_config, +}; + +static void pciex_clear_intr_all(unsigned int *base) +{ + PEX_OUT(base, PEXAERRSTS, 0xffffffff); + PEX_OUT(base, PEXPRERRSTS, 0xffffffff); + PEX_OUT(base, PEXINTSTS, 0xffffffff); +} + +#if 0 +static void pciex_disable_intr_all(unsigned int *base) +{ + PEX_OUT(base, PEXINTMASK, 0x0); + PEX_OUT(base, PEXAERRMASK, 0x0); + PEX_OUT(base, PEXPRERRMASK, 0x0); + PEX_OUT(base, PEXVDMASK, 0x0); +} +#endif + +static void pciex_enable_intr_all(unsigned int *base) +{ + PEX_OUT(base, PEXINTMASK, 0x0000e7f1); + PEX_OUT(base, PEXAERRMASK, 0x03ff01ff); + PEX_OUT(base, PEXPRERRMASK, 0x0001010f); + PEX_OUT(base, PEXVDMASK, 0x00000001); +} + +static void pciex_check_status(unsigned int *base) +{ + uint32_t err = 0; + uint32_t intsts, aerr, prerr, rcvcp, lenerr; + uint32_t maea, maec; + + intsts = PEX_IN(base, PEXINTSTS); + aerr = PEX_IN(base, PEXAERRSTS); + prerr = PEX_IN(base, PEXPRERRSTS); + rcvcp = PEX_IN(base, PEXRCVCPLIDA); + lenerr = PEX_IN(base, PEXLENERRIDA); + + if (intsts || aerr || prerr || rcvcp || lenerr) + err = 1; + + pr_info("PCEXC interrupt!!\n"); + pr_info("PEXINTSTS :0x%08x\n", intsts); + pr_info("PEXAERRSTS :0x%08x\n", aerr); + pr_info("PEXPRERRSTS :0x%08x\n", prerr); + pr_info("PEXRCVCPLIDA :0x%08x\n", rcvcp); + pr_info("PEXLENERRIDA :0x%08x\n", lenerr); + + /* print detail of Protection Error */ + if (intsts & 0x00004000) { + uint32_t i, n; + for (i = 0; i < 4; i++) { + n = 1 << i; + if (prerr & n) { + maea = PEX_IN(base, PEXMAEA(i)); + maec = PEX_IN(base, PEXMAEC(i)); + pr_info("PEXMAEC%d :0x%08x\n", i, maec); + pr_info("PEXMAEA%d :0x%08x\n", i, maea); + } + } + } + + if (err) + pciex_clear_intr_all(base); +} + +static irqreturn_t pciex_handle_internal_irq(int irq, void *dev_id) +{ + struct pci_controller *phb = dev_id; + + pr_debug("PCIEX:pciex_handle_internal_irq(irq=%d)\n", irq); + + BUG_ON(phb->cfg_addr == NULL); + + pciex_check_status(phb->cfg_addr); + + return IRQ_HANDLED; +} + +static __init int celleb_setup_pciex(struct device_node *node, + struct pci_controller *phb) +{ + struct resource r; + struct of_irq oirq; + int virq; + + /* SMMIO registers; used inside this file */ + if (of_address_to_resource(node, 0, &r)) { + pr_err("PCIEXC:Failed to get config resource.\n"); + return 1; + } + phb->cfg_addr = ioremap(r.start, r.end - r.start + 1); + if (!phb->cfg_addr) { + pr_err("PCIEXC:Failed to remap SMMIO region.\n"); + return 1; + } + + /* Not use cfg_data, cmd and data regs are near address reg */ + phb->cfg_data = NULL; + + /* set pci_ops */ + phb->ops = &scc_pciex_pci_ops; + + /* internal interrupt handler */ + if (of_irq_map_one(node, 1, &oirq)) { + pr_err("PCIEXC:Failed to map irq\n"); + goto error; + } + virq = irq_create_of_mapping(oirq.controller, oirq.specifier, + oirq.size); + if (request_irq(virq, pciex_handle_internal_irq, + IRQF_DISABLED, "pciex", (void *)phb)) { + pr_err("PCIEXC:Failed to request irq\n"); + goto error; + } + + /* enable all interrupts */ + pciex_clear_intr_all(phb->cfg_addr); + pciex_enable_intr_all(phb->cfg_addr); + /* MSI: TBD */ + + return 0; + +error: + phb->cfg_data = NULL; + if (phb->cfg_addr) + iounmap(phb->cfg_addr); + phb->cfg_addr = NULL; + return 1; +} + +struct celleb_phb_spec celleb_pciex_spec __initdata = { + .setup = celleb_setup_pciex, + .ops = &scc_pciex_ops, + .iowa_init = &scc_pciex_iowa_init, +}; diff --git a/trunk/arch/powerpc/platforms/celleb/scc_sio.c b/trunk/arch/powerpc/platforms/cell/celleb_scc_sio.c similarity index 100% rename from trunk/arch/powerpc/platforms/celleb/scc_sio.c rename to trunk/arch/powerpc/platforms/cell/celleb_scc_sio.c diff --git a/trunk/arch/powerpc/platforms/celleb/scc_uhc.c b/trunk/arch/powerpc/platforms/cell/celleb_scc_uhc.c similarity index 99% rename from trunk/arch/powerpc/platforms/celleb/scc_uhc.c rename to trunk/arch/powerpc/platforms/cell/celleb_scc_uhc.c index cb4307994087..d63b720bfe3a 100644 --- a/trunk/arch/powerpc/platforms/celleb/scc_uhc.c +++ b/trunk/arch/powerpc/platforms/cell/celleb_scc_uhc.c @@ -25,7 +25,7 @@ #include #include -#include "scc.h" +#include "celleb_scc.h" #define UHC_RESET_WAIT_MAX 10000 diff --git a/trunk/arch/powerpc/platforms/celleb/setup.c b/trunk/arch/powerpc/platforms/cell/celleb_setup.c similarity index 97% rename from trunk/arch/powerpc/platforms/celleb/setup.c rename to trunk/arch/powerpc/platforms/cell/celleb_setup.c index f27ae1e3fb58..b11cb30decb2 100644 --- a/trunk/arch/powerpc/platforms/celleb/setup.c +++ b/trunk/arch/powerpc/platforms/cell/celleb_setup.c @@ -56,13 +56,13 @@ #include #include -#include "interrupt.h" +#include "beat_interrupt.h" #include "beat_wrapper.h" #include "beat.h" -#include "pci.h" -#include "../cell/interrupt.h" -#include "../cell/pervasive.h" -#include "../cell/ras.h" +#include "celleb_pci.h" +#include "interrupt.h" +#include "pervasive.h" +#include "ras.h" static char celleb_machine_type[128] = "Celleb"; @@ -114,8 +114,6 @@ static int __init celleb_publish_devices(void) /* Publish OF platform devices for southbridge IOs */ of_platform_bus_probe(NULL, celleb_bus_ids, NULL); - celleb_pci_workaround_init(); - return 0; } machine_device_initcall(celleb_beat, celleb_publish_devices); diff --git a/trunk/arch/powerpc/platforms/cell/io-workarounds.c b/trunk/arch/powerpc/platforms/cell/io-workarounds.c index 979d4b67efb4..3b84e8be314c 100644 --- a/trunk/arch/powerpc/platforms/cell/io-workarounds.c +++ b/trunk/arch/powerpc/platforms/cell/io-workarounds.c @@ -1,6 +1,9 @@ /* + * Support PCI IO workaround + * * Copyright (C) 2006 Benjamin Herrenschmidt * IBM, Corp. + * (C) Copyright 2007-2008 TOSHIBA CORPORATION * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -9,335 +12,174 @@ #undef DEBUG #include -#include -#include + #include #include -#include +#include #include +#include "io-workarounds.h" -#define SPIDER_PCI_REG_BASE 0xd000 -#define SPIDER_PCI_VCI_CNTL_STAT 0x0110 -#define SPIDER_PCI_DUMMY_READ 0x0810 -#define SPIDER_PCI_DUMMY_READ_BASE 0x0814 +#define IOWA_MAX_BUS 8 -/* Undefine that to re-enable bogus prefetch - * - * Without that workaround, the chip will do bogus prefetch past - * page boundary from system memory. This setting will disable that, - * though the documentation is unclear as to the consequences of doing - * so, either purely performances, or possible misbehaviour... It's not - * clear wether the chip can handle unaligned accesses at all without - * prefetching enabled. - * - * For now, things appear to be behaving properly with that prefetching - * disabled and IDE, possibly because IDE isn't doing any unaligned - * access. - */ -#define SPIDER_DISABLE_PREFETCH +static struct iowa_bus iowa_busses[IOWA_MAX_BUS]; +static unsigned int iowa_bus_count; -#define MAX_SPIDERS 3 +static struct iowa_bus *iowa_pci_find(unsigned long vaddr, unsigned long paddr) +{ + int i, j; + struct resource *res; + unsigned long vstart, vend; -static struct spider_pci_bus { - void __iomem *regs; - unsigned long mmio_start; - unsigned long mmio_end; - unsigned long pio_vstart; - unsigned long pio_vend; -} spider_pci_busses[MAX_SPIDERS]; -static int spider_pci_count; + for (i = 0; i < iowa_bus_count; i++) { + struct iowa_bus *bus = &iowa_busses[i]; + struct pci_controller *phb = bus->phb; -static struct spider_pci_bus *spider_pci_find(unsigned long vaddr, - unsigned long paddr) -{ - int i; - - for (i = 0; i < spider_pci_count; i++) { - struct spider_pci_bus *bus = &spider_pci_busses[i]; - if (paddr && paddr >= bus->mmio_start && paddr < bus->mmio_end) - return bus; - if (vaddr && vaddr >= bus->pio_vstart && vaddr < bus->pio_vend) - return bus; + if (vaddr) { + vstart = (unsigned long)phb->io_base_virt; + vend = vstart + phb->pci_io_size - 1; + if ((vaddr >= vstart) && (vaddr <= vend)) + return bus; + } + + if (paddr) + for (j = 0; j < 3; j++) { + res = &phb->mem_resources[j]; + if (paddr >= res->start && paddr <= res->end) + return bus; + } } + return NULL; } -static void spider_io_flush(const volatile void __iomem *addr) +struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR addr) { - struct spider_pci_bus *bus; + struct iowa_bus *bus; int token; - /* Get platform token (set by ioremap) from address */ token = PCI_GET_ADDR_TOKEN(addr); - /* Fast path if we have a non-0 token, it indicates which bus we - * are on. - * - * If the token is 0, that means either that the ioremap was done - * before we initialized this layer, or it's a PIO operation. We - * fallback to a low path in this case. Hopefully, internal devices - * which are ioremap'ed early should use in_XX/out_XX functions - * instead of the PCI ones and thus not suffer from the slowdown. - * - * Also note that currently, the workaround will not work for areas - * that are not mapped with PTEs (bolted in the hash table). This - * is the case for ioremaps done very early at boot (before - * mem_init_done) and includes the mapping of the ISA IO space. - * - * Fortunately, none of the affected devices is expected to do DMA - * and thus there should be no problem in practice. - * - * In order to improve performances, we only do the PTE search for - * addresses falling in the PHB IO space area. That means it will - * not work for hotplug'ed PHBs but those don't exist with Spider. - */ - if (token && token <= spider_pci_count) - bus = &spider_pci_busses[token - 1]; + if (token && token <= iowa_bus_count) + bus = &iowa_busses[token - 1]; else { unsigned long vaddr, paddr; pte_t *ptep; - /* Fixup physical address */ vaddr = (unsigned long)PCI_FIX_ADDR(addr); + if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) + return NULL; - /* Check if it's in allowed range for PIO */ - if (vaddr < PHB_IO_BASE || vaddr > PHB_IO_END) - return; - - /* Try to find a PTE. If not, clear the paddr, we'll do - * a vaddr only lookup (PIO only) - */ ptep = find_linux_pte(init_mm.pgd, vaddr); if (ptep == NULL) paddr = 0; else paddr = pte_pfn(*ptep) << PAGE_SHIFT; + bus = iowa_pci_find(vaddr, paddr); - bus = spider_pci_find(vaddr, paddr); if (bus == NULL) - return; + return NULL; } - /* Now do the workaround - */ - (void)in_be32(bus->regs + SPIDER_PCI_DUMMY_READ); + return bus; } -static u8 spider_readb(const volatile void __iomem *addr) +struct iowa_bus *iowa_pio_find_bus(unsigned long port) { - u8 val = __do_readb(addr); - spider_io_flush(addr); - return val; + unsigned long vaddr = (unsigned long)pci_io_base + port; + return iowa_pci_find(vaddr, 0); } -static u16 spider_readw(const volatile void __iomem *addr) -{ - u16 val = __do_readw(addr); - spider_io_flush(addr); - return val; -} -static u32 spider_readl(const volatile void __iomem *addr) -{ - u32 val = __do_readl(addr); - spider_io_flush(addr); - return val; +#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) \ +static ret iowa_##name at \ +{ \ + struct iowa_bus *bus; \ + bus = iowa_##space##_find_bus(aa); \ + if (bus && bus->ops && bus->ops->name) \ + return bus->ops->name al; \ + return __do_##name al; \ } -static u64 spider_readq(const volatile void __iomem *addr) -{ - u64 val = __do_readq(addr); - spider_io_flush(addr); - return val; +#define DEF_PCI_AC_NORET(name, at, al, space, aa) \ +static void iowa_##name at \ +{ \ + struct iowa_bus *bus; \ + bus = iowa_##space##_find_bus(aa); \ + if (bus && bus->ops && bus->ops->name) { \ + bus->ops->name al; \ + return; \ + } \ + __do_##name al; \ } -static u16 spider_readw_be(const volatile void __iomem *addr) -{ - u16 val = __do_readw_be(addr); - spider_io_flush(addr); - return val; -} +#include -static u32 spider_readl_be(const volatile void __iomem *addr) -{ - u32 val = __do_readl_be(addr); - spider_io_flush(addr); - return val; -} +#undef DEF_PCI_AC_RET +#undef DEF_PCI_AC_NORET -static u64 spider_readq_be(const volatile void __iomem *addr) -{ - u64 val = __do_readq_be(addr); - spider_io_flush(addr); - return val; -} +static struct ppc_pci_io __initdata iowa_pci_io = { -static void spider_readsb(const volatile void __iomem *addr, void *buf, - unsigned long count) -{ - __do_readsb(addr, buf, count); - spider_io_flush(addr); -} +#define DEF_PCI_AC_RET(name, ret, at, al, space, aa) .name = iowa_##name, +#define DEF_PCI_AC_NORET(name, at, al, space, aa) .name = iowa_##name, -static void spider_readsw(const volatile void __iomem *addr, void *buf, - unsigned long count) -{ - __do_readsw(addr, buf, count); - spider_io_flush(addr); -} +#include -static void spider_readsl(const volatile void __iomem *addr, void *buf, - unsigned long count) -{ - __do_readsl(addr, buf, count); - spider_io_flush(addr); -} - -static void spider_memcpy_fromio(void *dest, const volatile void __iomem *src, - unsigned long n) -{ - __do_memcpy_fromio(dest, src, n); - spider_io_flush(src); -} +#undef DEF_PCI_AC_RET +#undef DEF_PCI_AC_NORET +}; -static void __iomem * spider_ioremap(unsigned long addr, unsigned long size, - unsigned long flags) +static void __iomem *iowa_ioremap(unsigned long addr, unsigned long size, + unsigned long flags) { - struct spider_pci_bus *bus; + struct iowa_bus *bus; void __iomem *res = __ioremap(addr, size, flags); int busno; - pr_debug("spider_ioremap(0x%lx, 0x%lx, 0x%lx) -> 0x%p\n", - addr, size, flags, res); - - bus = spider_pci_find(0, addr); + bus = iowa_pci_find(0, addr); if (bus != NULL) { - busno = bus - spider_pci_busses; - pr_debug(" found bus %d, setting token\n", busno); + busno = bus - iowa_busses; PCI_SET_ADDR_TOKEN(res, busno + 1); } - pr_debug(" result=0x%p\n", res); - return res; } -static void __init spider_pci_setup_chip(struct spider_pci_bus *bus) -{ -#ifdef SPIDER_DISABLE_PREFETCH - u32 val = in_be32(bus->regs + SPIDER_PCI_VCI_CNTL_STAT); - pr_debug(" PVCI_Control_Status was 0x%08x\n", val); - out_be32(bus->regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8); -#endif - - /* Configure the dummy address for the workaround */ - out_be32(bus->regs + SPIDER_PCI_DUMMY_READ_BASE, 0x80000000); -} - -static void __init spider_pci_add_one(struct pci_controller *phb) +/* Regist new bus to support workaround */ +void __init iowa_register_bus(struct pci_controller *phb, + struct ppc_pci_io *ops, + int (*initfunc)(struct iowa_bus *, void *), void *data) { - struct spider_pci_bus *bus = &spider_pci_busses[spider_pci_count]; + struct iowa_bus *bus; struct device_node *np = phb->dn; - struct resource rsrc; - void __iomem *regs; - if (spider_pci_count >= MAX_SPIDERS) { - printk(KERN_ERR "Too many spider bridges, workarounds" - " disabled for %s\n", np->full_name); + if (iowa_bus_count >= IOWA_MAX_BUS) { + pr_err("IOWA:Too many pci bridges, " + "workarounds disabled for %s\n", np->full_name); return; } - /* Get the registers for the beast */ - if (of_address_to_resource(np, 0, &rsrc)) { - printk(KERN_ERR "Failed to get registers for spider %s" - " workarounds disabled\n", np->full_name); - return; - } + bus = &iowa_busses[iowa_bus_count]; + bus->phb = phb; + bus->ops = ops; - /* Mask out some useless bits in there to get to the base of the - * spider chip - */ - rsrc.start &= ~0xfffffffful; - - /* Map them */ - regs = ioremap(rsrc.start + SPIDER_PCI_REG_BASE, 0x1000); - if (regs == NULL) { - printk(KERN_ERR "Failed to map registers for spider %s" - " workarounds disabled\n", np->full_name); - return; - } - - spider_pci_count++; - - /* We assume spiders only have one MMIO resource */ - bus->mmio_start = phb->mem_resources[0].start; - bus->mmio_end = phb->mem_resources[0].end + 1; - - bus->pio_vstart = (unsigned long)phb->io_base_virt; - bus->pio_vend = bus->pio_vstart + phb->pci_io_size; - - bus->regs = regs; - - printk(KERN_INFO "PCI: Spider MMIO workaround for %s\n",np->full_name); + if (initfunc) + if ((*initfunc)(bus, data)) + return; - pr_debug(" mmio (P) = 0x%016lx..0x%016lx\n", - bus->mmio_start, bus->mmio_end); - pr_debug(" pio (V) = 0x%016lx..0x%016lx\n", - bus->pio_vstart, bus->pio_vend); - pr_debug(" regs (P) = 0x%016lx (V) = 0x%p\n", - rsrc.start + SPIDER_PCI_REG_BASE, bus->regs); + iowa_bus_count++; - spider_pci_setup_chip(bus); + pr_debug("IOWA:[%d]Add bus, %s.\n", iowa_bus_count-1, np->full_name); } -static struct ppc_pci_io __initdata spider_pci_io = { - .readb = spider_readb, - .readw = spider_readw, - .readl = spider_readl, - .readq = spider_readq, - .readw_be = spider_readw_be, - .readl_be = spider_readl_be, - .readq_be = spider_readq_be, - .readsb = spider_readsb, - .readsw = spider_readsw, - .readsl = spider_readsl, - .memcpy_fromio = spider_memcpy_fromio, -}; - -static int __init spider_pci_workaround_init(void) +/* enable IO workaround */ +void __init io_workaround_init(void) { - struct pci_controller *phb; - - /* Find spider bridges. We assume they have been all probed - * in setup_arch(). If that was to change, we would need to - * update this code to cope with dynamically added busses - */ - list_for_each_entry(phb, &hose_list, list_node) { - struct device_node *np = phb->dn; - const char *model = of_get_property(np, "model", NULL); - - /* If no model property or name isn't exactly "pci", skip */ - if (model == NULL || strcmp(np->name, "pci")) - continue; - /* If model is not "Spider", skip */ - if (strcmp(model, "Spider")) - continue; - spider_pci_add_one(phb); - } - - /* No Spider PCI found, exit */ - if (spider_pci_count == 0) - return 0; + static int io_workaround_inited; - /* Setup IO callbacks. We only setup MMIO reads. PIO reads will - * fallback to MMIO reads (though without a token, thus slower) - */ - ppc_pci_io = spider_pci_io; - - /* Setup ioremap callback */ - ppc_md.ioremap = spider_ioremap; - - return 0; + if (io_workaround_inited) + return; + ppc_pci_io = iowa_pci_io; + ppc_md.ioremap = iowa_ioremap; + io_workaround_inited = 1; } -machine_arch_initcall(cell, spider_pci_workaround_init); diff --git a/trunk/arch/powerpc/platforms/cell/io-workarounds.h b/trunk/arch/powerpc/platforms/cell/io-workarounds.h new file mode 100644 index 000000000000..79d8ed3d510f --- /dev/null +++ b/trunk/arch/powerpc/platforms/cell/io-workarounds.h @@ -0,0 +1,49 @@ +/* + * Support PCI IO workaround + * + * (C) Copyright 2007-2008 TOSHIBA CORPORATION + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef _IO_WORKAROUNDS_H +#define _IO_WORKAROUNDS_H + +#include +#include + +/* Bus info */ +struct iowa_bus { + struct pci_controller *phb; + struct ppc_pci_io *ops; + void *private; +}; + +void __init io_workaround_init(void); +void __init iowa_register_bus(struct pci_controller *, struct ppc_pci_io *, + int (*)(struct iowa_bus *, void *), void *); +struct iowa_bus *iowa_mem_find_bus(const PCI_IO_ADDR); +struct iowa_bus *iowa_pio_find_bus(unsigned long); + +extern struct ppc_pci_io spiderpci_ops; +extern int spiderpci_iowa_init(struct iowa_bus *, void *); + +#define SPIDER_PCI_REG_BASE 0xd000 +#define SPIDER_PCI_REG_SIZE 0x1000 +#define SPIDER_PCI_VCI_CNTL_STAT 0x0110 +#define SPIDER_PCI_DUMMY_READ 0x0810 +#define SPIDER_PCI_DUMMY_READ_BASE 0x0814 + +#endif /* _IO_WORKAROUNDS_H */ diff --git a/trunk/arch/powerpc/platforms/cell/setup.c b/trunk/arch/powerpc/platforms/cell/setup.c index 5c531e8f9f6f..ab721b50fbba 100644 --- a/trunk/arch/powerpc/platforms/cell/setup.c +++ b/trunk/arch/powerpc/platforms/cell/setup.c @@ -57,6 +57,7 @@ #include "interrupt.h" #include "pervasive.h" #include "ras.h" +#include "io-workarounds.h" #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -117,13 +118,50 @@ static void cell_fixup_pcie_rootcomplex(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, cell_fixup_pcie_rootcomplex); +static int __devinit cell_setup_phb(struct pci_controller *phb) +{ + const char *model; + struct device_node *np; + + int rc = rtas_setup_phb(phb); + if (rc) + return rc; + + np = phb->dn; + model = of_get_property(np, "model", NULL); + if (model == NULL || strcmp(np->name, "pci")) + return 0; + + /* Setup workarounds for spider */ + if (strcmp(model, "Spider")) + return 0; + + iowa_register_bus(phb, &spiderpci_ops, &spiderpci_iowa_init, + (void *)SPIDER_PCI_REG_BASE); + io_workaround_init(); + + return 0; +} + static int __init cell_publish_devices(void) { + struct device_node *root = of_find_node_by_path("/"); + struct device_node *np; int node; /* Publish OF platform devices for southbridge IOs */ of_platform_bus_probe(NULL, NULL, NULL); + /* On spider based blades, we need to manually create the OF + * platform devices for the PCI host bridges + */ + for_each_child_of_node(root, np) { + if (np->type == NULL || (strcmp(np->type, "pci") != 0 && + strcmp(np->type, "pciex") != 0)) + continue; + of_platform_device_create(np, NULL, NULL); + } + /* There is no device for the MIC memory controller, thus we create * a platform device for it to attach the EDAC driver to. */ @@ -132,6 +170,7 @@ static int __init cell_publish_devices(void) continue; platform_device_register_simple("cbe-mic", node, NULL, 0); } + return 0; } machine_subsys_initcall(cell, cell_publish_devices); @@ -213,7 +252,7 @@ static void __init cell_setup_arch(void) /* Find and initialize PCI host bridges */ init_pci_config_tokens(); - find_and_init_phbs(); + cbe_pervasive_init(); #ifdef CONFIG_DUMMY_CONSOLE conswitchp = &dummy_con; @@ -249,7 +288,7 @@ define_machine(cell) { .calibrate_decr = generic_calibrate_decr, .progress = cell_progress, .init_IRQ = cell_init_irq, - .pci_setup_phb = rtas_setup_phb, + .pci_setup_phb = cell_setup_phb, #ifdef CONFIG_KEXEC .machine_kexec = default_machine_kexec, .machine_kexec_prepare = default_machine_kexec_prepare, diff --git a/trunk/arch/powerpc/platforms/cell/spider-pci.c b/trunk/arch/powerpc/platforms/cell/spider-pci.c new file mode 100644 index 000000000000..418b605ac35a --- /dev/null +++ b/trunk/arch/powerpc/platforms/cell/spider-pci.c @@ -0,0 +1,184 @@ +/* + * IO workarounds for PCI on Celleb/Cell platform + * + * (C) Copyright 2006-2007 TOSHIBA CORPORATION + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#undef DEBUG + +#include +#include +#include + +#include +#include + +#include "io-workarounds.h" + +#define SPIDER_PCI_DISABLE_PREFETCH + +struct spiderpci_iowa_private { + void __iomem *regs; +}; + +static void spiderpci_io_flush(struct iowa_bus *bus) +{ + struct spiderpci_iowa_private *priv; + u32 val; + + priv = bus->private; + val = in_be32(priv->regs + SPIDER_PCI_DUMMY_READ); + iosync(); +} + +#define SPIDER_PCI_MMIO_READ(name, ret) \ +static ret spiderpci_##name(const PCI_IO_ADDR addr) \ +{ \ + ret val = __do_##name(addr); \ + spiderpci_io_flush(iowa_mem_find_bus(addr)); \ + return val; \ +} + +#define SPIDER_PCI_MMIO_READ_STR(name) \ +static void spiderpci_##name(const PCI_IO_ADDR addr, void *buf, \ + unsigned long count) \ +{ \ + __do_##name(addr, buf, count); \ + spiderpci_io_flush(iowa_mem_find_bus(addr)); \ +} + +SPIDER_PCI_MMIO_READ(readb, u8) +SPIDER_PCI_MMIO_READ(readw, u16) +SPIDER_PCI_MMIO_READ(readl, u32) +SPIDER_PCI_MMIO_READ(readq, u64) +SPIDER_PCI_MMIO_READ(readw_be, u16) +SPIDER_PCI_MMIO_READ(readl_be, u32) +SPIDER_PCI_MMIO_READ(readq_be, u64) +SPIDER_PCI_MMIO_READ_STR(readsb) +SPIDER_PCI_MMIO_READ_STR(readsw) +SPIDER_PCI_MMIO_READ_STR(readsl) + +static void spiderpci_memcpy_fromio(void *dest, const PCI_IO_ADDR src, + unsigned long n) +{ + __do_memcpy_fromio(dest, src, n); + spiderpci_io_flush(iowa_mem_find_bus(src)); +} + +static int __init spiderpci_pci_setup_chip(struct pci_controller *phb, + void __iomem *regs) +{ + void *dummy_page_va; + dma_addr_t dummy_page_da; + +#ifdef SPIDER_PCI_DISABLE_PREFETCH + u32 val = in_be32(regs + SPIDER_PCI_VCI_CNTL_STAT); + pr_debug("SPIDER_IOWA:PVCI_Control_Status was 0x%08x\n", val); + out_be32(regs + SPIDER_PCI_VCI_CNTL_STAT, val | 0x8); +#endif /* SPIDER_PCI_DISABLE_PREFETCH */ + + /* setup dummy read */ + /* + * On CellBlade, we can't know that which XDR memory is used by + * kmalloc() to allocate dummy_page_va. + * In order to imporve the performance, the XDR which is used to + * allocate dummy_page_va is the nearest the spider-pci. + * We have to select the CBE which is the nearest the spider-pci + * to allocate memory from the best XDR, but I don't know that + * how to do. + * + * Celleb does not have this problem, because it has only one XDR. + */ + dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (!dummy_page_va) { + pr_err("SPIDERPCI-IOWA:Alloc dummy_page_va failed.\n"); + return -1; + } + + dummy_page_da = dma_map_single(phb->parent, dummy_page_va, + PAGE_SIZE, DMA_FROM_DEVICE); + if (dma_mapping_error(dummy_page_da)) { + pr_err("SPIDER-IOWA:Map dummy page filed.\n"); + kfree(dummy_page_va); + return -1; + } + + out_be32(regs + SPIDER_PCI_DUMMY_READ_BASE, dummy_page_da); + + return 0; +} + +int __init spiderpci_iowa_init(struct iowa_bus *bus, void *data) +{ + void __iomem *regs = NULL; + struct spiderpci_iowa_private *priv; + struct device_node *np = bus->phb->dn; + struct resource r; + unsigned long offset = (unsigned long)data; + + pr_debug("SPIDERPCI-IOWA:Bus initialize for spider(%s)\n", + np->full_name); + + priv = kzalloc(sizeof(struct spiderpci_iowa_private), GFP_KERNEL); + if (!priv) { + pr_err("SPIDERPCI-IOWA:" + "Can't allocate struct spiderpci_iowa_private"); + return -1; + } + + if (of_address_to_resource(np, 0, &r)) { + pr_err("SPIDERPCI-IOWA:Can't get resource.\n"); + goto error; + } + + regs = ioremap(r.start + offset, SPIDER_PCI_REG_SIZE); + if (!regs) { + pr_err("SPIDERPCI-IOWA:ioremap failed.\n"); + goto error; + } + priv->regs = regs; + bus->private = priv; + + if (spiderpci_pci_setup_chip(bus->phb, regs)) + goto error; + + return 0; + +error: + kfree(priv); + bus->private = NULL; + + if (regs) + iounmap(regs); + + return -1; +} + +struct ppc_pci_io spiderpci_ops = { + .readb = spiderpci_readb, + .readw = spiderpci_readw, + .readl = spiderpci_readl, + .readq = spiderpci_readq, + .readw_be = spiderpci_readw_be, + .readl_be = spiderpci_readl_be, + .readq_be = spiderpci_readq_be, + .readsb = spiderpci_readsb, + .readsw = spiderpci_readsw, + .readsl = spiderpci_readsl, + .memcpy_fromio = spiderpci_memcpy_fromio, +}; + diff --git a/trunk/arch/powerpc/platforms/celleb/Kconfig b/trunk/arch/powerpc/platforms/celleb/Kconfig deleted file mode 100644 index 372891edcdd2..000000000000 --- a/trunk/arch/powerpc/platforms/celleb/Kconfig +++ /dev/null @@ -1,12 +0,0 @@ -config PPC_CELLEB - bool "Toshiba's Cell Reference Set 'Celleb' Architecture" - depends on PPC_MULTIPLATFORM && PPC64 - select PPC_CELL - select PPC_CELL_NATIVE - select PPC_RTAS - select PPC_INDIRECT_IO - select PPC_OF_PLATFORM_PCI - select HAS_TXX9_SERIAL - select PPC_UDBG_BEAT - select USB_OHCI_BIG_ENDIAN_MMIO - select USB_EHCI_BIG_ENDIAN_MMIO diff --git a/trunk/arch/powerpc/platforms/celleb/Makefile b/trunk/arch/powerpc/platforms/celleb/Makefile deleted file mode 100644 index 889d43f715ea..000000000000 --- a/trunk/arch/powerpc/platforms/celleb/Makefile +++ /dev/null @@ -1,9 +0,0 @@ -obj-y += interrupt.o iommu.o setup.o \ - htab.o beat.o hvCall.o pci.o \ - scc_epci.o scc_uhc.o \ - io-workarounds.o - -obj-$(CONFIG_SMP) += smp.o -obj-$(CONFIG_PPC_UDBG_BEAT) += udbg_beat.o -obj-$(CONFIG_SERIAL_TXX9) += scc_sio.o -obj-$(CONFIG_SPU_BASE) += spu_priv1.o diff --git a/trunk/arch/powerpc/platforms/celleb/io-workarounds.c b/trunk/arch/powerpc/platforms/celleb/io-workarounds.c deleted file mode 100644 index 423339be1bac..000000000000 --- a/trunk/arch/powerpc/platforms/celleb/io-workarounds.c +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Support for Celleb io workarounds - * - * (C) Copyright 2006-2007 TOSHIBA CORPORATION - * - * This file is based to arch/powerpc/platform/cell/io-workarounds.c - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ - -#undef DEBUG - -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "pci.h" - -#define MAX_CELLEB_PCI_BUS 4 - -void *celleb_dummy_page_va; - -static struct celleb_pci_bus { - struct pci_controller *phb; - void (*dummy_read)(struct pci_controller *); -} celleb_pci_busses[MAX_CELLEB_PCI_BUS]; - -static int celleb_pci_count = 0; - -static struct celleb_pci_bus *celleb_pci_find(unsigned long vaddr, - unsigned long paddr) -{ - int i, j; - struct resource *res; - - for (i = 0; i < celleb_pci_count; i++) { - struct celleb_pci_bus *bus = &celleb_pci_busses[i]; - struct pci_controller *phb = bus->phb; - if (paddr) - for (j = 0; j < 3; j++) { - res = &phb->mem_resources[j]; - if (paddr >= res->start && paddr <= res->end) - return bus; - } - res = &phb->io_resource; - if (vaddr && vaddr >= res->start && vaddr <= res->end) - return bus; - } - return NULL; -} - -static void celleb_io_flush(const PCI_IO_ADDR addr) -{ - struct celleb_pci_bus *bus; - int token; - - token = PCI_GET_ADDR_TOKEN(addr); - - if (token && token <= celleb_pci_count) - bus = &celleb_pci_busses[token - 1]; - else { - unsigned long vaddr, paddr; - pte_t *ptep; - - vaddr = (unsigned long)PCI_FIX_ADDR(addr); - if (vaddr < PHB_IO_BASE || vaddr >= PHB_IO_END) - return; - - ptep = find_linux_pte(init_mm.pgd, vaddr); - if (ptep == NULL) - paddr = 0; - else - paddr = pte_pfn(*ptep) << PAGE_SHIFT; - bus = celleb_pci_find(vaddr, paddr); - - if (bus == NULL) - return; - } - - if (bus->dummy_read) - bus->dummy_read(bus->phb); -} - -static u8 celleb_readb(const PCI_IO_ADDR addr) -{ - u8 val; - val = __do_readb(addr); - celleb_io_flush(addr); - return val; -} - -static u16 celleb_readw(const PCI_IO_ADDR addr) -{ - u16 val; - val = __do_readw(addr); - celleb_io_flush(addr); - return val; -} - -static u32 celleb_readl(const PCI_IO_ADDR addr) -{ - u32 val; - val = __do_readl(addr); - celleb_io_flush(addr); - return val; -} - -static u64 celleb_readq(const PCI_IO_ADDR addr) -{ - u64 val; - val = __do_readq(addr); - celleb_io_flush(addr); - return val; -} - -static u16 celleb_readw_be(const PCI_IO_ADDR addr) -{ - u16 val; - val = __do_readw_be(addr); - celleb_io_flush(addr); - return val; -} - -static u32 celleb_readl_be(const PCI_IO_ADDR addr) -{ - u32 val; - val = __do_readl_be(addr); - celleb_io_flush(addr); - return val; -} - -static u64 celleb_readq_be(const PCI_IO_ADDR addr) -{ - u64 val; - val = __do_readq_be(addr); - celleb_io_flush(addr); - return val; -} - -static void celleb_readsb(const PCI_IO_ADDR addr, - void *buf, unsigned long count) -{ - __do_readsb(addr, buf, count); - celleb_io_flush(addr); -} - -static void celleb_readsw(const PCI_IO_ADDR addr, - void *buf, unsigned long count) -{ - __do_readsw(addr, buf, count); - celleb_io_flush(addr); -} - -static void celleb_readsl(const PCI_IO_ADDR addr, - void *buf, unsigned long count) -{ - __do_readsl(addr, buf, count); - celleb_io_flush(addr); -} - -static void celleb_memcpy_fromio(void *dest, - const PCI_IO_ADDR src, - unsigned long n) -{ - __do_memcpy_fromio(dest, src, n); - celleb_io_flush(src); -} - -static void __iomem *celleb_ioremap(unsigned long addr, - unsigned long size, - unsigned long flags) -{ - struct celleb_pci_bus *bus; - void __iomem *res = __ioremap(addr, size, flags); - int busno; - - bus = celleb_pci_find(0, addr); - if (bus != NULL) { - busno = bus - celleb_pci_busses; - PCI_SET_ADDR_TOKEN(res, busno + 1); - } - return res; -} - -static void celleb_iounmap(volatile void __iomem *addr) -{ - return __iounmap(PCI_FIX_ADDR(addr)); -} - -static struct ppc_pci_io celleb_pci_io __initdata = { - .readb = celleb_readb, - .readw = celleb_readw, - .readl = celleb_readl, - .readq = celleb_readq, - .readw_be = celleb_readw_be, - .readl_be = celleb_readl_be, - .readq_be = celleb_readq_be, - .readsb = celleb_readsb, - .readsw = celleb_readsw, - .readsl = celleb_readsl, - .memcpy_fromio = celleb_memcpy_fromio, -}; - -void __init celleb_pci_add_one(struct pci_controller *phb, - void (*dummy_read)(struct pci_controller *)) -{ - struct celleb_pci_bus *bus = &celleb_pci_busses[celleb_pci_count]; - struct device_node *np = phb->dn; - - if (celleb_pci_count >= MAX_CELLEB_PCI_BUS) { - printk(KERN_ERR "Too many pci bridges, workarounds" - " disabled for %s\n", np->full_name); - return; - } - - celleb_pci_count++; - - bus->phb = phb; - bus->dummy_read = dummy_read; -} - -static struct of_device_id celleb_pci_workaround_match[] __initdata = { - { - .name = "pci-pseudo", - .data = fake_pci_workaround_init, - }, { - .name = "epci", - .data = epci_workaround_init, - }, { - }, -}; - -int __init celleb_pci_workaround_init(void) -{ - struct pci_controller *phb; - struct device_node *node; - const struct of_device_id *match; - void (*init_func)(struct pci_controller *); - - celleb_dummy_page_va = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!celleb_dummy_page_va) { - printk(KERN_ERR "Celleb: dummy read disabled. " - "Alloc celleb_dummy_page_va failed\n"); - return 1; - } - - list_for_each_entry(phb, &hose_list, list_node) { - node = phb->dn; - match = of_match_node(celleb_pci_workaround_match, node); - - if (match) { - init_func = match->data; - (*init_func)(phb); - } - } - - ppc_pci_io = celleb_pci_io; - ppc_md.ioremap = celleb_ioremap; - ppc_md.iounmap = celleb_iounmap; - - return 0; -} diff --git a/trunk/arch/powerpc/platforms/iseries/exception.S b/trunk/arch/powerpc/platforms/iseries/exception.S index c775cd4b3d6e..8ff330d026ca 100644 --- a/trunk/arch/powerpc/platforms/iseries/exception.S +++ b/trunk/arch/powerpc/platforms/iseries/exception.S @@ -59,8 +59,33 @@ system_reset_iSeries: andc r4,r4,r5 mtspr SPRN_CTRLT,r4 +/* Spin on __secondary_hold_spinloop until it is updated by the boot cpu. */ +/* In the UP case we'll yeild() later, and we will not access the paca anyway */ +#ifdef CONFIG_SMP 1: HMT_LOW + LOAD_REG_IMMEDIATE(r23, __secondary_hold_spinloop) + ld r23,0(r23) + sync + LOAD_REG_IMMEDIATE(r3,current_set) + sldi r28,r24,3 /* get current_set[cpu#] */ + ldx r3,r3,r28 + addi r1,r3,THREAD_SIZE + subi r1,r1,STACK_FRAME_OVERHEAD + + cmpwi 0,r23,0 /* Keep poking the Hypervisor until */ + bne 2f /* we're released */ + /* Let the Hypervisor know we are alive */ + /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ + lis r3,0x8002 + rldicr r3,r3,32,15 /* r0 = (r3 << 32) & 0xffff000000000000 */ + li r0,-1 /* r0=-1 indicates a Hypervisor call */ + sc /* Invoke the hypervisor via a system call */ + b 1b +#endif + +2: + HMT_LOW #ifdef CONFIG_SMP lbz r23,PACAPROCSTART(r13) /* Test if this processor * should start */ @@ -91,7 +116,7 @@ iSeries_secondary_smp_loop: li r0,-1 /* r0=-1 indicates a Hypervisor call */ sc /* Invoke the hypervisor via a system call */ mfspr r13,SPRN_SPRG3 /* Put r13 back ???? */ - b 1b /* If SMP not configured, secondaries + b 2b /* If SMP not configured, secondaries * loop forever */ /*** ISeries-LPAR interrupt handlers ***/ diff --git a/trunk/arch/powerpc/platforms/ps3/os-area.c b/trunk/arch/powerpc/platforms/ps3/os-area.c index c73379ec9141..1d201782d4e5 100644 --- a/trunk/arch/powerpc/platforms/ps3/os-area.c +++ b/trunk/arch/powerpc/platforms/ps3/os-area.c @@ -25,6 +25,7 @@ #include #include #include +#include #include diff --git a/trunk/arch/powerpc/platforms/pseries/Kconfig b/trunk/arch/powerpc/platforms/pseries/Kconfig index 306a9d07491d..07fe5b69b9e2 100644 --- a/trunk/arch/powerpc/platforms/pseries/Kconfig +++ b/trunk/arch/powerpc/platforms/pseries/Kconfig @@ -34,3 +34,8 @@ config LPARCFG help Provide system capacity information via human readable = pairs through a /proc/ppc64/lparcfg interface. + +config PPC_PSERIES_DEBUG + depends on PPC_PSERIES && PPC_EARLY_DEBUG + bool "Enable extra debug logging in platforms/pseries" + default y diff --git a/trunk/arch/powerpc/platforms/pseries/Makefile b/trunk/arch/powerpc/platforms/pseries/Makefile index bdae04bb7a01..bd2593ed28dd 100644 --- a/trunk/arch/powerpc/platforms/pseries/Makefile +++ b/trunk/arch/powerpc/platforms/pseries/Makefile @@ -2,6 +2,10 @@ ifeq ($(CONFIG_PPC64),y) EXTRA_CFLAGS += -mno-minimal-toc endif +ifeq ($(CONFIG_PPC_PSERIES_DEBUG),y) +EXTRA_CFLAGS += -DDEBUG +endif + obj-y := lpar.o hvCall.o nvram.o reconfig.o \ setup.o iommu.o ras.o rtasd.o \ firmware.o power.o diff --git a/trunk/arch/powerpc/platforms/pseries/eeh.c b/trunk/arch/powerpc/platforms/pseries/eeh.c index 550b2f7d2cc1..a3fd56b186e6 100644 --- a/trunk/arch/powerpc/platforms/pseries/eeh.c +++ b/trunk/arch/powerpc/platforms/pseries/eeh.c @@ -39,7 +39,6 @@ #include #include -#undef DEBUG /** Overview: * EEH, or "Extended Error Handling" is a PCI bridge technology for diff --git a/trunk/arch/powerpc/platforms/pseries/eeh_cache.c b/trunk/arch/powerpc/platforms/pseries/eeh_cache.c index 1e83fcd0df31..ce37040af870 100644 --- a/trunk/arch/powerpc/platforms/pseries/eeh_cache.c +++ b/trunk/arch/powerpc/platforms/pseries/eeh_cache.c @@ -28,7 +28,6 @@ #include #include -#undef DEBUG /** * The pci address cache subsystem. This subsystem places diff --git a/trunk/arch/powerpc/platforms/pseries/firmware.c b/trunk/arch/powerpc/platforms/pseries/firmware.c index b765b7c77b65..9d3a40f45974 100644 --- a/trunk/arch/powerpc/platforms/pseries/firmware.c +++ b/trunk/arch/powerpc/platforms/pseries/firmware.c @@ -21,17 +21,11 @@ * 2 of the License, or (at your option) any later version. */ -#undef DEBUG #include #include #include -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif typedef struct { unsigned long val; @@ -72,7 +66,7 @@ void __init fw_feature_init(const char *hypertas, unsigned long len) const char *s; int i; - DBG(" -> fw_feature_init()\n"); + pr_debug(" -> fw_feature_init()\n"); for (s = hypertas; s < hypertas + len; s += strlen(s) + 1) { for (i = 0; i < FIRMWARE_MAX_FEATURES; i++) { @@ -88,5 +82,5 @@ void __init fw_feature_init(const char *hypertas, unsigned long len) } } - DBG(" <- fw_feature_init()\n"); + pr_debug(" <- fw_feature_init()\n"); } diff --git a/trunk/arch/powerpc/platforms/pseries/iommu.c b/trunk/arch/powerpc/platforms/pseries/iommu.c index a65c76308201..176f1f39d2d5 100644 --- a/trunk/arch/powerpc/platforms/pseries/iommu.c +++ b/trunk/arch/powerpc/platforms/pseries/iommu.c @@ -47,7 +47,6 @@ #include "plpar_wrappers.h" -#define DBG(fmt...) static void tce_build_pSeries(struct iommu_table *tbl, long index, long npages, unsigned long uaddr, @@ -322,7 +321,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) dn = pci_bus_to_OF_node(bus); - DBG("pci_dma_bus_setup_pSeries: setting up bus %s\n", dn->full_name); + pr_debug("pci_dma_bus_setup_pSeries: setting up bus %s\n", dn->full_name); if (bus->self) { /* This is not a root bus, any setup will be done for the @@ -347,7 +346,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) for (children = 0, tmp = dn->child; tmp; tmp = tmp->sibling) children++; - DBG("Children: %d\n", children); + pr_debug("Children: %d\n", children); /* Calculate amount of DMA window per slot. Each window must be * a power of two (due to pci_alloc_consistent requirements). @@ -361,8 +360,8 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) while (pci->phb->dma_window_size * children > 0x80000000ul) pci->phb->dma_window_size >>= 1; - DBG("No ISA/IDE, window size is 0x%lx\n", - pci->phb->dma_window_size); + pr_debug("No ISA/IDE, window size is 0x%lx\n", + pci->phb->dma_window_size); pci->phb->dma_window_base_cur = 0; return; @@ -387,8 +386,7 @@ static void pci_dma_bus_setup_pSeries(struct pci_bus *bus) while (pci->phb->dma_window_size * children > 0x70000000ul) pci->phb->dma_window_size >>= 1; - DBG("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size); - + pr_debug("ISA/IDE, window size is 0x%lx\n", pci->phb->dma_window_size); } @@ -401,7 +399,8 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) dn = pci_bus_to_OF_node(bus); - DBG("pci_dma_bus_setup_pSeriesLP: setting up bus %s\n", dn->full_name); + pr_debug("pci_dma_bus_setup_pSeriesLP: setting up bus %s\n", + dn->full_name); /* Find nearest ibm,dma-window, walking up the device tree */ for (pdn = dn; pdn != NULL; pdn = pdn->parent) { @@ -411,14 +410,14 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) } if (dma_window == NULL) { - DBG(" no ibm,dma-window property !\n"); + pr_debug(" no ibm,dma-window property !\n"); return; } ppci = PCI_DN(pdn); - DBG(" parent is %s, iommu_table: 0x%p\n", - pdn->full_name, ppci->iommu_table); + pr_debug(" parent is %s, iommu_table: 0x%p\n", + pdn->full_name, ppci->iommu_table); if (!ppci->iommu_table) { tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL, @@ -426,7 +425,7 @@ static void pci_dma_bus_setup_pSeriesLP(struct pci_bus *bus) iommu_table_setparms_lpar(ppci->phb, pdn, tbl, dma_window, bus->number); ppci->iommu_table = iommu_init_table(tbl, ppci->phb->node); - DBG(" created table: %p\n", ppci->iommu_table); + pr_debug(" created table: %p\n", ppci->iommu_table); } if (pdn != dn) @@ -439,7 +438,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) struct device_node *dn; struct iommu_table *tbl; - DBG("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev)); + pr_debug("pci_dma_dev_setup_pSeries: %s\n", pci_name(dev)); dn = dev->dev.archdata.of_node; @@ -450,7 +449,7 @@ static void pci_dma_dev_setup_pSeries(struct pci_dev *dev) if (!dev->bus->self) { struct pci_controller *phb = PCI_DN(dn)->phb; - DBG(" --> first child, no bridge. Allocating iommu table.\n"); + pr_debug(" --> first child, no bridge. Allocating iommu table.\n"); tbl = kmalloc_node(sizeof(struct iommu_table), GFP_KERNEL, phb->node); iommu_table_setparms(phb, dn, tbl); @@ -480,7 +479,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) const void *dma_window = NULL; struct pci_dn *pci; - DBG("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); + pr_debug("pci_dma_dev_setup_pSeriesLP: %s\n", pci_name(dev)); /* dev setup for LPAR is a little tricky, since the device tree might * contain the dma-window properties per-device and not neccesarily @@ -489,7 +488,7 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) * already allocated. */ dn = pci_device_to_OF_node(dev); - DBG(" node is %s\n", dn->full_name); + pr_debug(" node is %s\n", dn->full_name); for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table; pdn = pdn->parent) { @@ -504,13 +503,13 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) pci_name(dev), dn? dn->full_name : ""); return; } - DBG(" parent is %s\n", pdn->full_name); + pr_debug(" parent is %s\n", pdn->full_name); /* Check for parent == NULL so we don't try to setup the empty EADS * slots on POWER4 machines. */ if (dma_window == NULL || pdn->parent == NULL) { - DBG(" no dma window for device, linking to parent\n"); + pr_debug(" no dma window for device, linking to parent\n"); dev->dev.archdata.dma_data = PCI_DN(pdn)->iommu_table; return; } @@ -522,9 +521,9 @@ static void pci_dma_dev_setup_pSeriesLP(struct pci_dev *dev) iommu_table_setparms_lpar(pci->phb, pdn, tbl, dma_window, pci->phb->bus->number); pci->iommu_table = iommu_init_table(tbl, pci->phb->node); - DBG(" created table: %p\n", pci->iommu_table); + pr_debug(" created table: %p\n", pci->iommu_table); } else { - DBG(" found DMA window, table: %p\n", pci->iommu_table); + pr_debug(" found DMA window, table: %p\n", pci->iommu_table); } dev->dev.archdata.dma_data = pci->iommu_table; diff --git a/trunk/arch/powerpc/platforms/pseries/lpar.c b/trunk/arch/powerpc/platforms/pseries/lpar.c index 9235c469449e..2cbaedb17f3e 100644 --- a/trunk/arch/powerpc/platforms/pseries/lpar.c +++ b/trunk/arch/powerpc/platforms/pseries/lpar.c @@ -19,7 +19,8 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#undef DEBUG_LOW +/* Enables debugging of low-level hash table routines - careful! */ +#undef DEBUG #include #include @@ -42,11 +43,6 @@ #include "plpar_wrappers.h" #include "pseries.h" -#ifdef DEBUG_LOW -#define DBG_LOW(fmt...) do { udbg_printf(fmt); } while(0) -#else -#define DBG_LOW(fmt...) do { } while(0) -#endif /* in hvCall.S */ EXPORT_SYMBOL(plpar_hcall); @@ -196,6 +192,8 @@ void __init udbg_init_debug_lpar(void) udbg_putc = udbg_putcLP; udbg_getc = udbg_getcLP; udbg_getc_poll = udbg_getc_pollLP; + + register_early_udbg_console(); } /* returns 0 if couldn't find or use /chosen/stdout as console */ @@ -288,15 +286,15 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long hpte_v, hpte_r; if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW("hpte_insert(group=%lx, va=%016lx, pa=%016lx, " - "rflags=%lx, vflags=%lx, psize=%d)\n", - hpte_group, va, pa, rflags, vflags, psize); + pr_debug("hpte_insert(group=%lx, va=%016lx, pa=%016lx, " + "rflags=%lx, vflags=%lx, psize=%d)\n", + hpte_group, va, pa, rflags, vflags, psize); hpte_v = hpte_encode_v(va, psize, ssize) | vflags | HPTE_V_VALID; hpte_r = hpte_encode_r(pa, psize) | rflags; if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); + pr_debug(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r); /* Now fill in the actual HPTE */ /* Set CEC cookie to 0 */ @@ -313,7 +311,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, lpar_rc = plpar_pte_enter(flags, hpte_group, hpte_v, hpte_r, &slot); if (unlikely(lpar_rc == H_PTEG_FULL)) { if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW(" full\n"); + pr_debug(" full\n"); return -1; } @@ -324,11 +322,11 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group, */ if (unlikely(lpar_rc != H_SUCCESS)) { if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW(" lpar err %d\n", lpar_rc); + pr_debug(" lpar err %lu\n", lpar_rc); return -2; } if (!(vflags & HPTE_V_BOLTED)) - DBG_LOW(" -> slot: %d\n", slot & 7); + pr_debug(" -> slot: %lu\n", slot & 7); /* Because of iSeries, we have to pass down the secondary * bucket bit here as well @@ -420,17 +418,17 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot, want_v = hpte_encode_avpn(va, psize, ssize); - DBG_LOW(" update: avpnv=%016lx, hash=%016lx, f=%x, psize: %d ... ", - want_v, slot, flags, psize); + pr_debug(" update: avpnv=%016lx, hash=%016lx, f=%lx, psize: %d ...", + want_v, slot, flags, psize); lpar_rc = plpar_pte_protect(flags, slot, want_v); if (lpar_rc == H_NOT_FOUND) { - DBG_LOW("not found !\n"); + pr_debug("not found !\n"); return -1; } - DBG_LOW("ok\n"); + pr_debug("ok\n"); BUG_ON(lpar_rc != H_SUCCESS); @@ -505,8 +503,8 @@ static void pSeries_lpar_hpte_invalidate(unsigned long slot, unsigned long va, unsigned long lpar_rc; unsigned long dummy1, dummy2; - DBG_LOW(" inval : slot=%lx, va=%016lx, psize: %d, local: %d", - slot, va, psize, local); + pr_debug(" inval : slot=%lx, va=%016lx, psize: %d, local: %d\n", + slot, va, psize, local); want_v = hpte_encode_avpn(va, psize, ssize); lpar_rc = plpar_pte_remove(H_AVPN, slot, want_v, &dummy1, &dummy2); diff --git a/trunk/arch/powerpc/platforms/pseries/ras.c b/trunk/arch/powerpc/platforms/pseries/ras.c index a1ab25c7082f..2b548afd1003 100644 --- a/trunk/arch/powerpc/platforms/pseries/ras.c +++ b/trunk/arch/powerpc/platforms/pseries/ras.c @@ -67,8 +67,6 @@ static int ras_check_exception_token; static irqreturn_t ras_epow_interrupt(int irq, void *dev_id); static irqreturn_t ras_error_interrupt(int irq, void *dev_id); -/* #define DEBUG */ - static void request_ras_irqs(struct device_node *np, irq_handler_t handler, @@ -237,7 +235,7 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) printk(KERN_EMERG "Error: Fatal hardware error <0x%lx 0x%x>\n", *((unsigned long *)&ras_log_buf), status); -#ifndef DEBUG +#ifndef DEBUG_RTAS_POWER_OFF /* Don't actually power off when debugging so we can test * without actually failing while injecting errors. * Error data will not be logged to syslog. diff --git a/trunk/arch/powerpc/platforms/pseries/rtasd.c b/trunk/arch/powerpc/platforms/pseries/rtasd.c index e3078ce41518..befadd4f9524 100644 --- a/trunk/arch/powerpc/platforms/pseries/rtasd.c +++ b/trunk/arch/powerpc/platforms/pseries/rtasd.c @@ -29,11 +29,6 @@ #include #include -#if 0 -#define DEBUG(A...) printk(KERN_ERR A) -#else -#define DEBUG(A...) -#endif static DEFINE_SPINLOCK(rtasd_log_lock); @@ -198,7 +193,7 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) unsigned long s; int len = 0; - DEBUG("logging event\n"); + pr_debug("rtasd: logging event\n"); if (buf == NULL) return; @@ -409,7 +404,8 @@ static int rtasd(void *unused) daemonize("rtasd"); printk(KERN_DEBUG "RTAS daemon started\n"); - DEBUG("will sleep for %d milliseconds\n", (30000/rtas_event_scan_rate)); + pr_debug("rtasd: will sleep for %d milliseconds\n", + (30000 / rtas_event_scan_rate)); /* See if we have any error stored in NVRAM */ memset(logdata, 0, rtas_error_log_max); @@ -428,9 +424,9 @@ static int rtasd(void *unused) do_event_scan_all_cpus(1000); if (surveillance_timeout != -1) { - DEBUG("enabling surveillance\n"); + pr_debug("rtasd: enabling surveillance\n"); enable_surveillance(surveillance_timeout); - DEBUG("surveillance enabled\n"); + pr_debug("rtasd: surveillance enabled\n"); } /* Delay should be at least one second since some diff --git a/trunk/arch/powerpc/platforms/pseries/scanlog.c b/trunk/arch/powerpc/platforms/pseries/scanlog.c index e5b0ea870164..bec3803f0618 100644 --- a/trunk/arch/powerpc/platforms/pseries/scanlog.c +++ b/trunk/arch/powerpc/platforms/pseries/scanlog.c @@ -38,9 +38,7 @@ #define SCANLOG_HWERROR -1 #define SCANLOG_CONTINUE 1 -#define DEBUG(A...) do { if (scanlog_debug) printk(KERN_ERR "scanlog: " A); } while (0) -static int scanlog_debug; static unsigned int ibm_scan_log_dump; /* RTAS token */ static struct proc_dir_entry *proc_ppc64_scan_log_dump; /* The proc file */ @@ -86,14 +84,14 @@ static ssize_t scanlog_read(struct file *file, char __user *buf, memcpy(data, rtas_data_buf, RTAS_DATA_BUF_SIZE); spin_unlock(&rtas_data_buf_lock); - DEBUG("status=%d, data[0]=%x, data[1]=%x, data[2]=%x\n", - status, data[0], data[1], data[2]); + pr_debug("scanlog: status=%d, data[0]=%x, data[1]=%x, " \ + "data[2]=%x\n", status, data[0], data[1], data[2]); switch (status) { case SCANLOG_COMPLETE: - DEBUG("hit eof\n"); + pr_debug("scanlog: hit eof\n"); return 0; case SCANLOG_HWERROR: - DEBUG("hardware error reading scan log data\n"); + pr_debug("scanlog: hardware error reading data\n"); return -EIO; case SCANLOG_CONTINUE: /* We may or may not have data yet */ @@ -110,7 +108,8 @@ static ssize_t scanlog_read(struct file *file, char __user *buf, /* Assume extended busy */ wait_time = rtas_busy_delay_time(status); if (!wait_time) { - printk(KERN_ERR "scanlog: unknown error from rtas: %d\n", status); + printk(KERN_ERR "scanlog: unknown error " \ + "from rtas: %d\n", status); return -EIO; } } @@ -134,15 +133,9 @@ static ssize_t scanlog_write(struct file * file, const char __user * buf, if (buf) { if (strncmp(stkbuf, "reset", 5) == 0) { - DEBUG("reset scanlog\n"); + pr_debug("scanlog: reset scanlog\n"); status = rtas_call(ibm_scan_log_dump, 2, 1, NULL, 0, 0); - DEBUG("rtas returns %d\n", status); - } else if (strncmp(stkbuf, "debugon", 7) == 0) { - printk(KERN_ERR "scanlog: debug on\n"); - scanlog_debug = 1; - } else if (strncmp(stkbuf, "debugoff", 8) == 0) { - printk(KERN_ERR "scanlog: debug off\n"); - scanlog_debug = 0; + pr_debug("scanlog: rtas returns %d\n", status); } } return count; diff --git a/trunk/arch/powerpc/platforms/pseries/setup.c b/trunk/arch/powerpc/platforms/pseries/setup.c index f66aa9c3b135..f5d29f5b13c1 100644 --- a/trunk/arch/powerpc/platforms/pseries/setup.c +++ b/trunk/arch/powerpc/platforms/pseries/setup.c @@ -16,8 +16,6 @@ * bootup setup stuff.. */ -#undef DEBUG - #include #include #include @@ -70,11 +68,6 @@ #include "plpar_wrappers.h" #include "pseries.h" -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif int fwnmi_active; /* TRUE if an FWNMI handler is present */ @@ -326,7 +319,7 @@ static int pseries_set_xdabr(unsigned long dabr) */ static void __init pSeries_init_early(void) { - DBG(" -> pSeries_init_early()\n"); + pr_debug(" -> pSeries_init_early()\n"); if (firmware_has_feature(FW_FEATURE_LPAR)) find_udbg_vterm(); @@ -338,7 +331,7 @@ static void __init pSeries_init_early(void) iommu_init_early_pSeries(); - DBG(" <- pSeries_init_early()\n"); + pr_debug(" <- pSeries_init_early()\n"); } /* @@ -383,7 +376,7 @@ static int __init pSeries_probe(void) of_flat_dt_is_compatible(root, "IBM,CBEA")) return 0; - DBG("pSeries detected, looking for LPAR capability...\n"); + pr_debug("pSeries detected, looking for LPAR capability...\n"); /* Now try to figure out if we are running on LPAR */ of_scan_flat_dt(pSeries_probe_hypertas, NULL); @@ -393,8 +386,8 @@ static int __init pSeries_probe(void) else hpte_init_native(); - DBG("Machine is%s LPAR !\n", - (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not"); + pr_debug("Machine is%s LPAR !\n", + (powerpc_firmware_features & FW_FEATURE_LPAR) ? "" : " not"); return 1; } diff --git a/trunk/arch/powerpc/platforms/pseries/smp.c b/trunk/arch/powerpc/platforms/pseries/smp.c index ea4c65917a64..9d8f8c84ab89 100644 --- a/trunk/arch/powerpc/platforms/pseries/smp.c +++ b/trunk/arch/powerpc/platforms/pseries/smp.c @@ -12,7 +12,6 @@ * 2 of the License, or (at your option) any later version. */ -#undef DEBUG #include #include @@ -51,12 +50,6 @@ #include "plpar_wrappers.h" #include "pseries.h" -#ifdef DEBUG -#include -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif /* * The primary thread of each non-boot processor is recorded here before @@ -231,7 +224,7 @@ static void __init smp_init_pseries(void) { int i; - DBG(" -> smp_init_pSeries()\n"); + pr_debug(" -> smp_init_pSeries()\n"); /* Mark threads which are still spinning in hold loops. */ if (cpu_has_feature(CPU_FTR_SMT)) { @@ -255,7 +248,7 @@ static void __init smp_init_pseries(void) smp_ops->take_timebase = pSeries_take_timebase; } - DBG(" <- smp_init_pSeries()\n"); + pr_debug(" <- smp_init_pSeries()\n"); } #ifdef CONFIG_MPIC diff --git a/trunk/arch/powerpc/platforms/pseries/xics.c b/trunk/arch/powerpc/platforms/pseries/xics.c index 43df53c30aa0..ebebc28fe895 100644 --- a/trunk/arch/powerpc/platforms/pseries/xics.c +++ b/trunk/arch/powerpc/platforms/pseries/xics.c @@ -9,7 +9,6 @@ * 2 of the License, or (at your option) any later version. */ -#undef DEBUG #include #include diff --git a/trunk/arch/powerpc/sysdev/mv64x60_dev.c b/trunk/arch/powerpc/sysdev/mv64x60_dev.c index 047b31027fa6..41af1223e2a0 100644 --- a/trunk/arch/powerpc/sysdev/mv64x60_dev.c +++ b/trunk/arch/powerpc/sysdev/mv64x60_dev.c @@ -338,15 +338,13 @@ static int __init mv64x60_i2c_device_setup(struct device_node *np, int id) pdata.freq_m = 8; /* default */ prop = of_get_property(np, "freq_m", NULL); - if (!prop) - return -ENODEV; - pdata.freq_m = *prop; + if (prop) + pdata.freq_m = *prop; pdata.freq_m = 3; /* default */ prop = of_get_property(np, "freq_n", NULL); - if (!prop) - return -ENODEV; - pdata.freq_n = *prop; + if (prop) + pdata.freq_n = *prop; pdata.timeout = 1000; /* default: 1 second */ @@ -433,9 +431,13 @@ static int __init mv64x60_device_setup(void) int err; id = 0; - for_each_compatible_node(np, "serial", "marvell,mv64360-mpsc") - if ((err = mv64x60_mpsc_device_setup(np, id++))) - goto error; + for_each_compatible_node(np, "serial", "marvell,mv64360-mpsc") { + err = mv64x60_mpsc_device_setup(np, id++); + if (err) + printk(KERN_ERR "Failed to initialize MV64x60 " + "serial device %s: error %d.\n", + np->full_name, err); + } id = 0; id2 = 0; @@ -443,38 +445,44 @@ static int __init mv64x60_device_setup(void) pdev = mv64x60_eth_register_shared_pdev(np, id++); if (IS_ERR(pdev)) { err = PTR_ERR(pdev); - goto error; + printk(KERN_ERR "Failed to initialize MV64x60 " + "network block %s: error %d.\n", + np->full_name, err); + continue; } for_each_child_of_node(np, np2) { if (!of_device_is_compatible(np2, "marvell,mv64360-eth")) continue; err = mv64x60_eth_device_setup(np2, id2++, pdev); - if (err) { - of_node_put(np2); - goto error; - } + if (err) + printk(KERN_ERR "Failed to initialize " + "MV64x60 network device %s: " + "error %d.\n", + np2->full_name, err); } } id = 0; - for_each_compatible_node(np, "i2c", "marvell,mv64360-i2c") - if ((err = mv64x60_i2c_device_setup(np, id++))) - goto error; + for_each_compatible_node(np, "i2c", "marvell,mv64360-i2c") { + err = mv64x60_i2c_device_setup(np, id++); + if (err) + printk(KERN_ERR "Failed to initialize MV64x60 I2C " + "bus %s: error %d.\n", + np->full_name, err); + } /* support up to one watchdog timer */ np = of_find_compatible_node(np, NULL, "marvell,mv64360-wdt"); if (np) { if ((err = mv64x60_wdt_device_setup(np, id))) - goto error; + printk(KERN_ERR "Failed to initialize MV64x60 " + "Watchdog %s: error %d.\n", + np->full_name, err); of_node_put(np); } return 0; - -error: - of_node_put(np); - return err; } arch_initcall(mv64x60_device_setup); diff --git a/trunk/arch/powerpc/sysdev/mv64x60_udbg.c b/trunk/arch/powerpc/sysdev/mv64x60_udbg.c index ccdb3b0418fc..2792dc8b038c 100644 --- a/trunk/arch/powerpc/sysdev/mv64x60_udbg.c +++ b/trunk/arch/powerpc/sysdev/mv64x60_udbg.c @@ -94,7 +94,7 @@ static void mv64x60_udbg_init(void) if (!np) return; - block_index = of_get_property(np, "block-index", NULL); + block_index = of_get_property(np, "cell-index", NULL); if (!block_index) goto error; diff --git a/trunk/arch/ppc/8260_io/fcc_enet.c b/trunk/arch/ppc/8260_io/fcc_enet.c index bcc3aa9d04f3..d38b57e24cee 100644 --- a/trunk/arch/ppc/8260_io/fcc_enet.c +++ b/trunk/arch/ppc/8260_io/fcc_enet.c @@ -165,9 +165,6 @@ static int fcc_enet_set_mac_address(struct net_device *dev, void *addr); #ifdef CONFIG_SBC82xx #define F1_RXCLK 9 #define F1_TXCLK 10 -#elif defined(CONFIG_ADS8272) -#define F1_RXCLK 11 -#define F1_TXCLK 10 #else #define F1_RXCLK 12 #define F1_TXCLK 11 @@ -175,13 +172,8 @@ static int fcc_enet_set_mac_address(struct net_device *dev, void *addr); /* FCC2 Clock Source Configuration. There are board specific. Can only choose from CLK13-16 */ -#ifdef CONFIG_ADS8272 -#define F2_RXCLK 15 -#define F2_TXCLK 16 -#else #define F2_RXCLK 13 #define F2_TXCLK 14 -#endif /* FCC3 Clock Source Configuration. There are board specific. Can only choose from CLK13-16 */ @@ -289,10 +281,7 @@ static int fcc_enet_set_mac_address(struct net_device *dev, void *addr); /* TQM8260 has MDIO and MDCK on PC30 and PC31 respectively */ #define PC_MDIO ((uint)0x00000002) #define PC_MDCK ((uint)0x00000001) -#elif defined(CONFIG_ADS8272) -#define PC_MDIO ((uint)0x00002000) -#define PC_MDCK ((uint)0x00001000) -#elif defined(CONFIG_EST8260) || defined(CONFIG_ADS8260) || defined(CONFIG_PQ2FADS) +#elif defined(CONFIG_EST8260) || defined(CONFIG_ADS8260) #define PC_MDIO ((uint)0x00400000) #define PC_MDCK ((uint)0x00200000) #else @@ -2118,11 +2107,6 @@ init_fcc_startup(fcc_info_t *fip, struct net_device *dev) printk("Can't get FCC IRQ %d\n", fip->fc_interrupt); #ifdef PHY_INTERRUPT -#ifdef CONFIG_ADS8272 - if (request_irq(PHY_INTERRUPT, mii_link_interrupt, IRQF_SHARED, - "mii", dev) < 0) - printk(KERN_CRIT "Can't get MII IRQ %d\n", PHY_INTERRUPT); -#else /* Make IRQn edge triggered. This does not work if PHY_INTERRUPT is * on Port C. */ @@ -2132,7 +2116,6 @@ init_fcc_startup(fcc_info_t *fip, struct net_device *dev) if (request_irq(PHY_INTERRUPT, mii_link_interrupt, 0, "mii", dev) < 0) printk(KERN_CRIT "Can't get MII IRQ %d\n", PHY_INTERRUPT); -#endif #endif /* PHY_INTERRUPT */ /* Set GFMR to enable Ethernet operating mode. diff --git a/trunk/arch/ppc/8xx_io/enet.c b/trunk/arch/ppc/8xx_io/enet.c index c6d047ae77ac..5899aea1644b 100644 --- a/trunk/arch/ppc/8xx_io/enet.c +++ b/trunk/arch/ppc/8xx_io/enet.c @@ -946,29 +946,6 @@ static int __init scc_enet_init(void) *((volatile uint *)BCSR1) &= ~BCSR1_ETHEN; #endif -#ifdef CONFIG_MPC885ADS - - /* Deassert PHY reset and enable the PHY. - */ - { - volatile uint __iomem *bcsr = ioremap(BCSR_ADDR, BCSR_SIZE); - uint tmp; - - tmp = in_be32(bcsr + 1 /* BCSR1 */); - tmp |= BCSR1_ETHEN; - out_be32(bcsr + 1, tmp); - tmp = in_be32(bcsr + 4 /* BCSR4 */); - tmp |= BCSR4_ETH10_RST; - out_be32(bcsr + 4, tmp); - iounmap(bcsr); - } - - /* On MPC885ADS SCC ethernet PHY defaults to the full duplex mode - * upon reset. SCC is set to half duplex by default. So this - * inconsistency should be better fixed by the software. - */ -#endif - dev->base_addr = (unsigned long)ep; #if 0 dev->name = "CPM_ENET"; diff --git a/trunk/arch/ppc/Kconfig b/trunk/arch/ppc/Kconfig index abc877faf123..0f1863ed9c1c 100644 --- a/trunk/arch/ppc/Kconfig +++ b/trunk/arch/ppc/Kconfig @@ -372,22 +372,6 @@ config MPC8XXFADS bool "FADS" select FADS -config MPC86XADS - bool "MPC86XADS" - help - MPC86x Application Development System by Freescale Semiconductor. - The MPC86xADS is meant to serve as a platform for s/w and h/w - development around the MPC86X processor families. - select FADS - -config MPC885ADS - bool "MPC885ADS" - help - Freescale Semiconductor MPC885 Application Development System (ADS). - Also known as DUET. - The MPC885ADS is meant to serve as a platform for s/w and h/w - development around the MPC885 processor family. - config TQM823L bool "TQM823L" help @@ -479,53 +463,6 @@ config WINCEPT endchoice -menu "Freescale Ethernet driver platform-specific options" - depends on FS_ENET - - config MPC8xx_SECOND_ETH - bool "Second Ethernet channel" - depends on (MPC885ADS || MPC86XADS) - default y - help - This enables support for second Ethernet on MPC885ADS and MPC86xADS boards. - The latter will use SCC1, for 885ADS you can select it below. - - choice - prompt "Second Ethernet channel" - depends on MPC8xx_SECOND_ETH - default MPC8xx_SECOND_ETH_FEC2 - - config MPC8xx_SECOND_ETH_FEC2 - bool "FEC2" - depends on MPC885ADS - help - Enable FEC2 to serve as 2-nd Ethernet channel. Note that SMC2 - (often 2-nd UART) will not work if this is enabled. - - config MPC8xx_SECOND_ETH_SCC1 - bool "SCC1" - depends on MPC86XADS - select MPC8xx_SCC_ENET_FIXED - help - Enable SCC1 to serve as 2-nd Ethernet channel. Note that SMC1 - (often 1-nd UART) will not work if this is enabled. - - config MPC8xx_SECOND_ETH_SCC3 - bool "SCC3" - depends on MPC885ADS - help - Enable SCC3 to serve as 2-nd Ethernet channel. Note that SMC1 - (often 1-nd UART) will not work if this is enabled. - - endchoice - - config MPC8xx_SCC_ENET_FIXED - depends on MPC8xx_SECOND_ETH_SCC - default n - bool "Use fixed MII-less mode for SCC Ethernet" - -endmenu - choice prompt "Machine Type" depends on 6xx @@ -666,9 +603,6 @@ config TQM8260 End of Life: not yet :-) URL: -config ADS8272 - bool "ADS8272" - config PQ2FADS bool "Freescale-PQ2FADS" help @@ -698,11 +632,6 @@ config EV64360 platform. endchoice -config PQ2ADS - bool - depends on ADS8272 - default y - config TQM8xxL bool depends on 8xx && (TQM823L || TQM850L || FPS850L || TQM855L || TQM860L) @@ -725,15 +654,6 @@ config 8260 this option means that you wish to build a kernel for a machine with an 8260 class CPU. -config 8272 - bool - depends on 6xx - default y if ADS8272 - select 8260 - help - The MPC8272 CPM has a different internal dpram setup than other CPM2 - devices - config CPM1 bool depends on 8xx @@ -1069,7 +989,7 @@ config PCI_8260 config 8260_PCI9 bool "Enable workaround for MPC826x erratum PCI 9" - depends on PCI_8260 && !ADS8272 + depends on PCI_8260 default y choice diff --git a/trunk/arch/ppc/configs/ads8272_defconfig b/trunk/arch/ppc/configs/ads8272_defconfig deleted file mode 100644 index 6619f9118b00..000000000000 --- a/trunk/arch/ppc/configs/ads8272_defconfig +++ /dev/null @@ -1,930 +0,0 @@ -# -# Automatically generated make config: don't edit -# Linux kernel version: 2.6.21-rc5 -# Wed Apr 4 20:55:16 2007 -# -CONFIG_MMU=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_ARCH_HAS_ILOG2_U32=y -# CONFIG_ARCH_HAS_ILOG2_U64 is not set -CONFIG_GENERIC_HWEIGHT=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_PPC=y -CONFIG_PPC32=y -CONFIG_GENERIC_NVRAM=y -CONFIG_GENERIC_FIND_NEXT_BIT=y -CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y -CONFIG_ARCH_MAY_HAVE_PC_FDC=y -CONFIG_GENERIC_BUG=y -CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y -CONFIG_BROKEN_ON_SMP=y -CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# -CONFIG_LOCALVERSION="" -CONFIG_LOCALVERSION_AUTO=y -CONFIG_SWAP=y -CONFIG_SYSVIPC=y -# CONFIG_IPC_NS is not set -CONFIG_SYSVIPC_SYSCTL=y -# CONFIG_POSIX_MQUEUE is not set -# CONFIG_BSD_PROCESS_ACCT is not set -# CONFIG_TASKSTATS is not set -# CONFIG_UTS_NS is not set -# CONFIG_AUDIT is not set -# CONFIG_IKCONFIG is not set -CONFIG_SYSFS_DEPRECATED=y -# CONFIG_RELAY is not set -CONFIG_BLK_DEV_INITRD=y -CONFIG_INITRAMFS_SOURCE="" -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_SYSCTL=y -CONFIG_EMBEDDED=y -CONFIG_SYSCTL_SYSCALL=y -# CONFIG_KALLSYMS is not set -# CONFIG_HOTPLUG is not set -CONFIG_PRINTK=y -CONFIG_BUG=y -CONFIG_ELF_CORE=y -CONFIG_BASE_FULL=y -CONFIG_FUTEX=y -# CONFIG_EPOLL is not set -CONFIG_SHMEM=y -CONFIG_SLAB=y -CONFIG_VM_EVENT_COUNTERS=y -CONFIG_RT_MUTEXES=y -# CONFIG_TINY_SHMEM is not set -CONFIG_BASE_SMALL=0 -# CONFIG_SLOB is not set - -# -# Loadable module support -# -# CONFIG_MODULES is not set - -# -# Block layer -# -CONFIG_BLOCK=y -# CONFIG_LBD is not set -# CONFIG_BLK_DEV_IO_TRACE is not set -# CONFIG_LSF is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y -# CONFIG_DEFAULT_AS is not set -# CONFIG_DEFAULT_DEADLINE is not set -CONFIG_DEFAULT_CFQ=y -# CONFIG_DEFAULT_NOOP is not set -CONFIG_DEFAULT_IOSCHED="cfq" - -# -# Processor -# -CONFIG_6xx=y -# CONFIG_40x is not set -# CONFIG_44x is not set -# CONFIG_8xx is not set -# CONFIG_E200 is not set -# CONFIG_E500 is not set -CONFIG_PPC_FPU=y -# CONFIG_PPC_DCR_NATIVE is not set -# CONFIG_KEXEC is not set -# CONFIG_CPU_FREQ is not set -# CONFIG_WANT_EARLY_SERIAL is not set -CONFIG_EMBEDDEDBOOT=y -CONFIG_PPC_STD_MMU=y - -# -# Platform options -# - -# -# Freescale Ethernet driver platform-specific options -# -# CONFIG_PPC_PREP is not set -# CONFIG_APUS is not set -# CONFIG_KATANA is not set -# CONFIG_WILLOW is not set -# CONFIG_CPCI690 is not set -# CONFIG_POWERPMC250 is not set -# CONFIG_CHESTNUT is not set -# CONFIG_SPRUCE is not set -# CONFIG_HDPU is not set -# CONFIG_EV64260 is not set -# CONFIG_LOPEC is not set -# CONFIG_MVME5100 is not set -# CONFIG_PPLUS is not set -# CONFIG_PRPMC750 is not set -# CONFIG_PRPMC800 is not set -# CONFIG_SANDPOINT is not set -# CONFIG_RADSTONE_PPC7D is not set -# CONFIG_PAL4 is not set -# CONFIG_EST8260 is not set -# CONFIG_SBC82xx is not set -# CONFIG_SBS8260 is not set -# CONFIG_RPX8260 is not set -# CONFIG_TQM8260 is not set -CONFIG_ADS8272=y -# CONFIG_PQ2FADS is not set -# CONFIG_LITE5200 is not set -# CONFIG_MPC834x_SYS is not set -# CONFIG_EV64360 is not set -CONFIG_PQ2ADS=y -CONFIG_8260=y -CONFIG_8272=y -CONFIG_CPM2=y -# CONFIG_PC_KEYBOARD is not set -# CONFIG_SMP is not set -# CONFIG_HIGHMEM is not set -CONFIG_ARCH_POPULATES_NODE_MAP=y -# CONFIG_HZ_100 is not set -CONFIG_HZ_250=y -# CONFIG_HZ_300 is not set -# CONFIG_HZ_1000 is not set -CONFIG_HZ=250 -CONFIG_PREEMPT_NONE=y -# CONFIG_PREEMPT_VOLUNTARY is not set -# CONFIG_PREEMPT is not set -CONFIG_SELECT_MEMORY_MODEL=y -CONFIG_FLATMEM_MANUAL=y -# CONFIG_DISCONTIGMEM_MANUAL is not set -# CONFIG_SPARSEMEM_MANUAL is not set -CONFIG_FLATMEM=y -CONFIG_FLAT_NODE_MEM_MAP=y -# CONFIG_SPARSEMEM_STATIC is not set -CONFIG_SPLIT_PTLOCK_CPUS=4 -# CONFIG_RESOURCES_64BIT is not set -CONFIG_ZONE_DMA_FLAG=1 -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set -# CONFIG_CMDLINE_BOOL is not set -# CONFIG_PM is not set -CONFIG_SECCOMP=y -CONFIG_ISA_DMA_API=y - -# -# Bus options -# -CONFIG_ZONE_DMA=y -# CONFIG_PPC_I8259 is not set -CONFIG_PPC_INDIRECT_PCI=y -CONFIG_PCI=y -CONFIG_PCI_DOMAINS=y -CONFIG_PCI_8260=y - -# -# PCCARD (PCMCIA/CardBus) support -# - -# -# Advanced setup -# -# CONFIG_ADVANCED_OPTIONS is not set - -# -# Default settings for advanced configuration options are used -# -CONFIG_HIGHMEM_START=0xfe000000 -CONFIG_LOWMEM_SIZE=0x30000000 -CONFIG_KERNEL_START=0xc0000000 -CONFIG_TASK_SIZE=0x80000000 -CONFIG_BOOT_LOAD=0x00400000 - -# -# Networking -# -CONFIG_NET=y - -# -# Networking options -# -# CONFIG_NETDEBUG is not set -CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set -CONFIG_UNIX=y -CONFIG_XFRM=y -# CONFIG_XFRM_USER is not set -# CONFIG_XFRM_SUB_POLICY is not set -# CONFIG_XFRM_MIGRATE is not set -# CONFIG_NET_KEY is not set -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_FIB_HASH=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -# CONFIG_IP_PNP_RARP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_IP_MROUTE is not set -# CONFIG_ARPD is not set -CONFIG_SYN_COOKIES=y -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_XFRM_TUNNEL is not set -# CONFIG_INET_TUNNEL is not set -CONFIG_INET_XFRM_MODE_TRANSPORT=y -CONFIG_INET_XFRM_MODE_TUNNEL=y -CONFIG_INET_XFRM_MODE_BEET=y -CONFIG_INET_DIAG=y -CONFIG_INET_TCP_DIAG=y -# CONFIG_TCP_CONG_ADVANCED is not set -CONFIG_TCP_CONG_CUBIC=y -CONFIG_DEFAULT_TCP_CONG="cubic" -# CONFIG_TCP_MD5SIG is not set -# CONFIG_IPV6 is not set -# CONFIG_INET6_XFRM_TUNNEL is not set -# CONFIG_INET6_TUNNEL is not set -# CONFIG_NETWORK_SECMARK is not set -# CONFIG_NETFILTER is not set - -# -# DCCP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_DCCP is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set - -# -# TIPC Configuration (EXPERIMENTAL) -# -# CONFIG_TIPC is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_HAMRADIO is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set -# CONFIG_IEEE80211 is not set - -# -# Device Drivers -# - -# -# Generic Driver Options -# -CONFIG_STANDALONE=y -CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_SYS_HYPERVISOR is not set - -# -# Connector - unified userspace <-> kernelspace linker -# -# CONFIG_CONNECTOR is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -# CONFIG_PARPORT is not set - -# -# Plug and Play support -# -# CONFIG_PNPACPI is not set - -# -# Block devices -# -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_CPQ_DA is not set -# CONFIG_BLK_CPQ_CISS_DA is not set -# CONFIG_BLK_DEV_DAC960 is not set -# CONFIG_BLK_DEV_UMEM is not set -# CONFIG_BLK_DEV_COW_COMMON is not set -CONFIG_BLK_DEV_LOOP=y -# CONFIG_BLK_DEV_CRYPTOLOOP is not set -# CONFIG_BLK_DEV_NBD is not set -# CONFIG_BLK_DEV_SX8 is not set -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_BLK_DEV_RAM_SIZE=32768 -CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 -# CONFIG_CDROM_PKTCDVD is not set -# CONFIG_ATA_OVER_ETH is not set - -# -# Misc devices -# -# CONFIG_SGI_IOC4 is not set -# CONFIG_TIFM_CORE is not set - -# -# ATA/ATAPI/MFM/RLL support -# -# CONFIG_IDE is not set - -# -# SCSI device support -# -# CONFIG_RAID_ATTRS is not set -# CONFIG_SCSI is not set -# CONFIG_SCSI_NETLINK is not set - -# -# Serial ATA (prod) and Parallel ATA (experimental) drivers -# -# CONFIG_ATA is not set - -# -# Multi-device support (RAID and LVM) -# -# CONFIG_MD is not set - -# -# Fusion MPT device support -# -# CONFIG_FUSION is not set - -# -# IEEE 1394 (FireWire) support -# -# CONFIG_IEEE1394 is not set - -# -# I2O device support -# -# CONFIG_I2O is not set - -# -# Macintosh device drivers -# -# CONFIG_MAC_EMUMOUSEBTN is not set -# CONFIG_WINDFARM is not set - -# -# Network device support -# -CONFIG_NETDEVICES=y -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set - -# -# ARCnet devices -# -# CONFIG_ARCNET is not set - -# -# PHY device support -# -CONFIG_PHYLIB=y - -# -# MII PHY device drivers -# -# CONFIG_MARVELL_PHY is not set -CONFIG_DAVICOM_PHY=y -# CONFIG_QSEMI_PHY is not set -# CONFIG_LXT_PHY is not set -# CONFIG_CICADA_PHY is not set -# CONFIG_VITESSE_PHY is not set -# CONFIG_SMSC_PHY is not set -# CONFIG_BROADCOM_PHY is not set -# CONFIG_FIXED_PHY is not set - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -# CONFIG_HAPPYMEAL is not set -# CONFIG_SUNGEM is not set -# CONFIG_CASSINI is not set -# CONFIG_NET_VENDOR_3COM is not set - -# -# Tulip family network device support -# -# CONFIG_NET_TULIP is not set -# CONFIG_HP100 is not set -# CONFIG_NET_PCI is not set -CONFIG_FS_ENET=y -# CONFIG_FS_ENET_HAS_SCC is not set -CONFIG_FS_ENET_HAS_FCC=y - -# -# Ethernet (1000 Mbit) -# -# CONFIG_ACENIC is not set -# CONFIG_DL2K is not set -# CONFIG_E1000 is not set -# CONFIG_NS83820 is not set -# CONFIG_HAMACHI is not set -# CONFIG_YELLOWFIN is not set -# CONFIG_R8169 is not set -# CONFIG_SIS190 is not set -# CONFIG_SKGE is not set -# CONFIG_SKY2 is not set -# CONFIG_SK98LIN is not set -# CONFIG_TIGON3 is not set -# CONFIG_BNX2 is not set -# CONFIG_QLA3XXX is not set -# CONFIG_ATL1 is not set - -# -# Ethernet (10000 Mbit) -# -# CONFIG_CHELSIO_T1 is not set -# CONFIG_CHELSIO_T3 is not set -# CONFIG_IXGB is not set -# CONFIG_S2IO is not set -# CONFIG_MYRI10GE is not set -# CONFIG_NETXEN_NIC is not set - -# -# Token Ring devices -# -# CONFIG_TR is not set - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set -# CONFIG_FDDI is not set -# CONFIG_HIPPI is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set -# CONFIG_SHAPER is not set -# CONFIG_NETCONSOLE is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set - -# -# Telephony Support -# -# CONFIG_PHONE is not set - -# -# Input device support -# -CONFIG_INPUT=y -# CONFIG_INPUT_FF_MEMLESS is not set - -# -# Userland interfaces -# -# CONFIG_INPUT_MOUSEDEV is not set -# CONFIG_INPUT_JOYDEV is not set -# CONFIG_INPUT_TSDEV is not set -# CONFIG_INPUT_EVDEV is not set -# CONFIG_INPUT_EVBUG is not set - -# -# Input Device Drivers -# -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_INPUT_JOYSTICK is not set -# CONFIG_INPUT_TOUCHSCREEN is not set -# CONFIG_INPUT_MISC is not set - -# -# Hardware I/O ports -# -# CONFIG_SERIO is not set -# CONFIG_GAMEPORT is not set - -# -# Character devices -# -# CONFIG_VT is not set -# CONFIG_SERIAL_NONSTANDARD is not set - -# -# Serial drivers -# -# CONFIG_SERIAL_8250 is not set - -# -# Non-8250 serial port support -# -# CONFIG_SERIAL_UARTLITE is not set -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -CONFIG_SERIAL_CPM=y -CONFIG_SERIAL_CPM_CONSOLE=y -CONFIG_SERIAL_CPM_SCC1=y -# CONFIG_SERIAL_CPM_SCC2 is not set -# CONFIG_SERIAL_CPM_SCC3 is not set -CONFIG_SERIAL_CPM_SCC4=y -# CONFIG_SERIAL_CPM_SMC1 is not set -# CONFIG_SERIAL_CPM_SMC2 is not set -# CONFIG_SERIAL_JSM is not set -CONFIG_UNIX98_PTYS=y -CONFIG_LEGACY_PTYS=y -CONFIG_LEGACY_PTY_COUNT=256 - -# -# IPMI -# -# CONFIG_IPMI_HANDLER is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -CONFIG_HW_RANDOM=y -# CONFIG_NVRAM is not set -CONFIG_GEN_RTC=y -# CONFIG_GEN_RTC_X is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set -# CONFIG_APPLICOM is not set -# CONFIG_AGP is not set -# CONFIG_DRM is not set -# CONFIG_RAW_DRIVER is not set - -# -# TPM devices -# -# CONFIG_TCG_TPM is not set - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# SPI support -# -# CONFIG_SPI is not set -# CONFIG_SPI_MASTER is not set - -# -# Dallas's 1-wire bus -# -# CONFIG_W1 is not set - -# -# Hardware Monitoring support -# -CONFIG_HWMON=y -# CONFIG_HWMON_VID is not set -# CONFIG_SENSORS_ABITUGURU is not set -# CONFIG_SENSORS_F71805F is not set -# CONFIG_SENSORS_PC87427 is not set -# CONFIG_SENSORS_VT1211 is not set -# CONFIG_HWMON_DEBUG_CHIP is not set - -# -# Multifunction device drivers -# -# CONFIG_MFD_SM501 is not set - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# Digital Video Broadcasting Devices -# -# CONFIG_DVB is not set - -# -# Graphics support -# -# CONFIG_BACKLIGHT_LCD_SUPPORT is not set -# CONFIG_FB is not set -# CONFIG_FB_IBM_GXT4500 is not set - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# HID Devices -# -CONFIG_HID=y -# CONFIG_HID_DEBUG is not set - -# -# USB support -# -CONFIG_USB_ARCH_HAS_HCD=y -CONFIG_USB_ARCH_HAS_OHCI=y -CONFIG_USB_ARCH_HAS_EHCI=y -# CONFIG_USB is not set - -# -# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' -# - -# -# USB Gadget Support -# -# CONFIG_USB_GADGET is not set - -# -# MMC/SD Card support -# -# CONFIG_MMC is not set - -# -# LED devices -# -# CONFIG_NEW_LEDS is not set - -# -# LED drivers -# - -# -# LED Triggers -# - -# -# InfiniBand support -# -# CONFIG_INFINIBAND is not set - -# -# EDAC - error detection and reporting (RAS) (EXPERIMENTAL) -# - -# -# Real Time Clock -# -# CONFIG_RTC_CLASS is not set - -# -# DMA Engine support -# -# CONFIG_DMA_ENGINE is not set - -# -# DMA Clients -# - -# -# DMA Devices -# - -# -# Auxiliary Display support -# - -# -# Virtualization -# - -# -# File systems -# -CONFIG_EXT2_FS=y -# CONFIG_EXT2_FS_XATTR is not set -# CONFIG_EXT2_FS_XIP is not set -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_XATTR=y -# CONFIG_EXT3_FS_POSIX_ACL is not set -# CONFIG_EXT3_FS_SECURITY is not set -# CONFIG_EXT4DEV_FS is not set -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FS_MBCACHE=y -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set -CONFIG_FS_POSIX_ACL=y -# CONFIG_XFS_FS is not set -# CONFIG_GFS2_FS is not set -# CONFIG_OCFS2_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -CONFIG_INOTIFY=y -CONFIG_INOTIFY_USER=y -# CONFIG_QUOTA is not set -CONFIG_DNOTIFY=y -# CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set -# CONFIG_FUSE_FS is not set - -# -# CD-ROM/DVD Filesystems -# -# CONFIG_ISO9660_FS is not set -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -# CONFIG_MSDOS_FS is not set -# CONFIG_VFAT_FS is not set -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -CONFIG_PROC_KCORE=y -CONFIG_PROC_SYSCTL=y -CONFIG_SYSFS=y -CONFIG_TMPFS=y -# CONFIG_TMPFS_POSIX_ACL is not set -# CONFIG_HUGETLB_PAGE is not set -CONFIG_RAMFS=y -# CONFIG_CONFIGFS_FS is not set - -# -# Miscellaneous filesystems -# -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y -# CONFIG_NFS_DIRECTIO is not set -# CONFIG_NFSD is not set -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_NFS_ACL_SUPPORT=y -CONFIG_NFS_COMMON=y -CONFIG_SUNRPC=y -CONFIG_SUNRPC_GSS=y -CONFIG_RPCSEC_GSS_KRB5=y -# CONFIG_RPCSEC_GSS_SPKM3 is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set -# CONFIG_9P_FS is not set - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set -# CONFIG_MSDOS_PARTITION is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_KARMA_PARTITION is not set -# CONFIG_EFI_PARTITION is not set - -# -# Native Language Support -# -# CONFIG_NLS is not set - -# -# Distributed Lock Manager -# -# CONFIG_DLM is not set -# CONFIG_SCC_ENET is not set -# CONFIG_FEC_ENET is not set - -# -# CPM2 Options -# - -# -# Library routines -# -# CONFIG_CRC_CCITT is not set -# CONFIG_CRC16 is not set -# CONFIG_CRC32 is not set -# CONFIG_LIBCRC32C is not set -CONFIG_PLIST=y -CONFIG_HAS_IOMEM=y -CONFIG_HAS_IOPORT=y -# CONFIG_PROFILING is not set - -# -# Kernel hacking -# -# CONFIG_PRINTK_TIME is not set -CONFIG_ENABLE_MUST_CHECK=y -# CONFIG_MAGIC_SYSRQ is not set -# CONFIG_UNUSED_SYMBOLS is not set -# CONFIG_DEBUG_FS is not set -# CONFIG_HEADERS_CHECK is not set -# CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 -# CONFIG_DEBUG_BUGVERBOSE is not set -# CONFIG_KGDB_CONSOLE is not set - -# -# Security options -# -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set - -# -# Cryptographic options -# -CONFIG_CRYPTO=y -CONFIG_CRYPTO_ALGAPI=y -CONFIG_CRYPTO_BLKCIPHER=y -CONFIG_CRYPTO_MANAGER=y -# CONFIG_CRYPTO_HMAC is not set -# CONFIG_CRYPTO_XCBC is not set -# CONFIG_CRYPTO_NULL is not set -# CONFIG_CRYPTO_MD4 is not set -CONFIG_CRYPTO_MD5=y -# CONFIG_CRYPTO_SHA1 is not set -# CONFIG_CRYPTO_SHA256 is not set -# CONFIG_CRYPTO_SHA512 is not set -# CONFIG_CRYPTO_WP512 is not set -# CONFIG_CRYPTO_TGR192 is not set -# CONFIG_CRYPTO_GF128MUL is not set -CONFIG_CRYPTO_ECB=y -CONFIG_CRYPTO_CBC=y -CONFIG_CRYPTO_PCBC=y -# CONFIG_CRYPTO_LRW is not set -CONFIG_CRYPTO_DES=y -# CONFIG_CRYPTO_FCRYPT is not set -# CONFIG_CRYPTO_BLOWFISH is not set -# CONFIG_CRYPTO_TWOFISH is not set -# CONFIG_CRYPTO_SERPENT is not set -# CONFIG_CRYPTO_AES is not set -# CONFIG_CRYPTO_CAST5 is not set -# CONFIG_CRYPTO_CAST6 is not set -# CONFIG_CRYPTO_TEA is not set -# CONFIG_CRYPTO_ARC4 is not set -# CONFIG_CRYPTO_KHAZAD is not set -# CONFIG_CRYPTO_ANUBIS is not set -# CONFIG_CRYPTO_DEFLATE is not set -# CONFIG_CRYPTO_MICHAEL_MIC is not set -# CONFIG_CRYPTO_CRC32C is not set -# CONFIG_CRYPTO_CAMELLIA is not set - -# -# Hardware crypto devices -# diff --git a/trunk/arch/ppc/configs/mpc86x_ads_defconfig b/trunk/arch/ppc/configs/mpc86x_ads_defconfig deleted file mode 100644 index f63c6f59d68a..000000000000 --- a/trunk/arch/ppc/configs/mpc86x_ads_defconfig +++ /dev/null @@ -1,633 +0,0 @@ -# -# Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc4 -# Tue Jun 14 13:36:35 2005 -# -CONFIG_MMU=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_HAVE_DEC_LOCK=y -CONFIG_PPC=y -CONFIG_PPC32=y -CONFIG_GENERIC_NVRAM=y -CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y -# CONFIG_CLEAN_COMPILE is not set -CONFIG_BROKEN=y -CONFIG_BROKEN_ON_SMP=y -CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# -CONFIG_LOCALVERSION="" -# CONFIG_SWAP is not set -CONFIG_SYSVIPC=y -# CONFIG_POSIX_MQUEUE is not set -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y -# CONFIG_AUDIT is not set -# CONFIG_HOTPLUG is not set -CONFIG_KOBJECT_UEVENT=y -# CONFIG_IKCONFIG is not set -CONFIG_EMBEDDED=y -# CONFIG_KALLSYMS is not set -CONFIG_PRINTK=y -CONFIG_BUG=y -# CONFIG_BASE_FULL is not set -CONFIG_FUTEX=y -# CONFIG_EPOLL is not set -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -# CONFIG_SHMEM is not set -CONFIG_CC_ALIGN_FUNCTIONS=0 -CONFIG_CC_ALIGN_LABELS=0 -CONFIG_CC_ALIGN_LOOPS=0 -CONFIG_CC_ALIGN_JUMPS=0 -CONFIG_TINY_SHMEM=y -CONFIG_BASE_SMALL=1 - -# -# Loadable module support -# -CONFIG_MODULES=y -# CONFIG_MODULE_UNLOAD is not set -CONFIG_OBSOLETE_MODPARM=y -# CONFIG_MODVERSIONS is not set -# CONFIG_MODULE_SRCVERSION_ALL is not set -# CONFIG_KMOD is not set - -# -# Processor -# -# CONFIG_6xx is not set -# CONFIG_40x is not set -# CONFIG_44x is not set -# CONFIG_POWER3 is not set -# CONFIG_POWER4 is not set -CONFIG_8xx=y -# CONFIG_E500 is not set -# CONFIG_MATH_EMULATION is not set -# CONFIG_CPU_FREQ is not set -CONFIG_EMBEDDEDBOOT=y -# CONFIG_PM is not set -CONFIG_NOT_COHERENT_CACHE=y - -# -# Platform options -# -CONFIG_FADS=y -# CONFIG_RPXLITE is not set -# CONFIG_RPXCLASSIC is not set -# CONFIG_BSEIP is not set -# CONFIG_MPC8XXFADS is not set -CONFIG_MPC86XADS=y -# CONFIG_TQM823L is not set -# CONFIG_TQM850L is not set -# CONFIG_TQM855L is not set -# CONFIG_TQM860L is not set -# CONFIG_FPS850L is not set -# CONFIG_SPD823TS is not set -# CONFIG_IVMS8 is not set -# CONFIG_IVML24 is not set -# CONFIG_SM850 is not set -# CONFIG_HERMES_PRO is not set -# CONFIG_IP860 is not set -# CONFIG_LWMON is not set -# CONFIG_PCU_E is not set -# CONFIG_CCM is not set -# CONFIG_LANTEC is not set -# CONFIG_MBX is not set -# CONFIG_WINCEPT is not set -# CONFIG_SMP is not set -# CONFIG_PREEMPT is not set -# CONFIG_HIGHMEM is not set -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set -# CONFIG_CMDLINE_BOOL is not set -CONFIG_ISA_DMA_API=y - -# -# Bus options -# -# CONFIG_PCI is not set -# CONFIG_PCI_DOMAINS is not set -# CONFIG_PCI_QSPAN is not set - -# -# PCCARD (PCMCIA/CardBus) support -# -# CONFIG_PCCARD is not set - -# -# Advanced setup -# -# CONFIG_ADVANCED_OPTIONS is not set - -# -# Default settings for advanced configuration options are used -# -CONFIG_HIGHMEM_START=0xfe000000 -CONFIG_LOWMEM_SIZE=0x30000000 -CONFIG_KERNEL_START=0xc0000000 -CONFIG_TASK_SIZE=0x80000000 -CONFIG_CONSISTENT_START=0xff100000 -CONFIG_CONSISTENT_SIZE=0x00200000 -CONFIG_BOOT_LOAD=0x00400000 - -# -# Device Drivers -# - -# -# Generic Driver Options -# -# CONFIG_STANDALONE is not set -CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_FW_LOADER is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -# CONFIG_PARPORT is not set - -# -# Plug and Play support -# - -# -# Block devices -# -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_DEV_COW_COMMON is not set -CONFIG_BLK_DEV_LOOP=y -# CONFIG_BLK_DEV_CRYPTOLOOP is not set -# CONFIG_BLK_DEV_NBD is not set -# CONFIG_BLK_DEV_RAM is not set -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_INITRAMFS_SOURCE="" -# CONFIG_LBD is not set -# CONFIG_CDROM_PKTCDVD is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -CONFIG_IOSCHED_AS=y -CONFIG_IOSCHED_DEADLINE=y -CONFIG_IOSCHED_CFQ=y -# CONFIG_ATA_OVER_ETH is not set - -# -# ATA/ATAPI/MFM/RLL support -# -# CONFIG_IDE is not set - -# -# SCSI device support -# -# CONFIG_SCSI is not set - -# -# Multi-device support (RAID and LVM) -# -# CONFIG_MD is not set - -# -# Fusion MPT device support -# - -# -# IEEE 1394 (FireWire) support -# -# CONFIG_IEEE1394 is not set - -# -# I2O device support -# - -# -# Macintosh device drivers -# - -# -# Networking support -# -CONFIG_NET=y - -# -# Networking options -# -CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set -CONFIG_UNIX=y -# CONFIG_NET_KEY is not set -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IP_PNP_BOOTP is not set -# CONFIG_IP_PNP_RARP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_TUNNEL is not set -CONFIG_IP_TCPDIAG=y -# CONFIG_IP_TCPDIAG_IPV6 is not set -CONFIG_IPV6=m -# CONFIG_IPV6_PRIVACY is not set -# CONFIG_INET6_AH is not set -# CONFIG_INET6_ESP is not set -# CONFIG_INET6_IPCOMP is not set -# CONFIG_INET6_TUNNEL is not set -# CONFIG_IPV6_TUNNEL is not set -# CONFIG_NETFILTER is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set -# CONFIG_NET_CLS_ROUTE is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set -# CONFIG_HAMRADIO is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set -CONFIG_NETDEVICES=y -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -# CONFIG_MII is not set -# CONFIG_OAKNET is not set - -# -# Ethernet (1000 Mbit) -# - -# -# Ethernet (10000 Mbit) -# - -# -# Token Ring devices -# - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set -# CONFIG_PPP is not set -# CONFIG_SLIP is not set -# CONFIG_SHAPER is not set -# CONFIG_NETCONSOLE is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set - -# -# Telephony Support -# -# CONFIG_PHONE is not set - -# -# Input device support -# -# CONFIG_INPUT is not set - -# -# Hardware I/O ports -# -# CONFIG_SERIO is not set -# CONFIG_GAMEPORT is not set -CONFIG_SOUND_GAMEPORT=y - -# -# Character devices -# -# CONFIG_VT is not set -# CONFIG_SERIAL_NONSTANDARD is not set - -# -# Serial drivers -# -# CONFIG_SERIAL_8250 is not set - -# -# Non-8250 serial port support -# -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -CONFIG_SERIAL_CPM=y -CONFIG_SERIAL_CPM_CONSOLE=y -# CONFIG_SERIAL_CPM_SCC1 is not set -# CONFIG_SERIAL_CPM_SCC2 is not set -# CONFIG_SERIAL_CPM_SCC3 is not set -# CONFIG_SERIAL_CPM_SCC4 is not set -CONFIG_SERIAL_CPM_SMC1=y -# CONFIG_SERIAL_CPM_SMC2 is not set -CONFIG_UNIX98_PTYS=y -# CONFIG_LEGACY_PTYS is not set - -# -# IPMI -# -# CONFIG_IPMI_HANDLER is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_NVRAM is not set -# CONFIG_GEN_RTC is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_AGP is not set -# CONFIG_DRM is not set -# CONFIG_RAW_DRIVER is not set - -# -# TPM devices -# - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# Dallas's 1-wire bus -# -# CONFIG_W1 is not set - -# -# Misc devices -# - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# Digital Video Broadcasting Devices -# -# CONFIG_DVB is not set - -# -# Graphics support -# -# CONFIG_FB is not set - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# USB support -# -# CONFIG_USB_ARCH_HAS_HCD is not set -# CONFIG_USB_ARCH_HAS_OHCI is not set - -# -# USB Gadget Support -# -# CONFIG_USB_GADGET is not set - -# -# MMC/SD Card support -# -# CONFIG_MMC is not set - -# -# InfiniBand support -# -# CONFIG_INFINIBAND is not set - -# -# File systems -# -# CONFIG_EXT2_FS is not set -CONFIG_EXT3_FS=y -# CONFIG_EXT3_FS_XATTR is not set -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set - -# -# XFS support -# -# CONFIG_XFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_QUOTA is not set -# CONFIG_DNOTIFY is not set -# CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set - -# -# CD-ROM/DVD Filesystems -# -# CONFIG_ISO9660_FS is not set -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -# CONFIG_MSDOS_FS is not set -# CONFIG_VFAT_FS is not set -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -CONFIG_PROC_KCORE=y -CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVPTS_FS_XATTR is not set -# CONFIG_TMPFS is not set -# CONFIG_HUGETLBFS is not set -# CONFIG_HUGETLB_PAGE is not set -CONFIG_RAMFS=y - -# -# Miscellaneous filesystems -# -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -CONFIG_NFS_V3=y -CONFIG_NFS_V4=y -# CONFIG_NFS_DIRECTIO is not set -# CONFIG_NFSD is not set -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_LOCKD_V4=y -CONFIG_SUNRPC=y -CONFIG_SUNRPC_GSS=y -CONFIG_RPCSEC_GSS_KRB5=y -# CONFIG_RPCSEC_GSS_SPKM3 is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set - -# -# Partition Types -# -# CONFIG_PARTITION_ADVANCED is not set -CONFIG_MSDOS_PARTITION=y - -# -# Native Language Support -# -# CONFIG_NLS is not set - -# -# MPC8xx CPM Options -# -CONFIG_SCC_ENET=y -CONFIG_SCC1_ENET=y -# CONFIG_SCC2_ENET is not set -# CONFIG_SCC3_ENET is not set -# CONFIG_FEC_ENET is not set -# CONFIG_ENET_BIG_BUFFERS is not set - -# -# Generic MPC8xx Options -# -# CONFIG_8xx_COPYBACK is not set -# CONFIG_8xx_CPU6 is not set -CONFIG_NO_UCODE_PATCH=y -# CONFIG_USB_SOF_UCODE_PATCH is not set -# CONFIG_I2C_SPI_UCODE_PATCH is not set -# CONFIG_I2C_SPI_SMC1_UCODE_PATCH is not set - -# -# Library routines -# -# CONFIG_CRC_CCITT is not set -# CONFIG_CRC32 is not set -# CONFIG_LIBCRC32C is not set - -# -# Profiling support -# -# CONFIG_PROFILING is not set - -# -# Kernel hacking -# -# CONFIG_PRINTK_TIME is not set -# CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 - -# -# Security options -# -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set - -# -# Cryptographic options -# -CONFIG_CRYPTO=y -# CONFIG_CRYPTO_HMAC is not set -# CONFIG_CRYPTO_NULL is not set -# CONFIG_CRYPTO_MD4 is not set -CONFIG_CRYPTO_MD5=y -# CONFIG_CRYPTO_SHA1 is not set -# CONFIG_CRYPTO_SHA256 is not set -# CONFIG_CRYPTO_SHA512 is not set -# CONFIG_CRYPTO_WP512 is not set -# CONFIG_CRYPTO_TGR192 is not set -CONFIG_CRYPTO_DES=y -# CONFIG_CRYPTO_BLOWFISH is not set -# CONFIG_CRYPTO_TWOFISH is not set -# CONFIG_CRYPTO_SERPENT is not set -# CONFIG_CRYPTO_AES is not set -# CONFIG_CRYPTO_CAST5 is not set -# CONFIG_CRYPTO_CAST6 is not set -# CONFIG_CRYPTO_TEA is not set -# CONFIG_CRYPTO_ARC4 is not set -# CONFIG_CRYPTO_KHAZAD is not set -# CONFIG_CRYPTO_ANUBIS is not set -# CONFIG_CRYPTO_DEFLATE is not set -# CONFIG_CRYPTO_MICHAEL_MIC is not set -# CONFIG_CRYPTO_CRC32C is not set -# CONFIG_CRYPTO_TEST is not set - -# -# Hardware crypto devices -# diff --git a/trunk/arch/ppc/configs/mpc885ads_defconfig b/trunk/arch/ppc/configs/mpc885ads_defconfig deleted file mode 100644 index 016f94d9325f..000000000000 --- a/trunk/arch/ppc/configs/mpc885ads_defconfig +++ /dev/null @@ -1,622 +0,0 @@ -# -# Automatically generated make config: don't edit -# Linux kernel version: 2.6.12-rc6 -# Thu Jun 9 21:17:29 2005 -# -CONFIG_MMU=y -CONFIG_GENERIC_HARDIRQS=y -CONFIG_RWSEM_XCHGADD_ALGORITHM=y -CONFIG_GENERIC_CALIBRATE_DELAY=y -CONFIG_HAVE_DEC_LOCK=y -CONFIG_PPC=y -CONFIG_PPC32=y -CONFIG_GENERIC_NVRAM=y -CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y - -# -# Code maturity level options -# -CONFIG_EXPERIMENTAL=y -# CONFIG_CLEAN_COMPILE is not set -CONFIG_BROKEN=y -CONFIG_BROKEN_ON_SMP=y -CONFIG_INIT_ENV_ARG_LIMIT=32 - -# -# General setup -# -CONFIG_LOCALVERSION="" -# CONFIG_SWAP is not set -CONFIG_SYSVIPC=y -# CONFIG_POSIX_MQUEUE is not set -# CONFIG_BSD_PROCESS_ACCT is not set -CONFIG_SYSCTL=y -# CONFIG_AUDIT is not set -CONFIG_HOTPLUG=y -CONFIG_KOBJECT_UEVENT=y -# CONFIG_IKCONFIG is not set -CONFIG_EMBEDDED=y -# CONFIG_KALLSYMS is not set -CONFIG_PRINTK=y -CONFIG_BUG=y -CONFIG_BASE_FULL=y -CONFIG_FUTEX=y -# CONFIG_EPOLL is not set -# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set -CONFIG_SHMEM=y -CONFIG_CC_ALIGN_FUNCTIONS=0 -CONFIG_CC_ALIGN_LABELS=0 -CONFIG_CC_ALIGN_LOOPS=0 -CONFIG_CC_ALIGN_JUMPS=0 -# CONFIG_TINY_SHMEM is not set -CONFIG_BASE_SMALL=0 - -# -# Loadable module support -# -# CONFIG_MODULES is not set - -# -# Processor -# -# CONFIG_6xx is not set -# CONFIG_40x is not set -# CONFIG_44x is not set -# CONFIG_POWER3 is not set -# CONFIG_POWER4 is not set -CONFIG_8xx=y -# CONFIG_E500 is not set -# CONFIG_MATH_EMULATION is not set -# CONFIG_CPU_FREQ is not set -CONFIG_EMBEDDEDBOOT=y -# CONFIG_PM is not set -CONFIG_NOT_COHERENT_CACHE=y - -# -# Platform options -# -# CONFIG_RPXLITE is not set -# CONFIG_RPXCLASSIC is not set -# CONFIG_BSEIP is not set -# CONFIG_FADS is not set -CONFIG_MPC885ADS=y -# CONFIG_TQM823L is not set -# CONFIG_TQM850L is not set -# CONFIG_TQM855L is not set -# CONFIG_TQM860L is not set -# CONFIG_FPS850L is not set -# CONFIG_SPD823TS is not set -# CONFIG_IVMS8 is not set -# CONFIG_IVML24 is not set -# CONFIG_SM850 is not set -# CONFIG_HERMES_PRO is not set -# CONFIG_IP860 is not set -# CONFIG_LWMON is not set -# CONFIG_PCU_E is not set -# CONFIG_CCM is not set -# CONFIG_LANTEC is not set -# CONFIG_MBX is not set -# CONFIG_WINCEPT is not set -# CONFIG_SMP is not set -# CONFIG_PREEMPT is not set -# CONFIG_HIGHMEM is not set -CONFIG_BINFMT_ELF=y -# CONFIG_BINFMT_MISC is not set -# CONFIG_CMDLINE_BOOL is not set -CONFIG_ISA_DMA_API=y - -# -# Bus options -# -# CONFIG_PCI is not set -# CONFIG_PCI_DOMAINS is not set -# CONFIG_PCI_QSPAN is not set - -# -# PCCARD (PCMCIA/CardBus) support -# -# CONFIG_PCCARD is not set - -# -# Advanced setup -# -# CONFIG_ADVANCED_OPTIONS is not set - -# -# Default settings for advanced configuration options are used -# -CONFIG_HIGHMEM_START=0xfe000000 -CONFIG_LOWMEM_SIZE=0x30000000 -CONFIG_KERNEL_START=0xc0000000 -CONFIG_TASK_SIZE=0x80000000 -CONFIG_CONSISTENT_START=0xff100000 -CONFIG_CONSISTENT_SIZE=0x00200000 -CONFIG_BOOT_LOAD=0x00400000 - -# -# Device Drivers -# - -# -# Generic Driver Options -# -CONFIG_STANDALONE=y -CONFIG_PREVENT_FIRMWARE_BUILD=y -# CONFIG_FW_LOADER is not set - -# -# Memory Technology Devices (MTD) -# -# CONFIG_MTD is not set - -# -# Parallel port support -# -# CONFIG_PARPORT is not set - -# -# Plug and Play support -# - -# -# Block devices -# -# CONFIG_BLK_DEV_FD is not set -# CONFIG_BLK_DEV_COW_COMMON is not set -# CONFIG_BLK_DEV_LOOP is not set -# CONFIG_BLK_DEV_NBD is not set -# CONFIG_BLK_DEV_RAM is not set -CONFIG_BLK_DEV_RAM_COUNT=16 -CONFIG_INITRAMFS_SOURCE="" -# CONFIG_LBD is not set -# CONFIG_CDROM_PKTCDVD is not set - -# -# IO Schedulers -# -CONFIG_IOSCHED_NOOP=y -# CONFIG_IOSCHED_AS is not set -# CONFIG_IOSCHED_DEADLINE is not set -# CONFIG_IOSCHED_CFQ is not set -# CONFIG_ATA_OVER_ETH is not set - -# -# ATA/ATAPI/MFM/RLL support -# -# CONFIG_IDE is not set - -# -# SCSI device support -# -# CONFIG_SCSI is not set - -# -# Multi-device support (RAID and LVM) -# -# CONFIG_MD is not set - -# -# Fusion MPT device support -# - -# -# IEEE 1394 (FireWire) support -# -# CONFIG_IEEE1394 is not set - -# -# I2O device support -# - -# -# Macintosh device drivers -# - -# -# Networking support -# -CONFIG_NET=y - -# -# Networking options -# -CONFIG_PACKET=y -# CONFIG_PACKET_MMAP is not set -CONFIG_UNIX=y -# CONFIG_NET_KEY is not set -CONFIG_INET=y -# CONFIG_IP_MULTICAST is not set -# CONFIG_IP_ADVANCED_ROUTER is not set -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -# CONFIG_IP_PNP_RARP is not set -# CONFIG_NET_IPIP is not set -# CONFIG_NET_IPGRE is not set -# CONFIG_ARPD is not set -# CONFIG_SYN_COOKIES is not set -# CONFIG_INET_AH is not set -# CONFIG_INET_ESP is not set -# CONFIG_INET_IPCOMP is not set -# CONFIG_INET_TUNNEL is not set -CONFIG_IP_TCPDIAG=y -# CONFIG_IP_TCPDIAG_IPV6 is not set -# CONFIG_IPV6 is not set -# CONFIG_NETFILTER is not set - -# -# SCTP Configuration (EXPERIMENTAL) -# -# CONFIG_IP_SCTP is not set -# CONFIG_ATM is not set -# CONFIG_BRIDGE is not set -# CONFIG_VLAN_8021Q is not set -# CONFIG_DECNET is not set -# CONFIG_LLC2 is not set -# CONFIG_IPX is not set -# CONFIG_ATALK is not set -# CONFIG_X25 is not set -# CONFIG_LAPB is not set -# CONFIG_NET_DIVERT is not set -# CONFIG_ECONET is not set -# CONFIG_WAN_ROUTER is not set - -# -# QoS and/or fair queueing -# -# CONFIG_NET_SCHED is not set -# CONFIG_NET_CLS_ROUTE is not set - -# -# Network testing -# -# CONFIG_NET_PKTGEN is not set -# CONFIG_NETPOLL is not set -# CONFIG_NET_POLL_CONTROLLER is not set -# CONFIG_HAMRADIO is not set -# CONFIG_IRDA is not set -# CONFIG_BT is not set -CONFIG_NETDEVICES=y -# CONFIG_DUMMY is not set -# CONFIG_BONDING is not set -# CONFIG_EQUALIZER is not set -# CONFIG_TUN is not set - -# -# Ethernet (10 or 100Mbit) -# -CONFIG_NET_ETHERNET=y -CONFIG_MII=y -# CONFIG_OAKNET is not set - -# -# Ethernet (1000 Mbit) -# - -# -# Ethernet (10000 Mbit) -# - -# -# Token Ring devices -# - -# -# Wireless LAN (non-hamradio) -# -# CONFIG_NET_RADIO is not set - -# -# Wan interfaces -# -# CONFIG_WAN is not set -CONFIG_PPP=y -# CONFIG_PPP_MULTILINK is not set -# CONFIG_PPP_FILTER is not set -CONFIG_PPP_ASYNC=y -CONFIG_PPP_SYNC_TTY=y -CONFIG_PPP_DEFLATE=y -# CONFIG_PPP_BSDCOMP is not set -# CONFIG_PPPOE is not set -# CONFIG_SLIP is not set -# CONFIG_SHAPER is not set -# CONFIG_NETCONSOLE is not set - -# -# ISDN subsystem -# -# CONFIG_ISDN is not set - -# -# Telephony Support -# -# CONFIG_PHONE is not set - -# -# Input device support -# -# CONFIG_INPUT is not set - -# -# Hardware I/O ports -# -# CONFIG_SERIO is not set -# CONFIG_GAMEPORT is not set - -# -# Character devices -# -# CONFIG_VT is not set -# CONFIG_SERIAL_NONSTANDARD is not set - -# -# Serial drivers -# -# CONFIG_SERIAL_8250 is not set - -# -# Non-8250 serial port support -# -CONFIG_SERIAL_CORE=y -CONFIG_SERIAL_CORE_CONSOLE=y -CONFIG_SERIAL_CPM=y -CONFIG_SERIAL_CPM_CONSOLE=y -# CONFIG_SERIAL_CPM_SCC1 is not set -# CONFIG_SERIAL_CPM_SCC2 is not set -# CONFIG_SERIAL_CPM_SCC3 is not set -# CONFIG_SERIAL_CPM_SCC4 is not set -CONFIG_SERIAL_CPM_SMC1=y -CONFIG_SERIAL_CPM_SMC2=y -CONFIG_UNIX98_PTYS=y -# CONFIG_LEGACY_PTYS is not set - -# -# IPMI -# -# CONFIG_IPMI_HANDLER is not set - -# -# Watchdog Cards -# -# CONFIG_WATCHDOG is not set -# CONFIG_NVRAM is not set -# CONFIG_GEN_RTC is not set -# CONFIG_DTLK is not set -# CONFIG_R3964 is not set - -# -# Ftape, the floppy tape device driver -# -# CONFIG_AGP is not set -# CONFIG_DRM is not set -# CONFIG_RAW_DRIVER is not set - -# -# TPM devices -# - -# -# I2C support -# -# CONFIG_I2C is not set - -# -# Dallas's 1-wire bus -# -# CONFIG_W1 is not set - -# -# Misc devices -# - -# -# Multimedia devices -# -# CONFIG_VIDEO_DEV is not set - -# -# Digital Video Broadcasting Devices -# -# CONFIG_DVB is not set - -# -# Graphics support -# -# CONFIG_FB is not set - -# -# Sound -# -# CONFIG_SOUND is not set - -# -# USB support -# -# CONFIG_USB_ARCH_HAS_HCD is not set -# CONFIG_USB_ARCH_HAS_OHCI is not set - -# -# USB Gadget Support -# -# CONFIG_USB_GADGET is not set - -# -# MMC/SD Card support -# -# CONFIG_MMC is not set - -# -# InfiniBand support -# -# CONFIG_INFINIBAND is not set - -# -# File systems -# -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -# CONFIG_EXT2_FS_POSIX_ACL is not set -# CONFIG_EXT2_FS_SECURITY is not set -CONFIG_EXT3_FS=y -CONFIG_EXT3_FS_XATTR=y -# CONFIG_EXT3_FS_POSIX_ACL is not set -# CONFIG_EXT3_FS_SECURITY is not set -CONFIG_JBD=y -# CONFIG_JBD_DEBUG is not set -CONFIG_FS_MBCACHE=y -# CONFIG_REISERFS_FS is not set -# CONFIG_JFS_FS is not set - -# -# XFS support -# -# CONFIG_XFS_FS is not set -# CONFIG_MINIX_FS is not set -# CONFIG_ROMFS_FS is not set -# CONFIG_QUOTA is not set -# CONFIG_DNOTIFY is not set -# CONFIG_AUTOFS_FS is not set -# CONFIG_AUTOFS4_FS is not set - -# -# CD-ROM/DVD Filesystems -# -# CONFIG_ISO9660_FS is not set -# CONFIG_UDF_FS is not set - -# -# DOS/FAT/NT Filesystems -# -# CONFIG_MSDOS_FS is not set -# CONFIG_VFAT_FS is not set -# CONFIG_NTFS_FS is not set - -# -# Pseudo filesystems -# -CONFIG_PROC_FS=y -# CONFIG_PROC_KCORE is not set -CONFIG_SYSFS=y -# CONFIG_DEVFS_FS is not set -# CONFIG_DEVPTS_FS_XATTR is not set -# CONFIG_TMPFS is not set -# CONFIG_HUGETLBFS is not set -# CONFIG_HUGETLB_PAGE is not set -CONFIG_RAMFS=y - -# -# Miscellaneous filesystems -# -# CONFIG_ADFS_FS is not set -# CONFIG_AFFS_FS is not set -# CONFIG_HFS_FS is not set -# CONFIG_HFSPLUS_FS is not set -# CONFIG_BEFS_FS is not set -# CONFIG_BFS_FS is not set -# CONFIG_EFS_FS is not set -# CONFIG_CRAMFS is not set -# CONFIG_VXFS_FS is not set -# CONFIG_HPFS_FS is not set -# CONFIG_QNX4FS_FS is not set -# CONFIG_SYSV_FS is not set -# CONFIG_UFS_FS is not set - -# -# Network File Systems -# -CONFIG_NFS_FS=y -# CONFIG_NFS_V3 is not set -# CONFIG_NFS_V4 is not set -# CONFIG_NFS_DIRECTIO is not set -# CONFIG_NFSD is not set -CONFIG_ROOT_NFS=y -CONFIG_LOCKD=y -CONFIG_SUNRPC=y -# CONFIG_RPCSEC_GSS_KRB5 is not set -# CONFIG_RPCSEC_GSS_SPKM3 is not set -# CONFIG_SMB_FS is not set -# CONFIG_CIFS is not set -# CONFIG_NCP_FS is not set -# CONFIG_CODA_FS is not set -# CONFIG_AFS_FS is not set - -# -# Partition Types -# -CONFIG_PARTITION_ADVANCED=y -# CONFIG_ACORN_PARTITION is not set -# CONFIG_OSF_PARTITION is not set -# CONFIG_AMIGA_PARTITION is not set -# CONFIG_ATARI_PARTITION is not set -# CONFIG_MAC_PARTITION is not set -CONFIG_MSDOS_PARTITION=y -# CONFIG_BSD_DISKLABEL is not set -# CONFIG_MINIX_SUBPARTITION is not set -# CONFIG_SOLARIS_X86_PARTITION is not set -# CONFIG_UNIXWARE_DISKLABEL is not set -# CONFIG_LDM_PARTITION is not set -# CONFIG_SGI_PARTITION is not set -# CONFIG_ULTRIX_PARTITION is not set -# CONFIG_SUN_PARTITION is not set -# CONFIG_EFI_PARTITION is not set - -# -# Native Language Support -# -# CONFIG_NLS is not set - -# -# MPC8xx CPM Options -# -CONFIG_SCC_ENET=y -# CONFIG_SCC1_ENET is not set -# CONFIG_SCC2_ENET is not set -CONFIG_SCC3_ENET=y -# CONFIG_FEC_ENET is not set -# CONFIG_ENET_BIG_BUFFERS is not set - -# -# Generic MPC8xx Options -# -CONFIG_8xx_COPYBACK=y -CONFIG_8xx_CPU6=y -CONFIG_NO_UCODE_PATCH=y -# CONFIG_USB_SOF_UCODE_PATCH is not set -# CONFIG_I2C_SPI_UCODE_PATCH is not set -# CONFIG_I2C_SPI_SMC1_UCODE_PATCH is not set - -# -# Library routines -# -CONFIG_CRC_CCITT=y -# CONFIG_CRC32 is not set -# CONFIG_LIBCRC32C is not set -CONFIG_ZLIB_INFLATE=y -CONFIG_ZLIB_DEFLATE=y - -# -# Profiling support -# -# CONFIG_PROFILING is not set - -# -# Kernel hacking -# -# CONFIG_PRINTK_TIME is not set -# CONFIG_DEBUG_KERNEL is not set -CONFIG_LOG_BUF_SHIFT=14 - -# -# Security options -# -# CONFIG_KEYS is not set -# CONFIG_SECURITY is not set - -# -# Cryptographic options -# -# CONFIG_CRYPTO is not set - -# -# Hardware crypto devices -# diff --git a/trunk/arch/ppc/kernel/ppc_ksyms.c b/trunk/arch/ppc/kernel/ppc_ksyms.c index 2ba659f401be..d9036ef0b658 100644 --- a/trunk/arch/ppc/kernel/ppc_ksyms.c +++ b/trunk/arch/ppc/kernel/ppc_ksyms.c @@ -88,6 +88,7 @@ EXPORT_SYMBOL(strncpy); EXPORT_SYMBOL(strcat); EXPORT_SYMBOL(strlen); EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(csum_partial_copy_generic); diff --git a/trunk/arch/ppc/lib/string.S b/trunk/arch/ppc/lib/string.S index 84ed33ab4c2d..927253bfc826 100644 --- a/trunk/arch/ppc/lib/string.S +++ b/trunk/arch/ppc/lib/string.S @@ -121,6 +121,20 @@ _GLOBAL(strcmp) beq 1b blr +_GLOBAL(strncmp) + PPC_LCMPI r5,0 + beqlr + mtctr r5 + addi r5,r3,-1 + addi r4,r4,-1 +1: lbzu r3,1(r5) + cmpwi 1,r3,0 + lbzu r0,1(r4) + subf. r3,r0,r3 + beqlr 1 + bdnzt eq,1b + blr + _GLOBAL(strlen) addi r4,r3,-1 1: lbzu r0,1(r4) diff --git a/trunk/arch/ppc/platforms/Makefile b/trunk/arch/ppc/platforms/Makefile index 40f53fbe6d35..6260231987cb 100644 --- a/trunk/arch/ppc/platforms/Makefile +++ b/trunk/arch/ppc/platforms/Makefile @@ -4,7 +4,6 @@ obj-$(CONFIG_PPC_PREP) += prep_pci.o prep_setup.o obj-$(CONFIG_PREP_RESIDUAL) += residual.o -obj-$(CONFIG_PQ2ADS) += pq2ads.o obj-$(CONFIG_TQM8260) += tqm8260_setup.o obj-$(CONFIG_CPCI690) += cpci690.o obj-$(CONFIG_EV64260) += ev64260.o @@ -24,6 +23,3 @@ obj-$(CONFIG_SBC82xx) += sbc82xx.o obj-$(CONFIG_SPRUCE) += spruce.o obj-$(CONFIG_LITE5200) += lite5200.o obj-$(CONFIG_EV64360) += ev64360.o -obj-$(CONFIG_MPC86XADS) += mpc866ads_setup.o -obj-$(CONFIG_MPC885ADS) += mpc885ads_setup.o -obj-$(CONFIG_ADS8272) += mpc8272ads_setup.o diff --git a/trunk/arch/ppc/platforms/fads.h b/trunk/arch/ppc/platforms/fads.h index 2f9f0f60e3f7..5219366667b3 100644 --- a/trunk/arch/ppc/platforms/fads.h +++ b/trunk/arch/ppc/platforms/fads.h @@ -22,29 +22,6 @@ #include -#if defined(CONFIG_MPC86XADS) - -#define BOARD_CHIP_NAME "MPC86X" - -/* U-Boot maps BCSR to 0xff080000 */ -#define BCSR_ADDR ((uint)0xff080000) - -/* MPC86XADS has one more CPLD and an additional BCSR. - */ -#define CFG_PHYDEV_ADDR ((uint)0xff0a0000) -#define BCSR5 ((uint)(CFG_PHYDEV_ADDR + 0x300)) - -#define BCSR5_T1_RST 0x10 -#define BCSR5_ATM155_RST 0x08 -#define BCSR5_ATM25_RST 0x04 -#define BCSR5_MII1_EN 0x02 -#define BCSR5_MII1_RST 0x01 - -/* There is no PHY link change interrupt */ -#define PHY_INTERRUPT (-1) - -#else /* FADS */ - /* Memory map is configured by the PROM startup. * I tried to follow the FADS manual, although the startup PROM * dictates this and we simply have to move some of the physical @@ -55,8 +32,6 @@ /* PHY link change interrupt */ #define PHY_INTERRUPT SIU_IRQ2 -#endif /* CONFIG_MPC86XADS */ - #define BCSR_SIZE ((uint)(64 * 1024)) #define BCSR0 ((uint)(BCSR_ADDR + 0x00)) #define BCSR1 ((uint)(BCSR_ADDR + 0x04)) diff --git a/trunk/arch/ppc/platforms/mpc8272ads_setup.c b/trunk/arch/ppc/platforms/mpc8272ads_setup.c deleted file mode 100644 index 47f4b38edb5f..000000000000 --- a/trunk/arch/ppc/platforms/mpc8272ads_setup.c +++ /dev/null @@ -1,367 +0,0 @@ -/* - * arch/ppc/platforms/mpc8272ads_setup.c - * - * MPC82xx Board-specific PlatformDevice descriptions - * - * 2005 (c) MontaVista Software, Inc. - * Vitaly Bordug - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "pq2ads_pd.h" - -static void init_fcc1_ioports(struct fs_platform_info*); -static void init_fcc2_ioports(struct fs_platform_info*); -static void init_scc1_uart_ioports(struct fs_uart_platform_info*); -static void init_scc4_uart_ioports(struct fs_uart_platform_info*); - -static struct fs_uart_platform_info mpc8272_uart_pdata[] = { - [fsid_scc1_uart] = { - .init_ioports = init_scc1_uart_ioports, - .fs_no = fsid_scc1_uart, - .brg = 1, - .tx_num_fifo = 4, - .tx_buf_size = 32, - .rx_num_fifo = 4, - .rx_buf_size = 32, - }, - [fsid_scc4_uart] = { - .init_ioports = init_scc4_uart_ioports, - .fs_no = fsid_scc4_uart, - .brg = 4, - .tx_num_fifo = 4, - .tx_buf_size = 32, - .rx_num_fifo = 4, - .rx_buf_size = 32, - }, -}; - -static struct fs_mii_bb_platform_info m82xx_mii_bb_pdata = { - .mdio_dat.bit = 18, - .mdio_dir.bit = 18, - .mdc_dat.bit = 19, - .delay = 1, -}; - -static struct fs_platform_info mpc82xx_enet_pdata[] = { - [fsid_fcc1] = { - .fs_no = fsid_fcc1, - .cp_page = CPM_CR_FCC1_PAGE, - .cp_block = CPM_CR_FCC1_SBLOCK, - - .clk_trx = (PC_F1RXCLK | PC_F1TXCLK), - .clk_route = CMX1_CLK_ROUTE, - .clk_mask = CMX1_CLK_MASK, - .init_ioports = init_fcc1_ioports, - - .mem_offset = FCC1_MEM_OFFSET, - - .rx_ring = 32, - .tx_ring = 32, - .rx_copybreak = 240, - .use_napi = 0, - .napi_weight = 17, - .bus_id = "0:00", - }, - [fsid_fcc2] = { - .fs_no = fsid_fcc2, - .cp_page = CPM_CR_FCC2_PAGE, - .cp_block = CPM_CR_FCC2_SBLOCK, - .clk_trx = (PC_F2RXCLK | PC_F2TXCLK), - .clk_route = CMX2_CLK_ROUTE, - .clk_mask = CMX2_CLK_MASK, - .init_ioports = init_fcc2_ioports, - - .mem_offset = FCC2_MEM_OFFSET, - - .rx_ring = 32, - .tx_ring = 32, - .rx_copybreak = 240, - .use_napi = 0, - .napi_weight = 17, - .bus_id = "0:03", - }, -}; - -static void init_fcc1_ioports(struct fs_platform_info* pdata) -{ - struct io_port *io; - u32 tempval; - cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t)); - u32 *bcsr = ioremap(BCSR_ADDR+4, sizeof(u32)); - - io = &immap->im_ioport; - - /* Enable the PHY */ - clrbits32(bcsr, BCSR1_FETHIEN); - setbits32(bcsr, BCSR1_FETH_RST); - - /* FCC1 pins are on port A/C. */ - /* Configure port A and C pins for FCC1 Ethernet. */ - - tempval = in_be32(&io->iop_pdira); - tempval &= ~PA1_DIRA0; - tempval |= PA1_DIRA1; - out_be32(&io->iop_pdira, tempval); - - tempval = in_be32(&io->iop_psora); - tempval &= ~PA1_PSORA0; - tempval |= PA1_PSORA1; - out_be32(&io->iop_psora, tempval); - - setbits32(&io->iop_ppara,PA1_DIRA0 | PA1_DIRA1); - - /* Alter clocks */ - tempval = PC_F1TXCLK|PC_F1RXCLK; - - clrbits32(&io->iop_psorc, tempval); - clrbits32(&io->iop_pdirc, tempval); - setbits32(&io->iop_pparc, tempval); - - clrbits32(&immap->im_cpmux.cmx_fcr, CMX1_CLK_MASK); - setbits32(&immap->im_cpmux.cmx_fcr, CMX1_CLK_ROUTE); - iounmap(bcsr); - iounmap(immap); -} - -static void init_fcc2_ioports(struct fs_platform_info* pdata) -{ - cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t)); - u32 *bcsr = ioremap(BCSR_ADDR+12, sizeof(u32)); - - struct io_port *io; - u32 tempval; - - immap = cpm2_immr; - - io = &immap->im_ioport; - - /* Enable the PHY */ - clrbits32(bcsr, BCSR3_FETHIEN2); - setbits32(bcsr, BCSR3_FETH2_RST); - - /* FCC2 are port B/C. */ - /* Configure port A and C pins for FCC2 Ethernet. */ - - tempval = in_be32(&io->iop_pdirb); - tempval &= ~PB2_DIRB0; - tempval |= PB2_DIRB1; - out_be32(&io->iop_pdirb, tempval); - - tempval = in_be32(&io->iop_psorb); - tempval &= ~PB2_PSORB0; - tempval |= PB2_PSORB1; - out_be32(&io->iop_psorb, tempval); - - setbits32(&io->iop_pparb,PB2_DIRB0 | PB2_DIRB1); - - tempval = PC_F2RXCLK|PC_F2TXCLK; - - /* Alter clocks */ - clrbits32(&io->iop_psorc,tempval); - clrbits32(&io->iop_pdirc,tempval); - setbits32(&io->iop_pparc,tempval); - - clrbits32(&immap->im_cpmux.cmx_fcr, CMX2_CLK_MASK); - setbits32(&immap->im_cpmux.cmx_fcr, CMX2_CLK_ROUTE); - - iounmap(bcsr); - iounmap(immap); -} - - -static void __init mpc8272ads_fixup_enet_pdata(struct platform_device *pdev, - int idx) -{ - bd_t* bi = (void*)__res; - int fs_no = fsid_fcc1+pdev->id-1; - - if(fs_no >= ARRAY_SIZE(mpc82xx_enet_pdata)) { - return; - } - - mpc82xx_enet_pdata[fs_no].dpram_offset= - (u32)cpm2_immr->im_dprambase; - mpc82xx_enet_pdata[fs_no].fcc_regs_c = - (u32)cpm2_immr->im_fcc_c; - memcpy(&mpc82xx_enet_pdata[fs_no].macaddr,bi->bi_enetaddr,6); - - /* prevent dup mac */ - if(fs_no == fsid_fcc2) - mpc82xx_enet_pdata[fs_no].macaddr[5] ^= 1; - - pdev->dev.platform_data = &mpc82xx_enet_pdata[fs_no]; -} - -static void mpc8272ads_fixup_uart_pdata(struct platform_device *pdev, - int idx) -{ - bd_t *bd = (bd_t *) __res; - struct fs_uart_platform_info *pinfo; - int num = ARRAY_SIZE(mpc8272_uart_pdata); - int id = fs_uart_id_scc2fsid(idx); - - /* no need to alter anything if console */ - if ((id < num) && (!pdev->dev.platform_data)) { - pinfo = &mpc8272_uart_pdata[id]; - pinfo->uart_clk = bd->bi_intfreq; - pdev->dev.platform_data = pinfo; - } -} - -static void init_scc1_uart_ioports(struct fs_uart_platform_info* pdata) -{ - cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t)); - - /* SCC1 is only on port D */ - setbits32(&immap->im_ioport.iop_ppard,0x00000003); - clrbits32(&immap->im_ioport.iop_psord,0x00000001); - setbits32(&immap->im_ioport.iop_psord,0x00000002); - clrbits32(&immap->im_ioport.iop_pdird,0x00000001); - setbits32(&immap->im_ioport.iop_pdird,0x00000002); - - /* Wire BRG1 to SCC1 */ - clrbits32(&immap->im_cpmux.cmx_scr,0x00ffffff); - - iounmap(immap); -} - -static void init_scc4_uart_ioports(struct fs_uart_platform_info* pdata) -{ - cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t)); - - setbits32(&immap->im_ioport.iop_ppard,0x00000600); - clrbits32(&immap->im_ioport.iop_psord,0x00000600); - clrbits32(&immap->im_ioport.iop_pdird,0x00000200); - setbits32(&immap->im_ioport.iop_pdird,0x00000400); - - /* Wire BRG4 to SCC4 */ - clrbits32(&immap->im_cpmux.cmx_scr,0x000000ff); - setbits32(&immap->im_cpmux.cmx_scr,0x0000001b); - - iounmap(immap); -} - -static void __init mpc8272ads_fixup_mdio_pdata(struct platform_device *pdev, - int idx) -{ - m82xx_mii_bb_pdata.irq[0] = PHY_INTERRUPT; - m82xx_mii_bb_pdata.irq[1] = PHY_POLL; - m82xx_mii_bb_pdata.irq[2] = PHY_POLL; - m82xx_mii_bb_pdata.irq[3] = PHY_INTERRUPT; - m82xx_mii_bb_pdata.irq[31] = PHY_POLL; - - - m82xx_mii_bb_pdata.mdio_dat.offset = - (u32)&cpm2_immr->im_ioport.iop_pdatc; - - m82xx_mii_bb_pdata.mdio_dir.offset = - (u32)&cpm2_immr->im_ioport.iop_pdirc; - - m82xx_mii_bb_pdata.mdc_dat.offset = - (u32)&cpm2_immr->im_ioport.iop_pdatc; - - - pdev->dev.platform_data = &m82xx_mii_bb_pdata; -} - -static int mpc8272ads_platform_notify(struct device *dev) -{ - static const struct platform_notify_dev_map dev_map[] = { - { - .bus_id = "fsl-cpm-fcc", - .rtn = mpc8272ads_fixup_enet_pdata, - }, - { - .bus_id = "fsl-cpm-scc:uart", - .rtn = mpc8272ads_fixup_uart_pdata, - }, - { - .bus_id = "fsl-bb-mdio", - .rtn = mpc8272ads_fixup_mdio_pdata, - }, - { - .bus_id = NULL - } - }; - platform_notify_map(dev_map,dev); - - return 0; - -} - -int __init mpc8272ads_init(void) -{ - printk(KERN_NOTICE "mpc8272ads: Init\n"); - - platform_notify = mpc8272ads_platform_notify; - - ppc_sys_device_initfunc(); - - ppc_sys_device_disable_all(); - ppc_sys_device_enable(MPC82xx_CPM_FCC1); - ppc_sys_device_enable(MPC82xx_CPM_FCC2); - - /* to be ready for console, let's attach pdata here */ -#ifdef CONFIG_SERIAL_CPM_SCC1 - ppc_sys_device_setfunc(MPC82xx_CPM_SCC1, PPC_SYS_FUNC_UART); - ppc_sys_device_enable(MPC82xx_CPM_SCC1); - -#endif - -#ifdef CONFIG_SERIAL_CPM_SCC4 - ppc_sys_device_setfunc(MPC82xx_CPM_SCC4, PPC_SYS_FUNC_UART); - ppc_sys_device_enable(MPC82xx_CPM_SCC4); -#endif - - ppc_sys_device_enable(MPC82xx_MDIO_BB); - - return 0; -} - -/* - To prevent confusion, console selection is gross: - by 0 assumed SCC1 and by 1 assumed SCC4 - */ -struct platform_device* early_uart_get_pdev(int index) -{ - bd_t *bd = (bd_t *) __res; - struct fs_uart_platform_info *pinfo; - - struct platform_device* pdev = NULL; - if(index) { /*assume SCC4 here*/ - pdev = &ppc_sys_platform_devices[MPC82xx_CPM_SCC4]; - pinfo = &mpc8272_uart_pdata[fsid_scc4_uart]; - } else { /*over SCC1*/ - pdev = &ppc_sys_platform_devices[MPC82xx_CPM_SCC1]; - pinfo = &mpc8272_uart_pdata[fsid_scc1_uart]; - } - - pinfo->uart_clk = bd->bi_intfreq; - pdev->dev.platform_data = pinfo; - ppc_sys_fixup_mem_resource(pdev, CPM_MAP_ADDR); - return NULL; -} - -arch_initcall(mpc8272ads_init); diff --git a/trunk/arch/ppc/platforms/mpc885ads.h b/trunk/arch/ppc/platforms/mpc885ads.h deleted file mode 100644 index d3bbbb3c9a1f..000000000000 --- a/trunk/arch/ppc/platforms/mpc885ads.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - * A collection of structures, addresses, and values associated with - * the Freescale MPC885ADS board. - * Copied from the FADS stuff. - * - * Author: MontaVista Software, Inc. - * source@mvista.com - * - * 2005 (c) MontaVista Software, Inc. This file is licensed under the - * terms of the GNU General Public License version 2. This program is licensed - * "as is" without any warranty of any kind, whether express or implied. - */ - -#ifdef __KERNEL__ -#ifndef __ASM_MPC885ADS_H__ -#define __ASM_MPC885ADS_H__ - - -#include - -/* U-Boot maps BCSR to 0xff080000 */ -#define BCSR_ADDR ((uint)0xff080000) -#define BCSR_SIZE ((uint)32) -#define BCSR0 ((uint)(BCSR_ADDR + 0x00)) -#define BCSR1 ((uint)(BCSR_ADDR + 0x04)) -#define BCSR2 ((uint)(BCSR_ADDR + 0x08)) -#define BCSR3 ((uint)(BCSR_ADDR + 0x0c)) -#define BCSR4 ((uint)(BCSR_ADDR + 0x10)) - -#define CFG_PHYDEV_ADDR ((uint)0xff0a0000) -#define BCSR5 ((uint)(CFG_PHYDEV_ADDR + 0x300)) - -#define IMAP_ADDR ((uint)0xff000000) -#define IMAP_SIZE ((uint)(64 * 1024)) - -#define PCMCIA_MEM_ADDR ((uint)0xff020000) -#define PCMCIA_MEM_SIZE ((uint)(64 * 1024)) - -/* Bits of interest in the BCSRs. - */ -#define BCSR1_ETHEN ((uint)0x20000000) -#define BCSR1_IRDAEN ((uint)0x10000000) -#define BCSR1_RS232EN_1 ((uint)0x01000000) -#define BCSR1_PCCEN ((uint)0x00800000) -#define BCSR1_PCCVCC0 ((uint)0x00400000) -#define BCSR1_PCCVPP0 ((uint)0x00200000) -#define BCSR1_PCCVPP1 ((uint)0x00100000) -#define BCSR1_PCCVPP_MASK (BCSR1_PCCVPP0 | BCSR1_PCCVPP1) -#define BCSR1_RS232EN_2 ((uint)0x00040000) -#define BCSR1_PCCVCC1 ((uint)0x00010000) -#define BCSR1_PCCVCC_MASK (BCSR1_PCCVCC0 | BCSR1_PCCVCC1) - -#define BCSR4_ETH10_RST ((uint)0x80000000) /* 10Base-T PHY reset*/ -#define BCSR4_USB_LO_SPD ((uint)0x04000000) -#define BCSR4_USB_VCC ((uint)0x02000000) -#define BCSR4_USB_FULL_SPD ((uint)0x00040000) -#define BCSR4_USB_EN ((uint)0x00020000) - -#define BCSR5_MII2_EN 0x40 -#define BCSR5_MII2_RST 0x20 -#define BCSR5_T1_RST 0x10 -#define BCSR5_ATM155_RST 0x08 -#define BCSR5_ATM25_RST 0x04 -#define BCSR5_MII1_EN 0x02 -#define BCSR5_MII1_RST 0x01 - -/* Interrupt level assignments */ -#define PHY_INTERRUPT SIU_IRQ7 /* PHY link change interrupt */ -#define SIU_INT_FEC1 SIU_LEVEL1 /* FEC1 interrupt */ -#define SIU_INT_FEC2 SIU_LEVEL3 /* FEC2 interrupt */ -#define FEC_INTERRUPT SIU_INT_FEC1 /* FEC interrupt */ - -/* We don't use the 8259 */ -#define NR_8259_INTS 0 - -/* CPM Ethernet through SCC3 */ -#define PA_ENET_RXD ((ushort)0x0040) -#define PA_ENET_TXD ((ushort)0x0080) -#define PE_ENET_TCLK ((uint)0x00004000) -#define PE_ENET_RCLK ((uint)0x00008000) -#define PE_ENET_TENA ((uint)0x00000010) -#define PC_ENET_CLSN ((ushort)0x0400) -#define PC_ENET_RENA ((ushort)0x0800) - -/* Control bits in the SICR to route TCLK (CLK5) and RCLK (CLK6) to - * SCC3. Also, make sure GR3 (bit 8) and SC3 (bit 9) are zero */ -#define SICR_ENET_MASK ((uint)0x00ff0000) -#define SICR_ENET_CLKRT ((uint)0x002c0000) - -#define BOARD_CHIP_NAME "MPC885" - -#endif /* __ASM_MPC885ADS_H__ */ -#endif /* __KERNEL__ */ diff --git a/trunk/arch/ppc/platforms/mpc885ads_setup.c b/trunk/arch/ppc/platforms/mpc885ads_setup.c deleted file mode 100644 index ba06cc08cdab..000000000000 --- a/trunk/arch/ppc/platforms/mpc885ads_setup.c +++ /dev/null @@ -1,476 +0,0 @@ -/*arch/ppc/platforms/mpc885ads_setup.c - * - * Platform setup for the Freescale mpc885ads board - * - * Vitaly Bordug - * - * Copyright 2005 MontaVista Software Inc. - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -extern unsigned char __res[]; -static void setup_smc1_ioports(struct fs_uart_platform_info*); -static void setup_smc2_ioports(struct fs_uart_platform_info*); - -static struct fs_mii_fec_platform_info mpc8xx_mdio_fec_pdata; -static void setup_fec1_ioports(struct fs_platform_info*); -static void setup_fec2_ioports(struct fs_platform_info*); -static void setup_scc3_ioports(struct fs_platform_info*); - -static struct fs_uart_platform_info mpc885_uart_pdata[] = { - [fsid_smc1_uart] = { - .brg = 1, - .fs_no = fsid_smc1_uart, - .init_ioports = setup_smc1_ioports, - .tx_num_fifo = 4, - .tx_buf_size = 32, - .rx_num_fifo = 4, - .rx_buf_size = 32, - }, - [fsid_smc2_uart] = { - .brg = 2, - .fs_no = fsid_smc2_uart, - .init_ioports = setup_smc2_ioports, - .tx_num_fifo = 4, - .tx_buf_size = 32, - .rx_num_fifo = 4, - .rx_buf_size = 32, - }, -}; - -static struct fs_platform_info mpc8xx_enet_pdata[] = { - [fsid_fec1] = { - .rx_ring = 128, - .tx_ring = 16, - .rx_copybreak = 240, - - .use_napi = 1, - .napi_weight = 17, - - .init_ioports = setup_fec1_ioports, - - .bus_id = "0:00", - .has_phy = 1, - }, - [fsid_fec2] = { - .rx_ring = 128, - .tx_ring = 16, - .rx_copybreak = 240, - - .use_napi = 1, - .napi_weight = 17, - - .init_ioports = setup_fec2_ioports, - - .bus_id = "0:01", - .has_phy = 1, - }, - [fsid_scc3] = { - .rx_ring = 64, - .tx_ring = 8, - .rx_copybreak = 240, - - .use_napi = 1, - .napi_weight = 17, - - .init_ioports = setup_scc3_ioports, -#ifdef CONFIG_FIXED_MII_10_FDX - .bus_id = "fixed@100:1", -#else - .bus_id = "0:02", - #endif - }, -}; - -void __init board_init(void) -{ - cpm8xx_t *cp = cpmp; - unsigned int *bcsr_io; - -#ifdef CONFIG_FS_ENET - immap_t *immap = (immap_t *) IMAP_ADDR; -#endif - bcsr_io = ioremap(BCSR1, sizeof(unsigned long)); - - if (bcsr_io == NULL) { - printk(KERN_CRIT "Could not remap BCSR\n"); - return; - } -#ifdef CONFIG_SERIAL_CPM_SMC1 - cp->cp_simode &= ~(0xe0000000 >> 17); /* brg1 */ - clrbits32(bcsr_io, BCSR1_RS232EN_1); - cp->cp_smc[0].smc_smcm |= (SMCM_RX | SMCM_TX); - cp->cp_smc[0].smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN); -#else - setbits32(bcsr_io,BCSR1_RS232EN_1); - cp->cp_smc[0].smc_smcmr = 0; - cp->cp_smc[0].smc_smce = 0; -#endif - -#ifdef CONFIG_SERIAL_CPM_SMC2 - cp->cp_simode &= ~(0xe0000000 >> 1); - cp->cp_simode |= (0x20000000 >> 1); /* brg2 */ - clrbits32(bcsr_io,BCSR1_RS232EN_2); - cp->cp_smc[1].smc_smcm |= (SMCM_RX | SMCM_TX); - cp->cp_smc[1].smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN); -#else - setbits32(bcsr_io,BCSR1_RS232EN_2); - cp->cp_smc[1].smc_smcmr = 0; - cp->cp_smc[1].smc_smce = 0; -#endif - iounmap(bcsr_io); - -#ifdef CONFIG_FS_ENET - /* use MDC for MII (common) */ - setbits16(&immap->im_ioport.iop_pdpar, 0x0080); - clrbits16(&immap->im_ioport.iop_pddir, 0x0080); - bcsr_io = ioremap(BCSR5, sizeof(unsigned long)); - clrbits32(bcsr_io,BCSR5_MII1_EN); - clrbits32(bcsr_io,BCSR5_MII1_RST); -#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2 - clrbits32(bcsr_io,BCSR5_MII2_EN); - clrbits32(bcsr_io,BCSR5_MII2_RST); -#endif - iounmap(bcsr_io); -#endif -} - -static void setup_fec1_ioports(struct fs_platform_info* pdata) -{ - immap_t *immap = (immap_t *) IMAP_ADDR; - - /* configure FEC1 pins */ - setbits16(&immap->im_ioport.iop_papar, 0xf830); - setbits16(&immap->im_ioport.iop_padir, 0x0830); - clrbits16(&immap->im_ioport.iop_padir, 0xf000); - setbits32(&immap->im_cpm.cp_pbpar, 0x00001001); - - clrbits32(&immap->im_cpm.cp_pbdir, 0x00001001); - setbits16(&immap->im_ioport.iop_pcpar, 0x000c); - clrbits16(&immap->im_ioport.iop_pcdir, 0x000c); - setbits32(&immap->im_cpm.cp_pepar, 0x00000003); - - setbits32(&immap->im_cpm.cp_pedir, 0x00000003); - clrbits32(&immap->im_cpm.cp_peso, 0x00000003); - clrbits32(&immap->im_cpm.cp_cptr, 0x00000100); -} - -static void setup_fec2_ioports(struct fs_platform_info* pdata) -{ - immap_t *immap = (immap_t *) IMAP_ADDR; - - /* configure FEC2 pins */ - setbits32(&immap->im_cpm.cp_pepar, 0x0003fffc); - setbits32(&immap->im_cpm.cp_pedir, 0x0003fffc); - clrbits32(&immap->im_cpm.cp_peso, 0x000087fc); - setbits32(&immap->im_cpm.cp_peso, 0x00037800); - clrbits32(&immap->im_cpm.cp_cptr, 0x00000080); -} - -static void setup_scc3_ioports(struct fs_platform_info* pdata) -{ - immap_t *immap = (immap_t *) IMAP_ADDR; - unsigned *bcsr_io; - - bcsr_io = ioremap(BCSR_ADDR, BCSR_SIZE); - - if (bcsr_io == NULL) { - printk(KERN_CRIT "Could not remap BCSR\n"); - return; - } - - /* Enable the PHY. - */ - clrbits32(bcsr_io+4, BCSR4_ETH10_RST); - udelay(1000); - setbits32(bcsr_io+4, BCSR4_ETH10_RST); - /* Configure port A pins for Txd and Rxd. - */ - setbits16(&immap->im_ioport.iop_papar, PA_ENET_RXD | PA_ENET_TXD); - clrbits16(&immap->im_ioport.iop_padir, PA_ENET_RXD | PA_ENET_TXD); - - /* Configure port C pins to enable CLSN and RENA. - */ - clrbits16(&immap->im_ioport.iop_pcpar, PC_ENET_CLSN | PC_ENET_RENA); - clrbits16(&immap->im_ioport.iop_pcdir, PC_ENET_CLSN | PC_ENET_RENA); - setbits16(&immap->im_ioport.iop_pcso, PC_ENET_CLSN | PC_ENET_RENA); - - /* Configure port E for TCLK and RCLK. - */ - setbits32(&immap->im_cpm.cp_pepar, PE_ENET_TCLK | PE_ENET_RCLK); - clrbits32(&immap->im_cpm.cp_pepar, PE_ENET_TENA); - clrbits32(&immap->im_cpm.cp_pedir, - PE_ENET_TCLK | PE_ENET_RCLK | PE_ENET_TENA); - clrbits32(&immap->im_cpm.cp_peso, PE_ENET_TCLK | PE_ENET_RCLK); - setbits32(&immap->im_cpm.cp_peso, PE_ENET_TENA); - - /* Configure Serial Interface clock routing. - * First, clear all SCC bits to zero, then set the ones we want. - */ - clrbits32(&immap->im_cpm.cp_sicr, SICR_ENET_MASK); - setbits32(&immap->im_cpm.cp_sicr, SICR_ENET_CLKRT); - - /* Disable Rx and Tx. SMC1 sshould be stopped if SCC3 eternet are used. - */ - immap->im_cpm.cp_smc[0].smc_smcmr &= ~(SMCMR_REN | SMCMR_TEN); - /* On the MPC885ADS SCC ethernet PHY is initialized in the full duplex mode - * by H/W setting after reset. SCC ethernet controller support only half duplex. - * This discrepancy of modes causes a lot of carrier lost errors. - */ - - /* In the original SCC enet driver the following code is placed at - the end of the initialization */ - setbits32(&immap->im_cpm.cp_pepar, PE_ENET_TENA); - clrbits32(&immap->im_cpm.cp_pedir, PE_ENET_TENA); - setbits32(&immap->im_cpm.cp_peso, PE_ENET_TENA); - - setbits32(bcsr_io+4, BCSR1_ETHEN); - iounmap(bcsr_io); -} - -static int mac_count = 0; - -static void mpc885ads_fixup_enet_pdata(struct platform_device *pdev, int fs_no) -{ - struct fs_platform_info *fpi; - bd_t *bd = (bd_t *) __res; - char *e; - int i; - - if(fs_no >= ARRAY_SIZE(mpc8xx_enet_pdata)) { - printk(KERN_ERR"No network-suitable #%d device on bus", fs_no); - return; - } - - fpi = &mpc8xx_enet_pdata[fs_no]; - - switch (fs_no) { - case fsid_fec1: - fpi->init_ioports = &setup_fec1_ioports; - break; - case fsid_fec2: - fpi->init_ioports = &setup_fec2_ioports; - break; - case fsid_scc3: - fpi->init_ioports = &setup_scc3_ioports; - break; - default: - printk(KERN_WARNING "Device %s is not supported!\n", pdev->name); - return; - } - - pdev->dev.platform_data = fpi; - fpi->fs_no = fs_no; - - e = (unsigned char *)&bd->bi_enetaddr; - for (i = 0; i < 6; i++) - fpi->macaddr[i] = *e++; - - fpi->macaddr[5] += mac_count++; - -} - -static void mpc885ads_fixup_fec_enet_pdata(struct platform_device *pdev, - int idx) -{ - /* This is for FEC devices only */ - if (!pdev || !pdev->name || (!strstr(pdev->name, "fsl-cpm-fec"))) - return; - mpc885ads_fixup_enet_pdata(pdev, fsid_fec1 + pdev->id - 1); -} - -static void __init mpc885ads_fixup_scc_enet_pdata(struct platform_device *pdev, - int idx) -{ - /* This is for SCC devices only */ - if (!pdev || !pdev->name || (!strstr(pdev->name, "fsl-cpm-scc"))) - return; - - mpc885ads_fixup_enet_pdata(pdev, fsid_scc1 + pdev->id - 1); -} - -static void setup_smc1_ioports(struct fs_uart_platform_info* pdata) -{ - immap_t *immap = (immap_t *) IMAP_ADDR; - unsigned *bcsr_io; - unsigned int iobits = 0x000000c0; - - bcsr_io = ioremap(BCSR1, sizeof(unsigned long)); - - if (bcsr_io == NULL) { - printk(KERN_CRIT "Could not remap BCSR1\n"); - return; - } - clrbits32(bcsr_io,BCSR1_RS232EN_1); - iounmap(bcsr_io); - - setbits32(&immap->im_cpm.cp_pbpar, iobits); - clrbits32(&immap->im_cpm.cp_pbdir, iobits); - clrbits16(&immap->im_cpm.cp_pbodr, iobits); -} - -static void setup_smc2_ioports(struct fs_uart_platform_info* pdata) -{ - immap_t *immap = (immap_t *) IMAP_ADDR; - unsigned *bcsr_io; - unsigned int iobits = 0x00000c00; - - bcsr_io = ioremap(BCSR1, sizeof(unsigned long)); - - if (bcsr_io == NULL) { - printk(KERN_CRIT "Could not remap BCSR1\n"); - return; - } - clrbits32(bcsr_io,BCSR1_RS232EN_2); - iounmap(bcsr_io); - -#ifndef CONFIG_SERIAL_CPM_ALT_SMC2 - setbits32(&immap->im_cpm.cp_pbpar, iobits); - clrbits32(&immap->im_cpm.cp_pbdir, iobits); - clrbits16(&immap->im_cpm.cp_pbodr, iobits); -#else - setbits16(&immap->im_ioport.iop_papar, iobits); - clrbits16(&immap->im_ioport.iop_padir, iobits); - clrbits16(&immap->im_ioport.iop_paodr, iobits); -#endif -} - -static void __init mpc885ads_fixup_uart_pdata(struct platform_device *pdev, - int idx) -{ - bd_t *bd = (bd_t *) __res; - struct fs_uart_platform_info *pinfo; - int num = ARRAY_SIZE(mpc885_uart_pdata); - - int id = fs_uart_id_smc2fsid(idx); - - /* no need to alter anything if console */ - if ((id < num) && (!pdev->dev.platform_data)) { - pinfo = &mpc885_uart_pdata[id]; - pinfo->uart_clk = bd->bi_intfreq; - pdev->dev.platform_data = pinfo; - } -} - - -static int mpc885ads_platform_notify(struct device *dev) -{ - - static const struct platform_notify_dev_map dev_map[] = { - { - .bus_id = "fsl-cpm-fec", - .rtn = mpc885ads_fixup_fec_enet_pdata, - }, - { - .bus_id = "fsl-cpm-scc", - .rtn = mpc885ads_fixup_scc_enet_pdata, - }, - { - .bus_id = "fsl-cpm-smc:uart", - .rtn = mpc885ads_fixup_uart_pdata - }, - { - .bus_id = NULL - } - }; - - platform_notify_map(dev_map,dev); - - return 0; -} - -int __init mpc885ads_init(void) -{ - struct fs_mii_fec_platform_info* fmpi; - bd_t *bd = (bd_t *) __res; - - printk(KERN_NOTICE "mpc885ads: Init\n"); - - platform_notify = mpc885ads_platform_notify; - - ppc_sys_device_initfunc(); - ppc_sys_device_disable_all(); - - ppc_sys_device_enable(MPC8xx_CPM_FEC1); - - ppc_sys_device_enable(MPC8xx_MDIO_FEC); - fmpi = ppc_sys_platform_devices[MPC8xx_MDIO_FEC].dev.platform_data = - &mpc8xx_mdio_fec_pdata; - - fmpi->mii_speed = ((((bd->bi_intfreq + 4999999) / 2500000) / 2) & 0x3F) << 1; - - /* No PHY interrupt line here */ - fmpi->irq[0xf] = SIU_IRQ7; - -#ifdef CONFIG_MPC8xx_SECOND_ETH_SCC3 - ppc_sys_device_enable(MPC8xx_CPM_SCC3); - -#endif -#ifdef CONFIG_MPC8xx_SECOND_ETH_FEC2 - ppc_sys_device_enable(MPC8xx_CPM_FEC2); -#endif - -#ifdef CONFIG_SERIAL_CPM_SMC1 - ppc_sys_device_enable(MPC8xx_CPM_SMC1); - ppc_sys_device_setfunc(MPC8xx_CPM_SMC1, PPC_SYS_FUNC_UART); -#endif - -#ifdef CONFIG_SERIAL_CPM_SMC2 - ppc_sys_device_enable(MPC8xx_CPM_SMC2); - ppc_sys_device_setfunc(MPC8xx_CPM_SMC2, PPC_SYS_FUNC_UART); -#endif - return 0; -} - -arch_initcall(mpc885ads_init); - -/* - To prevent confusion, console selection is gross: - by 0 assumed SMC1 and by 1 assumed SMC2 - */ -struct platform_device* early_uart_get_pdev(int index) -{ - bd_t *bd = (bd_t *) __res; - struct fs_uart_platform_info *pinfo; - - struct platform_device* pdev = NULL; - if(index) { /*assume SMC2 here*/ - pdev = &ppc_sys_platform_devices[MPC8xx_CPM_SMC2]; - pinfo = &mpc885_uart_pdata[1]; - } else { /*over SMC1*/ - pdev = &ppc_sys_platform_devices[MPC8xx_CPM_SMC1]; - pinfo = &mpc885_uart_pdata[0]; - } - - pinfo->uart_clk = bd->bi_intfreq; - pdev->dev.platform_data = pinfo; - ppc_sys_fixup_mem_resource(pdev, IMAP_ADDR); - return NULL; -} - diff --git a/trunk/arch/ppc/platforms/pq2ads.c b/trunk/arch/ppc/platforms/pq2ads.c deleted file mode 100644 index 7fc2e02f5246..000000000000 --- a/trunk/arch/ppc/platforms/pq2ads.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * PQ2ADS platform support - * - * Author: Kumar Gala - * Derived from: est8260_setup.c by Allen Curtis - * - * Copyright 2004 Freescale Semiconductor, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - */ - -#include - -#include -#include -#include -#include - -void __init -m82xx_board_setup(void) -{ - cpm2_map_t* immap = ioremap(CPM_MAP_ADDR, sizeof(cpm2_map_t)); - u32 *bcsr = ioremap(BCSR_ADDR+4, sizeof(u32)); - - /* Enable the 2nd UART port */ - clrbits32(bcsr, BCSR1_RS232_EN2); - -#ifdef CONFIG_SERIAL_CPM_SCC1 - clrbits32((u32*)&immap->im_scc[0].scc_sccm, UART_SCCM_TX | UART_SCCM_RX); - clrbits32((u32*)&immap->im_scc[0].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT); -#endif - -#ifdef CONFIG_SERIAL_CPM_SCC2 - clrbits32((u32*)&immap->im_scc[1].scc_sccm, UART_SCCM_TX | UART_SCCM_RX); - clrbits32((u32*)&immap->im_scc[1].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT); -#endif - -#ifdef CONFIG_SERIAL_CPM_SCC3 - clrbits32((u32*)&immap->im_scc[2].scc_sccm, UART_SCCM_TX | UART_SCCM_RX); - clrbits32((u32*)&immap->im_scc[2].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT); -#endif - -#ifdef CONFIG_SERIAL_CPM_SCC4 - clrbits32((u32*)&immap->im_scc[3].scc_sccm, UART_SCCM_TX | UART_SCCM_RX); - clrbits32((u32*)&immap->im_scc[3].scc_gsmrl, SCC_GSMRL_ENR | SCC_GSMRL_ENT); -#endif - - iounmap(bcsr); - iounmap(immap); -} diff --git a/trunk/arch/ppc/platforms/pq2ads.h b/trunk/arch/ppc/platforms/pq2ads.h deleted file mode 100644 index 2b287f4e0ca3..000000000000 --- a/trunk/arch/ppc/platforms/pq2ads.h +++ /dev/null @@ -1,94 +0,0 @@ -/* - * A collection of structures, addresses, and values associated with - * the Motorola MPC8260ADS/MPC8266ADS-PCI boards. - * Copied from the RPX-Classic and SBS8260 stuff. - * - * Copyright (c) 2001 Dan Malek (dan@mvista.com) - */ -#ifdef __KERNEL__ -#ifndef __MACH_ADS8260_DEFS -#define __MACH_ADS8260_DEFS - - -#include - -#if defined(CONFIG_ADS8272) -#define BOARD_CHIP_NAME "8272" -#endif - -/* Memory map is configured by the PROM startup. - * We just map a few things we need. The CSR is actually 4 byte-wide - * registers that can be accessed as 8-, 16-, or 32-bit values. - */ -#define CPM_MAP_ADDR ((uint)0xf0000000) -#define BCSR_ADDR ((uint)0xf4500000) -#define BCSR_SIZE ((uint)(32 * 1024)) - -#define BOOTROM_RESTART_ADDR ((uint)0xff000104) - -/* For our show_cpuinfo hooks. */ -#define CPUINFO_VENDOR "Motorola" -#define CPUINFO_MACHINE "PQ2 ADS PowerPC" - -/* The ADS8260 has 16, 32-bit wide control/status registers, accessed - * only on word boundaries. - * Not all are used (yet), or are interesting to us (yet). - */ - -/* Things of interest in the CSR. -*/ -#define BCSR0_LED0 ((uint)0x02000000) /* 0 == on */ -#define BCSR0_LED1 ((uint)0x01000000) /* 0 == on */ -#define BCSR1_FETHIEN ((uint)0x08000000) /* 0 == enable */ -#define BCSR1_FETH_RST ((uint)0x04000000) /* 0 == reset */ -#define BCSR1_RS232_EN1 ((uint)0x02000000) /* 0 == enable */ -#define BCSR1_RS232_EN2 ((uint)0x01000000) /* 0 == enable */ -#define BCSR3_FETHIEN2 ((uint)0x10000000) /* 0 == enable */ -#define BCSR3_FETH2_RST ((uint)0x80000000) /* 0 == reset */ - -#define PHY_INTERRUPT SIU_INT_IRQ7 - -#ifdef CONFIG_PCI -/* PCI interrupt controller */ -#define PCI_INT_STAT_REG 0xF8200000 -#define PCI_INT_MASK_REG 0xF8200004 -#define PIRQA (NR_CPM_INTS + 0) -#define PIRQB (NR_CPM_INTS + 1) -#define PIRQC (NR_CPM_INTS + 2) -#define PIRQD (NR_CPM_INTS + 3) - -/* - * PCI memory map definitions for MPC8266ADS-PCI. - * - * processor view - * local address PCI address target - * 0x80000000-0x9FFFFFFF 0x80000000-0x9FFFFFFF PCI mem with prefetch - * 0xA0000000-0xBFFFFFFF 0xA0000000-0xBFFFFFFF PCI mem w/o prefetch - * 0xF4000000-0xF7FFFFFF 0x00000000-0x03FFFFFF PCI IO - * - * PCI master view - * local address PCI address target - * 0x00000000-0x1FFFFFFF 0x00000000-0x1FFFFFFF MPC8266 local memory - */ - -/* All the other PCI memory map definitions reside at syslib/m82xx_pci.h - Here we should redefine what is unique for this board */ -#define M82xx_PCI_SLAVE_MEM_LOCAL 0x00000000 /* Local base */ -#define M82xx_PCI_SLAVE_MEM_BUS 0x00000000 /* PCI base */ -#define M82xx_PCI_SLAVE_MEM_SIZE 0x10000000 /* 256 Mb */ - -#define M82xx_PCI_SLAVE_SEC_WND_SIZE ~(0x40000000 - 1U) /* 2 x 512Mb */ -#define M82xx_PCI_SLAVE_SEC_WND_BASE 0x80000000 /* PCI Memory base */ - -#if defined(CONFIG_ADS8272) -#define PCI_INT_TO_SIU SIU_INT_IRQ2 -#elif defined(CONFIG_PQ2FADS) -#define PCI_INT_TO_SIU SIU_INT_IRQ6 -#else -#warning PCI Bridge will be without interrupts support -#endif - -#endif /* CONFIG_PCI */ - -#endif /* __MACH_ADS8260_DEFS */ -#endif /* __KERNEL__ */ diff --git a/trunk/arch/ppc/platforms/pq2ads_pd.h b/trunk/arch/ppc/platforms/pq2ads_pd.h deleted file mode 100644 index 672483df8079..000000000000 --- a/trunk/arch/ppc/platforms/pq2ads_pd.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef __PQ2ADS_PD_H -#define __PQ2ADS_PD_H -/* - * arch/ppc/platforms/82xx/pq2ads_pd.h - * - * Some defines for MPC82xx board-specific PlatformDevice descriptions - * - * 2005 (c) MontaVista Software, Inc. - * Vitaly Bordug - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -/* FCC1 Clock Source Configuration. These can be redefined in the board specific file. - Can only choose from CLK9-12 */ - -#define F1_RXCLK 11 -#define F1_TXCLK 10 - -/* FCC2 Clock Source Configuration. These can be redefined in the board specific file. - Can only choose from CLK13-16 */ -#define F2_RXCLK 15 -#define F2_TXCLK 16 - -/* FCC3 Clock Source Configuration. These can be redefined in the board specific file. - Can only choose from CLK13-16 */ -#define F3_RXCLK 13 -#define F3_TXCLK 14 - -#endif diff --git a/trunk/arch/ppc/syslib/m8260_setup.c b/trunk/arch/ppc/syslib/m8260_setup.c index 46588fa94381..b40583724de3 100644 --- a/trunk/arch/ppc/syslib/m8260_setup.c +++ b/trunk/arch/ppc/syslib/m8260_setup.c @@ -175,12 +175,6 @@ m8260_init_IRQ(void) * in case the boot rom changed something on us. */ cpm2_immr->im_intctl.ic_siprr = 0x05309770; - -#if defined(CONFIG_PCI) && (defined(CONFIG_ADS8272) || defined(CONFIG_PQ2FADS)) - /* Initialize stuff for the 82xx CPLD IC and install demux */ - pq2pci_init_irq(); -#endif - } /* diff --git a/trunk/arch/ppc/syslib/m82xx_pci.c b/trunk/arch/ppc/syslib/m82xx_pci.c index fe860d52e2e4..657a1c25a2ab 100644 --- a/trunk/arch/ppc/syslib/m82xx_pci.c +++ b/trunk/arch/ppc/syslib/m82xx_pci.c @@ -150,14 +150,6 @@ pq2pci_init_irq(void) { int irq; volatile cpm2_map_t *immap = cpm2_immr; -#if defined CONFIG_ADS8272 - /* configure chip select for PCI interrupt controller */ - immap->im_memctl.memc_br3 = PCI_INT_STAT_REG | 0x00001801; - immap->im_memctl.memc_or3 = 0xffff8010; -#elif defined CONFIG_PQ2FADS - immap->im_memctl.memc_br8 = PCI_INT_STAT_REG | 0x00001801; - immap->im_memctl.memc_or8 = 0xffff8010; -#endif for (irq = NR_CPM_INTS; irq < NR_CPM_INTS + 4; irq++) irq_desc[irq].chip = &pq2pci_ic; @@ -222,26 +214,6 @@ pq2ads_setup_pci(struct pci_controller *hose) immap->im_memctl.memc_pcibr1 = M82xx_PCI_SEC_WND_BASE | PCIBR_ENABLE; #endif -#if defined CONFIG_ADS8272 - immap->im_siu_conf.siu_82xx.sc_siumcr = - (immap->im_siu_conf.siu_82xx.sc_siumcr & - ~(SIUMCR_BBD | SIUMCR_ESE | SIUMCR_PBSE | - SIUMCR_CDIS | SIUMCR_DPPC11 | SIUMCR_L2CPC11 | - SIUMCR_LBPC11 | SIUMCR_APPC11 | - SIUMCR_CS10PC11 | SIUMCR_BCTLC11 | SIUMCR_MMR11)) | - SIUMCR_DPPC11 | SIUMCR_L2CPC01 | SIUMCR_LBPC00 | - SIUMCR_APPC10 | SIUMCR_CS10PC00 | - SIUMCR_BCTLC00 | SIUMCR_MMR11 ; - -#elif defined CONFIG_PQ2FADS - /* - * Setting required to enable IRQ1-IRQ7 (SIUMCR [DPPC]), - * and local bus for PCI (SIUMCR [LBPC]). - */ - immap->im_siu_conf.siu_82xx.sc_siumcr = (immap->im_siu_conf.siu_82xx.sc_siumcr & - ~(SIUMCR_L2CPC11 | SIUMCR_LBPC11 | SIUMCR_CS10PC11 | SIUMCR_APPC11) | - SIUMCR_BBD | SIUMCR_LBPC01 | SIUMCR_DPPC11 | SIUMCR_APPC10); -#endif /* Enable PCI */ immap->im_pci.pci_gcr = cpu_to_le32(PCIGCR_PCI_BUS_EN); @@ -284,12 +256,6 @@ pq2ads_setup_pci(struct pci_controller *hose) immap->im_pci.pci_pibar0 = cpu_to_le32(M82xx_PCI_SLAVE_MEM_BUS >> PITA_ADDR_SHIFT); immap->im_pci.pci_pitar0 = cpu_to_le32(M82xx_PCI_SLAVE_MEM_LOCAL>> PITA_ADDR_SHIFT); -#if defined CONFIG_ADS8272 - /* PCI int highest prio */ - immap->im_siu_conf.siu_82xx.sc_ppc_alrh = 0x01236745; -#elif defined CONFIG_PQ2FADS - immap->im_siu_conf.siu_82xx.sc_ppc_alrh = 0x03124567; -#endif /* park bus on PCI */ immap->im_siu_conf.siu_82xx.sc_ppc_acr = PPC_ACR_BUS_PARK_PCI; @@ -320,10 +286,6 @@ void __init pq2_find_bridges(void) hose->bus_offset = 0; hose->last_busno = 0xff; -#ifdef CONFIG_ADS8272 - hose->set_cfg_type = 1; -#endif - setup_m8260_indirect_pci(hose, (unsigned long)&cpm2_immr->im_pci.pci_cfg_addr, (unsigned long)&cpm2_immr->im_pci.pci_cfg_data); diff --git a/trunk/arch/ppc/syslib/m8xx_setup.c b/trunk/arch/ppc/syslib/m8xx_setup.c index 19749e9bcf91..18da720fc1b0 100644 --- a/trunk/arch/ppc/syslib/m8xx_setup.c +++ b/trunk/arch/ppc/syslib/m8xx_setup.c @@ -139,16 +139,6 @@ m8xx_setup_arch(void) } } #endif -#endif - -#if defined (CONFIG_MPC86XADS) || defined (CONFIG_MPC885ADS) -#if defined(CONFIG_MTD_PHYSMAP) - physmap_configure(binfo->bi_flashstart, binfo->bi_flashsize, - MPC8xxADS_BANK_WIDTH, NULL); -#ifdef CONFIG_MTD_PARTITIONS - physmap_set_partitions(mpc8xxads_partitions, mpc8xxads_part_num); -#endif /* CONFIG_MTD_PARTITIONS */ -#endif /* CONFIG_MTD_PHYSMAP */ #endif board_init(); diff --git a/trunk/arch/s390/Kconfig b/trunk/arch/s390/Kconfig index f6a68e178fc5..8f5f02160ffc 100644 --- a/trunk/arch/s390/Kconfig +++ b/trunk/arch/s390/Kconfig @@ -62,6 +62,10 @@ config GENERIC_LOCKBREAK default y depends on SMP && PREEMPT +config PGSTE + bool + default y if KVM + mainmenu "Linux Kernel Configuration" config S390 @@ -69,6 +73,7 @@ config S390 select HAVE_OPROFILE select HAVE_KPROBES select HAVE_KRETPROBES + select HAVE_KVM if 64BIT source "init/Kconfig" @@ -515,6 +520,13 @@ config ZFCPDUMP Select this option if you want to build an zfcpdump enabled kernel. Refer to for more details on this. +config S390_GUEST +bool "s390 guest support (EXPERIMENTAL)" + depends on 64BIT && EXPERIMENTAL + select VIRTIO + select VIRTIO_RING + help + Select this option if you want to run the kernel under s390 linux endmenu source "net/Kconfig" @@ -536,3 +548,5 @@ source "security/Kconfig" source "crypto/Kconfig" source "lib/Kconfig" + +source "arch/s390/kvm/Kconfig" diff --git a/trunk/arch/s390/Makefile b/trunk/arch/s390/Makefile index f708be367b03..792a4e7743ce 100644 --- a/trunk/arch/s390/Makefile +++ b/trunk/arch/s390/Makefile @@ -87,7 +87,7 @@ LDFLAGS_vmlinux := -e start head-y := arch/s390/kernel/head.o arch/s390/kernel/init_task.o core-y += arch/s390/mm/ arch/s390/kernel/ arch/s390/crypto/ \ - arch/s390/appldata/ arch/s390/hypfs/ + arch/s390/appldata/ arch/s390/hypfs/ arch/s390/kvm/ libs-y += arch/s390/lib/ drivers-y += drivers/s390/ drivers-$(CONFIG_MATHEMU) += arch/s390/math-emu/ diff --git a/trunk/arch/s390/kernel/early.c b/trunk/arch/s390/kernel/early.c index 540a67f979b6..68ec4083bf73 100644 --- a/trunk/arch/s390/kernel/early.c +++ b/trunk/arch/s390/kernel/early.c @@ -144,6 +144,10 @@ static noinline __init void detect_machine_type(void) /* Running on a P/390 ? */ if (cpuinfo->cpu_id.machine == 0x7490) machine_flags |= 4; + + /* Running under KVM ? */ + if (cpuinfo->cpu_id.version == 0xfe) + machine_flags |= 64; } #ifdef CONFIG_64BIT diff --git a/trunk/arch/s390/kernel/setup.c b/trunk/arch/s390/kernel/setup.c index 7141147e6b63..a9d18aafa5f4 100644 --- a/trunk/arch/s390/kernel/setup.c +++ b/trunk/arch/s390/kernel/setup.c @@ -316,7 +316,11 @@ static int __init early_parse_ipldelay(char *p) early_param("ipldelay", early_parse_ipldelay); #ifdef CONFIG_S390_SWITCH_AMODE +#ifdef CONFIG_PGSTE +unsigned int switch_amode = 1; +#else unsigned int switch_amode = 0; +#endif EXPORT_SYMBOL_GPL(switch_amode); static void set_amode_and_uaccess(unsigned long user_amode, @@ -797,9 +801,13 @@ setup_arch(char **cmdline_p) "This machine has an IEEE fpu\n" : "This machine has no IEEE fpu\n"); #else /* CONFIG_64BIT */ - printk((MACHINE_IS_VM) ? - "We are running under VM (64 bit mode)\n" : - "We are running native (64 bit mode)\n"); + if (MACHINE_IS_VM) + printk("We are running under VM (64 bit mode)\n"); + else if (MACHINE_IS_KVM) { + printk("We are running under KVM (64 bit mode)\n"); + add_preferred_console("ttyS", 1, NULL); + } else + printk("We are running native (64 bit mode)\n"); #endif /* CONFIG_64BIT */ /* Save unparsed command line copy for /proc/cmdline */ diff --git a/trunk/arch/s390/kernel/vtime.c b/trunk/arch/s390/kernel/vtime.c index c5f05b3fb2c3..ca90ee3f930e 100644 --- a/trunk/arch/s390/kernel/vtime.c +++ b/trunk/arch/s390/kernel/vtime.c @@ -110,6 +110,7 @@ void account_system_vtime(struct task_struct *tsk) S390_lowcore.steal_clock -= cputime << 12; account_system_time(tsk, 0, cputime); } +EXPORT_SYMBOL_GPL(account_system_vtime); static inline void set_vtimer(__u64 expires) { diff --git a/trunk/arch/s390/kvm/Kconfig b/trunk/arch/s390/kvm/Kconfig new file mode 100644 index 000000000000..1761b74d639b --- /dev/null +++ b/trunk/arch/s390/kvm/Kconfig @@ -0,0 +1,46 @@ +# +# KVM configuration +# +config HAVE_KVM + bool + +menuconfig VIRTUALIZATION + bool "Virtualization" + default y + ---help--- + Say Y here to get to see options for using your Linux host to run other + operating systems inside virtual machines (guests). + This option alone does not add any kernel code. + + If you say N, all options in this submenu will be skipped and disabled. + +if VIRTUALIZATION + +config KVM + tristate "Kernel-based Virtual Machine (KVM) support" + depends on HAVE_KVM && EXPERIMENTAL + select PREEMPT_NOTIFIERS + select ANON_INODES + select S390_SWITCH_AMODE + select PREEMPT + ---help--- + Support hosting paravirtualized guest machines using the SIE + virtualization capability on the mainframe. This should work + on any 64bit machine. + + This module provides access to the hardware capabilities through + a character device node named /dev/kvm. + + To compile this as a module, choose M here: the module + will be called kvm. + + If unsure, say N. + +config KVM_TRACE + bool + +# OK, it's a little counter-intuitive to do this, but it puts it neatly under +# the virtualization menu. +source drivers/virtio/Kconfig + +endif # VIRTUALIZATION diff --git a/trunk/arch/s390/kvm/Makefile b/trunk/arch/s390/kvm/Makefile new file mode 100644 index 000000000000..e5221ec0b8e3 --- /dev/null +++ b/trunk/arch/s390/kvm/Makefile @@ -0,0 +1,14 @@ +# Makefile for kernel virtual machines on s390 +# +# Copyright IBM Corp. 2008 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License (version 2 only) +# as published by the Free Software Foundation. + +common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o) + +EXTRA_CFLAGS += -Ivirt/kvm -Iarch/s390/kvm + +kvm-objs := $(common-objs) kvm-s390.o sie64a.o intercept.o interrupt.o priv.o sigp.o diag.o +obj-$(CONFIG_KVM) += kvm.o diff --git a/trunk/arch/s390/kvm/diag.c b/trunk/arch/s390/kvm/diag.c new file mode 100644 index 000000000000..f639a152869f --- /dev/null +++ b/trunk/arch/s390/kvm/diag.c @@ -0,0 +1,67 @@ +/* + * diag.c - handling diagnose instructions + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte + * Christian Borntraeger + */ + +#include +#include +#include "kvm-s390.h" + +static int __diag_time_slice_end(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 5, "%s", "diag time slice end"); + vcpu->stat.diagnose_44++; + vcpu_put(vcpu); + schedule(); + vcpu_load(vcpu); + return 0; +} + +static int __diag_ipl_functions(struct kvm_vcpu *vcpu) +{ + unsigned int reg = vcpu->arch.sie_block->ipa & 0xf; + unsigned long subcode = vcpu->arch.guest_gprs[reg] & 0xffff; + + VCPU_EVENT(vcpu, 5, "diag ipl functions, subcode %lx", subcode); + switch (subcode) { + case 3: + vcpu->run->s390_reset_flags = KVM_S390_RESET_CLEAR; + break; + case 4: + vcpu->run->s390_reset_flags = 0; + break; + default: + return -ENOTSUPP; + } + + atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM; + vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL; + vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT; + vcpu->run->exit_reason = KVM_EXIT_S390_RESET; + VCPU_EVENT(vcpu, 3, "requesting userspace resets %lx", + vcpu->run->s390_reset_flags); + return -EREMOTE; +} + +int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) +{ + int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; + + switch (code) { + case 0x44: + return __diag_time_slice_end(vcpu); + case 0x308: + return __diag_ipl_functions(vcpu); + default: + return -ENOTSUPP; + } +} diff --git a/trunk/arch/s390/kvm/gaccess.h b/trunk/arch/s390/kvm/gaccess.h new file mode 100644 index 000000000000..4e0633c413f3 --- /dev/null +++ b/trunk/arch/s390/kvm/gaccess.h @@ -0,0 +1,274 @@ +/* + * gaccess.h - access guest memory + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte + */ + +#ifndef __KVM_S390_GACCESS_H +#define __KVM_S390_GACCESS_H + +#include +#include +#include + +static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu, + u64 guestaddr) +{ + u64 prefix = vcpu->arch.sie_block->prefix; + u64 origin = vcpu->kvm->arch.guest_origin; + u64 memsize = vcpu->kvm->arch.guest_memsize; + + if (guestaddr < 2 * PAGE_SIZE) + guestaddr += prefix; + else if ((guestaddr >= prefix) && (guestaddr < prefix + 2 * PAGE_SIZE)) + guestaddr -= prefix; + + if (guestaddr > memsize) + return (void __user __force *) ERR_PTR(-EFAULT); + + guestaddr += origin; + + return (void __user *) guestaddr; +} + +static inline int get_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr, + u64 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 7); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return get_user(*result, (u64 __user *) uptr); +} + +static inline int get_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr, + u32 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 3); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return get_user(*result, (u32 __user *) uptr); +} + +static inline int get_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr, + u16 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 1); + + if (IS_ERR(uptr)) + return PTR_ERR(uptr); + + return get_user(*result, (u16 __user *) uptr); +} + +static inline int get_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr, + u8 *result) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return get_user(*result, (u8 __user *) uptr); +} + +static inline int put_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr, + u64 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 7); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u64 __user *) uptr); +} + +static inline int put_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr, + u32 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 3); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u32 __user *) uptr); +} + +static inline int put_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr, + u16 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + BUG_ON(guestaddr & 1); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u16 __user *) uptr); +} + +static inline int put_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr, + u8 value) +{ + void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); + + if (IS_ERR((void __force *) uptr)) + return PTR_ERR((void __force *) uptr); + + return put_user(value, (u8 __user *) uptr); +} + + +static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, u64 guestdest, + const void *from, unsigned long n) +{ + int rc; + unsigned long i; + const u8 *data = from; + + for (i = 0; i < n; i++) { + rc = put_guest_u8(vcpu, guestdest++, *(data++)); + if (rc < 0) + return rc; + } + return 0; +} + +static inline int copy_to_guest(struct kvm_vcpu *vcpu, u64 guestdest, + const void *from, unsigned long n) +{ + u64 prefix = vcpu->arch.sie_block->prefix; + u64 origin = vcpu->kvm->arch.guest_origin; + u64 memsize = vcpu->kvm->arch.guest_memsize; + + if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE)) + goto slowpath; + + if ((guestdest < prefix) && (guestdest + n > prefix)) + goto slowpath; + + if ((guestdest < prefix + 2 * PAGE_SIZE) + && (guestdest + n > prefix + 2 * PAGE_SIZE)) + goto slowpath; + + if (guestdest < 2 * PAGE_SIZE) + guestdest += prefix; + else if ((guestdest >= prefix) && (guestdest < prefix + 2 * PAGE_SIZE)) + guestdest -= prefix; + + if (guestdest + n > memsize) + return -EFAULT; + + if (guestdest + n < guestdest) + return -EFAULT; + + guestdest += origin; + + return copy_to_user((void __user *) guestdest, from, n); +slowpath: + return __copy_to_guest_slow(vcpu, guestdest, from, n); +} + +static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to, + u64 guestsrc, unsigned long n) +{ + int rc; + unsigned long i; + u8 *data = to; + + for (i = 0; i < n; i++) { + rc = get_guest_u8(vcpu, guestsrc++, data++); + if (rc < 0) + return rc; + } + return 0; +} + +static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to, + u64 guestsrc, unsigned long n) +{ + u64 prefix = vcpu->arch.sie_block->prefix; + u64 origin = vcpu->kvm->arch.guest_origin; + u64 memsize = vcpu->kvm->arch.guest_memsize; + + if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE)) + goto slowpath; + + if ((guestsrc < prefix) && (guestsrc + n > prefix)) + goto slowpath; + + if ((guestsrc < prefix + 2 * PAGE_SIZE) + && (guestsrc + n > prefix + 2 * PAGE_SIZE)) + goto slowpath; + + if (guestsrc < 2 * PAGE_SIZE) + guestsrc += prefix; + else if ((guestsrc >= prefix) && (guestsrc < prefix + 2 * PAGE_SIZE)) + guestsrc -= prefix; + + if (guestsrc + n > memsize) + return -EFAULT; + + if (guestsrc + n < guestsrc) + return -EFAULT; + + guestsrc += origin; + + return copy_from_user(to, (void __user *) guestsrc, n); +slowpath: + return __copy_from_guest_slow(vcpu, to, guestsrc, n); +} + +static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, u64 guestdest, + const void *from, unsigned long n) +{ + u64 origin = vcpu->kvm->arch.guest_origin; + u64 memsize = vcpu->kvm->arch.guest_memsize; + + if (guestdest + n > memsize) + return -EFAULT; + + if (guestdest + n < guestdest) + return -EFAULT; + + guestdest += origin; + + return copy_to_user((void __user *) guestdest, from, n); +} + +static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to, + u64 guestsrc, unsigned long n) +{ + u64 origin = vcpu->kvm->arch.guest_origin; + u64 memsize = vcpu->kvm->arch.guest_memsize; + + if (guestsrc + n > memsize) + return -EFAULT; + + if (guestsrc + n < guestsrc) + return -EFAULT; + + guestsrc += origin; + + return copy_from_user(to, (void __user *) guestsrc, n); +} +#endif diff --git a/trunk/arch/s390/kvm/intercept.c b/trunk/arch/s390/kvm/intercept.c new file mode 100644 index 000000000000..349581a26103 --- /dev/null +++ b/trunk/arch/s390/kvm/intercept.c @@ -0,0 +1,216 @@ +/* + * intercept.c - in-kernel handling for sie intercepts + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte + * Christian Borntraeger + */ + +#include +#include +#include + +#include + +#include "kvm-s390.h" +#include "gaccess.h" + +static int handle_lctg(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + + ((vcpu->arch.sie_block->ipb & 0xff00) << 4); + u64 useraddr; + int reg, rc; + + vcpu->stat.instruction_lctg++; + if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f) + return -ENOTSUPP; + + useraddr = disp2; + if (base2) + useraddr += vcpu->arch.guest_gprs[base2]; + + reg = reg1; + + VCPU_EVENT(vcpu, 5, "lctg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, + disp2); + + do { + rc = get_guest_u64(vcpu, useraddr, + &vcpu->arch.sie_block->gcr[reg]); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + break; + } + useraddr += 8; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + return 0; +} + +static int handle_lctl(struct kvm_vcpu *vcpu) +{ + int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int reg3 = vcpu->arch.sie_block->ipa & 0x000f; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 useraddr; + u32 val = 0; + int reg, rc; + + vcpu->stat.instruction_lctl++; + + useraddr = disp2; + if (base2) + useraddr += vcpu->arch.guest_gprs[base2]; + + VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, + disp2); + + reg = reg1; + do { + rc = get_guest_u32(vcpu, useraddr, &val); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + break; + } + vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul; + vcpu->arch.sie_block->gcr[reg] |= val; + useraddr += 4; + if (reg == reg3) + break; + reg = (reg + 1) % 16; + } while (1); + return 0; +} + +static intercept_handler_t instruction_handlers[256] = { + [0x83] = kvm_s390_handle_diag, + [0xae] = kvm_s390_handle_sigp, + [0xb2] = kvm_s390_handle_priv, + [0xb7] = handle_lctl, + [0xeb] = handle_lctg, +}; + +static int handle_noop(struct kvm_vcpu *vcpu) +{ + switch (vcpu->arch.sie_block->icptcode) { + case 0x10: + vcpu->stat.exit_external_request++; + break; + case 0x14: + vcpu->stat.exit_external_interrupt++; + break; + default: + break; /* nothing */ + } + return 0; +} + +static int handle_stop(struct kvm_vcpu *vcpu) +{ + int rc; + + vcpu->stat.exit_stop_request++; + atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + spin_lock_bh(&vcpu->arch.local_int.lock); + if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) { + vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP; + rc = __kvm_s390_vcpu_store_status(vcpu, + KVM_S390_STORE_STATUS_NOADDR); + if (rc >= 0) + rc = -ENOTSUPP; + } + + if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) { + vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP; + VCPU_EVENT(vcpu, 3, "%s", "cpu stopped"); + rc = -ENOTSUPP; + } else + rc = 0; + spin_unlock_bh(&vcpu->arch.local_int.lock); + return rc; +} + +static int handle_validity(struct kvm_vcpu *vcpu) +{ + int viwhy = vcpu->arch.sie_block->ipb >> 16; + vcpu->stat.exit_validity++; + if (viwhy == 0x37) { + fault_in_pages_writeable((char __user *) + vcpu->kvm->arch.guest_origin + + vcpu->arch.sie_block->prefix, + PAGE_SIZE); + return 0; + } + VCPU_EVENT(vcpu, 2, "unhandled validity intercept code %d", + viwhy); + return -ENOTSUPP; +} + +static int handle_instruction(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + vcpu->stat.exit_instruction++; + handler = instruction_handlers[vcpu->arch.sie_block->ipa >> 8]; + if (handler) + return handler(vcpu); + return -ENOTSUPP; +} + +static int handle_prog(struct kvm_vcpu *vcpu) +{ + vcpu->stat.exit_program_interruption++; + return kvm_s390_inject_program_int(vcpu, vcpu->arch.sie_block->iprcc); +} + +static int handle_instruction_and_prog(struct kvm_vcpu *vcpu) +{ + int rc, rc2; + + vcpu->stat.exit_instr_and_program++; + rc = handle_instruction(vcpu); + rc2 = handle_prog(vcpu); + + if (rc == -ENOTSUPP) + vcpu->arch.sie_block->icptcode = 0x04; + if (rc) + return rc; + return rc2; +} + +static const intercept_handler_t intercept_funcs[0x48 >> 2] = { + [0x00 >> 2] = handle_noop, + [0x04 >> 2] = handle_instruction, + [0x08 >> 2] = handle_prog, + [0x0C >> 2] = handle_instruction_and_prog, + [0x10 >> 2] = handle_noop, + [0x14 >> 2] = handle_noop, + [0x1C >> 2] = kvm_s390_handle_wait, + [0x20 >> 2] = handle_validity, + [0x28 >> 2] = handle_stop, +}; + +int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu) +{ + intercept_handler_t func; + u8 code = vcpu->arch.sie_block->icptcode; + + if (code & 3 || code > 0x48) + return -ENOTSUPP; + func = intercept_funcs[code >> 2]; + if (func) + return func(vcpu); + return -ENOTSUPP; +} diff --git a/trunk/arch/s390/kvm/interrupt.c b/trunk/arch/s390/kvm/interrupt.c new file mode 100644 index 000000000000..fcd1ed8015c1 --- /dev/null +++ b/trunk/arch/s390/kvm/interrupt.c @@ -0,0 +1,592 @@ +/* + * interrupt.c - handling kvm guest interrupts + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte + */ + +#include +#include +#include +#include "kvm-s390.h" +#include "gaccess.h" + +static int psw_extint_disabled(struct kvm_vcpu *vcpu) +{ + return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT); +} + +static int psw_interrupts_disabled(struct kvm_vcpu *vcpu) +{ + if ((vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) || + (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_IO) || + (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT)) + return 0; + return 1; +} + +static int __interrupt_is_deliverable(struct kvm_vcpu *vcpu, + struct interrupt_info *inti) +{ + switch (inti->type) { + case KVM_S390_INT_EMERGENCY: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x4000ul) + return 1; + return 0; + case KVM_S390_INT_SERVICE: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x200ul) + return 1; + return 0; + case KVM_S390_INT_VIRTIO: + if (psw_extint_disabled(vcpu)) + return 0; + if (vcpu->arch.sie_block->gcr[0] & 0x200ul) + return 1; + return 0; + case KVM_S390_PROGRAM_INT: + case KVM_S390_SIGP_STOP: + case KVM_S390_SIGP_SET_PREFIX: + case KVM_S390_RESTART: + return 1; + default: + BUG(); + } + return 0; +} + +static void __set_cpu_idle(struct kvm_vcpu *vcpu) +{ + BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); + atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); + set_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); +} + +static void __unset_cpu_idle(struct kvm_vcpu *vcpu) +{ + BUG_ON(vcpu->vcpu_id > KVM_MAX_VCPUS - 1); + atomic_clear_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags); + clear_bit(vcpu->vcpu_id, vcpu->arch.local_int.float_int->idle_mask); +} + +static void __reset_intercept_indicators(struct kvm_vcpu *vcpu) +{ + atomic_clear_mask(CPUSTAT_ECALL_PEND | + CPUSTAT_IO_INT | CPUSTAT_EXT_INT | CPUSTAT_STOP_INT, + &vcpu->arch.sie_block->cpuflags); + vcpu->arch.sie_block->lctl = 0x0000; +} + +static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag) +{ + atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags); +} + +static void __set_intercept_indicator(struct kvm_vcpu *vcpu, + struct interrupt_info *inti) +{ + switch (inti->type) { + case KVM_S390_INT_EMERGENCY: + case KVM_S390_INT_SERVICE: + case KVM_S390_INT_VIRTIO: + if (psw_extint_disabled(vcpu)) + __set_cpuflag(vcpu, CPUSTAT_EXT_INT); + else + vcpu->arch.sie_block->lctl |= LCTL_CR0; + break; + case KVM_S390_SIGP_STOP: + __set_cpuflag(vcpu, CPUSTAT_STOP_INT); + break; + default: + BUG(); + } +} + +static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, + struct interrupt_info *inti) +{ + const unsigned short table[] = { 2, 4, 4, 6 }; + int rc, exception = 0; + + switch (inti->type) { + case KVM_S390_INT_EMERGENCY: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg"); + vcpu->stat.deliver_emergency_signal++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1201); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_INT_SERVICE: + VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x", + inti->ext.ext_params); + vcpu->stat.deliver_service_signal++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2401); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_INT_VIRTIO: + VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%lx", + inti->ext.ext_params, inti->ext.ext_params2); + vcpu->stat.deliver_virtio_interrupt++; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x2603); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_CPU_ADDRESS, 0x0d00); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u32(vcpu, __LC_EXT_PARAMS, inti->ext.ext_params); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u64(vcpu, __LC_PFAULT_INTPARM, + inti->ext.ext_params2); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_SIGP_STOP: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop"); + vcpu->stat.deliver_stop_signal++; + __set_intercept_indicator(vcpu, inti); + break; + + case KVM_S390_SIGP_SET_PREFIX: + VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", + inti->prefix.address); + vcpu->stat.deliver_prefix_signal++; + vcpu->arch.sie_block->prefix = inti->prefix.address; + vcpu->arch.sie_block->ihcpu = 0xffff; + break; + + case KVM_S390_RESTART: + VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart"); + vcpu->stat.deliver_restart_signal++; + rc = copy_to_guest(vcpu, offsetof(struct _lowcore, + restart_old_psw), &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + offsetof(struct _lowcore, restart_psw), sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + + case KVM_S390_PROGRAM_INT: + VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x", + inti->pgm.code, + table[vcpu->arch.sie_block->ipa >> 14]); + vcpu->stat.deliver_program_int++; + rc = put_guest_u16(vcpu, __LC_PGM_INT_CODE, inti->pgm.code); + if (rc == -EFAULT) + exception = 1; + + rc = put_guest_u16(vcpu, __LC_PGM_ILC, + table[vcpu->arch.sie_block->ipa >> 14]); + if (rc == -EFAULT) + exception = 1; + + rc = copy_to_guest(vcpu, __LC_PGM_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_PGM_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + break; + + default: + BUG(); + } + + if (exception) { + VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" + " interrupt"); + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + if (inti->type == KVM_S390_PROGRAM_INT) { + printk(KERN_WARNING "kvm: recursive program check\n"); + BUG(); + } + } +} + +static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) +{ + int rc, exception = 0; + + if (psw_extint_disabled(vcpu)) + return 0; + if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul)) + return 0; + rc = put_guest_u16(vcpu, __LC_EXT_INT_CODE, 0x1004); + if (rc == -EFAULT) + exception = 1; + rc = copy_to_guest(vcpu, __LC_EXT_OLD_PSW, + &vcpu->arch.sie_block->gpsw, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + rc = copy_from_guest(vcpu, &vcpu->arch.sie_block->gpsw, + __LC_EXT_NEW_PSW, sizeof(psw_t)); + if (rc == -EFAULT) + exception = 1; + + if (exception) { + VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" \ + " ckc interrupt"); + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + return 0; + } + + return 1; +} + +int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu) +{ + struct local_interrupt *li = &vcpu->arch.local_int; + struct float_interrupt *fi = vcpu->arch.local_int.float_int; + struct interrupt_info *inti; + int rc = 0; + + if (atomic_read(&li->active)) { + spin_lock_bh(&li->lock); + list_for_each_entry(inti, &li->list, list) + if (__interrupt_is_deliverable(vcpu, inti)) { + rc = 1; + break; + } + spin_unlock_bh(&li->lock); + } + + if ((!rc) && atomic_read(&fi->active)) { + spin_lock_bh(&fi->lock); + list_for_each_entry(inti, &fi->list, list) + if (__interrupt_is_deliverable(vcpu, inti)) { + rc = 1; + break; + } + spin_unlock_bh(&fi->lock); + } + + if ((!rc) && (vcpu->arch.sie_block->ckc < + get_clock() + vcpu->arch.sie_block->epoch)) { + if ((!psw_extint_disabled(vcpu)) && + (vcpu->arch.sie_block->gcr[0] & 0x800ul)) + rc = 1; + } + + return rc; +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return 0; +} + +int kvm_s390_handle_wait(struct kvm_vcpu *vcpu) +{ + u64 now, sltime; + DECLARE_WAITQUEUE(wait, current); + + vcpu->stat.exit_wait_state++; + if (kvm_cpu_has_interrupt(vcpu)) + return 0; + + if (psw_interrupts_disabled(vcpu)) { + VCPU_EVENT(vcpu, 3, "%s", "disabled wait"); + __unset_cpu_idle(vcpu); + return -ENOTSUPP; /* disabled wait */ + } + + if (psw_extint_disabled(vcpu) || + (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))) { + VCPU_EVENT(vcpu, 3, "%s", "enabled wait w/o timer"); + goto no_timer; + } + + now = get_clock() + vcpu->arch.sie_block->epoch; + if (vcpu->arch.sie_block->ckc < now) { + __unset_cpu_idle(vcpu); + return 0; + } + + sltime = (vcpu->arch.sie_block->ckc - now) / (0xf4240000ul / HZ) + 1; + + vcpu->arch.ckc_timer.expires = jiffies + sltime; + + add_timer(&vcpu->arch.ckc_timer); + VCPU_EVENT(vcpu, 5, "enabled wait timer:%lx jiffies", sltime); +no_timer: + spin_lock_bh(&vcpu->arch.local_int.float_int->lock); + spin_lock_bh(&vcpu->arch.local_int.lock); + __set_cpu_idle(vcpu); + vcpu->arch.local_int.timer_due = 0; + add_wait_queue(&vcpu->arch.local_int.wq, &wait); + while (list_empty(&vcpu->arch.local_int.list) && + list_empty(&vcpu->arch.local_int.float_int->list) && + (!vcpu->arch.local_int.timer_due) && + !signal_pending(current)) { + set_current_state(TASK_INTERRUPTIBLE); + spin_unlock_bh(&vcpu->arch.local_int.lock); + spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); + vcpu_put(vcpu); + schedule(); + vcpu_load(vcpu); + spin_lock_bh(&vcpu->arch.local_int.float_int->lock); + spin_lock_bh(&vcpu->arch.local_int.lock); + } + __unset_cpu_idle(vcpu); + __set_current_state(TASK_RUNNING); + remove_wait_queue(&vcpu->wq, &wait); + spin_unlock_bh(&vcpu->arch.local_int.lock); + spin_unlock_bh(&vcpu->arch.local_int.float_int->lock); + del_timer(&vcpu->arch.ckc_timer); + return 0; +} + +void kvm_s390_idle_wakeup(unsigned long data) +{ + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + + spin_lock_bh(&vcpu->arch.local_int.lock); + vcpu->arch.local_int.timer_due = 1; + if (waitqueue_active(&vcpu->arch.local_int.wq)) + wake_up_interruptible(&vcpu->arch.local_int.wq); + spin_unlock_bh(&vcpu->arch.local_int.lock); +} + + +void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu) +{ + struct local_interrupt *li = &vcpu->arch.local_int; + struct float_interrupt *fi = vcpu->arch.local_int.float_int; + struct interrupt_info *n, *inti = NULL; + int deliver; + + __reset_intercept_indicators(vcpu); + if (atomic_read(&li->active)) { + do { + deliver = 0; + spin_lock_bh(&li->lock); + list_for_each_entry_safe(inti, n, &li->list, list) { + if (__interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&li->list)) + atomic_set(&li->active, 0); + spin_unlock_bh(&li->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } + + if ((vcpu->arch.sie_block->ckc < + get_clock() + vcpu->arch.sie_block->epoch)) + __try_deliver_ckc_interrupt(vcpu); + + if (atomic_read(&fi->active)) { + do { + deliver = 0; + spin_lock_bh(&fi->lock); + list_for_each_entry_safe(inti, n, &fi->list, list) { + if (__interrupt_is_deliverable(vcpu, inti)) { + list_del(&inti->list); + deliver = 1; + break; + } + __set_intercept_indicator(vcpu, inti); + } + if (list_empty(&fi->list)) + atomic_set(&fi->active, 0); + spin_unlock_bh(&fi->lock); + if (deliver) { + __do_deliver_interrupt(vcpu, inti); + kfree(inti); + } + } while (deliver); + } +} + +int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code) +{ + struct local_interrupt *li = &vcpu->arch.local_int; + struct interrupt_info *inti; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_PROGRAM_INT;; + inti->pgm.code = code; + + VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code); + spin_lock_bh(&li->lock); + list_add(&inti->list, &li->list); + atomic_set(&li->active, 1); + BUG_ON(waitqueue_active(&li->wq)); + spin_unlock_bh(&li->lock); + return 0; +} + +int kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int) +{ + struct local_interrupt *li; + struct float_interrupt *fi; + struct interrupt_info *inti; + int sigcpu; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + switch (s390int->type) { + case KVM_S390_INT_VIRTIO: + VM_EVENT(kvm, 5, "inject: virtio parm:%x,parm64:%lx", + s390int->parm, s390int->parm64); + inti->type = s390int->type; + inti->ext.ext_params = s390int->parm; + inti->ext.ext_params2 = s390int->parm64; + break; + case KVM_S390_INT_SERVICE: + VM_EVENT(kvm, 5, "inject: sclp parm:%x", s390int->parm); + inti->type = s390int->type; + inti->ext.ext_params = s390int->parm; + break; + case KVM_S390_PROGRAM_INT: + case KVM_S390_SIGP_STOP: + case KVM_S390_INT_EMERGENCY: + default: + kfree(inti); + return -EINVAL; + } + + mutex_lock(&kvm->lock); + fi = &kvm->arch.float_int; + spin_lock_bh(&fi->lock); + list_add_tail(&inti->list, &fi->list); + atomic_set(&fi->active, 1); + sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS); + if (sigcpu == KVM_MAX_VCPUS) { + do { + sigcpu = fi->next_rr_cpu++; + if (sigcpu == KVM_MAX_VCPUS) + sigcpu = fi->next_rr_cpu = 0; + } while (fi->local_int[sigcpu] == NULL); + } + li = fi->local_int[sigcpu]; + spin_lock_bh(&li->lock); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + spin_unlock_bh(&li->lock); + spin_unlock_bh(&fi->lock); + mutex_unlock(&kvm->lock); + return 0; +} + +int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int) +{ + struct local_interrupt *li; + struct interrupt_info *inti; + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + switch (s390int->type) { + case KVM_S390_PROGRAM_INT: + if (s390int->parm & 0xffff0000) { + kfree(inti); + return -EINVAL; + } + inti->type = s390int->type; + inti->pgm.code = s390int->parm; + VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)", + s390int->parm); + break; + case KVM_S390_SIGP_STOP: + case KVM_S390_RESTART: + case KVM_S390_SIGP_SET_PREFIX: + case KVM_S390_INT_EMERGENCY: + VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type); + inti->type = s390int->type; + break; + case KVM_S390_INT_VIRTIO: + case KVM_S390_INT_SERVICE: + default: + kfree(inti); + return -EINVAL; + } + + mutex_lock(&vcpu->kvm->lock); + li = &vcpu->arch.local_int; + spin_lock_bh(&li->lock); + if (inti->type == KVM_S390_PROGRAM_INT) + list_add(&inti->list, &li->list); + else + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + if (inti->type == KVM_S390_SIGP_STOP) + li->action_bits |= ACTION_STOP_ON_STOP; + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&vcpu->arch.local_int.wq); + spin_unlock_bh(&li->lock); + mutex_unlock(&vcpu->kvm->lock); + return 0; +} diff --git a/trunk/arch/s390/kvm/kvm-s390.c b/trunk/arch/s390/kvm/kvm-s390.c new file mode 100644 index 000000000000..98d1e73e01f1 --- /dev/null +++ b/trunk/arch/s390/kvm/kvm-s390.c @@ -0,0 +1,685 @@ +/* + * s390host.c -- hosting zSeries kernel virtual machines + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte + * Christian Borntraeger + * Heiko Carstens + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kvm-s390.h" +#include "gaccess.h" + +#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU + +struct kvm_stats_debugfs_item debugfs_entries[] = { + { "userspace_handled", VCPU_STAT(exit_userspace) }, + { "exit_validity", VCPU_STAT(exit_validity) }, + { "exit_stop_request", VCPU_STAT(exit_stop_request) }, + { "exit_external_request", VCPU_STAT(exit_external_request) }, + { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, + { "exit_instruction", VCPU_STAT(exit_instruction) }, + { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, + { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, + { "instruction_lctg", VCPU_STAT(instruction_lctg) }, + { "instruction_lctl", VCPU_STAT(instruction_lctl) }, + { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, + { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, + { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, + { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, + { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, + { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, + { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, + { "exit_wait_state", VCPU_STAT(exit_wait_state) }, + { "instruction_stidp", VCPU_STAT(instruction_stidp) }, + { "instruction_spx", VCPU_STAT(instruction_spx) }, + { "instruction_stpx", VCPU_STAT(instruction_stpx) }, + { "instruction_stap", VCPU_STAT(instruction_stap) }, + { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, + { "instruction_stsch", VCPU_STAT(instruction_stsch) }, + { "instruction_chsc", VCPU_STAT(instruction_chsc) }, + { "instruction_stsi", VCPU_STAT(instruction_stsi) }, + { "instruction_stfl", VCPU_STAT(instruction_stfl) }, + { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, + { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, + { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, + { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, + { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, + { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, + { "diagnose_44", VCPU_STAT(diagnose_44) }, + { NULL } +}; + + +/* Section: not file related */ +void kvm_arch_hardware_enable(void *garbage) +{ + /* every s390 is virtualization enabled ;-) */ +} + +void kvm_arch_hardware_disable(void *garbage) +{ +} + +void decache_vcpus_on_cpu(int cpu) +{ +} + +int kvm_arch_hardware_setup(void) +{ + return 0; +} + +void kvm_arch_hardware_unsetup(void) +{ +} + +void kvm_arch_check_processor_compat(void *rtn) +{ +} + +int kvm_arch_init(void *opaque) +{ + return 0; +} + +void kvm_arch_exit(void) +{ +} + +/* Section: device related */ +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + if (ioctl == KVM_S390_ENABLE_SIE) + return s390_enable_sie(); + return -EINVAL; +} + +int kvm_dev_ioctl_check_extension(long ext) +{ + return 0; +} + +/* Section: vm related */ +/* + * Get (and clear) the dirty memory log for a memory slot. + */ +int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, + struct kvm_dirty_log *log) +{ + return 0; +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + int r; + + switch (ioctl) { + case KVM_S390_INTERRUPT: { + struct kvm_s390_interrupt s390int; + + r = -EFAULT; + if (copy_from_user(&s390int, argp, sizeof(s390int))) + break; + r = kvm_s390_inject_vm(kvm, &s390int); + break; + } + default: + r = -EINVAL; + } + + return r; +} + +struct kvm *kvm_arch_create_vm(void) +{ + struct kvm *kvm; + int rc; + char debug_name[16]; + + rc = s390_enable_sie(); + if (rc) + goto out_nokvm; + + rc = -ENOMEM; + kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); + if (!kvm) + goto out_nokvm; + + kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); + if (!kvm->arch.sca) + goto out_nosca; + + sprintf(debug_name, "kvm-%u", current->pid); + + kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); + if (!kvm->arch.dbf) + goto out_nodbf; + + spin_lock_init(&kvm->arch.float_int.lock); + INIT_LIST_HEAD(&kvm->arch.float_int.list); + + debug_register_view(kvm->arch.dbf, &debug_sprintf_view); + VM_EVENT(kvm, 3, "%s", "vm created"); + + try_module_get(THIS_MODULE); + + return kvm; +out_nodbf: + free_page((unsigned long)(kvm->arch.sca)); +out_nosca: + kfree(kvm); +out_nokvm: + return ERR_PTR(rc); +} + +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + debug_unregister(kvm->arch.dbf); + free_page((unsigned long)(kvm->arch.sca)); + kfree(kvm); + module_put(THIS_MODULE); +} + +/* Section: vcpu related */ +int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) +{ + return 0; +} + +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + /* kvm common code refers to this, but does'nt call it */ + BUG(); +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + save_fp_regs(&vcpu->arch.host_fpregs); + save_access_regs(vcpu->arch.host_acrs); + vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK; + restore_fp_regs(&vcpu->arch.guest_fpregs); + restore_access_regs(vcpu->arch.guest_acrs); + + if (signal_pending(current)) + atomic_set_mask(CPUSTAT_STOP_INT, + &vcpu->arch.sie_block->cpuflags); +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ + save_fp_regs(&vcpu->arch.guest_fpregs); + save_access_regs(vcpu->arch.guest_acrs); + restore_fp_regs(&vcpu->arch.host_fpregs); + restore_access_regs(vcpu->arch.host_acrs); +} + +static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) +{ + /* this equals initial cpu reset in pop, but we don't switch to ESA */ + vcpu->arch.sie_block->gpsw.mask = 0UL; + vcpu->arch.sie_block->gpsw.addr = 0UL; + vcpu->arch.sie_block->prefix = 0UL; + vcpu->arch.sie_block->ihcpu = 0xffff; + vcpu->arch.sie_block->cputm = 0UL; + vcpu->arch.sie_block->ckc = 0UL; + vcpu->arch.sie_block->todpr = 0; + memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); + vcpu->arch.sie_block->gcr[0] = 0xE0UL; + vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; + vcpu->arch.guest_fpregs.fpc = 0; + asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); + vcpu->arch.sie_block->gbea = 1; +} + +int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) +{ + atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH); + vcpu->arch.sie_block->gmslm = 0xffffffffffUL; + vcpu->arch.sie_block->gmsor = 0x000000000000; + vcpu->arch.sie_block->ecb = 2; + vcpu->arch.sie_block->eca = 0xC1002001U; + setup_timer(&vcpu->arch.ckc_timer, kvm_s390_idle_wakeup, + (unsigned long) vcpu); + get_cpu_id(&vcpu->arch.cpu_id); + vcpu->arch.cpu_id.version = 0xfe; + return 0; +} + +struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, + unsigned int id) +{ + struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); + int rc = -ENOMEM; + + if (!vcpu) + goto out_nomem; + + vcpu->arch.sie_block = (struct sie_block *) get_zeroed_page(GFP_KERNEL); + + if (!vcpu->arch.sie_block) + goto out_free_cpu; + + vcpu->arch.sie_block->icpua = id; + BUG_ON(!kvm->arch.sca); + BUG_ON(kvm->arch.sca->cpu[id].sda); + kvm->arch.sca->cpu[id].sda = (__u64) vcpu->arch.sie_block; + vcpu->arch.sie_block->scaoh = (__u32)(((__u64)kvm->arch.sca) >> 32); + vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; + + spin_lock_init(&vcpu->arch.local_int.lock); + INIT_LIST_HEAD(&vcpu->arch.local_int.list); + vcpu->arch.local_int.float_int = &kvm->arch.float_int; + spin_lock_bh(&kvm->arch.float_int.lock); + kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; + init_waitqueue_head(&vcpu->arch.local_int.wq); + vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; + spin_unlock_bh(&kvm->arch.float_int.lock); + + rc = kvm_vcpu_init(vcpu, kvm, id); + if (rc) + goto out_free_cpu; + VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, + vcpu->arch.sie_block); + + try_module_get(THIS_MODULE); + + return vcpu; +out_free_cpu: + kfree(vcpu); +out_nomem: + return ERR_PTR(rc); +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + VCPU_EVENT(vcpu, 3, "%s", "destroy cpu"); + free_page((unsigned long)(vcpu->arch.sie_block)); + kfree(vcpu); + module_put(THIS_MODULE); +} + +int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) +{ + /* kvm common code refers to this, but never calls it */ + BUG(); + return 0; +} + +static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) +{ + vcpu_load(vcpu); + kvm_s390_vcpu_initial_reset(vcpu); + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_load(vcpu); + memcpy(&vcpu->arch.guest_gprs, ®s->gprs, sizeof(regs->gprs)); + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) +{ + vcpu_load(vcpu); + memcpy(®s->gprs, &vcpu->arch.guest_gprs, sizeof(regs->gprs)); + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + vcpu_load(vcpu); + memcpy(&vcpu->arch.guest_acrs, &sregs->acrs, sizeof(sregs->acrs)); + memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, + struct kvm_sregs *sregs) +{ + vcpu_load(vcpu); + memcpy(&sregs->acrs, &vcpu->arch.guest_acrs, sizeof(sregs->acrs)); + memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + vcpu_load(vcpu); + memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); + vcpu->arch.guest_fpregs.fpc = fpu->fpc; + vcpu_put(vcpu); + return 0; +} + +int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) +{ + vcpu_load(vcpu); + memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); + fpu->fpc = vcpu->arch.guest_fpregs.fpc; + vcpu_put(vcpu); + return 0; +} + +static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) +{ + int rc = 0; + + vcpu_load(vcpu); + if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING) + rc = -EBUSY; + else + vcpu->arch.sie_block->gpsw = psw; + vcpu_put(vcpu); + return rc; +} + +int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, + struct kvm_translation *tr) +{ + return -EINVAL; /* not implemented yet */ +} + +int kvm_arch_vcpu_ioctl_debug_guest(struct kvm_vcpu *vcpu, + struct kvm_debug_guest *dbg) +{ + return -EINVAL; /* not implemented yet */ +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; /* not implemented yet */ +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + return -EINVAL; /* not implemented yet */ +} + +static void __vcpu_run(struct kvm_vcpu *vcpu) +{ + memcpy(&vcpu->arch.sie_block->gg14, &vcpu->arch.guest_gprs[14], 16); + + if (need_resched()) + schedule(); + + vcpu->arch.sie_block->icptcode = 0; + local_irq_disable(); + kvm_guest_enter(); + local_irq_enable(); + VCPU_EVENT(vcpu, 6, "entering sie flags %x", + atomic_read(&vcpu->arch.sie_block->cpuflags)); + sie64a(vcpu->arch.sie_block, vcpu->arch.guest_gprs); + VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", + vcpu->arch.sie_block->icptcode); + local_irq_disable(); + kvm_guest_exit(); + local_irq_enable(); + + memcpy(&vcpu->arch.guest_gprs[14], &vcpu->arch.sie_block->gg14, 16); +} + +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +{ + int rc; + sigset_t sigsaved; + + vcpu_load(vcpu); + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); + + atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + + BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL); + + switch (kvm_run->exit_reason) { + case KVM_EXIT_S390_SIEIC: + vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask; + vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr; + break; + case KVM_EXIT_UNKNOWN: + case KVM_EXIT_S390_RESET: + break; + default: + BUG(); + } + + might_sleep(); + + do { + kvm_s390_deliver_pending_interrupts(vcpu); + __vcpu_run(vcpu); + rc = kvm_handle_sie_intercept(vcpu); + } while (!signal_pending(current) && !rc); + + if (signal_pending(current) && !rc) + rc = -EINTR; + + if (rc == -ENOTSUPP) { + /* intercept cannot be handled in-kernel, prepare kvm-run */ + kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; + kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; + kvm_run->s390_sieic.mask = vcpu->arch.sie_block->gpsw.mask; + kvm_run->s390_sieic.addr = vcpu->arch.sie_block->gpsw.addr; + kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; + kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; + rc = 0; + } + + if (rc == -EREMOTE) { + /* intercept was handled, but userspace support is needed + * kvm_run has been prepared by the handler */ + rc = 0; + } + + if (vcpu->sigset_active) + sigprocmask(SIG_SETMASK, &sigsaved, NULL); + + vcpu_put(vcpu); + + vcpu->stat.exit_userspace++; + return rc; +} + +static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, const void *from, + unsigned long n, int prefix) +{ + if (prefix) + return copy_to_guest(vcpu, guestdest, from, n); + else + return copy_to_guest_absolute(vcpu, guestdest, from, n); +} + +/* + * store status at address + * we use have two special cases: + * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit + * KVM_S390_STORE_STATUS_PREFIXED: -> prefix + */ +int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +{ + const unsigned char archmode = 1; + int prefix; + + if (addr == KVM_S390_STORE_STATUS_NOADDR) { + if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1)) + return -EFAULT; + addr = SAVE_AREA_BASE; + prefix = 0; + } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) { + if (copy_to_guest(vcpu, 163ul, &archmode, 1)) + return -EFAULT; + addr = SAVE_AREA_BASE; + prefix = 1; + } else + prefix = 0; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, fp_regs), + vcpu->arch.guest_fpregs.fprs, 128, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, gp_regs), + vcpu->arch.guest_gprs, 128, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, psw), + &vcpu->arch.sie_block->gpsw, 16, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, pref_reg), + &vcpu->arch.sie_block->prefix, 4, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, + addr + offsetof(struct save_area_s390x, fp_ctrl_reg), + &vcpu->arch.guest_fpregs.fpc, 4, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, tod_reg), + &vcpu->arch.sie_block->todpr, 4, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, timer), + &vcpu->arch.sie_block->cputm, 8, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, clk_cmp), + &vcpu->arch.sie_block->ckc, 8, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, addr + offsetof(struct save_area_s390x, acc_regs), + &vcpu->arch.guest_acrs, 64, prefix)) + return -EFAULT; + + if (__guestcopy(vcpu, + addr + offsetof(struct save_area_s390x, ctrl_regs), + &vcpu->arch.sie_block->gcr, 128, prefix)) + return -EFAULT; + return 0; +} + +static int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +{ + int rc; + + vcpu_load(vcpu); + rc = __kvm_s390_vcpu_store_status(vcpu, addr); + vcpu_put(vcpu); + return rc; +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + + switch (ioctl) { + case KVM_S390_INTERRUPT: { + struct kvm_s390_interrupt s390int; + + if (copy_from_user(&s390int, argp, sizeof(s390int))) + return -EFAULT; + return kvm_s390_inject_vcpu(vcpu, &s390int); + } + case KVM_S390_STORE_STATUS: + return kvm_s390_vcpu_store_status(vcpu, arg); + case KVM_S390_SET_INITIAL_PSW: { + psw_t psw; + + if (copy_from_user(&psw, argp, sizeof(psw))) + return -EFAULT; + return kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); + } + case KVM_S390_INITIAL_RESET: + return kvm_arch_vcpu_ioctl_initial_reset(vcpu); + default: + ; + } + return -EINVAL; +} + +/* Section: memory related */ +int kvm_arch_set_memory_region(struct kvm *kvm, + struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot old, + int user_alloc) +{ + /* A few sanity checks. We can have exactly one memory slot which has + to start at guest virtual zero and which has to be located at a + page boundary in userland and which has to end at a page boundary. + The memory in userland is ok to be fragmented into various different + vmas. It is okay to mmap() and munmap() stuff in this slot after + doing this call at any time */ + + if (mem->slot) + return -EINVAL; + + if (mem->guest_phys_addr) + return -EINVAL; + + if (mem->userspace_addr & (PAGE_SIZE - 1)) + return -EINVAL; + + if (mem->memory_size & (PAGE_SIZE - 1)) + return -EINVAL; + + kvm->arch.guest_origin = mem->userspace_addr; + kvm->arch.guest_memsize = mem->memory_size; + + /* FIXME: we do want to interrupt running CPUs and update their memory + configuration now to avoid race conditions. But hey, changing the + memory layout while virtual CPUs are running is usually bad + programming practice. */ + + return 0; +} + +gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn) +{ + return gfn; +} + +static int __init kvm_s390_init(void) +{ + return kvm_init(NULL, sizeof(struct kvm_vcpu), THIS_MODULE); +} + +static void __exit kvm_s390_exit(void) +{ + kvm_exit(); +} + +module_init(kvm_s390_init); +module_exit(kvm_s390_exit); diff --git a/trunk/arch/s390/kvm/kvm-s390.h b/trunk/arch/s390/kvm/kvm-s390.h new file mode 100644 index 000000000000..3893cf12eacf --- /dev/null +++ b/trunk/arch/s390/kvm/kvm-s390.h @@ -0,0 +1,64 @@ +/* + * kvm_s390.h - definition for kvm on s390 + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte + * Christian Borntraeger + */ + +#ifndef ARCH_S390_KVM_S390_H +#define ARCH_S390_KVM_S390_H + +#include +#include + +typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); + +int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); + +#define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ +do { \ + debug_sprintf_event(d_kvm->arch.dbf, d_loglevel, d_string "\n", \ + d_args); \ +} while (0) + +#define VCPU_EVENT(d_vcpu, d_loglevel, d_string, d_args...)\ +do { \ + debug_sprintf_event(d_vcpu->kvm->arch.dbf, d_loglevel, \ + "%02d[%016lx-%016lx]: " d_string "\n", d_vcpu->vcpu_id, \ + d_vcpu->arch.sie_block->gpsw.mask, d_vcpu->arch.sie_block->gpsw.addr,\ + d_args); \ +} while (0) + +static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu) +{ + return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT; +} + +int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); +void kvm_s390_idle_wakeup(unsigned long data); +void kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu); +int kvm_s390_inject_vm(struct kvm *kvm, + struct kvm_s390_interrupt *s390int); +int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, + struct kvm_s390_interrupt *s390int); +int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); + +/* implemented in priv.c */ +int kvm_s390_handle_priv(struct kvm_vcpu *vcpu); + +/* implemented in sigp.c */ +int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); + +/* implemented in kvm-s390.c */ +int __kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, + unsigned long addr); +/* implemented in diag.c */ +int kvm_s390_handle_diag(struct kvm_vcpu *vcpu); + +#endif diff --git a/trunk/arch/s390/kvm/priv.c b/trunk/arch/s390/kvm/priv.c new file mode 100644 index 000000000000..1465946325c5 --- /dev/null +++ b/trunk/arch/s390/kvm/priv.c @@ -0,0 +1,323 @@ +/* + * priv.c - handling privileged instructions + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte + * Christian Borntraeger + */ + +#include +#include +#include +#include +#include +#include +#include "gaccess.h" +#include "kvm-s390.h" + +static int handle_set_prefix(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + u32 address = 0; + u8 tmp; + + vcpu->stat.instruction_spx++; + + operand2 = disp2; + if (base2) + operand2 += vcpu->arch.guest_gprs[base2]; + + /* must be word boundary */ + if (operand2 & 3) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + /* get the value */ + if (get_guest_u32(vcpu, operand2, &address)) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + address = address & 0x7fffe000u; + + /* make sure that the new value is valid memory */ + if (copy_from_guest_absolute(vcpu, &tmp, address, 1) || + (copy_from_guest_absolute(vcpu, &tmp, address + PAGE_SIZE, 1))) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + vcpu->arch.sie_block->prefix = address; + vcpu->arch.sie_block->ihcpu = 0xffff; + + VCPU_EVENT(vcpu, 5, "setting prefix to %x", address); +out: + return 0; +} + +static int handle_store_prefix(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + u32 address; + + vcpu->stat.instruction_stpx++; + operand2 = disp2; + if (base2) + operand2 += vcpu->arch.guest_gprs[base2]; + + /* must be word boundary */ + if (operand2 & 3) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + address = vcpu->arch.sie_block->prefix; + address = address & 0x7fffe000u; + + /* get the value */ + if (put_guest_u32(vcpu, operand2, address)) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + VCPU_EVENT(vcpu, 5, "storing prefix to %x", address); +out: + return 0; +} + +static int handle_store_cpu_address(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 useraddr; + int rc; + + vcpu->stat.instruction_stap++; + useraddr = disp2; + if (base2) + useraddr += vcpu->arch.guest_gprs[base2]; + + if (useraddr & 1) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + rc = put_guest_u16(vcpu, useraddr, vcpu->vcpu_id); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + VCPU_EVENT(vcpu, 5, "storing cpu address to %lx", useraddr); +out: + return 0; +} + +static int handle_skey(struct kvm_vcpu *vcpu) +{ + vcpu->stat.instruction_storage_key++; + vcpu->arch.sie_block->gpsw.addr -= 4; + VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation"); + return 0; +} + +static int handle_stsch(struct kvm_vcpu *vcpu) +{ + vcpu->stat.instruction_stsch++; + VCPU_EVENT(vcpu, 4, "%s", "store subchannel - CC3"); + /* condition code 3 */ + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + return 0; +} + +static int handle_chsc(struct kvm_vcpu *vcpu) +{ + vcpu->stat.instruction_chsc++; + VCPU_EVENT(vcpu, 4, "%s", "channel subsystem call - CC3"); + /* condition code 3 */ + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask |= (3 & 3ul) << 44; + return 0; +} + +static unsigned int kvm_stfl(void) +{ + asm volatile( + " .insn s,0xb2b10000,0(0)\n" /* stfl */ + "0:\n" + EX_TABLE(0b, 0b)); + return S390_lowcore.stfl_fac_list; +} + +static int handle_stfl(struct kvm_vcpu *vcpu) +{ + unsigned int facility_list = kvm_stfl(); + int rc; + + vcpu->stat.instruction_stfl++; + facility_list &= ~(1UL<<24); /* no stfle */ + + rc = copy_to_guest(vcpu, offsetof(struct _lowcore, stfl_fac_list), + &facility_list, sizeof(facility_list)); + if (rc == -EFAULT) + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + else + VCPU_EVENT(vcpu, 5, "store facility list value %x", + facility_list); + return 0; +} + +static int handle_stidp(struct kvm_vcpu *vcpu) +{ + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + int rc; + + vcpu->stat.instruction_stidp++; + operand2 = disp2; + if (base2) + operand2 += vcpu->arch.guest_gprs[base2]; + + if (operand2 & 7) { + kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + goto out; + } + + rc = put_guest_u64(vcpu, operand2, vcpu->arch.stidp_data); + if (rc == -EFAULT) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out; + } + + VCPU_EVENT(vcpu, 5, "%s", "store cpu id"); +out: + return 0; +} + +static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) +{ + struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + int cpus = 0; + int n; + + spin_lock_bh(&fi->lock); + for (n = 0; n < KVM_MAX_VCPUS; n++) + if (fi->local_int[n]) + cpus++; + spin_unlock_bh(&fi->lock); + + /* deal with other level 3 hypervisors */ + if (stsi(mem, 3, 2, 2) == -ENOSYS) + mem->count = 0; + if (mem->count < 8) + mem->count++; + for (n = mem->count - 1; n > 0 ; n--) + memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0])); + + mem->vm[0].cpus_total = cpus; + mem->vm[0].cpus_configured = cpus; + mem->vm[0].cpus_standby = 0; + mem->vm[0].cpus_reserved = 0; + mem->vm[0].caf = 1000; + memcpy(mem->vm[0].name, "KVMguest", 8); + ASCEBC(mem->vm[0].name, 8); + memcpy(mem->vm[0].cpi, "KVM/Linux ", 16); + ASCEBC(mem->vm[0].cpi, 16); +} + +static int handle_stsi(struct kvm_vcpu *vcpu) +{ + int fc = (vcpu->arch.guest_gprs[0] & 0xf0000000) >> 28; + int sel1 = vcpu->arch.guest_gprs[0] & 0xff; + int sel2 = vcpu->arch.guest_gprs[1] & 0xffff; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u64 operand2; + unsigned long mem; + + vcpu->stat.instruction_stsi++; + VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2); + + operand2 = disp2; + if (base2) + operand2 += vcpu->arch.guest_gprs[base2]; + + if (operand2 & 0xfff && fc > 0) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + + switch (fc) { + case 0: + vcpu->arch.guest_gprs[0] = 3 << 28; + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + return 0; + case 1: /* same handling for 1 and 2 */ + case 2: + mem = get_zeroed_page(GFP_KERNEL); + if (!mem) + goto out_fail; + if (stsi((void *) mem, fc, sel1, sel2) == -ENOSYS) + goto out_mem; + break; + case 3: + if (sel1 != 2 || sel2 != 2) + goto out_fail; + mem = get_zeroed_page(GFP_KERNEL); + if (!mem) + goto out_fail; + handle_stsi_3_2_2(vcpu, (void *) mem); + break; + default: + goto out_fail; + } + + if (copy_to_guest_absolute(vcpu, operand2, (void *) mem, PAGE_SIZE)) { + kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + goto out_mem; + } + free_page(mem); + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.guest_gprs[0] = 0; + return 0; +out_mem: + free_page(mem); +out_fail: + /* condition code 3 */ + vcpu->arch.sie_block->gpsw.mask |= 3ul << 44; + return 0; +} + +static intercept_handler_t priv_handlers[256] = { + [0x02] = handle_stidp, + [0x10] = handle_set_prefix, + [0x11] = handle_store_prefix, + [0x12] = handle_store_cpu_address, + [0x29] = handle_skey, + [0x2a] = handle_skey, + [0x2b] = handle_skey, + [0x34] = handle_stsch, + [0x5f] = handle_chsc, + [0x7d] = handle_stsi, + [0xb1] = handle_stfl, +}; + +int kvm_s390_handle_priv(struct kvm_vcpu *vcpu) +{ + intercept_handler_t handler; + + handler = priv_handlers[vcpu->arch.sie_block->ipa & 0x00ff]; + if (handler) + return handler(vcpu); + return -ENOTSUPP; +} diff --git a/trunk/arch/s390/kvm/sie64a.S b/trunk/arch/s390/kvm/sie64a.S new file mode 100644 index 000000000000..934fd6a885f6 --- /dev/null +++ b/trunk/arch/s390/kvm/sie64a.S @@ -0,0 +1,47 @@ +/* + * sie64a.S - low level sie call + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Heiko Carstens + */ + +#include +#include + +SP_R5 = 5 * 8 # offset into stackframe +SP_R6 = 6 * 8 + +/* + * sie64a calling convention: + * %r2 pointer to sie control block + * %r3 guest register save area + */ + .globl sie64a +sie64a: + lgr %r5,%r3 + stmg %r5,%r14,SP_R5(%r15) # save register on entry + lgr %r14,%r2 # pointer to sie control block + lmg %r0,%r13,0(%r3) # load guest gprs 0-13 +sie_inst: + sie 0(%r14) + lg %r14,SP_R5(%r15) + stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + lghi %r2,0 + lmg %r6,%r14,SP_R6(%r15) + br %r14 + +sie_err: + lg %r14,SP_R5(%r15) + stmg %r0,%r13,0(%r14) # save guest gprs 0-13 + lghi %r2,-EFAULT + lmg %r6,%r14,SP_R6(%r15) + br %r14 + + .section __ex_table,"a" + .quad sie_inst,sie_err + .previous diff --git a/trunk/arch/s390/kvm/sigp.c b/trunk/arch/s390/kvm/sigp.c new file mode 100644 index 000000000000..0a236acfb5f6 --- /dev/null +++ b/trunk/arch/s390/kvm/sigp.c @@ -0,0 +1,288 @@ +/* + * sigp.c - handlinge interprocessor communication + * + * Copyright IBM Corp. 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (version 2 only) + * as published by the Free Software Foundation. + * + * Author(s): Carsten Otte + * Christian Borntraeger + */ + +#include +#include +#include "gaccess.h" +#include "kvm-s390.h" + +/* sigp order codes */ +#define SIGP_SENSE 0x01 +#define SIGP_EXTERNAL_CALL 0x02 +#define SIGP_EMERGENCY 0x03 +#define SIGP_START 0x04 +#define SIGP_STOP 0x05 +#define SIGP_RESTART 0x06 +#define SIGP_STOP_STORE_STATUS 0x09 +#define SIGP_INITIAL_CPU_RESET 0x0b +#define SIGP_CPU_RESET 0x0c +#define SIGP_SET_PREFIX 0x0d +#define SIGP_STORE_STATUS_ADDR 0x0e +#define SIGP_SET_ARCH 0x12 + +/* cpu status bits */ +#define SIGP_STAT_EQUIPMENT_CHECK 0x80000000UL +#define SIGP_STAT_INCORRECT_STATE 0x00000200UL +#define SIGP_STAT_INVALID_PARAMETER 0x00000100UL +#define SIGP_STAT_EXT_CALL_PENDING 0x00000080UL +#define SIGP_STAT_STOPPED 0x00000040UL +#define SIGP_STAT_OPERATOR_INTERV 0x00000020UL +#define SIGP_STAT_CHECK_STOP 0x00000010UL +#define SIGP_STAT_INOPERATIVE 0x00000004UL +#define SIGP_STAT_INVALID_ORDER 0x00000002UL +#define SIGP_STAT_RECEIVER_CHECK 0x00000001UL + + +static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) +{ + struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + spin_lock_bh(&fi->lock); + if (fi->local_int[cpu_addr] == NULL) + rc = 3; /* not operational */ + else if (atomic_read(fi->local_int[cpu_addr]->cpuflags) + & CPUSTAT_RUNNING) { + *reg &= 0xffffffff00000000UL; + rc = 1; /* status stored */ + } else { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STAT_STOPPED; + rc = 1; /* status stored */ + } + spin_unlock_bh(&fi->lock); + + VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc); + return rc; +} + +static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr) +{ + struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct local_interrupt *li; + struct interrupt_info *inti; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_INT_EMERGENCY; + + spin_lock_bh(&fi->lock); + li = fi->local_int[cpu_addr]; + if (li == NULL) { + rc = 3; /* not operational */ + kfree(inti); + goto unlock; + } + spin_lock_bh(&li->lock); + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + spin_unlock_bh(&li->lock); + rc = 0; /* order accepted */ +unlock: + spin_unlock_bh(&fi->lock); + VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr); + return rc; +} + +static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int store) +{ + struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct local_interrupt *li; + struct interrupt_info *inti; + int rc; + + if (cpu_addr >= KVM_MAX_VCPUS) + return 3; /* not operational */ + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return -ENOMEM; + + inti->type = KVM_S390_SIGP_STOP; + + spin_lock_bh(&fi->lock); + li = fi->local_int[cpu_addr]; + if (li == NULL) { + rc = 3; /* not operational */ + kfree(inti); + goto unlock; + } + spin_lock_bh(&li->lock); + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags); + if (store) + li->action_bits |= ACTION_STORE_ON_STOP; + li->action_bits |= ACTION_STOP_ON_STOP; + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + spin_unlock_bh(&li->lock); + rc = 0; /* order accepted */ +unlock: + spin_unlock_bh(&fi->lock); + VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); + return rc; +} + +static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) +{ + int rc; + + switch (parameter & 0xff) { + case 0: + printk(KERN_WARNING "kvm: request to switch to ESA/390 mode" + " not supported"); + rc = 3; /* not operational */ + break; + case 1: + case 2: + rc = 0; /* order accepted */ + break; + default: + rc = -ENOTSUPP; + } + return rc; +} + +static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, + u64 *reg) +{ + struct float_interrupt *fi = &vcpu->kvm->arch.float_int; + struct local_interrupt *li; + struct interrupt_info *inti; + int rc; + u8 tmp; + + /* make sure that the new value is valid memory */ + address = address & 0x7fffe000u; + if ((copy_from_guest(vcpu, &tmp, + (u64) (address + vcpu->kvm->arch.guest_origin) , 1)) || + (copy_from_guest(vcpu, &tmp, (u64) (address + + vcpu->kvm->arch.guest_origin + PAGE_SIZE), 1))) { + *reg |= SIGP_STAT_INVALID_PARAMETER; + return 1; /* invalid parameter */ + } + + inti = kzalloc(sizeof(*inti), GFP_KERNEL); + if (!inti) + return 2; /* busy */ + + spin_lock_bh(&fi->lock); + li = fi->local_int[cpu_addr]; + + if ((cpu_addr >= KVM_MAX_VCPUS) || (li == NULL)) { + rc = 1; /* incorrect state */ + *reg &= SIGP_STAT_INCORRECT_STATE; + kfree(inti); + goto out_fi; + } + + spin_lock_bh(&li->lock); + /* cpu must be in stopped state */ + if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) { + rc = 1; /* incorrect state */ + *reg &= SIGP_STAT_INCORRECT_STATE; + kfree(inti); + goto out_li; + } + + inti->type = KVM_S390_SIGP_SET_PREFIX; + inti->prefix.address = address; + + list_add_tail(&inti->list, &li->list); + atomic_set(&li->active, 1); + if (waitqueue_active(&li->wq)) + wake_up_interruptible(&li->wq); + rc = 0; /* order accepted */ + + VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address); +out_li: + spin_unlock_bh(&li->lock); +out_fi: + spin_unlock_bh(&fi->lock); + return rc; +} + +int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) +{ + int r1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; + int r3 = vcpu->arch.sie_block->ipa & 0x000f; + int base2 = vcpu->arch.sie_block->ipb >> 28; + int disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); + u32 parameter; + u16 cpu_addr = vcpu->arch.guest_gprs[r3]; + u8 order_code; + int rc; + + order_code = disp2; + if (base2) + order_code += vcpu->arch.guest_gprs[base2]; + + if (r1 % 2) + parameter = vcpu->arch.guest_gprs[r1]; + else + parameter = vcpu->arch.guest_gprs[r1 + 1]; + + switch (order_code) { + case SIGP_SENSE: + vcpu->stat.instruction_sigp_sense++; + rc = __sigp_sense(vcpu, cpu_addr, + &vcpu->arch.guest_gprs[r1]); + break; + case SIGP_EMERGENCY: + vcpu->stat.instruction_sigp_emergency++; + rc = __sigp_emergency(vcpu, cpu_addr); + break; + case SIGP_STOP: + vcpu->stat.instruction_sigp_stop++; + rc = __sigp_stop(vcpu, cpu_addr, 0); + break; + case SIGP_STOP_STORE_STATUS: + vcpu->stat.instruction_sigp_stop++; + rc = __sigp_stop(vcpu, cpu_addr, 1); + break; + case SIGP_SET_ARCH: + vcpu->stat.instruction_sigp_arch++; + rc = __sigp_set_arch(vcpu, parameter); + break; + case SIGP_SET_PREFIX: + vcpu->stat.instruction_sigp_prefix++; + rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, + &vcpu->arch.guest_gprs[r1]); + break; + case SIGP_RESTART: + vcpu->stat.instruction_sigp_restart++; + /* user space must know about restart */ + default: + return -ENOTSUPP; + } + + if (rc < 0) + return rc; + + vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); + vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44; + return 0; +} diff --git a/trunk/arch/s390/mm/pgtable.c b/trunk/arch/s390/mm/pgtable.c index fd072013f88c..5c1aea97cd12 100644 --- a/trunk/arch/s390/mm/pgtable.c +++ b/trunk/arch/s390/mm/pgtable.c @@ -30,11 +30,27 @@ #define TABLES_PER_PAGE 4 #define FRAG_MASK 15UL #define SECOND_HALVES 10UL + +void clear_table_pgstes(unsigned long *table) +{ + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); + memset(table + 256, 0, PAGE_SIZE/4); + clear_table(table + 512, _PAGE_TYPE_EMPTY, PAGE_SIZE/4); + memset(table + 768, 0, PAGE_SIZE/4); +} + #else #define ALLOC_ORDER 2 #define TABLES_PER_PAGE 2 #define FRAG_MASK 3UL #define SECOND_HALVES 2UL + +void clear_table_pgstes(unsigned long *table) +{ + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2); + memset(table + 256, 0, PAGE_SIZE/2); +} + #endif unsigned long *crst_table_alloc(struct mm_struct *mm, int noexec) @@ -153,7 +169,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm) unsigned long *table; unsigned long bits; - bits = mm->context.noexec ? 3UL : 1UL; + bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; spin_lock(&mm->page_table_lock); page = NULL; if (!list_empty(&mm->context.pgtable_list)) { @@ -170,7 +186,10 @@ unsigned long *page_table_alloc(struct mm_struct *mm) pgtable_page_ctor(page); page->flags &= ~FRAG_MASK; table = (unsigned long *) page_to_phys(page); - clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); + if (mm->context.pgstes) + clear_table_pgstes(table); + else + clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE); spin_lock(&mm->page_table_lock); list_add(&page->lru, &mm->context.pgtable_list); } @@ -191,7 +210,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) struct page *page; unsigned long bits; - bits = mm->context.noexec ? 3UL : 1UL; + bits = (mm->context.noexec || mm->context.pgstes) ? 3UL : 1UL; bits <<= (__pa(table) & (PAGE_SIZE - 1)) / 256 / sizeof(unsigned long); page = pfn_to_page(__pa(table) >> PAGE_SHIFT); spin_lock(&mm->page_table_lock); @@ -228,3 +247,43 @@ void disable_noexec(struct mm_struct *mm, struct task_struct *tsk) mm->context.noexec = 0; update_mm(mm, tsk); } + +/* + * switch on pgstes for its userspace process (for kvm) + */ +int s390_enable_sie(void) +{ + struct task_struct *tsk = current; + struct mm_struct *mm; + int rc; + + task_lock(tsk); + + rc = 0; + if (tsk->mm->context.pgstes) + goto unlock; + + rc = -EINVAL; + if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || + tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) + goto unlock; + + tsk->mm->context.pgstes = 1; /* dirty little tricks .. */ + mm = dup_mm(tsk); + tsk->mm->context.pgstes = 0; + + rc = -ENOMEM; + if (!mm) + goto unlock; + mmput(tsk->mm); + tsk->mm = tsk->active_mm = mm; + preempt_disable(); + update_mm(mm, tsk); + cpu_set(smp_processor_id(), mm->cpu_vm_mask); + preempt_enable(); + rc = 0; +unlock: + task_unlock(tsk); + return rc; +} +EXPORT_SYMBOL_GPL(s390_enable_sie); diff --git a/trunk/arch/sparc64/kernel/smp.c b/trunk/arch/sparc64/kernel/smp.c index 524b88920947..409dd71f2738 100644 --- a/trunk/arch/sparc64/kernel/smp.c +++ b/trunk/arch/sparc64/kernel/smp.c @@ -866,14 +866,21 @@ void smp_call_function_client(int irq, struct pt_regs *regs) void *info = call_data->info; clear_softint(1 << irq); + + irq_enter(); + + if (!call_data->wait) { + /* let initiator proceed after getting data */ + atomic_inc(&call_data->finished); + } + + func(info); + + irq_exit(); + if (call_data->wait) { /* let initiator proceed only after completion */ - func(info); atomic_inc(&call_data->finished); - } else { - /* let initiator proceed after getting data */ - atomic_inc(&call_data->finished); - func(info); } } @@ -1032,7 +1039,9 @@ void smp_receive_signal(int cpu) void smp_receive_signal_client(int irq, struct pt_regs *regs) { + irq_enter(); clear_softint(1 << irq); + irq_exit(); } void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) @@ -1040,6 +1049,8 @@ void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) struct mm_struct *mm; unsigned long flags; + irq_enter(); + clear_softint(1 << irq); /* See if we need to allocate a new TLB context because @@ -1059,6 +1070,8 @@ void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs) load_secondary_context(mm); __flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT); + + irq_exit(); } void smp_new_mmu_context_version(void) @@ -1217,6 +1230,8 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs) { clear_softint(1 << irq); + irq_enter(); + preempt_disable(); __asm__ __volatile__("flushw"); @@ -1229,6 +1244,8 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs) prom_world(0); preempt_enable(); + + irq_exit(); } /* /proc/profile writes can call this, don't __init it please. */ diff --git a/trunk/arch/sparc64/kernel/sys_sparc.c b/trunk/arch/sparc64/kernel/sys_sparc.c index 73ed01ba40dc..8d4761f15fa9 100644 --- a/trunk/arch/sparc64/kernel/sys_sparc.c +++ b/trunk/arch/sparc64/kernel/sys_sparc.c @@ -454,8 +454,8 @@ asmlinkage long sys_ipc(unsigned int call, int first, unsigned long second, err = sys_semget(first, (int)second, (int)third); goto out; case SEMCTL: { - err = sys_semctl(first, third, - (int)second | IPC_64, + err = sys_semctl(first, second, + (int)third | IPC_64, (union semun) ptr); goto out; } diff --git a/trunk/arch/um/Kconfig.x86_64 b/trunk/arch/um/Kconfig.x86_64 index 3fbe69e359ed..5696e7b374b3 100644 --- a/trunk/arch/um/Kconfig.x86_64 +++ b/trunk/arch/um/Kconfig.x86_64 @@ -1,3 +1,10 @@ + +menu "Host processor type and features" + +source "arch/x86/Kconfig.cpu" + +endmenu + config UML_X86 bool default y diff --git a/trunk/arch/um/os-Linux/helper.c b/trunk/arch/um/os-Linux/helper.c index f4bd349d4412..f25c29a12d00 100644 --- a/trunk/arch/um/os-Linux/helper.c +++ b/trunk/arch/um/os-Linux/helper.c @@ -14,6 +14,7 @@ #include "os.h" #include "um_malloc.h" #include "user.h" +#include struct helper_data { void (*pre_exec)(void*); diff --git a/trunk/arch/um/sys-i386/Makefile b/trunk/arch/um/sys-i386/Makefile index 964dc1a04c37..598b5c1903af 100644 --- a/trunk/arch/um/sys-i386/Makefile +++ b/trunk/arch/um/sys-i386/Makefile @@ -6,7 +6,7 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ ptrace_user.o setjmp.o signal.o stub.o stub_segv.o syscalls.o sysrq.o \ sys_call_table.o tls.o -subarch-obj-y = lib/bitops_32.o lib/semaphore_32.o lib/string_32.o +subarch-obj-y = lib/semaphore_32.o lib/string_32.o subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o subarch-obj-$(CONFIG_MODULES) += kernel/module_32.o diff --git a/trunk/arch/um/sys-x86_64/Makefile b/trunk/arch/um/sys-x86_64/Makefile index 3c22de532088..c8b4cce9cfe1 100644 --- a/trunk/arch/um/sys-x86_64/Makefile +++ b/trunk/arch/um/sys-x86_64/Makefile @@ -10,7 +10,7 @@ obj-y = bug.o bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \ obj-$(CONFIG_MODULES) += um_module.o -subarch-obj-y = lib/bitops_64.o lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o +subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o subarch-obj-$(CONFIG_MODULES) += kernel/module_64.o ldt-y = ../sys-i386/ldt.o diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 87a693cf2bb7..e5790fe9e330 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -23,7 +23,7 @@ config X86 select HAVE_KPROBES select HAVE_KRETPROBES select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) - select HAVE_ARCH_KGDB + select HAVE_ARCH_KGDB if !X86_VOYAGER config GENERIC_LOCKBREAK @@ -142,6 +142,9 @@ config AUDIT_ARCH config ARCH_SUPPORTS_AOUT def_bool y +config ARCH_SUPPORTS_OPTIMIZED_INLINING + def_bool y + # Use the generic interrupt handling code in kernel/irq/: config GENERIC_HARDIRQS bool @@ -370,6 +373,25 @@ config VMI at the moment), by linking the kernel to a GPL-ed ROM module provided by the hypervisor. +config KVM_CLOCK + bool "KVM paravirtualized clock" + select PARAVIRT + depends on !(X86_VISWS || X86_VOYAGER) + help + Turning on this option will allow you to run a paravirtualized clock + when running over the KVM hypervisor. Instead of relying on a PIT + (or probably other) emulation by the underlying device model, the host + provides the guest with timing infrastructure such as time of day, and + system time + +config KVM_GUEST + bool "KVM Guest support" + select PARAVIRT + depends on !(X86_VISWS || X86_VOYAGER) + help + This option enables various optimizations for running under the KVM + hypervisor. + source "arch/x86/lguest/Kconfig" config PARAVIRT @@ -1049,9 +1071,9 @@ config MTRR See for more information. config X86_PAT - def_bool y + bool prompt "x86 PAT support" - depends on MTRR && NONPROMISC_DEVMEM + depends on MTRR help Use PAT attributes to setup page level cache control. diff --git a/trunk/arch/x86/Kconfig.cpu b/trunk/arch/x86/Kconfig.cpu index 57072f2716f9..7ef18b01f0bc 100644 --- a/trunk/arch/x86/Kconfig.cpu +++ b/trunk/arch/x86/Kconfig.cpu @@ -21,8 +21,8 @@ config M386 Here are the settings recommended for greatest speed: - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI - 486DLC/DLC2, UMC 486SX-S and NexGen Nx586. Only "386" kernels - will run on a 386 class machine. + 486DLC/DLC2, and UMC 486SX-S. Only "386" kernels will run on a 386 + class machine. - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. - "586" for generic Pentium CPUs lacking the TSC @@ -278,6 +278,11 @@ config GENERIC_CPU endchoice +config X86_CPU + def_bool y + select GENERIC_FIND_FIRST_BIT + select GENERIC_FIND_NEXT_BIT + config X86_GENERIC bool "Generic x86 support" depends on X86_32 @@ -398,7 +403,7 @@ config X86_TSC # generates cmov. config X86_CMOV def_bool y - depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7) + depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || X86_64) config X86_MINIMUM_CPU_FAMILY int diff --git a/trunk/arch/x86/Kconfig.debug b/trunk/arch/x86/Kconfig.debug index 610aaecc19f8..5b1979a45a1e 100644 --- a/trunk/arch/x86/Kconfig.debug +++ b/trunk/arch/x86/Kconfig.debug @@ -5,6 +5,17 @@ config TRACE_IRQFLAGS_SUPPORT source "lib/Kconfig.debug" +config NONPROMISC_DEVMEM + bool "Disable promiscuous /dev/mem" + help + The /dev/mem file by default only allows userspace access to PCI + space and the BIOS code and data regions. This is sufficient for + dosemu and X and all common users of /dev/mem. With this config + option, you allow userspace access to all of memory, including + kernel and userspace memory. Accidental access to this is + obviously disasterous, but specific access can be used by people + debugging the kernel. + config EARLY_PRINTK bool "Early printk" if EMBEDDED default y @@ -246,3 +257,16 @@ config CPA_DEBUG Do change_page_attr() self-tests every 30 seconds. endmenu + +config OPTIMIZE_INLINING + bool "Allow gcc to uninline functions marked 'inline'" + default y + help + This option determines if the kernel forces gcc to inline the functions + developers have marked 'inline'. Doing so takes away freedom from gcc to + do what it thinks is best, which is desirable for the gcc 3.x series of + compilers. The gcc 4.x series have a rewritten inlining algorithm and + disabling this option will generate a smaller kernel there. Hopefully + this algorithm is so good that allowing gcc4 to make the decision can + become the default in the future, until then this option is there to + test gcc for this. diff --git a/trunk/arch/x86/boot/.gitignore b/trunk/arch/x86/boot/.gitignore index b1bdc4c6f9f2..172cf8a98bdd 100644 --- a/trunk/arch/x86/boot/.gitignore +++ b/trunk/arch/x86/boot/.gitignore @@ -1,7 +1,8 @@ bootsect bzImage +cpustr.h +mkcpustr +offsets.h setup setup.bin setup.elf -cpustr.h -mkcpustr diff --git a/trunk/arch/x86/boot/header.S b/trunk/arch/x86/boot/header.S index 6d2df8d61c54..af86e431acfa 100644 --- a/trunk/arch/x86/boot/header.S +++ b/trunk/arch/x86/boot/header.S @@ -120,7 +120,7 @@ _start: # Part 2 of the header, from the old setup.S .ascii "HdrS" # header signature - .word 0x0208 # header version number (>= 0x0105) + .word 0x0209 # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) .globl realmode_swtch realmode_swtch: .word 0, 0 # default_switch, SETUPSEG @@ -227,6 +227,10 @@ hardware_subarch_data: .quad 0 payload_offset: .long input_data payload_length: .long input_data_end-input_data +setup_data: .quad 0 # 64-bit physical pointer to + # single linked list of + # struct setup_data + # End of setup header ##################################################### .section ".inittext", "ax" diff --git a/trunk/arch/x86/configs/i386_defconfig b/trunk/arch/x86/configs/i386_defconfig index 3df340b54e57..ad7ddaaff588 100644 --- a/trunk/arch/x86/configs/i386_defconfig +++ b/trunk/arch/x86/configs/i386_defconfig @@ -1421,6 +1421,7 @@ CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_LIST is not set # CONFIG_FRAME_POINTER is not set +CONFIG_OPTIMIZE_INLINING=y # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set diff --git a/trunk/arch/x86/configs/x86_64_defconfig b/trunk/arch/x86/configs/x86_64_defconfig index eef98cb00c62..2d6f5b2809d2 100644 --- a/trunk/arch/x86/configs/x86_64_defconfig +++ b/trunk/arch/x86/configs/x86_64_defconfig @@ -1346,6 +1346,7 @@ CONFIG_DEBUG_BUGVERBOSE=y # CONFIG_DEBUG_VM is not set # CONFIG_DEBUG_LIST is not set # CONFIG_FRAME_POINTER is not set +CONFIG_OPTIMIZE_INLINING=y # CONFIG_RCU_TORTURE_TEST is not set # CONFIG_LKDTM is not set # CONFIG_FAULT_INJECTION is not set diff --git a/trunk/arch/x86/ia32/ia32_signal.c b/trunk/arch/x86/ia32/ia32_signal.c index 05e155d3fb6c..bbed3a26ce55 100644 --- a/trunk/arch/x86/ia32/ia32_signal.c +++ b/trunk/arch/x86/ia32/ia32_signal.c @@ -499,11 +499,6 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, regs->cs = __USER32_CS; regs->ss = __USER32_DS; - set_fs(USER_DS); - regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - #if DEBUG_SIG printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", current->comm, current->pid, frame, regs->ip, frame->pretcode); @@ -599,11 +594,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, regs->cs = __USER32_CS; regs->ss = __USER32_DS; - set_fs(USER_DS); - regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF); - if (test_thread_flag(TIF_SINGLESTEP)) - ptrace_notify(SIGTRAP); - #if DEBUG_SIG printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n", current->comm, current->pid, frame, regs->ip, frame->pretcode); diff --git a/trunk/arch/x86/ia32/ia32entry.S b/trunk/arch/x86/ia32/ia32entry.S index ae7158bce4d6..b5e329da166c 100644 --- a/trunk/arch/x86/ia32/ia32entry.S +++ b/trunk/arch/x86/ia32/ia32entry.S @@ -430,7 +430,7 @@ ia32_sys_call_table: .quad sys_setuid16 .quad sys_getuid16 .quad compat_sys_stime /* stime */ /* 25 */ - .quad sys32_ptrace /* ptrace */ + .quad compat_sys_ptrace /* ptrace */ .quad sys_alarm .quad sys_fstat /* (old)fstat */ .quad sys_pause diff --git a/trunk/arch/x86/kernel/Makefile b/trunk/arch/x86/kernel/Makefile index 90e092d0af0c..fa19c3819540 100644 --- a/trunk/arch/x86/kernel/Makefile +++ b/trunk/arch/x86/kernel/Makefile @@ -80,6 +80,8 @@ obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_VMI) += vmi_32.o vmiclock_32.o +obj-$(CONFIG_KVM_GUEST) += kvm.o +obj-$(CONFIG_KVM_CLOCK) += kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o ifdef CONFIG_INPUT_PCSPKR diff --git a/trunk/arch/x86/kernel/acpi/boot.c b/trunk/arch/x86/kernel/acpi/boot.c index 057ccf1d5ad4..977ed5cdeaa3 100644 --- a/trunk/arch/x86/kernel/acpi/boot.c +++ b/trunk/arch/x86/kernel/acpi/boot.c @@ -697,10 +697,6 @@ static int __init acpi_parse_hpet(struct acpi_table_header *table) #define HPET_RESOURCE_NAME_SIZE 9 hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); - if (!hpet_res) - return 0; - - memset(hpet_res, 0, sizeof(*hpet_res)); hpet_res->name = (void *)&hpet_res[1]; hpet_res->flags = IORESOURCE_MEM; snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, "HPET %u", diff --git a/trunk/arch/x86/kernel/acpi/realmode/.gitignore b/trunk/arch/x86/kernel/acpi/realmode/.gitignore new file mode 100644 index 000000000000..58f1f48a58f8 --- /dev/null +++ b/trunk/arch/x86/kernel/acpi/realmode/.gitignore @@ -0,0 +1,3 @@ +wakeup.bin +wakeup.elf +wakeup.lds diff --git a/trunk/arch/x86/kernel/alternative.c b/trunk/arch/x86/kernel/alternative.c index df4099dc1c68..65c7857a90dd 100644 --- a/trunk/arch/x86/kernel/alternative.c +++ b/trunk/arch/x86/kernel/alternative.c @@ -511,31 +511,30 @@ void *__kprobes text_poke(void *addr, const void *opcode, size_t len) unsigned long flags; char *vaddr; int nr_pages = 2; + struct page *pages[2]; + int i; - BUG_ON(len > sizeof(long)); - BUG_ON((((long)addr + len - 1) & ~(sizeof(long) - 1)) - - ((long)addr & ~(sizeof(long) - 1))); - if (kernel_text_address((unsigned long)addr)) { - struct page *pages[2] = { virt_to_page(addr), - virt_to_page(addr + PAGE_SIZE) }; - if (!pages[1]) - nr_pages = 1; - vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); - BUG_ON(!vaddr); - local_irq_save(flags); - memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); - local_irq_restore(flags); - vunmap(vaddr); + if (!core_kernel_text((unsigned long)addr)) { + pages[0] = vmalloc_to_page(addr); + pages[1] = vmalloc_to_page(addr + PAGE_SIZE); } else { - /* - * modules are in vmalloc'ed memory, always writable. - */ - local_irq_save(flags); - memcpy(addr, opcode, len); - local_irq_restore(flags); + pages[0] = virt_to_page(addr); + WARN_ON(!PageReserved(pages[0])); + pages[1] = virt_to_page(addr + PAGE_SIZE); } + BUG_ON(!pages[0]); + if (!pages[1]) + nr_pages = 1; + vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); + BUG_ON(!vaddr); + local_irq_save(flags); + memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len); + local_irq_restore(flags); + vunmap(vaddr); sync_core(); /* Could also do a CLFLUSH here to speed up CPU recovery; but that causes hangs on some VIA CPUs. */ + for (i = 0; i < len; i++) + BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]); return addr; } diff --git a/trunk/arch/x86/kernel/apic_32.c b/trunk/arch/x86/kernel/apic_32.c index 687208190b06..4b99b1bdeb6c 100644 --- a/trunk/arch/x86/kernel/apic_32.c +++ b/trunk/arch/x86/kernel/apic_32.c @@ -451,7 +451,8 @@ void __init setup_boot_APIC_clock(void) } /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 32); + lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, + lapic_clockevent.shift); lapic_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); lapic_clockevent.min_delta_ns = @@ -902,7 +903,7 @@ void __init init_bsp_APIC(void) apic_write_around(APIC_LVT1, value); } -void __cpuinit lapic_setup_esr(void) +static void __cpuinit lapic_setup_esr(void) { unsigned long oldvalue, value, maxlvt; if (lapic_is_integrated() && !esr_disable) { diff --git a/trunk/arch/x86/kernel/apic_64.c b/trunk/arch/x86/kernel/apic_64.c index 9e8e5c050c55..5910020c3f24 100644 --- a/trunk/arch/x86/kernel/apic_64.c +++ b/trunk/arch/x86/kernel/apic_64.c @@ -360,7 +360,8 @@ static void __init calibrate_APIC_clock(void) result / 1000 / 1000, result / 1000 % 1000); /* Calculate the scaled math multiplication factor */ - lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, 32); + lapic_clockevent.mult = div_sc(result, NSEC_PER_SEC, + lapic_clockevent.shift); lapic_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFF, &lapic_clockevent); lapic_clockevent.min_delta_ns = @@ -429,7 +430,7 @@ void __init setup_boot_APIC_clock(void) * set the DUMMY flag again and force the broadcast mode in the * clockevents layer. */ -void __cpuinit check_boot_apic_timer_broadcast(void) +static void __cpuinit check_boot_apic_timer_broadcast(void) { if (!disable_apic_timer || (lapic_clockevent.features & CLOCK_EVT_FEAT_DUMMY)) @@ -834,7 +835,7 @@ void __cpuinit setup_local_APIC(void) preempt_enable(); } -void __cpuinit lapic_setup_esr(void) +static void __cpuinit lapic_setup_esr(void) { unsigned maxlvt = lapic_get_maxlvt(); diff --git a/trunk/arch/x86/kernel/apm_32.c b/trunk/arch/x86/kernel/apm_32.c index f0030a0999c7..e4ea362e8480 100644 --- a/trunk/arch/x86/kernel/apm_32.c +++ b/trunk/arch/x86/kernel/apm_32.c @@ -904,6 +904,7 @@ static void apm_cpu_idle(void) original_pm_idle(); else default_idle(); + local_irq_disable(); jiffies_since_last_check = jiffies - last_jiffies; if (jiffies_since_last_check > idle_period) goto recalc; @@ -911,6 +912,8 @@ static void apm_cpu_idle(void) if (apm_idle_done) apm_do_busy(); + + local_irq_enable(); } /** diff --git a/trunk/arch/x86/kernel/cpu/Makefile b/trunk/arch/x86/kernel/cpu/Makefile index ee7c45235e54..a0c6f8190887 100644 --- a/trunk/arch/x86/kernel/cpu/Makefile +++ b/trunk/arch/x86/kernel/cpu/Makefile @@ -11,7 +11,6 @@ obj-$(CONFIG_X86_32) += cyrix.o obj-$(CONFIG_X86_32) += centaur.o obj-$(CONFIG_X86_32) += transmeta.o obj-$(CONFIG_X86_32) += intel.o -obj-$(CONFIG_X86_32) += nexgen.o obj-$(CONFIG_X86_32) += umc.o obj-$(CONFIG_X86_MCE) += mcheck/ diff --git a/trunk/arch/x86/kernel/cpu/amd.c b/trunk/arch/x86/kernel/cpu/amd.c index 0173065dc3b7..245866828294 100644 --- a/trunk/arch/x86/kernel/cpu/amd.c +++ b/trunk/arch/x86/kernel/cpu/amd.c @@ -343,10 +343,4 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = { .c_size_cache = amd_size_cache, }; -int __init amd_init_cpu(void) -{ - cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; - return 0; -} - cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); diff --git a/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c b/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c index 9a699ed03598..e07e8c068ae0 100644 --- a/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/trunk/arch/x86/kernel/cpu/mcheck/mce_64.c @@ -49,7 +49,7 @@ static int banks; static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; static unsigned long notify_user; static int rip_msr; -static int mce_bootlog = 1; +static int mce_bootlog = -1; static atomic_t mce_events; static char trigger[128]; @@ -471,13 +471,15 @@ static void mce_init(void *dummy) static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) { /* This should be disabled by the BIOS, but isn't always */ - if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) { - /* disable GART TBL walk error reporting, which trips off - incorrectly with the IOMMU & 3ware & Cerberus. */ - clear_bit(10, &bank[4]); - /* Lots of broken BIOS around that don't clear them - by default and leave crap in there. Don't log. */ - mce_bootlog = 0; + if (c->x86_vendor == X86_VENDOR_AMD) { + if(c->x86 == 15) + /* disable GART TBL walk error reporting, which trips off + incorrectly with the IOMMU & 3ware & Cerberus. */ + clear_bit(10, &bank[4]); + if(c->x86 <= 17 && mce_bootlog < 0) + /* Lots of broken BIOS around that don't clear them + by default and leave crap in there. Don't log. */ + mce_bootlog = 0; } } diff --git a/trunk/arch/x86/kernel/cpu/nexgen.c b/trunk/arch/x86/kernel/cpu/nexgen.c deleted file mode 100644 index 5d5e1c134123..000000000000 --- a/trunk/arch/x86/kernel/cpu/nexgen.c +++ /dev/null @@ -1,59 +0,0 @@ -#include -#include -#include -#include - -#include "cpu.h" - -/* - * Detect a NexGen CPU running without BIOS hypercode new enough - * to have CPUID. (Thanks to Herbert Oppmann) - */ - -static int __cpuinit deep_magic_nexgen_probe(void) -{ - int ret; - - __asm__ __volatile__ ( - " movw $0x5555, %%ax\n" - " xorw %%dx,%%dx\n" - " movw $2, %%cx\n" - " divw %%cx\n" - " movl $0, %%eax\n" - " jnz 1f\n" - " movl $1, %%eax\n" - "1:\n" - : "=a" (ret) : : "cx", "dx"); - return ret; -} - -static void __cpuinit init_nexgen(struct cpuinfo_x86 *c) -{ - c->x86_cache_size = 256; /* A few had 1 MB... */ -} - -static void __cpuinit nexgen_identify(struct cpuinfo_x86 *c) -{ - /* Detect NexGen with old hypercode */ - if (deep_magic_nexgen_probe()) - strcpy(c->x86_vendor_id, "NexGenDriven"); -} - -static struct cpu_dev nexgen_cpu_dev __cpuinitdata = { - .c_vendor = "Nexgen", - .c_ident = { "NexGenDriven" }, - .c_models = { - { .vendor = X86_VENDOR_NEXGEN, - .family = 5, - .model_names = { [1] = "Nx586" } - }, - }, - .c_init = init_nexgen, - .c_identify = nexgen_identify, -}; - -int __init nexgen_init_cpu(void) -{ - cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; - return 0; -} diff --git a/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c b/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c index b943e10ad814..f9ae93adffe5 100644 --- a/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/trunk/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -614,16 +614,6 @@ static struct wd_ops intel_arch_wd_ops __read_mostly = { .evntsel = MSR_ARCH_PERFMON_EVENTSEL1, }; -static struct wd_ops coreduo_wd_ops = { - .reserve = single_msr_reserve, - .unreserve = single_msr_unreserve, - .setup = setup_intel_arch_watchdog, - .rearm = p6_rearm, - .stop = single_msr_stop_watchdog, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .evntsel = MSR_ARCH_PERFMON_EVENTSEL0, -}; - static void probe_nmi_watchdog(void) { switch (boot_cpu_data.x86_vendor) { @@ -637,8 +627,8 @@ static void probe_nmi_watchdog(void) /* Work around Core Duo (Yonah) errata AE49 where perfctr1 doesn't have a working enable bit. */ if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { - wd_ops = &coreduo_wd_ops; - break; + intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; + intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; } if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { wd_ops = &intel_arch_wd_ops; diff --git a/trunk/arch/x86/kernel/crash.c b/trunk/arch/x86/kernel/crash.c index 2251d0ae9570..268553817909 100644 --- a/trunk/arch/x86/kernel/crash.c +++ b/trunk/arch/x86/kernel/crash.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -117,7 +118,7 @@ static void nmi_shootdown_cpus(void) } #endif -void machine_crash_shutdown(struct pt_regs *regs) +void native_machine_crash_shutdown(struct pt_regs *regs) { /* This function is only called after the system * has panicked or is otherwise in a critical state. diff --git a/trunk/arch/x86/kernel/e820_64.c b/trunk/arch/x86/kernel/e820_64.c index cbd42e51cb08..645ee5e32a27 100644 --- a/trunk/arch/x86/kernel/e820_64.c +++ b/trunk/arch/x86/kernel/e820_64.c @@ -84,14 +84,41 @@ void __init reserve_early(unsigned long start, unsigned long end, char *name) strncpy(r->name, name, sizeof(r->name) - 1); } -void __init early_res_to_bootmem(void) +void __init free_early(unsigned long start, unsigned long end) +{ + struct early_res *r; + int i, j; + + for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { + r = &early_res[i]; + if (start == r->start && end == r->end) + break; + } + if (i >= MAX_EARLY_RES || !early_res[i].end) + panic("free_early on not reserved area: %lx-%lx!", start, end); + + for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) + ; + + memcpy(&early_res[i], &early_res[i + 1], + (j - 1 - i) * sizeof(struct early_res)); + + early_res[j - 1].end = 0; +} + +void __init early_res_to_bootmem(unsigned long start, unsigned long end) { int i; + unsigned long final_start, final_end; for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { struct early_res *r = &early_res[i]; - printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, - r->start, r->end - 1, r->name); - reserve_bootmem_generic(r->start, r->end - r->start); + final_start = max(start, r->start); + final_end = min(end, r->end); + if (final_start >= final_end) + continue; + printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, + final_start, final_end - 1, r->name); + reserve_bootmem_generic(final_start, final_end - final_start); } } diff --git a/trunk/arch/x86/kernel/entry_32.S b/trunk/arch/x86/kernel/entry_32.S index f0f8934fc303..2a609dc3271c 100644 --- a/trunk/arch/x86/kernel/entry_32.S +++ b/trunk/arch/x86/kernel/entry_32.S @@ -409,7 +409,7 @@ restore_nocheck_notrace: irq_return: INTERRUPT_RETURN .section .fixup,"ax" -iret_exc: +ENTRY(iret_exc) pushl $0 # no error code pushl $do_iret_error jmp error_code @@ -1017,6 +1017,13 @@ ENTRY(kernel_thread_helper) ENDPROC(kernel_thread_helper) #ifdef CONFIG_XEN +/* Xen doesn't set %esp to be precisely what the normal sysenter + entrypoint expects, so fix it up before using the normal path. */ +ENTRY(xen_sysenter_target) + RING0_INT_FRAME + addl $5*4, %esp /* remove xen-provided frame */ + jmp sysenter_past_esp + ENTRY(xen_hypervisor_callback) CFI_STARTPROC pushl $0 @@ -1035,8 +1042,9 @@ ENTRY(xen_hypervisor_callback) cmpl $xen_iret_end_crit,%eax jae 1f - call xen_iret_crit_fixup + jmp xen_iret_crit_fixup +ENTRY(xen_do_upcall) 1: mov %esp, %eax call xen_evtchn_do_upcall jmp ret_from_intr diff --git a/trunk/arch/x86/kernel/genapic_64.c b/trunk/arch/x86/kernel/genapic_64.c index 9546ef408b92..021624c83583 100644 --- a/trunk/arch/x86/kernel/genapic_64.c +++ b/trunk/arch/x86/kernel/genapic_64.c @@ -51,7 +51,7 @@ void __init setup_apic_routing(void) else #endif - if (cpus_weight(cpu_possible_map) <= 8) + if (num_possible_cpus() <= 8) genapic = &apic_flat; else genapic = &apic_physflat; diff --git a/trunk/arch/x86/kernel/head64.c b/trunk/arch/x86/kernel/head64.c index 993c76773256..e25c57b8aa84 100644 --- a/trunk/arch/x86/kernel/head64.c +++ b/trunk/arch/x86/kernel/head64.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include static void __init zap_identity_mappings(void) { @@ -49,7 +51,6 @@ static void __init copy_bootdata(char *real_mode_data) } } -#define BIOS_EBDA_SEGMENT 0x40E #define BIOS_LOWMEM_KILOBYTES 0x413 /* @@ -80,8 +81,7 @@ static void __init reserve_ebda_region(void) lowmem <<= 10; /* start of EBDA area */ - ebda_addr = *(unsigned short *)__va(BIOS_EBDA_SEGMENT); - ebda_addr <<= 4; + ebda_addr = get_bios_ebda(); /* Fixup: bios puts an EBDA in the top 64K segment */ /* of conventional memory, but does not adjust lowmem. */ @@ -101,6 +101,24 @@ static void __init reserve_ebda_region(void) reserve_early(lowmem, 0x100000, "BIOS reserved"); } +static void __init reserve_setup_data(void) +{ + struct setup_data *data; + unsigned long pa_data; + char buf[32]; + + if (boot_params.hdr.version < 0x0209) + return; + pa_data = boot_params.hdr.setup_data; + while (pa_data) { + data = early_ioremap(pa_data, sizeof(*data)); + sprintf(buf, "setup data %x", data->type); + reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); + pa_data = data->next; + early_iounmap(data, sizeof(*data)); + } +} + void __init x86_64_start_kernel(char * real_mode_data) { int i; @@ -157,6 +175,7 @@ void __init x86_64_start_kernel(char * real_mode_data) #endif reserve_ebda_region(); + reserve_setup_data(); /* * At this point everything still needed from the boot loader diff --git a/trunk/arch/x86/kernel/hpet.c b/trunk/arch/x86/kernel/hpet.c index 36652ea1a265..9007f9ea64ee 100644 --- a/trunk/arch/x86/kernel/hpet.c +++ b/trunk/arch/x86/kernel/hpet.c @@ -218,7 +218,7 @@ static void hpet_legacy_clockevent_register(void) hpet_freq = 1000000000000000ULL; do_div(hpet_freq, hpet_period); hpet_clockevent.mult = div_sc((unsigned long) hpet_freq, - NSEC_PER_SEC, 32); + NSEC_PER_SEC, hpet_clockevent.shift); /* Calculate the min / max delta */ hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &hpet_clockevent); diff --git a/trunk/arch/x86/kernel/i8253.c b/trunk/arch/x86/kernel/i8253.c index 8540abe86ade..c1b5e3ece1f2 100644 --- a/trunk/arch/x86/kernel/i8253.c +++ b/trunk/arch/x86/kernel/i8253.c @@ -115,7 +115,8 @@ void __init setup_pit_timer(void) * IO_APIC has been initialized. */ pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); - pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, 32); + pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, + pit_clockevent.shift); pit_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_clockevent); pit_clockevent.min_delta_ns = @@ -224,7 +225,8 @@ static int __init init_pit_clocksource(void) pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC) return 0; - clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, 20); + clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE, + clocksource_pit.shift); return clocksource_register(&clocksource_pit); } arch_initcall(init_pit_clocksource); diff --git a/trunk/arch/x86/kernel/io_apic_32.c b/trunk/arch/x86/kernel/io_apic_32.c index 2e2f42074e18..696b8e4e66bb 100644 --- a/trunk/arch/x86/kernel/io_apic_32.c +++ b/trunk/arch/x86/kernel/io_apic_32.c @@ -2068,7 +2068,7 @@ static void __init setup_nmi(void) * cycles as some i82489DX-based boards have glue logic that keeps the * 8259A interrupt line asserted until INTA. --macro */ -static inline void unlock_ExtINT_logic(void) +static inline void __init unlock_ExtINT_logic(void) { int apic, pin, i; struct IO_APIC_route_entry entry0, entry1; diff --git a/trunk/arch/x86/kernel/io_apic_64.c b/trunk/arch/x86/kernel/io_apic_64.c index 9ba11d07920f..ef1a8dfcc529 100644 --- a/trunk/arch/x86/kernel/io_apic_64.c +++ b/trunk/arch/x86/kernel/io_apic_64.c @@ -1599,7 +1599,7 @@ static void __init setup_nmi(void) * cycles as some i82489DX-based boards have glue logic that keeps the * 8259A interrupt line asserted until INTA. --macro */ -static inline void unlock_ExtINT_logic(void) +static inline void __init unlock_ExtINT_logic(void) { int apic, pin, i; struct IO_APIC_route_entry entry0, entry1; diff --git a/trunk/arch/x86/kernel/irq_32.c b/trunk/arch/x86/kernel/irq_32.c index 6ea67b76a214..00bda7bcda63 100644 --- a/trunk/arch/x86/kernel/irq_32.c +++ b/trunk/arch/x86/kernel/irq_32.c @@ -134,7 +134,7 @@ unsigned int do_IRQ(struct pt_regs *regs) : "=a" (arg1), "=d" (arg2), "=b" (bx) : "0" (irq), "1" (desc), "2" (isp), "D" (desc->handle_irq) - : "memory", "cc" + : "memory", "cc", "ecx" ); } else #endif diff --git a/trunk/arch/x86/kernel/kdebugfs.c b/trunk/arch/x86/kernel/kdebugfs.c index 73354302fda7..c03205991718 100644 --- a/trunk/arch/x86/kernel/kdebugfs.c +++ b/trunk/arch/x86/kernel/kdebugfs.c @@ -6,23 +6,171 @@ * * This file is released under the GPLv2. */ - #include +#include #include #include +#include +#include #include #ifdef CONFIG_DEBUG_BOOT_PARAMS +struct setup_data_node { + u64 paddr; + u32 type; + u32 len; +}; + +static ssize_t +setup_data_read(struct file *file, char __user *user_buf, size_t count, + loff_t *ppos) +{ + struct setup_data_node *node = file->private_data; + unsigned long remain; + loff_t pos = *ppos; + struct page *pg; + void *p; + u64 pa; + + if (pos < 0) + return -EINVAL; + if (pos >= node->len) + return 0; + + if (count > node->len - pos) + count = node->len - pos; + pa = node->paddr + sizeof(struct setup_data) + pos; + pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT); + if (PageHighMem(pg)) { + p = ioremap_cache(pa, count); + if (!p) + return -ENXIO; + } else { + p = __va(pa); + } + + remain = copy_to_user(user_buf, p, count); + + if (PageHighMem(pg)) + iounmap(p); + + if (remain) + return -EFAULT; + + *ppos = pos + count; + + return count; +} + +static int setup_data_open(struct inode *inode, struct file *file) +{ + file->private_data = inode->i_private; + return 0; +} + +static const struct file_operations fops_setup_data = { + .read = setup_data_read, + .open = setup_data_open, +}; + +static int __init +create_setup_data_node(struct dentry *parent, int no, + struct setup_data_node *node) +{ + struct dentry *d, *type, *data; + char buf[16]; + int error; + + sprintf(buf, "%d", no); + d = debugfs_create_dir(buf, parent); + if (!d) { + error = -ENOMEM; + goto err_return; + } + type = debugfs_create_x32("type", S_IRUGO, d, &node->type); + if (!type) { + error = -ENOMEM; + goto err_dir; + } + data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data); + if (!data) { + error = -ENOMEM; + goto err_type; + } + return 0; + +err_type: + debugfs_remove(type); +err_dir: + debugfs_remove(d); +err_return: + return error; +} + +static int __init create_setup_data_nodes(struct dentry *parent) +{ + struct setup_data_node *node; + struct setup_data *data; + int error, no = 0; + struct dentry *d; + struct page *pg; + u64 pa_data; + + d = debugfs_create_dir("setup_data", parent); + if (!d) { + error = -ENOMEM; + goto err_return; + } + + pa_data = boot_params.hdr.setup_data; + + while (pa_data) { + node = kmalloc(sizeof(*node), GFP_KERNEL); + if (!node) { + error = -ENOMEM; + goto err_dir; + } + pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT); + if (PageHighMem(pg)) { + data = ioremap_cache(pa_data, sizeof(*data)); + if (!data) { + error = -ENXIO; + goto err_dir; + } + } else { + data = __va(pa_data); + } + + node->paddr = pa_data; + node->type = data->type; + node->len = data->len; + error = create_setup_data_node(d, no, node); + pa_data = data->next; + + if (PageHighMem(pg)) + iounmap(data); + if (error) + goto err_dir; + no++; + } + return 0; + +err_dir: + debugfs_remove(d); +err_return: + return error; +} + static struct debugfs_blob_wrapper boot_params_blob = { - .data = &boot_params, - .size = sizeof(boot_params), + .data = &boot_params, + .size = sizeof(boot_params), }; static int __init boot_params_kdebugfs_init(void) { - int error; struct dentry *dbp, *version, *data; + int error; dbp = debugfs_create_dir("boot_params", NULL); if (!dbp) { @@ -41,7 +189,13 @@ static int __init boot_params_kdebugfs_init(void) error = -ENOMEM; goto err_version; } + error = create_setup_data_nodes(dbp); + if (error) + goto err_data; return 0; + +err_data: + debugfs_remove(data); err_version: debugfs_remove(version); err_dir: @@ -61,5 +215,4 @@ static int __init arch_kdebugfs_init(void) return error; } - arch_initcall(arch_kdebugfs_init); diff --git a/trunk/arch/x86/kernel/kvm.c b/trunk/arch/x86/kernel/kvm.c new file mode 100644 index 000000000000..8b7a3cf37d2b --- /dev/null +++ b/trunk/arch/x86/kernel/kvm.c @@ -0,0 +1,248 @@ +/* + * KVM paravirt_ops implementation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar + * Copyright IBM Corporation, 2007 + * Authors: Anthony Liguori + */ + +#include +#include +#include +#include +#include +#include +#include + +#define MMU_QUEUE_SIZE 1024 + +struct kvm_para_state { + u8 mmu_queue[MMU_QUEUE_SIZE]; + int mmu_queue_len; + enum paravirt_lazy_mode mode; +}; + +static DEFINE_PER_CPU(struct kvm_para_state, para_state); + +static struct kvm_para_state *kvm_para_state(void) +{ + return &per_cpu(para_state, raw_smp_processor_id()); +} + +/* + * No need for any "IO delay" on KVM + */ +static void kvm_io_delay(void) +{ +} + +static void kvm_mmu_op(void *buffer, unsigned len) +{ + int r; + unsigned long a1, a2; + + do { + a1 = __pa(buffer); + a2 = 0; /* on i386 __pa() always returns <4G */ + r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2); + buffer += r; + len -= r; + } while (len); +} + +static void mmu_queue_flush(struct kvm_para_state *state) +{ + if (state->mmu_queue_len) { + kvm_mmu_op(state->mmu_queue, state->mmu_queue_len); + state->mmu_queue_len = 0; + } +} + +static void kvm_deferred_mmu_op(void *buffer, int len) +{ + struct kvm_para_state *state = kvm_para_state(); + + if (state->mode != PARAVIRT_LAZY_MMU) { + kvm_mmu_op(buffer, len); + return; + } + if (state->mmu_queue_len + len > sizeof state->mmu_queue) + mmu_queue_flush(state); + memcpy(state->mmu_queue + state->mmu_queue_len, buffer, len); + state->mmu_queue_len += len; +} + +static void kvm_mmu_write(void *dest, u64 val) +{ + __u64 pte_phys; + struct kvm_mmu_op_write_pte wpte; + +#ifdef CONFIG_HIGHPTE + struct page *page; + unsigned long dst = (unsigned long) dest; + + page = kmap_atomic_to_page(dest); + pte_phys = page_to_pfn(page); + pte_phys <<= PAGE_SHIFT; + pte_phys += (dst & ~(PAGE_MASK)); +#else + pte_phys = (unsigned long)__pa(dest); +#endif + wpte.header.op = KVM_MMU_OP_WRITE_PTE; + wpte.pte_val = val; + wpte.pte_phys = pte_phys; + + kvm_deferred_mmu_op(&wpte, sizeof wpte); +} + +/* + * We only need to hook operations that are MMU writes. We hook these so that + * we can use lazy MMU mode to batch these operations. We could probably + * improve the performance of the host code if we used some of the information + * here to simplify processing of batched writes. + */ +static void kvm_set_pte(pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, pte_val(pte)); +} + +static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, pte_val(pte)); +} + +static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + kvm_mmu_write(pmdp, pmd_val(pmd)); +} + +#if PAGETABLE_LEVELS >= 3 +#ifdef CONFIG_X86_PAE +static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, pte_val(pte)); +} + +static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, pte_val(pte)); +} + +static void kvm_pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + kvm_mmu_write(ptep, 0); +} + +static void kvm_pmd_clear(pmd_t *pmdp) +{ + kvm_mmu_write(pmdp, 0); +} +#endif + +static void kvm_set_pud(pud_t *pudp, pud_t pud) +{ + kvm_mmu_write(pudp, pud_val(pud)); +} + +#if PAGETABLE_LEVELS == 4 +static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + kvm_mmu_write(pgdp, pgd_val(pgd)); +} +#endif +#endif /* PAGETABLE_LEVELS >= 3 */ + +static void kvm_flush_tlb(void) +{ + struct kvm_mmu_op_flush_tlb ftlb = { + .header.op = KVM_MMU_OP_FLUSH_TLB, + }; + + kvm_deferred_mmu_op(&ftlb, sizeof ftlb); +} + +static void kvm_release_pt(u32 pfn) +{ + struct kvm_mmu_op_release_pt rpt = { + .header.op = KVM_MMU_OP_RELEASE_PT, + .pt_phys = (u64)pfn << PAGE_SHIFT, + }; + + kvm_mmu_op(&rpt, sizeof rpt); +} + +static void kvm_enter_lazy_mmu(void) +{ + struct kvm_para_state *state = kvm_para_state(); + + paravirt_enter_lazy_mmu(); + state->mode = paravirt_get_lazy_mode(); +} + +static void kvm_leave_lazy_mmu(void) +{ + struct kvm_para_state *state = kvm_para_state(); + + mmu_queue_flush(state); + paravirt_leave_lazy(paravirt_get_lazy_mode()); + state->mode = paravirt_get_lazy_mode(); +} + +static void paravirt_ops_setup(void) +{ + pv_info.name = "KVM"; + pv_info.paravirt_enabled = 1; + + if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) + pv_cpu_ops.io_delay = kvm_io_delay; + + if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) { + pv_mmu_ops.set_pte = kvm_set_pte; + pv_mmu_ops.set_pte_at = kvm_set_pte_at; + pv_mmu_ops.set_pmd = kvm_set_pmd; +#if PAGETABLE_LEVELS >= 3 +#ifdef CONFIG_X86_PAE + pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; + pv_mmu_ops.set_pte_present = kvm_set_pte_present; + pv_mmu_ops.pte_clear = kvm_pte_clear; + pv_mmu_ops.pmd_clear = kvm_pmd_clear; +#endif + pv_mmu_ops.set_pud = kvm_set_pud; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.set_pgd = kvm_set_pgd; +#endif +#endif + pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; + pv_mmu_ops.release_pte = kvm_release_pt; + pv_mmu_ops.release_pmd = kvm_release_pt; + pv_mmu_ops.release_pud = kvm_release_pt; + + pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; + pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; + } +} + +void __init kvm_guest_init(void) +{ + if (!kvm_para_available()) + return; + + paravirt_ops_setup(); +} diff --git a/trunk/arch/x86/kernel/kvmclock.c b/trunk/arch/x86/kernel/kvmclock.c new file mode 100644 index 000000000000..ddee04043aeb --- /dev/null +++ b/trunk/arch/x86/kernel/kvmclock.c @@ -0,0 +1,187 @@ +/* KVM paravirtual clock driver. A clocksource implementation + Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#include +#include +#include +#include +#include +#include +#include + +#define KVM_SCALE 22 + +static int kvmclock = 1; + +static int parse_no_kvmclock(char *arg) +{ + kvmclock = 0; + return 0; +} +early_param("no-kvmclock", parse_no_kvmclock); + +/* The hypervisor will put information about time periodically here */ +static DEFINE_PER_CPU_SHARED_ALIGNED(struct kvm_vcpu_time_info, hv_clock); +#define get_clock(cpu, field) per_cpu(hv_clock, cpu).field + +static inline u64 kvm_get_delta(u64 last_tsc) +{ + int cpu = smp_processor_id(); + u64 delta = native_read_tsc() - last_tsc; + return (delta * get_clock(cpu, tsc_to_system_mul)) >> KVM_SCALE; +} + +static struct kvm_wall_clock wall_clock; +static cycle_t kvm_clock_read(void); +/* + * The wallclock is the time of day when we booted. Since then, some time may + * have elapsed since the hypervisor wrote the data. So we try to account for + * that with system time + */ +unsigned long kvm_get_wallclock(void) +{ + u32 wc_sec, wc_nsec; + u64 delta; + struct timespec ts; + int version, nsec; + int low, high; + + low = (int)__pa(&wall_clock); + high = ((u64)__pa(&wall_clock) >> 32); + + delta = kvm_clock_read(); + + native_write_msr(MSR_KVM_WALL_CLOCK, low, high); + do { + version = wall_clock.wc_version; + rmb(); + wc_sec = wall_clock.wc_sec; + wc_nsec = wall_clock.wc_nsec; + rmb(); + } while ((wall_clock.wc_version != version) || (version & 1)); + + delta = kvm_clock_read() - delta; + delta += wc_nsec; + nsec = do_div(delta, NSEC_PER_SEC); + set_normalized_timespec(&ts, wc_sec + delta, nsec); + /* + * Of all mechanisms of time adjustment I've tested, this one + * was the champion! + */ + return ts.tv_sec + 1; +} + +int kvm_set_wallclock(unsigned long now) +{ + return 0; +} + +/* + * This is our read_clock function. The host puts an tsc timestamp each time + * it updates a new time. Without the tsc adjustment, we can have a situation + * in which a vcpu starts to run earlier (smaller system_time), but probes + * time later (compared to another vcpu), leading to backwards time + */ +static cycle_t kvm_clock_read(void) +{ + u64 last_tsc, now; + int cpu; + + preempt_disable(); + cpu = smp_processor_id(); + + last_tsc = get_clock(cpu, tsc_timestamp); + now = get_clock(cpu, system_time); + + now += kvm_get_delta(last_tsc); + preempt_enable(); + + return now; +} +static struct clocksource kvm_clock = { + .name = "kvm-clock", + .read = kvm_clock_read, + .rating = 400, + .mask = CLOCKSOURCE_MASK(64), + .mult = 1 << KVM_SCALE, + .shift = KVM_SCALE, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +static int kvm_register_clock(void) +{ + int cpu = smp_processor_id(); + int low, high; + low = (int)__pa(&per_cpu(hv_clock, cpu)) | 1; + high = ((u64)__pa(&per_cpu(hv_clock, cpu)) >> 32); + + return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high); +} + +static void kvm_setup_secondary_clock(void) +{ + /* + * Now that the first cpu already had this clocksource initialized, + * we shouldn't fail. + */ + WARN_ON(kvm_register_clock()); + /* ok, done with our trickery, call native */ + setup_secondary_APIC_clock(); +} + +/* + * After the clock is registered, the host will keep writing to the + * registered memory location. If the guest happens to shutdown, this memory + * won't be valid. In cases like kexec, in which you install a new kernel, this + * means a random memory location will be kept being written. So before any + * kind of shutdown from our side, we unregister the clock by writting anything + * that does not have the 'enable' bit set in the msr + */ +#ifdef CONFIG_KEXEC +static void kvm_crash_shutdown(struct pt_regs *regs) +{ + native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); + native_machine_crash_shutdown(regs); +} +#endif + +static void kvm_shutdown(void) +{ + native_write_msr_safe(MSR_KVM_SYSTEM_TIME, 0, 0); + native_machine_shutdown(); +} + +void __init kvmclock_init(void) +{ + if (!kvm_para_available()) + return; + + if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { + if (kvm_register_clock()) + return; + pv_time_ops.get_wallclock = kvm_get_wallclock; + pv_time_ops.set_wallclock = kvm_set_wallclock; + pv_time_ops.sched_clock = kvm_clock_read; + pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; + machine_ops.shutdown = kvm_shutdown; +#ifdef CONFIG_KEXEC + machine_ops.crash_shutdown = kvm_crash_shutdown; +#endif + clocksource_register(&kvm_clock); + } +} diff --git a/trunk/arch/x86/kernel/mfgpt_32.c b/trunk/arch/x86/kernel/mfgpt_32.c index b402c0f3f192..cfc2648d25ff 100644 --- a/trunk/arch/x86/kernel/mfgpt_32.c +++ b/trunk/arch/x86/kernel/mfgpt_32.c @@ -364,7 +364,8 @@ int __init mfgpt_timer_setup(void) geode_mfgpt_write(mfgpt_event_clock, MFGPT_REG_SETUP, val); /* Set up the clock event */ - mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, 32); + mfgpt_clockevent.mult = div_sc(MFGPT_HZ, NSEC_PER_SEC, + mfgpt_clockevent.shift); mfgpt_clockevent.min_delta_ns = clockevent_delta2ns(0xF, &mfgpt_clockevent); mfgpt_clockevent.max_delta_ns = clockevent_delta2ns(0xFFFE, diff --git a/trunk/arch/x86/kernel/mpparse.c b/trunk/arch/x86/kernel/mpparse.c index 70744e344fa1..3e2c54dc8b29 100644 --- a/trunk/arch/x86/kernel/mpparse.c +++ b/trunk/arch/x86/kernel/mpparse.c @@ -686,13 +686,11 @@ void __init get_smp_config(void) static int __init smp_scan_config(unsigned long base, unsigned long length, unsigned reserve) { - extern void __bad_mpf_size(void); unsigned int *bp = phys_to_virt(base); struct intel_mp_floating *mpf; Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length); - if (sizeof(*mpf) != 16) - __bad_mpf_size(); + BUILD_BUG_ON(sizeof(*mpf) != 16); while (length > 0) { mpf = (struct intel_mp_floating *)bp; @@ -801,7 +799,6 @@ void __init find_smp_config(void) #ifdef CONFIG_X86_IO_APIC #define MP_ISA_BUS 0 -#define MP_MAX_IOAPIC_PIN 127 extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS]; @@ -820,7 +817,7 @@ static int mp_find_ioapic(int gsi) return -1; } -static u8 uniq_ioapic_id(u8 id) +static u8 __init uniq_ioapic_id(u8 id) { #ifdef CONFIG_X86_32 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && @@ -909,14 +906,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */ intsrc.mpc_dstirq = pin; /* INTIN# */ - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, %d-%d\n", - intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); + MP_intsrc_info(&intsrc); } int es7000_plat; @@ -985,23 +975,14 @@ void __init mp_config_acpi_legacy_irqs(void) intsrc.mpc_srcbusirq = i; /* Identity mapped */ intsrc.mpc_dstirq = i; - Dprintk("Int: type %d, pol %d, trig %d, bus %d, irq %d, " - "%d-%d\n", intsrc.mpc_irqtype, intsrc.mpc_irqflag & 3, - (intsrc.mpc_irqflag >> 2) & 3, intsrc.mpc_srcbus, - intsrc.mpc_srcbusirq, intsrc.mpc_dstapic, - intsrc.mpc_dstirq); - - mp_irqs[mp_irq_entries] = intsrc; - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!\n"); + MP_intsrc_info(&intsrc); } } int mp_register_gsi(u32 gsi, int triggering, int polarity) { - int ioapic = -1; - int ioapic_pin = 0; - int idx, bit = 0; + int ioapic; + int ioapic_pin; #ifdef CONFIG_X86_32 #define MAX_GSI_NUM 4096 #define IRQ_COMPRESSION_START 64 @@ -1041,15 +1022,13 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) * with redundant pin->gsi mappings (but unique PCI devices); * we only program the IOAPIC on the first. */ - bit = ioapic_pin % 32; - idx = (ioapic_pin < 32) ? 0 : (ioapic_pin / 32); - if (idx > 3) { + if (ioapic_pin > MP_MAX_IOAPIC_PIN) { printk(KERN_ERR "Invalid reference to IOAPIC pin " "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); return gsi; } - if ((1 << bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) { + if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", mp_ioapic_routing[ioapic].apic_id, ioapic_pin); #ifdef CONFIG_X86_32 @@ -1059,7 +1038,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) #endif } - mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1 << bit); + set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); #ifdef CONFIG_X86_32 /* * For GSI >= 64, use IRQ compression diff --git a/trunk/arch/x86/kernel/paravirt.c b/trunk/arch/x86/kernel/paravirt.c index 3733412d1357..74f0c5ea2a03 100644 --- a/trunk/arch/x86/kernel/paravirt.c +++ b/trunk/arch/x86/kernel/paravirt.c @@ -366,11 +366,13 @@ struct pv_mmu_ops pv_mmu_ops = { .flush_tlb_single = native_flush_tlb_single, .flush_tlb_others = native_flush_tlb_others, - .alloc_pt = paravirt_nop, - .alloc_pd = paravirt_nop, - .alloc_pd_clone = paravirt_nop, - .release_pt = paravirt_nop, - .release_pd = paravirt_nop, + .alloc_pte = paravirt_nop, + .alloc_pmd = paravirt_nop, + .alloc_pmd_clone = paravirt_nop, + .alloc_pud = paravirt_nop, + .release_pte = paravirt_nop, + .release_pmd = paravirt_nop, + .release_pud = paravirt_nop, .set_pte = native_set_pte, .set_pte_at = native_set_pte_at, diff --git a/trunk/arch/x86/kernel/pci-calgary_64.c b/trunk/arch/x86/kernel/pci-calgary_64.c index 2edee22e9c30..e28ec497e142 100644 --- a/trunk/arch/x86/kernel/pci-calgary_64.c +++ b/trunk/arch/x86/kernel/pci-calgary_64.c @@ -43,6 +43,7 @@ #include #include #include +#include #ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT int use_calgary __read_mostly = 1; diff --git a/trunk/arch/x86/kernel/process.c b/trunk/arch/x86/kernel/process.c index 3004d716539d..67e9b4a1e89d 100644 --- a/trunk/arch/x86/kernel/process.c +++ b/trunk/arch/x86/kernel/process.c @@ -4,6 +4,8 @@ #include #include #include +#include +#include struct kmem_cache *task_xstate_cachep; @@ -42,3 +44,118 @@ void arch_task_cache_init(void) __alignof__(union thread_xstate), SLAB_PANIC, NULL); } + +static void do_nothing(void *unused) +{ +} + +/* + * cpu_idle_wait - Used to ensure that all the CPUs discard old value of + * pm_idle and update to new pm_idle value. Required while changing pm_idle + * handler on SMP systems. + * + * Caller must have changed pm_idle to the new value before the call. Old + * pm_idle value will not be used by any CPU after the return of this function. + */ +void cpu_idle_wait(void) +{ + smp_mb(); + /* kick all the CPUs so that they exit out of pm_idle */ + smp_call_function(do_nothing, NULL, 0, 1); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + +/* + * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, + * which can obviate IPI to trigger checking of need_resched. + * We execute MONITOR against need_resched and enter optimized wait state + * through MWAIT. Whenever someone changes need_resched, we would be woken + * up from MWAIT (without an IPI). + * + * New with Core Duo processors, MWAIT can take some hints based on CPU + * capability. + */ +void mwait_idle_with_hints(unsigned long ax, unsigned long cx) +{ + if (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __mwait(ax, cx); + } +} + +/* Default MONITOR/MWAIT with no hints, used for default C1 state */ +static void mwait_idle(void) +{ + if (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (!need_resched()) + __sti_mwait(0, 0); + else + local_irq_enable(); + } else + local_irq_enable(); +} + + +static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) +{ + if (force_mwait) + return 1; + /* Any C1 states supported? */ + return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; +} + +/* + * On SMP it's slightly faster (but much more power-consuming!) + * to poll the ->work.need_resched flag instead of waiting for the + * cross-CPU IPI to arrive. Use this option with caution. + */ +static void poll_idle(void) +{ + local_irq_enable(); + cpu_relax(); +} + +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) +{ + static int selected; + + if (selected) + return; +#ifdef CONFIG_X86_SMP + if (pm_idle == poll_idle && smp_num_siblings > 1) { + printk(KERN_WARNING "WARNING: polling idle and HT enabled," + " performance may degrade.\n"); + } +#endif + if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { + /* + * Skip, if setup has overridden idle. + * One CPU supports mwait => All CPUs supports mwait + */ + if (!pm_idle) { + printk(KERN_INFO "using mwait in idle threads.\n"); + pm_idle = mwait_idle; + } + } + selected = 1; +} + +static int __init idle_setup(char *str) +{ + if (!strcmp(str, "poll")) { + printk("using polling idle threads.\n"); + pm_idle = poll_idle; + } else if (!strcmp(str, "mwait")) + force_mwait = 1; + else + return -1; + + boot_option_idle_override = 1; + return 0; +} +early_param("idle", idle_setup); + diff --git a/trunk/arch/x86/kernel/process_32.c b/trunk/arch/x86/kernel/process_32.c index 7adad088e373..f8476dfbb60d 100644 --- a/trunk/arch/x86/kernel/process_32.c +++ b/trunk/arch/x86/kernel/process_32.c @@ -111,12 +111,10 @@ void default_idle(void) */ smp_mb(); - local_irq_disable(); - if (!need_resched()) { + if (!need_resched()) safe_halt(); /* enables interrupts racelessly */ - local_irq_disable(); - } - local_irq_enable(); + else + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } else { local_irq_enable(); @@ -128,17 +126,6 @@ void default_idle(void) EXPORT_SYMBOL(default_idle); #endif -/* - * On SMP it's slightly faster (but much more power-consuming!) - * to poll the ->work.need_resched flag instead of waiting for the - * cross-CPU IPI to arrive. Use this option with caution. - */ -static void poll_idle(void) -{ - local_irq_enable(); - cpu_relax(); -} - #ifdef CONFIG_HOTPLUG_CPU #include /* We don't actually take CPU down, just spin without interrupts. */ @@ -196,6 +183,7 @@ void cpu_idle(void) if (cpu_is_offline(cpu)) play_dead(); + local_irq_disable(); __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } @@ -206,104 +194,6 @@ void cpu_idle(void) } } -static void do_nothing(void *unused) -{ -} - -/* - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of - * pm_idle and update to new pm_idle value. Required while changing pm_idle - * handler on SMP systems. - * - * Caller must have changed pm_idle to the new value before the call. Old - * pm_idle value will not be used by any CPU after the return of this function. - */ -void cpu_idle_wait(void) -{ - smp_mb(); - /* kick all the CPUs so that they exit out of pm_idle */ - smp_call_function(do_nothing, NULL, 0, 1); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); - -/* - * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, - * which can obviate IPI to trigger checking of need_resched. - * We execute MONITOR against need_resched and enter optimized wait state - * through MWAIT. Whenever someone changes need_resched, we would be woken - * up from MWAIT (without an IPI). - * - * New with Core Duo processors, MWAIT can take some hints based on CPU - * capability. - */ -void mwait_idle_with_hints(unsigned long ax, unsigned long cx) -{ - if (!need_resched()) { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __sti_mwait(ax, cx); - else - local_irq_enable(); - } else - local_irq_enable(); -} - -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - local_irq_enable(); - mwait_idle_with_hints(0, 0); -} - -static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) -{ - if (force_mwait) - return 1; - /* Any C1 states supported? */ - return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; -} - -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) -{ - static int selected; - - if (selected) - return; -#ifdef CONFIG_X86_SMP - if (pm_idle == poll_idle && smp_num_siblings > 1) { - printk(KERN_WARNING "WARNING: polling idle and HT enabled," - " performance may degrade.\n"); - } -#endif - if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { - /* - * Skip, if setup has overridden idle. - * One CPU supports mwait => All CPUs supports mwait - */ - if (!pm_idle) { - printk(KERN_INFO "using mwait in idle threads.\n"); - pm_idle = mwait_idle; - } - } - selected = 1; -} - -static int __init idle_setup(char *str) -{ - if (!strcmp(str, "poll")) { - printk("using polling idle threads.\n"); - pm_idle = poll_idle; - } else if (!strcmp(str, "mwait")) - force_mwait = 1; - else - return -1; - - boot_option_idle_override = 1; - return 0; -} -early_param("idle", idle_setup); - void __show_registers(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; @@ -550,7 +440,7 @@ static void hard_enable_TSC(void) write_cr4(read_cr4() & ~X86_CR4_TSD); } -void enable_TSC(void) +static void enable_TSC(void) { preempt_disable(); if (test_and_clear_thread_flag(TIF_NOTSC)) diff --git a/trunk/arch/x86/kernel/process_64.c b/trunk/arch/x86/kernel/process_64.c index 891af1a1b48a..e2319f39988b 100644 --- a/trunk/arch/x86/kernel/process_64.c +++ b/trunk/arch/x86/kernel/process_64.c @@ -106,26 +106,13 @@ void default_idle(void) * test NEED_RESCHED: */ smp_mb(); - local_irq_disable(); - if (!need_resched()) { + if (!need_resched()) safe_halt(); /* enables interrupts racelessly */ - local_irq_disable(); - } - local_irq_enable(); + else + local_irq_enable(); current_thread_info()->status |= TS_POLLING; } -/* - * On SMP it's slightly faster (but much more power-consuming!) - * to poll the ->need_resched flag instead of waiting for the - * cross-CPU IPI to arrive. Use this option with caution. - */ -static void poll_idle(void) -{ - local_irq_enable(); - cpu_relax(); -} - #ifdef CONFIG_HOTPLUG_CPU DECLARE_PER_CPU(int, cpu_state); @@ -192,110 +179,6 @@ void cpu_idle(void) } } -static void do_nothing(void *unused) -{ -} - -/* - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of - * pm_idle and update to new pm_idle value. Required while changing pm_idle - * handler on SMP systems. - * - * Caller must have changed pm_idle to the new value before the call. Old - * pm_idle value will not be used by any CPU after the return of this function. - */ -void cpu_idle_wait(void) -{ - smp_mb(); - /* kick all the CPUs so that they exit out of pm_idle */ - smp_call_function(do_nothing, NULL, 0, 1); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); - -/* - * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, - * which can obviate IPI to trigger checking of need_resched. - * We execute MONITOR against need_resched and enter optimized wait state - * through MWAIT. Whenever someone changes need_resched, we would be woken - * up from MWAIT (without an IPI). - * - * New with Core Duo processors, MWAIT can take some hints based on CPU - * capability. - */ -void mwait_idle_with_hints(unsigned long ax, unsigned long cx) -{ - if (!need_resched()) { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __mwait(ax, cx); - } -} - -/* Default MONITOR/MWAIT with no hints, used for default C1 state */ -static void mwait_idle(void) -{ - if (!need_resched()) { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - smp_mb(); - if (!need_resched()) - __sti_mwait(0, 0); - else - local_irq_enable(); - } else { - local_irq_enable(); - } -} - - -static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) -{ - if (force_mwait) - return 1; - /* Any C1 states supported? */ - return c->cpuid_level >= 5 && ((cpuid_edx(5) >> 4) & 0xf) > 0; -} - -void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) -{ - static int selected; - - if (selected) - return; -#ifdef CONFIG_X86_SMP - if (pm_idle == poll_idle && smp_num_siblings > 1) { - printk(KERN_WARNING "WARNING: polling idle and HT enabled," - " performance may degrade.\n"); - } -#endif - if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) { - /* - * Skip, if setup has overridden idle. - * One CPU supports mwait => All CPUs supports mwait - */ - if (!pm_idle) { - printk(KERN_INFO "using mwait in idle threads.\n"); - pm_idle = mwait_idle; - } - } - selected = 1; -} - -static int __init idle_setup(char *str) -{ - if (!strcmp(str, "poll")) { - printk("using polling idle threads.\n"); - pm_idle = poll_idle; - } else if (!strcmp(str, "mwait")) - force_mwait = 1; - else - return -1; - - boot_option_idle_override = 1; - return 0; -} -early_param("idle", idle_setup); - /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs * regs) { @@ -562,7 +445,7 @@ static void hard_enable_TSC(void) write_cr4(read_cr4() & ~X86_CR4_TSD); } -void enable_TSC(void) +static void enable_TSC(void) { preempt_disable(); if (test_and_clear_thread_flag(TIF_NOTSC)) diff --git a/trunk/arch/x86/kernel/ptrace.c b/trunk/arch/x86/kernel/ptrace.c index 559c1b027417..fb03ef380f0e 100644 --- a/trunk/arch/x86/kernel/ptrace.c +++ b/trunk/arch/x86/kernel/ptrace.c @@ -1207,96 +1207,15 @@ static int genregs32_set(struct task_struct *target, return ret; } -static long ptrace32_siginfo(unsigned request, u32 pid, u32 addr, u32 data) +long compat_arch_ptrace(struct task_struct *child, compat_long_t request, + compat_ulong_t caddr, compat_ulong_t cdata) { - siginfo_t __user *si = compat_alloc_user_space(sizeof(siginfo_t)); - compat_siginfo_t __user *si32 = compat_ptr(data); - siginfo_t ssi; - int ret; - - if (request == PTRACE_SETSIGINFO) { - memset(&ssi, 0, sizeof(siginfo_t)); - ret = copy_siginfo_from_user32(&ssi, si32); - if (ret) - return ret; - if (copy_to_user(si, &ssi, sizeof(siginfo_t))) - return -EFAULT; - } - ret = sys_ptrace(request, pid, addr, (unsigned long)si); - if (ret) - return ret; - if (request == PTRACE_GETSIGINFO) { - if (copy_from_user(&ssi, si, sizeof(siginfo_t))) - return -EFAULT; - ret = copy_siginfo_to_user32(si32, &ssi); - } - return ret; -} - -asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) -{ - struct task_struct *child; - struct pt_regs *childregs; + unsigned long addr = caddr; + unsigned long data = cdata; void __user *datap = compat_ptr(data); int ret; __u32 val; - switch (request) { - case PTRACE_TRACEME: - case PTRACE_ATTACH: - case PTRACE_KILL: - case PTRACE_CONT: - case PTRACE_SINGLESTEP: - case PTRACE_SINGLEBLOCK: - case PTRACE_DETACH: - case PTRACE_SYSCALL: - case PTRACE_OLDSETOPTIONS: - case PTRACE_SETOPTIONS: - case PTRACE_SET_THREAD_AREA: - case PTRACE_GET_THREAD_AREA: -#ifdef X86_BTS - case PTRACE_BTS_CONFIG: - case PTRACE_BTS_STATUS: - case PTRACE_BTS_SIZE: - case PTRACE_BTS_GET: - case PTRACE_BTS_CLEAR: - case PTRACE_BTS_DRAIN: -#endif - return sys_ptrace(request, pid, addr, data); - - default: - return -EINVAL; - - case PTRACE_PEEKTEXT: - case PTRACE_PEEKDATA: - case PTRACE_POKEDATA: - case PTRACE_POKETEXT: - case PTRACE_POKEUSR: - case PTRACE_PEEKUSR: - case PTRACE_GETREGS: - case PTRACE_SETREGS: - case PTRACE_SETFPREGS: - case PTRACE_GETFPREGS: - case PTRACE_SETFPXREGS: - case PTRACE_GETFPXREGS: - case PTRACE_GETEVENTMSG: - break; - - case PTRACE_SETSIGINFO: - case PTRACE_GETSIGINFO: - return ptrace32_siginfo(request, pid, addr, data); - } - - child = ptrace_get_task_struct(pid); - if (IS_ERR(child)) - return PTR_ERR(child); - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out; - - childregs = task_pt_regs(child); - switch (request) { case PTRACE_PEEKUSR: ret = getreg32(child, addr, &val); @@ -1343,12 +1262,14 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) sizeof(struct user32_fxsr_struct), datap); + case PTRACE_GET_THREAD_AREA: + case PTRACE_SET_THREAD_AREA: + return arch_ptrace(child, request, addr, data); + default: return compat_ptrace_request(child, request, addr, data); } - out: - put_task_struct(child); return ret; } diff --git a/trunk/arch/x86/kernel/reboot.c b/trunk/arch/x86/kernel/reboot.c index 19c9386ac118..a4a838306b2c 100644 --- a/trunk/arch/x86/kernel/reboot.c +++ b/trunk/arch/x86/kernel/reboot.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -15,7 +16,6 @@ # include # include # include -# include #else # include #endif @@ -275,7 +275,7 @@ void machine_real_restart(unsigned char *code, int length) /* Remap the kernel at virtual address zero, as well as offset zero from the kernel segment. This assumes the kernel segment starts at virtual address PAGE_OFFSET. */ - memcpy(swapper_pg_dir, swapper_pg_dir + USER_PGD_PTRS, + memcpy(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY, sizeof(swapper_pg_dir [0]) * KERNEL_PGD_PTRS); /* @@ -399,7 +399,7 @@ static void native_machine_emergency_restart(void) } } -static void native_machine_shutdown(void) +void native_machine_shutdown(void) { /* Stop the cpus and apics */ #ifdef CONFIG_SMP @@ -470,7 +470,10 @@ struct machine_ops machine_ops = { .shutdown = native_machine_shutdown, .emergency_restart = native_machine_emergency_restart, .restart = native_machine_restart, - .halt = native_machine_halt + .halt = native_machine_halt, +#ifdef CONFIG_KEXEC + .crash_shutdown = native_machine_crash_shutdown, +#endif }; void machine_power_off(void) @@ -498,3 +501,9 @@ void machine_halt(void) machine_ops.halt(); } +#ifdef CONFIG_KEXEC +void machine_crash_shutdown(struct pt_regs *regs) +{ + machine_ops.crash_shutdown(regs); +} +#endif diff --git a/trunk/arch/x86/kernel/setup.c b/trunk/arch/x86/kernel/setup.c index 0d1f44ae6eea..c0c68c18a788 100644 --- a/trunk/arch/x86/kernel/setup.c +++ b/trunk/arch/x86/kernel/setup.c @@ -18,8 +18,6 @@ unsigned disabled_cpus __cpuinitdata; unsigned int boot_cpu_physical_apicid = -1U; EXPORT_SYMBOL(boot_cpu_physical_apicid); -physid_mask_t phys_cpu_present_map; - DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID; EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); diff --git a/trunk/arch/x86/kernel/setup_32.c b/trunk/arch/x86/kernel/setup_32.c index 78828b0f604f..2283422af794 100644 --- a/trunk/arch/x86/kernel/setup_32.c +++ b/trunk/arch/x86/kernel/setup_32.c @@ -47,6 +47,7 @@ #include #include #include +#include #include