diff --git a/[refs] b/[refs] index 1c4536c93f7f..2b59f49e6bed 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: d992895ba2b27cf5adf1ba0ad6d27662adc54c5e +refs/heads/master: bf3a46aa9b96f6eb3a49a568f72a2801c3e830c0 diff --git a/trunk/arch/ppc/boot/utils/addRamDisk.c b/trunk/arch/ppc/boot/utils/addRamDisk.c new file mode 100644 index 000000000000..93400dfcce7f --- /dev/null +++ b/trunk/arch/ppc/boot/utils/addRamDisk.c @@ -0,0 +1,203 @@ +#include +#include +#include +#include +#include +#include +#include + +#define ElfHeaderSize (64 * 1024) +#define ElfPages (ElfHeaderSize / 4096) +#define KERNELBASE (0xc0000000) + +void get4k(FILE *file, char *buf ) +{ + unsigned j; + unsigned num = fread(buf, 1, 4096, file); + for ( j=num; j<4096; ++j ) + buf[j] = 0; +} + +void put4k(FILE *file, char *buf ) +{ + fwrite(buf, 1, 4096, file); +} + +void death(const char *msg, FILE *fdesc, const char *fname) +{ + printf(msg); + fclose(fdesc); + unlink(fname); + exit(1); +} + +int main(int argc, char **argv) +{ + char inbuf[4096]; + FILE *ramDisk = NULL; + FILE *inputVmlinux = NULL; + FILE *outputVmlinux = NULL; + unsigned i = 0; + u_int32_t ramFileLen = 0; + u_int32_t ramLen = 0; + u_int32_t roundR = 0; + u_int32_t kernelLen = 0; + u_int32_t actualKernelLen = 0; + u_int32_t round = 0; + u_int32_t roundedKernelLen = 0; + u_int32_t ramStartOffs = 0; + u_int32_t ramPages = 0; + u_int32_t roundedKernelPages = 0; + u_int32_t hvReleaseData = 0; + u_int32_t eyeCatcher = 0xc8a5d9c4; + u_int32_t naca = 0; + u_int32_t xRamDisk = 0; + u_int32_t xRamDiskSize = 0; + if ( argc < 2 ) { + printf("Name of RAM disk file missing.\n"); + exit(1); + } + + if ( argc < 3 ) { + printf("Name of vmlinux file missing.\n"); + exit(1); + } + + if ( argc < 4 ) { + printf("Name of vmlinux output file missing.\n"); + exit(1); + } + + ramDisk = fopen(argv[1], "r"); + if ( ! ramDisk ) { + printf("RAM disk file \"%s\" failed to open.\n", argv[1]); + exit(1); + } + inputVmlinux = fopen(argv[2], "r"); + if ( ! inputVmlinux ) { + printf("vmlinux file \"%s\" failed to open.\n", argv[2]); + exit(1); + } + outputVmlinux = fopen(argv[3], "w+"); + if ( ! outputVmlinux ) { + printf("output vmlinux file \"%s\" failed to open.\n", argv[3]); + exit(1); + } + fseek(ramDisk, 0, SEEK_END); + ramFileLen = ftell(ramDisk); + fseek(ramDisk, 0, SEEK_SET); + printf("%s file size = %d\n", argv[1], ramFileLen); + + ramLen = ramFileLen; + + roundR = 4096 - (ramLen % 4096); + if ( roundR ) { + printf("Rounding RAM disk file up to a multiple of 4096, adding %d\n", roundR); + ramLen += roundR; + } + + printf("Rounded RAM disk size is %d\n", ramLen); + fseek(inputVmlinux, 0, SEEK_END); + kernelLen = ftell(inputVmlinux); + fseek(inputVmlinux, 0, SEEK_SET); + printf("kernel file size = %d\n", kernelLen); + if ( kernelLen == 0 ) { + printf("You must have a linux kernel specified as argv[2]\n"); + exit(1); + } + + actualKernelLen = kernelLen - ElfHeaderSize; + + printf("actual kernel length (minus ELF header) = %d\n", actualKernelLen); + + round = actualKernelLen % 4096; + roundedKernelLen = actualKernelLen; + if ( round ) + roundedKernelLen += (4096 - round); + + printf("actual kernel length rounded up to a 4k multiple = %d\n", roundedKernelLen); + + ramStartOffs = roundedKernelLen; + ramPages = ramLen / 4096; + + printf("RAM disk pages to copy = %d\n", ramPages); + + // Copy 64K ELF header + for (i=0; i<(ElfPages); ++i) { + get4k( inputVmlinux, inbuf ); + put4k( outputVmlinux, inbuf ); + } + + roundedKernelPages = roundedKernelLen / 4096; + + fseek(inputVmlinux, ElfHeaderSize, SEEK_SET); + + for ( i=0; i .text .globl _start diff --git a/trunk/arch/ppc64/boot/div64.S b/trunk/arch/ppc64/boot/div64.S index 722f360a32a9..38f7e466d7d6 100644 --- a/trunk/arch/ppc64/boot/div64.S +++ b/trunk/arch/ppc64/boot/div64.S @@ -13,7 +13,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include "ppc_asm.h" +#include .globl __div64_32 __div64_32: diff --git a/trunk/arch/ppc64/boot/elf.h b/trunk/arch/ppc64/boot/elf.h deleted file mode 100644 index d4828fcf1cb9..000000000000 --- a/trunk/arch/ppc64/boot/elf.h +++ /dev/null @@ -1,149 +0,0 @@ -#ifndef _PPC_BOOT_ELF_H_ -#define _PPC_BOOT_ELF_H_ - -/* 32-bit ELF base types. */ -typedef unsigned int Elf32_Addr; -typedef unsigned short Elf32_Half; -typedef unsigned int Elf32_Off; -typedef signed int Elf32_Sword; -typedef unsigned int Elf32_Word; - -/* 64-bit ELF base types. */ -typedef unsigned long long Elf64_Addr; -typedef unsigned short Elf64_Half; -typedef signed short Elf64_SHalf; -typedef unsigned long long Elf64_Off; -typedef signed int Elf64_Sword; -typedef unsigned int Elf64_Word; -typedef unsigned long long Elf64_Xword; -typedef signed long long Elf64_Sxword; - -/* These constants are for the segment types stored in the image headers */ -#define PT_NULL 0 -#define PT_LOAD 1 -#define PT_DYNAMIC 2 -#define PT_INTERP 3 -#define PT_NOTE 4 -#define PT_SHLIB 5 -#define PT_PHDR 6 -#define PT_TLS 7 /* Thread local storage segment */ -#define PT_LOOS 0x60000000 /* OS-specific */ -#define PT_HIOS 0x6fffffff /* OS-specific */ -#define PT_LOPROC 0x70000000 -#define PT_HIPROC 0x7fffffff -#define PT_GNU_EH_FRAME 0x6474e550 - -#define PT_GNU_STACK (PT_LOOS + 0x474e551) - -/* These constants define the different elf file types */ -#define ET_NONE 0 -#define ET_REL 1 -#define ET_EXEC 2 -#define ET_DYN 3 -#define ET_CORE 4 -#define ET_LOPROC 0xff00 -#define ET_HIPROC 0xffff - -/* These constants define the various ELF target machines */ -#define EM_NONE 0 -#define EM_PPC 20 /* PowerPC */ -#define EM_PPC64 21 /* PowerPC64 */ - -#define EI_NIDENT 16 - -typedef struct elf32_hdr { - unsigned char e_ident[EI_NIDENT]; - Elf32_Half e_type; - Elf32_Half e_machine; - Elf32_Word e_version; - Elf32_Addr e_entry; /* Entry point */ - Elf32_Off e_phoff; - Elf32_Off e_shoff; - Elf32_Word e_flags; - Elf32_Half e_ehsize; - Elf32_Half e_phentsize; - Elf32_Half e_phnum; - Elf32_Half e_shentsize; - Elf32_Half e_shnum; - Elf32_Half e_shstrndx; -} Elf32_Ehdr; - -typedef struct elf64_hdr { - unsigned char e_ident[16]; /* ELF "magic number" */ - Elf64_Half e_type; - Elf64_Half e_machine; - Elf64_Word e_version; - Elf64_Addr e_entry; /* Entry point virtual address */ - Elf64_Off e_phoff; /* Program header table file offset */ - Elf64_Off e_shoff; /* Section header table file offset */ - Elf64_Word e_flags; - Elf64_Half e_ehsize; - Elf64_Half e_phentsize; - Elf64_Half e_phnum; - Elf64_Half e_shentsize; - Elf64_Half e_shnum; - Elf64_Half e_shstrndx; -} Elf64_Ehdr; - -/* These constants define the permissions on sections in the program - header, p_flags. */ -#define PF_R 0x4 -#define PF_W 0x2 -#define PF_X 0x1 - -typedef struct elf32_phdr { - Elf32_Word p_type; - Elf32_Off p_offset; - Elf32_Addr p_vaddr; - Elf32_Addr p_paddr; - Elf32_Word p_filesz; - Elf32_Word p_memsz; - Elf32_Word p_flags; - Elf32_Word p_align; -} Elf32_Phdr; - -typedef struct elf64_phdr { - Elf64_Word p_type; - Elf64_Word p_flags; - Elf64_Off p_offset; /* Segment file offset */ - Elf64_Addr p_vaddr; /* Segment virtual address */ - Elf64_Addr p_paddr; /* Segment physical address */ - Elf64_Xword p_filesz; /* Segment size in file */ - Elf64_Xword p_memsz; /* Segment size in memory */ - Elf64_Xword p_align; /* Segment alignment, file & memory */ -} Elf64_Phdr; - -#define EI_MAG0 0 /* e_ident[] indexes */ -#define EI_MAG1 1 -#define EI_MAG2 2 -#define EI_MAG3 3 -#define EI_CLASS 4 -#define EI_DATA 5 -#define EI_VERSION 6 -#define EI_OSABI 7 -#define EI_PAD 8 - -#define ELFMAG0 0x7f /* EI_MAG */ -#define ELFMAG1 'E' -#define ELFMAG2 'L' -#define ELFMAG3 'F' -#define ELFMAG "\177ELF" -#define SELFMAG 4 - -#define ELFCLASSNONE 0 /* EI_CLASS */ -#define ELFCLASS32 1 -#define ELFCLASS64 2 -#define ELFCLASSNUM 3 - -#define ELFDATANONE 0 /* e_ident[EI_DATA] */ -#define ELFDATA2LSB 1 -#define ELFDATA2MSB 2 - -#define EV_NONE 0 /* e_version, EI_VERSION */ -#define EV_CURRENT 1 -#define EV_NUM 2 - -#define ELFOSABI_NONE 0 -#define ELFOSABI_LINUX 3 - -#endif /* _PPC_BOOT_ELF_H_ */ diff --git a/trunk/arch/ppc64/boot/main.c b/trunk/arch/ppc64/boot/main.c index 99e68cfbe688..199d9804f61c 100644 --- a/trunk/arch/ppc64/boot/main.c +++ b/trunk/arch/ppc64/boot/main.c @@ -8,28 +8,36 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include -#include -#include "elf.h" -#include "page.h" -#include "string.h" -#include "stdio.h" -#include "prom.h" +#include "ppc32-types.h" #include "zlib.h" - -static void gunzip(void *, int, unsigned char *, int *); -extern void flush_cache(void *, unsigned long); - +#include +#include +#include +#include + +extern void *finddevice(const char *); +extern int getprop(void *, const char *, void *, int); +extern void printf(const char *fmt, ...); +extern int sprintf(char *buf, const char *fmt, ...); +void gunzip(void *, int, unsigned char *, int *); +void *claim(unsigned int, unsigned int, unsigned int); +void flush_cache(void *, unsigned long); +void pause(void); +extern void exit(void); + +unsigned long strlen(const char *s); +void *memmove(void *dest, const void *src, unsigned long n); +void *memcpy(void *dest, const void *src, unsigned long n); /* Value picked to match that used by yaboot */ #define PROG_START 0x01400000 #define RAM_END (256<<20) // Fixme: use OF */ -static char *avail_ram; -static char *begin_avail, *end_avail; -static char *avail_high; -static unsigned int heap_use; -static unsigned int heap_max; +char *avail_ram; +char *begin_avail, *end_avail; +char *avail_high; +unsigned int heap_use; +unsigned int heap_max; extern char _start[]; extern char _vmlinux_start[]; @@ -44,9 +52,9 @@ struct addr_range { unsigned long size; unsigned long memsize; }; -static struct addr_range vmlinux = {0, 0, 0}; -static struct addr_range vmlinuz = {0, 0, 0}; -static struct addr_range initrd = {0, 0, 0}; +struct addr_range vmlinux = {0, 0, 0}; +struct addr_range vmlinuz = {0, 0, 0}; +struct addr_range initrd = {0, 0, 0}; static char scratch[128<<10]; /* 128kB of scratch space for gunzip */ @@ -56,6 +64,13 @@ typedef void (*kernel_entry_t)( unsigned long, void *); +int (*prom)(void *); + +void *chosen_handle; +void *stdin; +void *stdout; +void *stderr; + #undef DEBUG static unsigned long claim_base = PROG_START; @@ -262,7 +277,7 @@ void zfree(void *x, void *addr, unsigned nb) #define DEFLATED 8 -static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) +void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) { z_stream s; int r, i, flags; diff --git a/trunk/arch/ppc64/boot/page.h b/trunk/arch/ppc64/boot/page.h deleted file mode 100644 index 14eca30fef64..000000000000 --- a/trunk/arch/ppc64/boot/page.h +++ /dev/null @@ -1,34 +0,0 @@ -#ifndef _PPC_BOOT_PAGE_H -#define _PPC_BOOT_PAGE_H -/* - * Copyright (C) 2001 PPC64 Team, IBM Corp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifdef __ASSEMBLY__ -#define ASM_CONST(x) x -#else -#define __ASM_CONST(x) x##UL -#define ASM_CONST(x) __ASM_CONST(x) -#endif - -/* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT 12 -#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE-1)) - -/* align addr on a size boundary - adjust address up/down if needed */ -#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) -#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) - -/* align addr on a size boundary - adjust address up if needed */ -#define _ALIGN(addr,size) _ALIGN_UP(addr,size) - -/* to align the pointer to the (next) page boundary */ -#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) - -#endif /* _PPC_BOOT_PAGE_H */ diff --git a/trunk/arch/ppc64/boot/ppc32-types.h b/trunk/arch/ppc64/boot/ppc32-types.h new file mode 100644 index 000000000000..f7b8884f8f70 --- /dev/null +++ b/trunk/arch/ppc64/boot/ppc32-types.h @@ -0,0 +1,36 @@ +#ifndef _PPC64_TYPES_H +#define _PPC64_TYPES_H + +typedef __signed__ char __s8; +typedef unsigned char __u8; + +typedef __signed__ short __s16; +typedef unsigned short __u16; + +typedef __signed__ int __s32; +typedef unsigned int __u32; + +typedef __signed__ long long __s64; +typedef unsigned long long __u64; + +typedef signed char s8; +typedef unsigned char u8; + +typedef signed short s16; +typedef unsigned short u16; + +typedef signed int s32; +typedef unsigned int u32; + +typedef signed long long s64; +typedef unsigned long long u64; + +typedef struct { + __u32 u[4]; +} __attribute((aligned(16))) __vector128; + +#define BITS_PER_LONG 32 + +typedef __vector128 vector128; + +#endif /* _PPC64_TYPES_H */ diff --git a/trunk/arch/ppc64/boot/ppc_asm.h b/trunk/arch/ppc64/boot/ppc_asm.h deleted file mode 100644 index 1c2c2817f9b7..000000000000 --- a/trunk/arch/ppc64/boot/ppc_asm.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef _PPC64_PPC_ASM_H -#define _PPC64_PPC_ASM_H -/* - * - * Definitions used by various bits of low-level assembly code on PowerPC. - * - * Copyright (C) 1995-1999 Gary Thomas, Paul Mackerras, Cort Dougan. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* Condition Register Bit Fields */ - -#define cr0 0 -#define cr1 1 -#define cr2 2 -#define cr3 3 -#define cr4 4 -#define cr5 5 -#define cr6 6 -#define cr7 7 - - -/* General Purpose Registers (GPRs) */ - -#define r0 0 -#define r1 1 -#define r2 2 -#define r3 3 -#define r4 4 -#define r5 5 -#define r6 6 -#define r7 7 -#define r8 8 -#define r9 9 -#define r10 10 -#define r11 11 -#define r12 12 -#define r13 13 -#define r14 14 -#define r15 15 -#define r16 16 -#define r17 17 -#define r18 18 -#define r19 19 -#define r20 20 -#define r21 21 -#define r22 22 -#define r23 23 -#define r24 24 -#define r25 25 -#define r26 26 -#define r27 27 -#define r28 28 -#define r29 29 -#define r30 30 -#define r31 31 - -#endif /* _PPC64_PPC_ASM_H */ diff --git a/trunk/arch/ppc64/boot/prom.c b/trunk/arch/ppc64/boot/prom.c index 4bea2f4dcb06..5e48b80ff5a0 100644 --- a/trunk/arch/ppc64/boot/prom.c +++ b/trunk/arch/ppc64/boot/prom.c @@ -7,19 +7,43 @@ * 2 of the License, or (at your option) any later version. */ #include -#include -#include "string.h" -#include "stdio.h" -#include "prom.h" +#include +#include +#include + +extern __u32 __div64_32(unsigned long long *dividend, __u32 divisor); + +/* The unnecessary pointer compare is there + * to check for type safety (n must be 64bit) + */ +# define do_div(n,base) ({ \ + __u32 __base = (base); \ + __u32 __rem; \ + (void)(((typeof((n)) *)0) == ((unsigned long long *)0)); \ + if (((n) >> 32) == 0) { \ + __rem = (__u32)(n) % __base; \ + (n) = (__u32)(n) / __base; \ + } else \ + __rem = __div64_32(&(n), __base); \ + __rem; \ + }) int (*prom)(void *); void *chosen_handle; - void *stdin; void *stdout; void *stderr; +void exit(void); +void *finddevice(const char *name); +int getprop(void *phandle, const char *name, void *buf, int buflen); +void chrpboot(int a1, int a2, void *prom); /* in main.c */ + +int printf(char *fmt, ...); + +/* there is no convenient header to get this from... -- paulus */ +extern unsigned long strlen(const char *); int write(void *handle, void *ptr, int nb) @@ -186,6 +210,107 @@ fputs(char *str, void *f) return write(f, str, n) == n? 0: -1; } +int +readchar(void) +{ + char ch; + + for (;;) { + switch (read(stdin, &ch, 1)) { + case 1: + return ch; + case -1: + printf("read(stdin) returned -1\r\n"); + return -1; + } + } +} + +static char line[256]; +static char *lineptr; +static int lineleft; + +int +getchar(void) +{ + int c; + + if (lineleft == 0) { + lineptr = line; + for (;;) { + c = readchar(); + if (c == -1 || c == 4) + break; + if (c == '\r' || c == '\n') { + *lineptr++ = '\n'; + putchar('\n'); + break; + } + switch (c) { + case 0177: + case '\b': + if (lineptr > line) { + putchar('\b'); + putchar(' '); + putchar('\b'); + --lineptr; + } + break; + case 'U' & 0x1F: + while (lineptr > line) { + putchar('\b'); + putchar(' '); + putchar('\b'); + --lineptr; + } + break; + default: + if (lineptr >= &line[sizeof(line) - 1]) + putchar('\a'); + else { + putchar(c); + *lineptr++ = c; + } + } + } + lineleft = lineptr - line; + lineptr = line; + } + if (lineleft == 0) + return -1; + --lineleft; + return *lineptr++; +} + + + +/* String functions lifted from lib/vsprintf.c and lib/ctype.c */ +unsigned char _ctype[] = { +_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ +_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ +_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ +_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ +_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ +_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ +_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ +_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ +_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ +_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ +_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ +_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ +_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ +_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ +_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ +_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ +_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ +_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ + size_t strnlen(const char * s, size_t count) { const char *sc; @@ -195,30 +320,44 @@ size_t strnlen(const char * s, size_t count) return sc - s; } -extern unsigned int __div64_32(unsigned long long *dividend, - unsigned int divisor); +unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) +{ + unsigned long result = 0,value; -/* The unnecessary pointer compare is there - * to check for type safety (n must be 64bit) - */ -# define do_div(n,base) ({ \ - unsigned int __base = (base); \ - unsigned int __rem; \ - (void)(((typeof((n)) *)0) == ((unsigned long long *)0)); \ - if (((n) >> 32) == 0) { \ - __rem = (unsigned int)(n) % __base; \ - (n) = (unsigned int)(n) / __base; \ - } else \ - __rem = __div64_32(&(n), __base); \ - __rem; \ - }) + if (!base) { + base = 10; + if (*cp == '0') { + base = 8; + cp++; + if ((*cp == 'x') && isxdigit(cp[1])) { + cp++; + base = 16; + } + } + } + while (isxdigit(*cp) && + (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { + result = result*base + value; + cp++; + } + if (endp) + *endp = (char *)cp; + return result; +} + +long simple_strtol(const char *cp,char **endp,unsigned int base) +{ + if(*cp=='-') + return -simple_strtoul(cp+1,endp,base); + return simple_strtoul(cp,endp,base); +} static int skip_atoi(const char **s) { - int i, c; + int i=0; - for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s) - i = i*10 + c - '0'; + while (isdigit(**s)) + i = i*10 + *((*s)++) - '0'; return i; } @@ -297,6 +436,9 @@ static char * number(char * str, unsigned long long num, int base, int size, int return str; } +/* Forward decl. needed for IP address printing stuff... */ +int sprintf(char * buf, const char *fmt, ...); + int vsprintf(char *buf, const char *fmt, va_list args) { int len; @@ -335,7 +477,7 @@ int vsprintf(char *buf, const char *fmt, va_list args) /* get field width */ field_width = -1; - if ('0' <= *fmt && *fmt <= '9') + if (isdigit(*fmt)) field_width = skip_atoi(&fmt); else if (*fmt == '*') { ++fmt; @@ -351,7 +493,7 @@ int vsprintf(char *buf, const char *fmt, va_list args) precision = -1; if (*fmt == '.') { ++fmt; - if ('0' <= *fmt && *fmt <= '9') + if (isdigit(*fmt)) precision = skip_atoi(&fmt); else if (*fmt == '*') { ++fmt; @@ -486,7 +628,7 @@ int sprintf(char * buf, const char *fmt, ...) static char sprint_buf[1024]; int -printf(const char *fmt, ...) +printf(char *fmt, ...) { va_list args; int n; diff --git a/trunk/arch/ppc64/boot/prom.h b/trunk/arch/ppc64/boot/prom.h deleted file mode 100644 index 96ab5aec740c..000000000000 --- a/trunk/arch/ppc64/boot/prom.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef _PPC_BOOT_PROM_H_ -#define _PPC_BOOT_PROM_H_ - -extern int (*prom) (void *); -extern void *chosen_handle; - -extern void *stdin; -extern void *stdout; -extern void *stderr; - -extern int write(void *handle, void *ptr, int nb); -extern int read(void *handle, void *ptr, int nb); -extern void exit(void); -extern void pause(void); -extern void *finddevice(const char *); -extern void *claim(unsigned long virt, unsigned long size, unsigned long align); -extern int getprop(void *phandle, const char *name, void *buf, int buflen); -#endif /* _PPC_BOOT_PROM_H_ */ diff --git a/trunk/arch/ppc64/boot/stdio.h b/trunk/arch/ppc64/boot/stdio.h deleted file mode 100644 index 24bd3a8dee94..000000000000 --- a/trunk/arch/ppc64/boot/stdio.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _PPC_BOOT_STDIO_H_ -#define _PPC_BOOT_STDIO_H_ - -extern int printf(const char *fmt, ...); - -extern int sprintf(char *buf, const char *fmt, ...); - -extern int vsprintf(char *buf, const char *fmt, va_list args); - -extern int putc(int c, void *f); -extern int putchar(int c); -extern int getchar(void); - -extern int fputs(char *str, void *f); - -#endif /* _PPC_BOOT_STDIO_H_ */ diff --git a/trunk/arch/ppc64/boot/string.S b/trunk/arch/ppc64/boot/string.S index 7ade87ae7718..ba5f2d21c9ea 100644 --- a/trunk/arch/ppc64/boot/string.S +++ b/trunk/arch/ppc64/boot/string.S @@ -9,7 +9,7 @@ * NOTE: this code runs in 32 bit mode and is packaged as ELF32. */ -#include "ppc_asm.h" +#include .text .globl strcpy diff --git a/trunk/arch/ppc64/boot/string.h b/trunk/arch/ppc64/boot/string.h deleted file mode 100644 index 9289258bcbd6..000000000000 --- a/trunk/arch/ppc64/boot/string.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _PPC_BOOT_STRING_H_ -#define _PPC_BOOT_STRING_H_ - -extern char *strcpy(char *dest, const char *src); -extern char *strncpy(char *dest, const char *src, size_t n); -extern char *strcat(char *dest, const char *src); -extern int strcmp(const char *s1, const char *s2); -extern size_t strlen(const char *s); -extern size_t strnlen(const char *s, size_t count); - -extern void *memset(void *s, int c, size_t n); -extern void *memmove(void *dest, const void *src, unsigned long n); -extern void *memcpy(void *dest, const void *src, unsigned long n); -extern int memcmp(const void *s1, const void *s2, size_t n); - -#endif /* _PPC_BOOT_STRING_H_ */ diff --git a/trunk/arch/ppc64/boot/zlib.c b/trunk/arch/ppc64/boot/zlib.c index 0d910cd2079d..78837e884b8b 100644 --- a/trunk/arch/ppc64/boot/zlib.c +++ b/trunk/arch/ppc64/boot/zlib.c @@ -107,7 +107,7 @@ extern void *memcpy(void *, const void *, unsigned long); /* Diagnostic functions */ #ifdef DEBUG_ZLIB -# include "stdio.h" +# include # ifndef verbose # define verbose 0 # endif diff --git a/trunk/arch/ppc64/configs/iSeries_defconfig b/trunk/arch/ppc64/configs/iSeries_defconfig index 219c6677abcc..394ba18b58c7 100644 --- a/trunk/arch/ppc64/configs/iSeries_defconfig +++ b/trunk/arch/ppc64/configs/iSeries_defconfig @@ -99,6 +99,7 @@ CONFIG_HZ_100=y # CONFIG_HZ_1000 is not set CONFIG_HZ=100 CONFIG_GENERIC_HARDIRQS=y +CONFIG_MSCHUNKS=y CONFIG_LPARCFG=y CONFIG_SECCOMP=y CONFIG_ISA_DMA_API=y diff --git a/trunk/arch/ppc64/kernel/LparData.c b/trunk/arch/ppc64/kernel/LparData.c index 0a9c23ca2f0c..1c11031c838e 100644 --- a/trunk/arch/ppc64/kernel/LparData.c +++ b/trunk/arch/ppc64/kernel/LparData.c @@ -51,17 +51,6 @@ struct HvReleaseData hvReleaseData = { 0xf4, 0x4b, 0xf6, 0xf4 }, }; -/* - * The NACA. The first dword of the naca is required by the iSeries - * hypervisor to point to itVpdAreas. The hypervisor finds the NACA - * through the pointer in hvReleaseData. - */ -struct naca_struct naca = { - .xItVpdAreas = &itVpdAreas, - .xRamDisk = 0, - .xRamDiskSize = 0, -}; - extern void system_reset_iSeries(void); extern void machine_check_iSeries(void); extern void data_access_iSeries(void); @@ -225,3 +214,29 @@ struct ItVpdAreas itVpdAreas = { 0,0 } }; + +struct msChunks msChunks; +EXPORT_SYMBOL(msChunks); + +/* Depending on whether this is called from iSeries or pSeries setup + * code, the location of the msChunks struct may or may not have + * to be reloc'd, so we force the caller to do that for us by passing + * in a pointer to the structure. + */ +unsigned long +msChunks_alloc(unsigned long mem, unsigned long num_chunks, unsigned long chunk_size) +{ + unsigned long offset = reloc_offset(); + struct msChunks *_msChunks = PTRRELOC(&msChunks); + + _msChunks->num_chunks = num_chunks; + _msChunks->chunk_size = chunk_size; + _msChunks->chunk_shift = __ilog2(chunk_size); + _msChunks->chunk_mask = (1UL<<_msChunks->chunk_shift)-1; + + mem = _ALIGN(mem, sizeof(msChunks_entry)); + _msChunks->abs = (msChunks_entry *)(mem + offset); + mem += num_chunks * sizeof(msChunks_entry); + + return mem; +} diff --git a/trunk/arch/ppc64/kernel/Makefile b/trunk/arch/ppc64/kernel/Makefile index f4b3bfcc109d..2ecccb6b4f8c 100644 --- a/trunk/arch/ppc64/kernel/Makefile +++ b/trunk/arch/ppc64/kernel/Makefile @@ -11,7 +11,7 @@ obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \ udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ ptrace32.o signal32.o rtc.o init_task.o \ lmb.o cputable.o cpu_setup_power4.o idle_power4.o \ - iommu.o sysfs.o vdso.o pmc.o firmware.o + iommu.o sysfs.o vdso.o pmc.o obj-y += vdso32/ vdso64/ obj-$(CONFIG_PPC_OF) += of_device.o @@ -50,10 +50,7 @@ obj-$(CONFIG_LPARCFG) += lparcfg.o obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_HVCS) += hvcserver.o - -vio-obj-$(CONFIG_PPC_PSERIES) += pSeries_vio.o -vio-obj-$(CONFIG_PPC_ISERIES) += iSeries_vio.o -obj-$(CONFIG_IBMVIO) += vio.o $(vio-obj-y) +obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_XICS) += xics.o obj-$(CONFIG_MPIC) += mpic.o diff --git a/trunk/arch/ppc64/kernel/asm-offsets.c b/trunk/arch/ppc64/kernel/asm-offsets.c index 17e35d0fed09..abb9e5b5da03 100644 --- a/trunk/arch/ppc64/kernel/asm-offsets.c +++ b/trunk/arch/ppc64/kernel/asm-offsets.c @@ -94,8 +94,7 @@ int main(void) DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); #ifdef CONFIG_HUGETLB_PAGE - DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); - DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); + DEFINE(PACAHTLBSEGS, offsetof(struct paca_struct, context.htlb_segs)); #endif /* CONFIG_HUGETLB_PAGE */ DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr)); DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); diff --git a/trunk/arch/ppc64/kernel/cputable.c b/trunk/arch/ppc64/kernel/cputable.c index 4847f2ac8c9f..77cec42f9525 100644 --- a/trunk/arch/ppc64/kernel/cputable.c +++ b/trunk/arch/ppc64/kernel/cputable.c @@ -5,7 +5,7 @@ * * Modifications for ppc64: * Copyright (C) 2003 Dave Engebretsen - * + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -60,6 +60,7 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, }, { /* Power3+ */ .pvr_mask = 0xffff0000, @@ -72,6 +73,7 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, }, { /* Northstar */ .pvr_mask = 0xffff0000, @@ -84,6 +86,7 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, }, { /* Pulsar */ .pvr_mask = 0xffff0000, @@ -96,6 +99,7 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, }, { /* I-star */ .pvr_mask = 0xffff0000, @@ -108,6 +112,7 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, }, { /* S-star */ .pvr_mask = 0xffff0000, @@ -120,6 +125,7 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, + .firmware_features = COMMON_PPC64_FW, }, { /* Power4 */ .pvr_mask = 0xffff0000, @@ -132,6 +138,7 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, }, { /* Power4+ */ .pvr_mask = 0xffff0000, @@ -144,6 +151,7 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, }, { /* PPC970 */ .pvr_mask = 0xffff0000, @@ -158,6 +166,7 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_ppc970, + .firmware_features = COMMON_PPC64_FW, }, { /* PPC970FX */ .pvr_mask = 0xffff0000, @@ -172,6 +181,7 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_ppc970, + .firmware_features = COMMON_PPC64_FW, }, { /* PPC970MP */ .pvr_mask = 0xffff0000, @@ -186,6 +196,7 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_ppc970, + .firmware_features = COMMON_PPC64_FW, }, { /* Power5 */ .pvr_mask = 0xffff0000, @@ -200,6 +211,7 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, }, { /* Power5 */ .pvr_mask = 0xffff0000, @@ -214,6 +226,7 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, }, { /* BE DD1.x */ .pvr_mask = 0xffff0000, @@ -228,6 +241,7 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_be, + .firmware_features = COMMON_PPC64_FW, }, { /* default match */ .pvr_mask = 0x00000000, @@ -240,5 +254,29 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power4, + .firmware_features = COMMON_PPC64_FW, } }; + +firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { + {FW_FEATURE_PFT, "hcall-pft"}, + {FW_FEATURE_TCE, "hcall-tce"}, + {FW_FEATURE_SPRG0, "hcall-sprg0"}, + {FW_FEATURE_DABR, "hcall-dabr"}, + {FW_FEATURE_COPY, "hcall-copy"}, + {FW_FEATURE_ASR, "hcall-asr"}, + {FW_FEATURE_DEBUG, "hcall-debug"}, + {FW_FEATURE_PERF, "hcall-perf"}, + {FW_FEATURE_DUMP, "hcall-dump"}, + {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, + {FW_FEATURE_MIGRATE, "hcall-migrate"}, + {FW_FEATURE_PERFMON, "hcall-perfmon"}, + {FW_FEATURE_CRQ, "hcall-crq"}, + {FW_FEATURE_VIO, "hcall-vio"}, + {FW_FEATURE_RDMA, "hcall-rdma"}, + {FW_FEATURE_LLAN, "hcall-lLAN"}, + {FW_FEATURE_BULK, "hcall-bulk"}, + {FW_FEATURE_XDABR, "hcall-xdabr"}, + {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, + {FW_FEATURE_SPLPAR, "hcall-splpar"}, +}; diff --git a/trunk/arch/ppc64/kernel/firmware.c b/trunk/arch/ppc64/kernel/firmware.c deleted file mode 100644 index d8432c0fb27d..000000000000 --- a/trunk/arch/ppc64/kernel/firmware.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * arch/ppc64/kernel/firmware.c - * - * Extracted from cputable.c - * - * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) - * - * Modifications for ppc64: - * Copyright (C) 2003 Dave Engebretsen - * Copyright (C) 2005 Stephen Rothwell, IBM Corporation - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include - -#include - -unsigned long ppc64_firmware_features; - -#ifdef CONFIG_PPC_PSERIES -firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { - {FW_FEATURE_PFT, "hcall-pft"}, - {FW_FEATURE_TCE, "hcall-tce"}, - {FW_FEATURE_SPRG0, "hcall-sprg0"}, - {FW_FEATURE_DABR, "hcall-dabr"}, - {FW_FEATURE_COPY, "hcall-copy"}, - {FW_FEATURE_ASR, "hcall-asr"}, - {FW_FEATURE_DEBUG, "hcall-debug"}, - {FW_FEATURE_PERF, "hcall-perf"}, - {FW_FEATURE_DUMP, "hcall-dump"}, - {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, - {FW_FEATURE_MIGRATE, "hcall-migrate"}, - {FW_FEATURE_PERFMON, "hcall-perfmon"}, - {FW_FEATURE_CRQ, "hcall-crq"}, - {FW_FEATURE_VIO, "hcall-vio"}, - {FW_FEATURE_RDMA, "hcall-rdma"}, - {FW_FEATURE_LLAN, "hcall-lLAN"}, - {FW_FEATURE_BULK, "hcall-bulk"}, - {FW_FEATURE_XDABR, "hcall-xdabr"}, - {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, - {FW_FEATURE_SPLPAR, "hcall-splpar"}, -}; -#endif diff --git a/trunk/arch/ppc64/kernel/head.S b/trunk/arch/ppc64/kernel/head.S index cccec4902646..accaa052d31f 100644 --- a/trunk/arch/ppc64/kernel/head.S +++ b/trunk/arch/ppc64/kernel/head.S @@ -23,11 +23,14 @@ * 2 of the License, or (at your option) any later version. */ +#define SECONDARY_PROCESSORS + #include #include #include #include #include +#include #include #include #include @@ -41,14 +44,19 @@ #define DO_SOFT_DISABLE #endif +/* + * hcall interface to pSeries LPAR + */ +#define H_SET_ASR 0x30 + /* * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code * 0x0100 - 0x2fff : pSeries Interrupt prologs - * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs - * 0x6000 - 0x6fff : Initial (CPU0) segment table - * 0x7000 - 0x7fff : FWNMI data area - * 0x8000 - : Early init and support code + * 0x3000 - 0x3fff : Interrupt support + * 0x4000 - 0x4fff : NACA + * 0x6000 : iSeries and common interrupt prologs + * 0x9000 - 0x9fff : Initial segment table */ /* @@ -86,7 +94,6 @@ END_FTR_SECTION(0, 1) /* Catch branch to 0 in real mode */ trap - #ifdef CONFIG_PPC_ISERIES /* * At offset 0x20, there is a pointer to iSeries LPAR data. @@ -96,12 +103,12 @@ END_FTR_SECTION(0, 1) .llong hvReleaseData-KERNELBASE /* - * At offset 0x28 and 0x30 are offsets to the mschunks_map + * At offset 0x28 and 0x30 are offsets to the msChunks * array (used by the iSeries LPAR debugger to do translation * between physical addresses and absolute addresses) and * to the pidhash table (also used by the debugger) */ - .llong mschunks_map-KERNELBASE + .llong msChunks-KERNELBASE .llong 0 /* pidhash-KERNELBASE SFRXXX */ /* Offset 0x38 - Pointer to start of embedded System.map */ @@ -113,7 +120,7 @@ embedded_sysmap_start: embedded_sysmap_end: .llong 0 -#endif /* CONFIG_PPC_ISERIES */ +#else /* CONFIG_PPC_ISERIES */ /* Secondary processors spin on this value until it goes to 1. */ .globl __secondary_hold_spinloop @@ -148,7 +155,7 @@ _GLOBAL(__secondary_hold) std r24,__secondary_hold_acknowledge@l(0) sync - /* All secondary cpus wait here until told to start. */ + /* All secondary cpu's wait here until told to start. */ 100: ld r4,__secondary_hold_spinloop@l(0) cmpdi 0,r4,1 bne 100b @@ -163,6 +170,7 @@ _GLOBAL(__secondary_hold) BUG_OPCODE #endif #endif +#endif /* This value is used to mark exception frames on the stack. */ .section ".toc","aw" @@ -494,37 +502,33 @@ system_call_pSeries: STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) STD_EXCEPTION_PSERIES(0x1700, altivec_assist) - . = 0x3000 - -/*** pSeries interrupt support ***/ - /* moved from 0xf00 */ - STD_EXCEPTION_PSERIES(., performance_monitor) + STD_EXCEPTION_PSERIES(0x3000, performance_monitor) - .align 7 + . = 0x3100 _GLOBAL(do_stab_bolted_pSeries) mtcrf 0x80,r12 mfspr r12,SPRG2 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) -/* - * Vectors for the FWNMI option. Share common code. - */ - .globl system_reset_fwnmi -system_reset_fwnmi: - HMT_MEDIUM - mtspr SPRG1,r13 /* save r13 */ - RUNLATCH_ON(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) + + /* Space for the naca. Architected to be located at real address + * NACA_PHYS_ADDR. Various tools rely on this location being fixed. + * The first dword of the naca is required by iSeries LPAR to + * point to itVpdAreas. On pSeries native, this value is not used. + */ + . = NACA_PHYS_ADDR + .globl __end_interrupts +__end_interrupts: +#ifdef CONFIG_PPC_ISERIES + .globl naca +naca: + .llong itVpdAreas + .llong 0 /* xRamDisk */ + .llong 0 /* xRamDiskSize */ - .globl machine_check_fwnmi -machine_check_fwnmi: - HMT_MEDIUM - mtspr SPRG1,r13 /* save r13 */ - RUNLATCH_ON(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + . = 0x6100 -#ifdef CONFIG_PPC_ISERIES /*** ISeries-LPAR interrupt handlers ***/ STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC) @@ -622,7 +626,9 @@ system_reset_iSeries: cmpwi 0,r23,0 beq iSeries_secondary_smp_loop /* Loop until told to go */ +#ifdef SECONDARY_PROCESSORS bne .__secondary_start /* Loop until told to go */ +#endif iSeries_secondary_smp_loop: /* Let the Hypervisor know we are alive */ /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ @@ -665,8 +671,51 @@ hardware_interrupt_iSeries_masked: ld r13,PACA_EXGEN+EX_R13(r13) rfid b . /* prevent speculative execution */ +#endif + +/* + * Data area reserved for FWNMI option. + */ + .= 0x7000 + .globl fwnmi_data_area +fwnmi_data_area: + +#ifdef CONFIG_PPC_ISERIES + . = LPARMAP_PHYS +#include "lparmap.s" #endif /* CONFIG_PPC_ISERIES */ +/* + * Vectors for the FWNMI option. Share common code. + */ + . = 0x8000 + .globl system_reset_fwnmi +system_reset_fwnmi: + HMT_MEDIUM + mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) + .globl machine_check_fwnmi +machine_check_fwnmi: + HMT_MEDIUM + mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) + + /* + * Space for the initial segment table + * For LPAR, the hypervisor must fill in at least one entry + * before we get control (with relocate on) + */ + . = STAB0_PHYS_ADDR + .globl __start_stab +__start_stab: + + . = (STAB0_PHYS_ADDR + PAGE_SIZE) + .globl __end_stab +__end_stab: + + /*** Common interrupt handlers ***/ STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception) @@ -703,8 +752,8 @@ machine_check_common: * R9 contains the saved CR, r13 points to the paca, * r10 contains the (bad) kernel stack pointer, * r11 and r12 contain the saved SRR0 and SRR1. - * We switch to using an emergency stack, save the registers there, - * and call kernel_bad_stack(), which panics. + * We switch to using the paca guard page as an emergency stack, + * save the registers there, and call kernel_bad_stack(), which panics. */ bad_stack: ld r1,PACAEMERGSP(r13) @@ -857,62 +906,6 @@ fp_unavailable_common: bl .kernel_fp_unavailable_exception BUG_OPCODE -/* - * load_up_fpu(unused, unused, tsk) - * Disable FP for the task which had the FPU previously, - * and save its floating-point registers in its thread_struct. - * Enables the FPU for use in the kernel on return. - * On SMP we know the fpu is free, since we give it up every - * switch (ie, no lazy save of the FP registers). - * On entry: r13 == 'current' && last_task_used_math != 'current' - */ -_STATIC(load_up_fpu) - mfmsr r5 /* grab the current MSR */ - ori r5,r5,MSR_FP - mtmsrd r5 /* enable use of fpu now */ - isync -/* - * For SMP, we don't do lazy FPU switching because it just gets too - * horrendously complex, especially when a task switches from one CPU - * to another. Instead we call giveup_fpu in switch_to. - * - */ -#ifndef CONFIG_SMP - ld r3,last_task_used_math@got(r2) - ld r4,0(r3) - cmpdi 0,r4,0 - beq 1f - /* Save FP state to last_task_used_math's THREAD struct */ - addi r4,r4,THREAD - SAVE_32FPRS(0, r4) - mffs fr0 - stfd fr0,THREAD_FPSCR(r4) - /* Disable FP for last_task_used_math */ - ld r5,PT_REGS(r4) - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r6,MSR_FP|MSR_FE0|MSR_FE1 - andc r4,r4,r6 - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* CONFIG_SMP */ - /* enable use of FP after return */ - ld r4,PACACURRENT(r13) - addi r5,r4,THREAD /* Get THREAD */ - ld r4,THREAD_FPEXC_MODE(r5) - ori r12,r12,MSR_FP - or r12,r12,r4 - std r12,_MSR(r1) - lfd fr0,THREAD_FPSCR(r5) - mtfsf 0xff,fr0 - REST_32FPRS(0, r5) -#ifndef CONFIG_SMP - /* Update last_task_used_math to 'current' */ - subi r4,r5,THREAD /* Back to 'current' */ - std r4,0(r3) -#endif /* CONFIG_SMP */ - /* restore registers and return */ - b fast_exception_return - .align 7 .globl altivec_unavailable_common altivec_unavailable_common: @@ -928,80 +921,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) bl .altivec_unavailable_exception b .ret_from_except -#ifdef CONFIG_ALTIVEC -/* - * load_up_altivec(unused, unused, tsk) - * Disable VMX for the task which had it previously, - * and save its vector registers in its thread_struct. - * Enables the VMX for use in the kernel on return. - * On SMP we know the VMX is free, since we give it up every - * switch (ie, no lazy save of the vector registers). - * On entry: r13 == 'current' && last_task_used_altivec != 'current' - */ -_STATIC(load_up_altivec) - mfmsr r5 /* grab the current MSR */ - oris r5,r5,MSR_VEC@h - mtmsrd r5 /* enable use of VMX now */ - isync - -/* - * For SMP, we don't do lazy VMX switching because it just gets too - * horrendously complex, especially when a task switches from one CPU - * to another. Instead we call giveup_altvec in switch_to. - * VRSAVE isn't dealt with here, that is done in the normal context - * switch code. Note that we could rely on vrsave value to eventually - * avoid saving all of the VREGs here... - */ -#ifndef CONFIG_SMP - ld r3,last_task_used_altivec@got(r2) - ld r4,0(r3) - cmpdi 0,r4,0 - beq 1f - /* Save VMX state to last_task_used_altivec's THREAD struct */ - addi r4,r4,THREAD - SAVE_32VRS(0,r5,r4) - mfvscr vr0 - li r10,THREAD_VSCR - stvx vr0,r10,r4 - /* Disable VMX for last_task_used_altivec */ - ld r5,PT_REGS(r4) - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r6,MSR_VEC@h - andc r4,r4,r6 - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* CONFIG_SMP */ - /* Hack: if we get an altivec unavailable trap with VRSAVE - * set to all zeros, we assume this is a broken application - * that fails to set it properly, and thus we switch it to - * all 1's - */ - mfspr r4,SPRN_VRSAVE - cmpdi 0,r4,0 - bne+ 1f - li r4,-1 - mtspr SPRN_VRSAVE,r4 -1: - /* enable use of VMX after return */ - ld r4,PACACURRENT(r13) - addi r5,r4,THREAD /* Get THREAD */ - oris r12,r12,MSR_VEC@h - std r12,_MSR(r1) - li r4,1 - li r10,THREAD_VSCR - stw r4,THREAD_USED_VR(r5) - lvx vr0,r10,r5 - mtvscr vr0 - REST_32VRS(0,r4,r5) -#ifndef CONFIG_SMP - /* Update last_task_used_math to 'current' */ - subi r4,r5,THREAD /* Back to 'current' */ - std r4,0(r3) -#endif /* CONFIG_SMP */ - /* restore registers and return */ - b fast_exception_return -#endif /* CONFIG_ALTIVEC */ - /* * Hash table stuff */ @@ -1248,28 +1167,6 @@ unrecov_slb: bl .unrecoverable_exception b 1b -/* - * Space for CPU0's segment table. - * - * On iSeries, the hypervisor must fill in at least one entry before - * we get control (with relocate on). The address is give to the hv - * as a page number (see xLparMap in LparData.c), so this must be at a - * fixed address (the linker can't compute (u64)&initial_stab >> - * PAGE_SHIFT). - */ - . = STAB0_PHYS_ADDR /* 0x6000 */ - .globl initial_stab -initial_stab: - .space 4096 - -/* - * Data area reserved for FWNMI option. - * This address (0x7000) is fixed by the RPA. - */ - .= 0x7000 - .globl fwnmi_data_area -fwnmi_data_area: - .space PAGE_SIZE /* * On pSeries, secondary processors spin in the following code. @@ -1303,7 +1200,7 @@ _GLOBAL(pSeries_secondary_smp_init) b .kexec_wait /* next kernel might do better */ 2: mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ - /* From now on, r24 is expected to be logical cpuid */ + /* From now on, r24 is expected to be logica cpuid */ mr r24,r5 3: HMT_LOW lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ @@ -1316,7 +1213,9 @@ _GLOBAL(pSeries_secondary_smp_init) cmpwi 0,r23,0 #ifdef CONFIG_SMP +#ifdef SECONDARY_PROCESSORS bne .__secondary_start +#endif #endif b 3b /* Loop until told to go */ @@ -1531,6 +1430,228 @@ _GLOBAL(copy_and_flush) .align 8 copy_to_here: +/* + * load_up_fpu(unused, unused, tsk) + * Disable FP for the task which had the FPU previously, + * and save its floating-point registers in its thread_struct. + * Enables the FPU for use in the kernel on return. + * On SMP we know the fpu is free, since we give it up every + * switch (ie, no lazy save of the FP registers). + * On entry: r13 == 'current' && last_task_used_math != 'current' + */ +_STATIC(load_up_fpu) + mfmsr r5 /* grab the current MSR */ + ori r5,r5,MSR_FP + mtmsrd r5 /* enable use of fpu now */ + isync +/* + * For SMP, we don't do lazy FPU switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_fpu in switch_to. + * + */ +#ifndef CONFIG_SMP + ld r3,last_task_used_math@got(r2) + ld r4,0(r3) + cmpdi 0,r4,0 + beq 1f + /* Save FP state to last_task_used_math's THREAD struct */ + addi r4,r4,THREAD + SAVE_32FPRS(0, r4) + mffs fr0 + stfd fr0,THREAD_FPSCR(r4) + /* Disable FP for last_task_used_math */ + ld r5,PT_REGS(r4) + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r6,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r6 + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* enable use of FP after return */ + ld r4,PACACURRENT(r13) + addi r5,r4,THREAD /* Get THREAD */ + ld r4,THREAD_FPEXC_MODE(r5) + ori r12,r12,MSR_FP + or r12,r12,r4 + std r12,_MSR(r1) + lfd fr0,THREAD_FPSCR(r5) + mtfsf 0xff,fr0 + REST_32FPRS(0, r5) +#ifndef CONFIG_SMP + /* Update last_task_used_math to 'current' */ + subi r4,r5,THREAD /* Back to 'current' */ + std r4,0(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + b fast_exception_return + +/* + * disable_kernel_fp() + * Disable the FPU. + */ +_GLOBAL(disable_kernel_fp) + mfmsr r3 + rldicl r0,r3,(63-MSR_FP_LG),1 + rldicl r3,r0,(MSR_FP_LG+1),0 + mtmsrd r3 /* disable use of fpu now */ + isync + blr + +/* + * giveup_fpu(tsk) + * Disable FP for the task given as the argument, + * and save the floating-point registers in its thread_struct. + * Enables the FPU for use in the kernel on return. + */ +_GLOBAL(giveup_fpu) + mfmsr r5 + ori r5,r5,MSR_FP + mtmsrd r5 /* enable use of fpu now */ + isync + cmpdi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + ld r5,PT_REGS(r3) + cmpdi 0,r5,0 + SAVE_32FPRS(0, r3) + mffs fr0 + stfd fr0,THREAD_FPSCR(r3) + beq 1f + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r3,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r3 /* disable FP for previous task */ + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + ld r4,last_task_used_math@got(r2) + std r5,0(r4) +#endif /* CONFIG_SMP */ + blr + + +#ifdef CONFIG_ALTIVEC + +/* + * load_up_altivec(unused, unused, tsk) + * Disable VMX for the task which had it previously, + * and save its vector registers in its thread_struct. + * Enables the VMX for use in the kernel on return. + * On SMP we know the VMX is free, since we give it up every + * switch (ie, no lazy save of the vector registers). + * On entry: r13 == 'current' && last_task_used_altivec != 'current' + */ +_STATIC(load_up_altivec) + mfmsr r5 /* grab the current MSR */ + oris r5,r5,MSR_VEC@h + mtmsrd r5 /* enable use of VMX now */ + isync + +/* + * For SMP, we don't do lazy VMX switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_altvec in switch_to. + * VRSAVE isn't dealt with here, that is done in the normal context + * switch code. Note that we could rely on vrsave value to eventually + * avoid saving all of the VREGs here... + */ +#ifndef CONFIG_SMP + ld r3,last_task_used_altivec@got(r2) + ld r4,0(r3) + cmpdi 0,r4,0 + beq 1f + /* Save VMX state to last_task_used_altivec's THREAD struct */ + addi r4,r4,THREAD + SAVE_32VRS(0,r5,r4) + mfvscr vr0 + li r10,THREAD_VSCR + stvx vr0,r10,r4 + /* Disable VMX for last_task_used_altivec */ + ld r5,PT_REGS(r4) + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r6,MSR_VEC@h + andc r4,r4,r6 + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* Hack: if we get an altivec unavailable trap with VRSAVE + * set to all zeros, we assume this is a broken application + * that fails to set it properly, and thus we switch it to + * all 1's + */ + mfspr r4,SPRN_VRSAVE + cmpdi 0,r4,0 + bne+ 1f + li r4,-1 + mtspr SPRN_VRSAVE,r4 +1: + /* enable use of VMX after return */ + ld r4,PACACURRENT(r13) + addi r5,r4,THREAD /* Get THREAD */ + oris r12,r12,MSR_VEC@h + std r12,_MSR(r1) + li r4,1 + li r10,THREAD_VSCR + stw r4,THREAD_USED_VR(r5) + lvx vr0,r10,r5 + mtvscr vr0 + REST_32VRS(0,r4,r5) +#ifndef CONFIG_SMP + /* Update last_task_used_math to 'current' */ + subi r4,r5,THREAD /* Back to 'current' */ + std r4,0(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + b fast_exception_return + +/* + * disable_kernel_altivec() + * Disable the VMX. + */ +_GLOBAL(disable_kernel_altivec) + mfmsr r3 + rldicl r0,r3,(63-MSR_VEC_LG),1 + rldicl r3,r0,(MSR_VEC_LG+1),0 + mtmsrd r3 /* disable use of VMX now */ + isync + blr + +/* + * giveup_altivec(tsk) + * Disable VMX for the task given as the argument, + * and save the vector registers in its thread_struct. + * Enables the VMX for use in the kernel on return. + */ +_GLOBAL(giveup_altivec) + mfmsr r5 + oris r5,r5,MSR_VEC@h + mtmsrd r5 /* enable use of VMX now */ + isync + cmpdi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + ld r5,PT_REGS(r3) + cmpdi 0,r5,0 + SAVE_32VRS(0,r4,r3) + mfvscr vr0 + li r4,THREAD_VSCR + stvx vr0,r4,r3 + beq 1f + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r3,MSR_VEC@h + andc r4,r4,r3 /* disable FP for previous task */ + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + ld r4,last_task_used_altivec@got(r2) + std r5,0(r4) +#endif /* CONFIG_SMP */ + blr + +#endif /* CONFIG_ALTIVEC */ + #ifdef CONFIG_SMP #ifdef CONFIG_PPC_PMAC /* @@ -1881,6 +2002,9 @@ _STATIC(start_here_common) bl .start_kernel +_GLOBAL(__setup_cpu_power3) + blr + _GLOBAL(hmt_init) #ifdef CONFIG_HMT LOADADDR(r5, hmt_thread_data) @@ -1971,19 +2095,20 @@ _GLOBAL(smp_release_cpus) /* * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the bss, which is page-aligned. + * This stuff goes at the beginning of the data segment, + * which is page-aligned. */ - .section ".bss" - + .data .align 12 - + .globl sdata +sdata: .globl empty_zero_page empty_zero_page: - .space PAGE_SIZE + .space 4096 .globl swapper_pg_dir swapper_pg_dir: - .space PAGE_SIZE + .space 4096 /* * This space gets a copy of optional info passed to us by the bootstrap diff --git a/trunk/arch/ppc64/kernel/iSeries_htab.c b/trunk/arch/ppc64/kernel/iSeries_htab.c index 2192055a90a0..b0250ae4a72a 100644 --- a/trunk/arch/ppc64/kernel/iSeries_htab.c +++ b/trunk/arch/ppc64/kernel/iSeries_htab.c @@ -41,7 +41,6 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long prpn, unsigned long vflags, unsigned long rflags) { - unsigned long arpn; long slot; hpte_t lhpte; int secondary = 0; @@ -71,10 +70,8 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, slot &= 0x7fffffffffffffff; } - arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT; - lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; - lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags; + lhpte.r = (physRpn_to_absRpn(prpn) << HPTE_R_RPN_SHIFT) | rflags; /* Now fill in the actual HPTE */ HvCallHpt_addValidate(slot, secondary, &lhpte); diff --git a/trunk/arch/ppc64/kernel/iSeries_setup.c b/trunk/arch/ppc64/kernel/iSeries_setup.c index 3ffefbbc6623..a649edbb23b6 100644 --- a/trunk/arch/ppc64/kernel/iSeries_setup.c +++ b/trunk/arch/ppc64/kernel/iSeries_setup.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include "iSeries_setup.h" @@ -315,8 +314,6 @@ static void __init iSeries_init_early(void) DBG(" -> iSeries_init_early()\n"); - ppc64_firmware_features = FW_FEATURE_ISERIES; - ppcdbg_initialize(); #if defined(CONFIG_BLK_DEV_INITRD) @@ -415,22 +412,6 @@ static void __init iSeries_init_early(void) DBG(" <- iSeries_init_early()\n"); } -struct mschunks_map mschunks_map = { - /* XXX We don't use these, but Piranha might need them. */ - .chunk_size = MSCHUNKS_CHUNK_SIZE, - .chunk_shift = MSCHUNKS_CHUNK_SHIFT, - .chunk_mask = MSCHUNKS_OFFSET_MASK, -}; -EXPORT_SYMBOL(mschunks_map); - -void mschunks_alloc(unsigned long num_chunks) -{ - klimit = _ALIGN(klimit, sizeof(u32)); - mschunks_map.mapping = (u32 *)klimit; - klimit += num_chunks * sizeof(u32); - mschunks_map.num_chunks = num_chunks; -} - /* * The iSeries may have very large memories ( > 128 GB ) and a partition * may get memory in "chunks" that may be anywhere in the 2**52 real @@ -468,7 +449,7 @@ static void __init build_iSeries_Memory_Map(void) /* Chunk size on iSeries is 256K bytes */ totalChunks = (u32)HvLpConfig_getMsChunks(); - mschunks_alloc(totalChunks); + klimit = msChunks_alloc(klimit, totalChunks, 1UL << 18); /* * Get absolute address of our load area @@ -505,7 +486,7 @@ static void __init build_iSeries_Memory_Map(void) printk("Load area size %dK\n", loadAreaSize * 256); for (nextPhysChunk = 0; nextPhysChunk < loadAreaSize; ++nextPhysChunk) - mschunks_map.mapping[nextPhysChunk] = + msChunks.abs[nextPhysChunk] = loadAreaFirstChunk + nextPhysChunk; /* @@ -514,7 +495,7 @@ static void __init build_iSeries_Memory_Map(void) */ hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress()); hptSizePages = (u32)HvCallHpt_getHptPages(); - hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT); + hptSizeChunks = hptSizePages >> (msChunks.chunk_shift - PAGE_SHIFT); hptLastChunk = hptFirstChunk + hptSizeChunks - 1; printk("HPT absolute addr = %016lx, size = %dK\n", @@ -571,8 +552,7 @@ static void __init build_iSeries_Memory_Map(void) (absChunk > hptLastChunk)) && ((absChunk < loadAreaFirstChunk) || (absChunk > loadAreaLastChunk))) { - mschunks_map.mapping[nextPhysChunk] = - absChunk; + msChunks.abs[nextPhysChunk] = absChunk; ++nextPhysChunk; } } @@ -964,8 +944,6 @@ void __init iSeries_early_setup(void) ppc_md.calibrate_decr = iSeries_calibrate_decr; ppc_md.progress = iSeries_progress; - /* XXX Implement enable_pmcs for iSeries */ - if (get_paca()->lppaca.shared_proc) { ppc_md.idle_loop = iseries_shared_idle; printk(KERN_INFO "Using shared processor idle loop\n"); diff --git a/trunk/arch/ppc64/kernel/iSeries_vio.c b/trunk/arch/ppc64/kernel/iSeries_vio.c deleted file mode 100644 index b4268cc4ba48..000000000000 --- a/trunk/arch/ppc64/kernel/iSeries_vio.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - * IBM PowerPC iSeries Virtual I/O Infrastructure Support. - * - * Copyright (c) 2005 Stephen Rothwell, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -struct device *iSeries_vio_dev = &vio_bus_device.dev; -EXPORT_SYMBOL(iSeries_vio_dev); - -static struct iommu_table veth_iommu_table; -static struct iommu_table vio_iommu_table; - -static void __init iommu_vio_init(void) -{ - struct iommu_table *t; - struct iommu_table_cb cb; - unsigned long cbp; - unsigned long itc_entries; - - cb.itc_busno = 255; /* Bus 255 is the virtual bus */ - cb.itc_virtbus = 0xff; /* Ask for virtual bus */ - - cbp = virt_to_abs(&cb); - HvCallXm_getTceTableParms(cbp); - - itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry); - veth_iommu_table.it_size = itc_entries / 2; - veth_iommu_table.it_busno = cb.itc_busno; - veth_iommu_table.it_offset = cb.itc_offset; - veth_iommu_table.it_index = cb.itc_index; - veth_iommu_table.it_type = TCE_VB; - veth_iommu_table.it_blocksize = 1; - - t = iommu_init_table(&veth_iommu_table); - - if (!t) - printk("Virtual Bus VETH TCE table failed.\n"); - - vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size; - vio_iommu_table.it_busno = cb.itc_busno; - vio_iommu_table.it_offset = cb.itc_offset + - veth_iommu_table.it_size; - vio_iommu_table.it_index = cb.itc_index; - vio_iommu_table.it_type = TCE_VB; - vio_iommu_table.it_blocksize = 1; - - t = iommu_init_table(&vio_iommu_table); - - if (!t) - printk("Virtual Bus VIO TCE table failed.\n"); -} - -/** - * vio_register_device: - Register a new vio device. - * @voidev: The device to register. - */ -static struct vio_dev *__init vio_register_device_iseries(char *type, - uint32_t unit_num) -{ - struct vio_dev *viodev; - - /* allocate a vio_dev for this node */ - viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); - if (!viodev) - return NULL; - memset(viodev, 0, sizeof(struct vio_dev)); - - snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num); - - return vio_register_device_common(viodev, viodev->dev.bus_id, type, - unit_num, &vio_iommu_table); -} - -void __init probe_bus_iseries(void) -{ - HvLpIndexMap vlan_map; - struct vio_dev *viodev; - int i; - - /* there is only one of each of these */ - vio_register_device_iseries("viocons", 0); - vio_register_device_iseries("vscsi", 0); - - vlan_map = HvLpConfig_getVirtualLanIndexMap(); - for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { - if ((vlan_map & (0x8000 >> i)) == 0) - continue; - viodev = vio_register_device_iseries("vlan", i); - /* veth is special and has it own iommu_table */ - viodev->iommu_table = &veth_iommu_table; - } - for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++) - vio_register_device_iseries("viodasd", i); - for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++) - vio_register_device_iseries("viocd", i); - for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++) - vio_register_device_iseries("viotape", i); -} - -/** - * vio_match_device_iseries: - Tell if a iSeries VIO device matches a - * vio_device_id - */ -static int vio_match_device_iseries(const struct vio_device_id *id, - const struct vio_dev *dev) -{ - return strncmp(dev->type, id->type, strlen(id->type)) == 0; -} - -/** - * vio_bus_init_iseries: - Initialize the iSeries virtual IO bus - */ -static int __init vio_bus_init_iseries(void) -{ - int err; - - err = vio_bus_init(vio_match_device_iseries, NULL, NULL); - if (err == 0) { - iommu_vio_init(); - vio_bus_device.iommu_table = &vio_iommu_table; - iSeries_vio_dev = &vio_bus_device.dev; - probe_bus_iseries(); - } - return err; -} - -__initcall(vio_bus_init_iseries); diff --git a/trunk/arch/ppc64/kernel/lmb.c b/trunk/arch/ppc64/kernel/lmb.c index 5adaca2ddc9d..d6c6bd03d2a4 100644 --- a/trunk/arch/ppc64/kernel/lmb.c +++ b/trunk/arch/ppc64/kernel/lmb.c @@ -28,28 +28,33 @@ void lmb_dump_all(void) { #ifdef DEBUG unsigned long i; + struct lmb *_lmb = &lmb; udbg_printf("lmb_dump_all:\n"); udbg_printf(" memory.cnt = 0x%lx\n", - lmb.memory.cnt); + _lmb->memory.cnt); udbg_printf(" memory.size = 0x%lx\n", - lmb.memory.size); - for (i=0; i < lmb.memory.cnt ;i++) { + _lmb->memory.size); + for (i=0; i < _lmb->memory.cnt ;i++) { udbg_printf(" memory.region[0x%x].base = 0x%lx\n", - i, lmb.memory.region[i].base); + i, _lmb->memory.region[i].base); + udbg_printf(" .physbase = 0x%lx\n", + _lmb->memory.region[i].physbase); udbg_printf(" .size = 0x%lx\n", - lmb.memory.region[i].size); + _lmb->memory.region[i].size); } udbg_printf("\n reserved.cnt = 0x%lx\n", - lmb.reserved.cnt); + _lmb->reserved.cnt); udbg_printf(" reserved.size = 0x%lx\n", - lmb.reserved.size); - for (i=0; i < lmb.reserved.cnt ;i++) { + _lmb->reserved.size); + for (i=0; i < _lmb->reserved.cnt ;i++) { udbg_printf(" reserved.region[0x%x].base = 0x%lx\n", - i, lmb.reserved.region[i].base); + i, _lmb->reserved.region[i].base); + udbg_printf(" .physbase = 0x%lx\n", + _lmb->reserved.region[i].physbase); udbg_printf(" .size = 0x%lx\n", - lmb.reserved.region[i].size); + _lmb->reserved.region[i].size); } #endif /* DEBUG */ } @@ -93,6 +98,7 @@ lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2) rgn->region[r1].size += rgn->region[r2].size; for (i=r2; i < rgn->cnt-1; i++) { rgn->region[i].base = rgn->region[i+1].base; + rgn->region[i].physbase = rgn->region[i+1].physbase; rgn->region[i].size = rgn->region[i+1].size; } rgn->cnt--; @@ -102,29 +108,49 @@ lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2) void __init lmb_init(void) { + struct lmb *_lmb = &lmb; + /* Create a dummy zero size LMB which will get coalesced away later. * This simplifies the lmb_add() code below... */ - lmb.memory.region[0].base = 0; - lmb.memory.region[0].size = 0; - lmb.memory.cnt = 1; + _lmb->memory.region[0].base = 0; + _lmb->memory.region[0].size = 0; + _lmb->memory.cnt = 1; /* Ditto. */ - lmb.reserved.region[0].base = 0; - lmb.reserved.region[0].size = 0; - lmb.reserved.cnt = 1; + _lmb->reserved.region[0].base = 0; + _lmb->reserved.region[0].size = 0; + _lmb->reserved.cnt = 1; } /* This routine called with relocation disabled. */ void __init lmb_analyze(void) { - int i; - - lmb.memory.size = 0; + unsigned long i; + unsigned long mem_size = 0; + unsigned long size_mask = 0; + struct lmb *_lmb = &lmb; +#ifdef CONFIG_MSCHUNKS + unsigned long physbase = 0; +#endif + + for (i=0; i < _lmb->memory.cnt; i++) { + unsigned long lmb_size; + + lmb_size = _lmb->memory.region[i].size; + +#ifdef CONFIG_MSCHUNKS + _lmb->memory.region[i].physbase = physbase; + physbase += lmb_size; +#else + _lmb->memory.region[i].physbase = _lmb->memory.region[i].base; +#endif + mem_size += lmb_size; + size_mask |= lmb_size; + } - for (i = 0; i < lmb.memory.cnt; i++) - lmb.memory.size += lmb.memory.region[i].size; + _lmb->memory.size = mem_size; } /* This routine called with relocation disabled. */ @@ -142,6 +168,7 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize); if ( adjacent > 0 ) { rgn->region[i].base -= size; + rgn->region[i].physbase -= size; rgn->region[i].size += size; coalesced++; break; @@ -168,9 +195,11 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) for (i=rgn->cnt-1; i >= 0; i--) { if (base < rgn->region[i].base) { rgn->region[i+1].base = rgn->region[i].base; + rgn->region[i+1].physbase = rgn->region[i].physbase; rgn->region[i+1].size = rgn->region[i].size; } else { rgn->region[i+1].base = base; + rgn->region[i+1].physbase = lmb_abs_to_phys(base); rgn->region[i+1].size = size; break; } @@ -184,11 +213,12 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) long __init lmb_add(unsigned long base, unsigned long size) { - struct lmb_region *_rgn = &(lmb.memory); + struct lmb *_lmb = &lmb; + struct lmb_region *_rgn = &(_lmb->memory); /* On pSeries LPAR systems, the first LMB is our RMO region. */ if ( base == 0 ) - lmb.rmo_size = size; + _lmb->rmo_size = size; return lmb_add_region(_rgn, base, size); @@ -197,7 +227,8 @@ lmb_add(unsigned long base, unsigned long size) long __init lmb_reserve(unsigned long base, unsigned long size) { - struct lmb_region *_rgn = &(lmb.reserved); + struct lmb *_lmb = &lmb; + struct lmb_region *_rgn = &(_lmb->reserved); return lmb_add_region(_rgn, base, size); } @@ -229,10 +260,13 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) { long i, j; unsigned long base = 0; + struct lmb *_lmb = &lmb; + struct lmb_region *_mem = &(_lmb->memory); + struct lmb_region *_rsv = &(_lmb->reserved); - for (i=lmb.memory.cnt-1; i >= 0; i--) { - unsigned long lmbbase = lmb.memory.region[i].base; - unsigned long lmbsize = lmb.memory.region[i].size; + for (i=_mem->cnt-1; i >= 0; i--) { + unsigned long lmbbase = _mem->region[i].base; + unsigned long lmbsize = _mem->region[i].size; if ( max_addr == LMB_ALLOC_ANYWHERE ) base = _ALIGN_DOWN(lmbbase+lmbsize-size, align); @@ -242,8 +276,8 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) continue; while ( (lmbbase <= base) && - ((j = lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) { - base = _ALIGN_DOWN(lmb.reserved.region[j].base-size, align); + ((j = lmb_overlaps_region(_rsv,base,size)) >= 0) ) { + base = _ALIGN_DOWN(_rsv->region[j].base-size, align); } if ( (base != 0) && (lmbbase <= base) ) @@ -253,24 +287,62 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) if ( i < 0 ) return 0; - lmb_add_region(&lmb.reserved, base, size); + lmb_add_region(_rsv, base, size); return base; } -/* You must call lmb_analyze() before this. */ unsigned long __init lmb_phys_mem_size(void) { - return lmb.memory.size; + struct lmb *_lmb = &lmb; +#ifdef CONFIG_MSCHUNKS + return _lmb->memory.size; +#else + struct lmb_region *_mem = &(_lmb->memory); + unsigned long total = 0; + int i; + + /* add all physical memory to the bootmem map */ + for (i=0; i < _mem->cnt; i++) + total += _mem->region[i].size; + return total; +#endif /* CONFIG_MSCHUNKS */ } unsigned long __init lmb_end_of_DRAM(void) { - int idx = lmb.memory.cnt - 1; + struct lmb *_lmb = &lmb; + struct lmb_region *_mem = &(_lmb->memory); + int idx = _mem->cnt - 1; + +#ifdef CONFIG_MSCHUNKS + return (_mem->region[idx].physbase + _mem->region[idx].size); +#else + return (_mem->region[idx].base + _mem->region[idx].size); +#endif /* CONFIG_MSCHUNKS */ + + return 0; +} + +unsigned long __init +lmb_abs_to_phys(unsigned long aa) +{ + unsigned long i, pa = aa; + struct lmb *_lmb = &lmb; + struct lmb_region *_mem = &(_lmb->memory); + + for (i=0; i < _mem->cnt; i++) { + unsigned long lmbbase = _mem->region[i].base; + unsigned long lmbsize = _mem->region[i].size; + if ( lmb_addrs_overlap(aa,1,lmbbase,lmbsize) ) { + pa = _mem->region[i].physbase + (aa - lmbbase); + break; + } + } - return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); + return pa; } /* @@ -281,19 +353,20 @@ void __init lmb_enforce_memory_limit(void) { extern unsigned long memory_limit; unsigned long i, limit; + struct lmb_region *mem = &(lmb.memory); if (! memory_limit) return; limit = memory_limit; - for (i = 0; i < lmb.memory.cnt; i++) { - if (limit > lmb.memory.region[i].size) { - limit -= lmb.memory.region[i].size; + for (i = 0; i < mem->cnt; i++) { + if (limit > mem->region[i].size) { + limit -= mem->region[i].size; continue; } - lmb.memory.region[i].size = limit; - lmb.memory.cnt = i + 1; + mem->region[i].size = limit; + mem->cnt = i + 1; break; } } diff --git a/trunk/arch/ppc64/kernel/lparcfg.c b/trunk/arch/ppc64/kernel/lparcfg.c index 9d034ff062b1..02e96627fa66 100644 --- a/trunk/arch/ppc64/kernel/lparcfg.c +++ b/trunk/arch/ppc64/kernel/lparcfg.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include @@ -377,7 +377,7 @@ static int lparcfg_data(struct seq_file *m, void *v) partition_active_processors = lparcfg_count_active_processors(); - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { unsigned long h_entitled, h_unallocated; unsigned long h_aggregation, h_resource; unsigned long pool_idle_time, pool_procs; @@ -571,7 +571,7 @@ int __init lparcfg_init(void) mode_t mode = S_IRUSR; /* Allow writing if we have FW_FEATURE_SPLPAR */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { lparcfg_fops.write = lparcfg_write; mode |= S_IWUSR; } diff --git a/trunk/arch/ppc64/kernel/misc.S b/trunk/arch/ppc64/kernel/misc.S index 474df0a862bf..a05b50b738e9 100644 --- a/trunk/arch/ppc64/kernel/misc.S +++ b/trunk/arch/ppc64/kernel/misc.S @@ -680,104 +680,6 @@ _GLOBAL(kernel_thread) ld r30,-16(r1) blr -/* - * disable_kernel_fp() - * Disable the FPU. - */ -_GLOBAL(disable_kernel_fp) - mfmsr r3 - rldicl r0,r3,(63-MSR_FP_LG),1 - rldicl r3,r0,(MSR_FP_LG+1),0 - mtmsrd r3 /* disable use of fpu now */ - isync - blr - -/* - * giveup_fpu(tsk) - * Disable FP for the task given as the argument, - * and save the floating-point registers in its thread_struct. - * Enables the FPU for use in the kernel on return. - */ -_GLOBAL(giveup_fpu) - mfmsr r5 - ori r5,r5,MSR_FP - mtmsrd r5 /* enable use of fpu now */ - isync - cmpdi 0,r3,0 - beqlr- /* if no previous owner, done */ - addi r3,r3,THREAD /* want THREAD of task */ - ld r5,PT_REGS(r3) - cmpdi 0,r5,0 - SAVE_32FPRS(0, r3) - mffs fr0 - stfd fr0,THREAD_FPSCR(r3) - beq 1f - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r3,MSR_FP|MSR_FE0|MSR_FE1 - andc r4,r4,r3 /* disable FP for previous task */ - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#ifndef CONFIG_SMP - li r5,0 - ld r4,last_task_used_math@got(r2) - std r5,0(r4) -#endif /* CONFIG_SMP */ - blr - -#ifdef CONFIG_ALTIVEC - -#if 0 /* this has no callers for now */ -/* - * disable_kernel_altivec() - * Disable the VMX. - */ -_GLOBAL(disable_kernel_altivec) - mfmsr r3 - rldicl r0,r3,(63-MSR_VEC_LG),1 - rldicl r3,r0,(MSR_VEC_LG+1),0 - mtmsrd r3 /* disable use of VMX now */ - isync - blr -#endif /* 0 */ - -/* - * giveup_altivec(tsk) - * Disable VMX for the task given as the argument, - * and save the vector registers in its thread_struct. - * Enables the VMX for use in the kernel on return. - */ -_GLOBAL(giveup_altivec) - mfmsr r5 - oris r5,r5,MSR_VEC@h - mtmsrd r5 /* enable use of VMX now */ - isync - cmpdi 0,r3,0 - beqlr- /* if no previous owner, done */ - addi r3,r3,THREAD /* want THREAD of task */ - ld r5,PT_REGS(r3) - cmpdi 0,r5,0 - SAVE_32VRS(0,r4,r3) - mfvscr vr0 - li r4,THREAD_VSCR - stvx vr0,r4,r3 - beq 1f - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r3,MSR_VEC@h - andc r4,r4,r3 /* disable FP for previous task */ - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#ifndef CONFIG_SMP - li r5,0 - ld r4,last_task_used_altivec@got(r2) - std r5,0(r4) -#endif /* CONFIG_SMP */ - blr - -#endif /* CONFIG_ALTIVEC */ - -_GLOBAL(__setup_cpu_power3) - blr - /* kexec_wait(phys_cpu) * * wait for the flag to change, indicating this kernel is going away but diff --git a/trunk/arch/ppc64/kernel/pSeries_iommu.c b/trunk/arch/ppc64/kernel/pSeries_iommu.c index 9d5e1e7fc389..69130522a87e 100644 --- a/trunk/arch/ppc64/kernel/pSeries_iommu.c +++ b/trunk/arch/ppc64/kernel/pSeries_iommu.c @@ -45,7 +45,6 @@ #include #include #include -#include #include "pci.h" #define DBG(fmt...) @@ -547,7 +546,7 @@ void iommu_init_early_pSeries(void) } if (systemcfg->platform & PLATFORM_LPAR) { - if (firmware_has_feature(FW_FEATURE_MULTITCE)) { + if (cur_cpu_spec->firmware_features & FW_FEATURE_MULTITCE) { ppc_md.tce_build = tce_buildmulti_pSeriesLP; ppc_md.tce_free = tce_freemulti_pSeriesLP; } else { diff --git a/trunk/arch/ppc64/kernel/pSeries_lpar.c b/trunk/arch/ppc64/kernel/pSeries_lpar.c index 0a3ddc9227c5..74dd144dcce8 100644 --- a/trunk/arch/ppc64/kernel/pSeries_lpar.c +++ b/trunk/arch/ppc64/kernel/pSeries_lpar.c @@ -52,6 +52,7 @@ EXPORT_SYMBOL(plpar_hcall_4out); EXPORT_SYMBOL(plpar_hcall_norets); EXPORT_SYMBOL(plpar_hcall_8arg_2ret); +extern void fw_feature_init(void); extern void pSeries_find_serial_port(void); @@ -278,6 +279,7 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long prpn, unsigned long vflags, unsigned long rflags) { + unsigned long arpn = physRpn_to_absRpn(prpn); unsigned long lpar_rc; unsigned long flags; unsigned long slot; @@ -288,7 +290,7 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group, if (vflags & HPTE_V_LARGE) hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT); - hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; + hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags; /* Now fill in the actual HPTE */ /* Set CEC cookie to 0 */ diff --git a/trunk/arch/ppc64/kernel/pSeries_setup.c b/trunk/arch/ppc64/kernel/pSeries_setup.c index f0f0630cf07c..5bec956e44a0 100644 --- a/trunk/arch/ppc64/kernel/pSeries_setup.c +++ b/trunk/arch/ppc64/kernel/pSeries_setup.c @@ -60,8 +60,7 @@ #include #include #include -#include -#include +#include #include "i8259.h" #include "mpic.h" @@ -188,21 +187,6 @@ static void __init pSeries_setup_mpic(void) " MPIC "); } -static void pseries_lpar_enable_pmcs(void) -{ - unsigned long set, reset; - - power4_enable_pmcs(); - - set = 1UL << 63; - reset = 0; - plpar_hcall_norets(H_PERFMON, set, reset); - - /* instruct hypervisor to maintain PMCs */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) - get_paca()->lppaca.pmcregs_in_use = 1; -} - static void __init pSeries_setup_arch(void) { /* Fixup ppc_md depending on the type of interrupt controller */ @@ -247,9 +231,11 @@ static void __init pSeries_setup_arch(void) pSeries_nvram_init(); - /* Choose an idle loop */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) vpa_init(boot_cpuid); + + /* Choose an idle loop */ + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { if (get_paca()->lppaca.shared_proc) { printk(KERN_INFO "Using shared processor idle loop\n"); ppc_md.idle_loop = pseries_shared_idle; @@ -261,11 +247,6 @@ static void __init pSeries_setup_arch(void) printk(KERN_INFO "Using default idle loop\n"); ppc_md.idle_loop = default_idle; } - - if (systemcfg->platform & PLATFORM_LPAR) - ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; - else - ppc_md.enable_pmcs = power4_enable_pmcs; } static int __init pSeries_init_panel(void) @@ -279,11 +260,11 @@ static int __init pSeries_init_panel(void) arch_initcall(pSeries_init_panel); -/* Build up the ppc64_firmware_features bitmask field +/* Build up the firmware_features bitmask field * using contents of device-tree/ibm,hypertas-functions. * Ultimately this functionality may be moved into prom.c prom_init(). */ -static void __init fw_feature_init(void) +void __init fw_feature_init(void) { struct device_node * dn; char * hypertas; @@ -291,7 +272,7 @@ static void __init fw_feature_init(void) DBG(" -> fw_feature_init()\n"); - ppc64_firmware_features = 0; + cur_cpu_spec->firmware_features = 0; dn = of_find_node_by_path("/rtas"); if (dn == NULL) { printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n"); @@ -307,7 +288,7 @@ static void __init fw_feature_init(void) if ((firmware_features_table[i].name) && (strcmp(firmware_features_table[i].name,hypertas))==0) { /* we have a match */ - ppc64_firmware_features |= + cur_cpu_spec->firmware_features |= (firmware_features_table[i].val); break; } @@ -321,7 +302,7 @@ static void __init fw_feature_init(void) of_node_put(dn); no_rtas: printk(KERN_INFO "firmware_features = 0x%lx\n", - ppc64_firmware_features); + cur_cpu_spec->firmware_features); DBG(" <- fw_feature_init()\n"); } diff --git a/trunk/arch/ppc64/kernel/pSeries_smp.c b/trunk/arch/ppc64/kernel/pSeries_smp.c index 79c7f3223665..62c55a123560 100644 --- a/trunk/arch/ppc64/kernel/pSeries_smp.c +++ b/trunk/arch/ppc64/kernel/pSeries_smp.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -327,7 +326,7 @@ static void __devinit smp_xics_setup_cpu(int cpu) if (cpu != boot_cpuid) xics_setup_cpu(); - if (firmware_has_feature(FW_FEATURE_SPLPAR)) + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) vpa_init(cpu); cpu_clear(cpu, of_spin_map); diff --git a/trunk/arch/ppc64/kernel/pSeries_vio.c b/trunk/arch/ppc64/kernel/pSeries_vio.c deleted file mode 100644 index 338f9e1bdc09..000000000000 --- a/trunk/arch/ppc64/kernel/pSeries_vio.c +++ /dev/null @@ -1,266 +0,0 @@ -/* - * IBM PowerPC pSeries Virtual I/O Infrastructure Support. - * - * Copyright (c) 2003-2005 IBM Corp. - * Dave Engebretsen engebret@us.ibm.com - * Santiago Leon santil@us.ibm.com - * Hollis Blanchard - * Stephen Rothwell - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -extern struct subsystem devices_subsys; /* needed for vio_find_name() */ - -static void probe_bus_pseries(void) -{ - struct device_node *node_vroot, *of_node; - - node_vroot = find_devices("vdevice"); - if ((node_vroot == NULL) || (node_vroot->child == NULL)) - /* this machine doesn't do virtual IO, and that's ok */ - return; - - /* - * Create struct vio_devices for each virtual device in the device tree. - * Drivers will associate with them later. - */ - for (of_node = node_vroot->child; of_node != NULL; - of_node = of_node->sibling) { - printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node); - vio_register_device_node(of_node); - } -} - -/** - * vio_match_device_pseries: - Tell if a pSeries VIO device matches a - * vio_device_id - */ -static int vio_match_device_pseries(const struct vio_device_id *id, - const struct vio_dev *dev) -{ - return (strncmp(dev->type, id->type, strlen(id->type)) == 0) && - device_is_compatible(dev->dev.platform_data, id->compat); -} - -static void vio_release_device_pseries(struct device *dev) -{ - /* XXX free TCE table */ - of_node_put(dev->platform_data); -} - -static ssize_t viodev_show_devspec(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct device_node *of_node = dev->platform_data; - - return sprintf(buf, "%s\n", of_node->full_name); -} -DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL); - -static void vio_unregister_device_pseries(struct vio_dev *viodev) -{ - device_remove_file(&viodev->dev, &dev_attr_devspec); -} - -/** - * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus - */ -static int __init vio_bus_init_pseries(void) -{ - int err; - - err = vio_bus_init(vio_match_device_pseries, - vio_unregister_device_pseries, - vio_release_device_pseries); - if (err == 0) - probe_bus_pseries(); - return err; -} - -__initcall(vio_bus_init_pseries); - -/** - * vio_build_iommu_table: - gets the dma information from OF and - * builds the TCE tree. - * @dev: the virtual device. - * - * Returns a pointer to the built tce tree, or NULL if it can't - * find property. -*/ -static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) -{ - unsigned int *dma_window; - struct iommu_table *newTceTable; - unsigned long offset; - int dma_window_property_size; - - dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size); - if(!dma_window) { - return NULL; - } - - newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL); - - /* There should be some code to extract the phys-encoded offset - using prom_n_addr_cells(). However, according to a comment - on earlier versions, it's always zero, so we don't bother */ - offset = dma_window[1] >> PAGE_SHIFT; - - /* TCE table size - measured in tce entries */ - newTceTable->it_size = dma_window[4] >> PAGE_SHIFT; - /* offset for VIO should always be 0 */ - newTceTable->it_offset = offset; - newTceTable->it_busno = 0; - newTceTable->it_index = (unsigned long)dma_window[0]; - newTceTable->it_type = TCE_VB; - - return iommu_init_table(newTceTable); -} - -/** - * vio_register_device_node: - Register a new vio device. - * @of_node: The OF node for this device. - * - * Creates and initializes a vio_dev structure from the data in - * of_node (dev.platform_data) and adds it to the list of virtual devices. - * Returns a pointer to the created vio_dev or NULL if node has - * NULL device_type or compatible fields. - */ -struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) -{ - struct vio_dev *viodev; - unsigned int *unit_address; - unsigned int *irq_p; - - /* we need the 'device_type' property, in order to match with drivers */ - if ((NULL == of_node->type)) { - printk(KERN_WARNING - "%s: node %s missing 'device_type'\n", __FUNCTION__, - of_node->name ? of_node->name : ""); - return NULL; - } - - unit_address = (unsigned int *)get_property(of_node, "reg", NULL); - if (!unit_address) { - printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__, - of_node->name ? of_node->name : ""); - return NULL; - } - - /* allocate a vio_dev for this node */ - viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); - if (!viodev) { - return NULL; - } - memset(viodev, 0, sizeof(struct vio_dev)); - - viodev->dev.platform_data = of_node_get(of_node); - - viodev->irq = NO_IRQ; - irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL); - if (irq_p) { - int virq = virt_irq_create_mapping(*irq_p); - if (virq == NO_IRQ) { - printk(KERN_ERR "Unable to allocate interrupt " - "number for %s\n", of_node->full_name); - } else - viodev->irq = irq_offset_up(virq); - } - - snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); - - /* register with generic device framework */ - if (vio_register_device_common(viodev, of_node->name, of_node->type, - *unit_address, vio_build_iommu_table(viodev)) - == NULL) { - /* XXX free TCE table */ - kfree(viodev); - return NULL; - } - device_create_file(&viodev->dev, &dev_attr_devspec); - - return viodev; -} -EXPORT_SYMBOL(vio_register_device_node); - -/** - * vio_get_attribute: - get attribute for virtual device - * @vdev: The vio device to get property. - * @which: The property/attribute to be extracted. - * @length: Pointer to length of returned data size (unused if NULL). - * - * Calls prom.c's get_property() to return the value of the - * attribute specified by the preprocessor constant @which -*/ -const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length) -{ - return get_property(vdev->dev.platform_data, (char*)which, length); -} -EXPORT_SYMBOL(vio_get_attribute); - -/* vio_find_name() - internal because only vio.c knows how we formatted the - * kobject name - * XXX once vio_bus_type.devices is actually used as a kset in - * drivers/base/bus.c, this function should be removed in favor of - * "device_find(kobj_name, &vio_bus_type)" - */ -static struct vio_dev *vio_find_name(const char *kobj_name) -{ - struct kobject *found; - - found = kset_find_obj(&devices_subsys.kset, kobj_name); - if (!found) - return NULL; - - return to_vio_dev(container_of(found, struct device, kobj)); -} - -/** - * vio_find_node - find an already-registered vio_dev - * @vnode: device_node of the virtual device we're looking for - */ -struct vio_dev *vio_find_node(struct device_node *vnode) -{ - uint32_t *unit_address; - char kobj_name[BUS_ID_SIZE]; - - /* construct the kobject name from the device node */ - unit_address = (uint32_t *)get_property(vnode, "reg", NULL); - if (!unit_address) - return NULL; - snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address); - - return vio_find_name(kobj_name); -} -EXPORT_SYMBOL(vio_find_node); - -int vio_enable_interrupts(struct vio_dev *dev) -{ - int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); - if (rc != H_Success) - printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); - return rc; -} -EXPORT_SYMBOL(vio_enable_interrupts); - -int vio_disable_interrupts(struct vio_dev *dev) -{ - int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); - if (rc != H_Success) - printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); - return rc; -} -EXPORT_SYMBOL(vio_disable_interrupts); diff --git a/trunk/arch/ppc64/kernel/pacaData.c b/trunk/arch/ppc64/kernel/pacaData.c index 6182a2cd90a5..6316188737b6 100644 --- a/trunk/arch/ppc64/kernel/pacaData.c +++ b/trunk/arch/ppc64/kernel/pacaData.c @@ -78,7 +78,7 @@ extern unsigned long __toc_start; #define BOOTCPU_PACA_INIT(number) \ { \ - PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab) \ + PACA_INIT_COMMON(number, 1, 0, STAB0_VIRT_ADDR) \ PACA_INIT_ISERIES(number) \ } @@ -90,7 +90,7 @@ extern unsigned long __toc_start; #define BOOTCPU_PACA_INIT(number) \ { \ - PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab) \ + PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, STAB0_VIRT_ADDR) \ } #endif diff --git a/trunk/arch/ppc64/kernel/pmac_setup.c b/trunk/arch/ppc64/kernel/pmac_setup.c index 8ff86a766cdf..e40877fa67cd 100644 --- a/trunk/arch/ppc64/kernel/pmac_setup.c +++ b/trunk/arch/ppc64/kernel/pmac_setup.c @@ -71,7 +71,6 @@ #include #include #include -#include #include "pmac.h" #include "mpic.h" @@ -512,5 +511,4 @@ struct machdep_calls __initdata pmac_md = { .progress = pmac_progress, .check_legacy_ioport = pmac_check_legacy_ioport, .idle_loop = native_idle, - .enable_pmcs = power4_enable_pmcs, }; diff --git a/trunk/arch/ppc64/kernel/pmc.c b/trunk/arch/ppc64/kernel/pmc.c index cdfec7438d01..67be773f9c00 100644 --- a/trunk/arch/ppc64/kernel/pmc.c +++ b/trunk/arch/ppc64/kernel/pmc.c @@ -65,24 +65,3 @@ void release_pmc_hardware(void) spin_unlock(&pmc_owner_lock); } EXPORT_SYMBOL_GPL(release_pmc_hardware); - -void power4_enable_pmcs(void) -{ - unsigned long hid0; - - hid0 = mfspr(HID0); - hid0 |= 1UL << (63 - 20); - - /* POWER4 requires the following sequence */ - asm volatile( - "sync\n" - "mtspr %1, %0\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0): - "memory"); -} diff --git a/trunk/arch/ppc64/kernel/process.c b/trunk/arch/ppc64/kernel/process.c index 7a7e027653ad..f7cae05e40fb 100644 --- a/trunk/arch/ppc64/kernel/process.c +++ b/trunk/arch/ppc64/kernel/process.c @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include @@ -203,10 +202,11 @@ struct task_struct *__switch_to(struct task_struct *prev, new_thread = &new->thread; old_thread = ¤t->thread; - /* Collect purr utilization data per process and per processor - * wise purr is nothing but processor time base - */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { +/* Collect purr utilization data per process and per processor wise */ +/* purr is nothing but processor time base */ + +#if defined(CONFIG_PPC_PSERIES) + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); long unsigned start_tb, current_tb; start_tb = old_thread->start_tb; @@ -214,6 +214,8 @@ struct task_struct *__switch_to(struct task_struct *prev, old_thread->accum_tb += (current_tb - start_tb); new_thread->start_tb = current_tb; } +#endif + local_irq_save(flags); last = _switch(old_thread, new_thread); diff --git a/trunk/arch/ppc64/kernel/prom.c b/trunk/arch/ppc64/kernel/prom.c index b21848826791..5aca01ddd81f 100644 --- a/trunk/arch/ppc64/kernel/prom.c +++ b/trunk/arch/ppc64/kernel/prom.c @@ -625,8 +625,8 @@ void __init finish_device_tree(void) static inline char *find_flat_dt_string(u32 offset) { - return ((char *)initial_boot_params) + - initial_boot_params->off_dt_strings + offset; + return ((char *)initial_boot_params) + initial_boot_params->off_dt_strings + + offset; } /** @@ -635,33 +635,26 @@ static inline char *find_flat_dt_string(u32 offset) * unflatten the tree */ static int __init scan_flat_dt(int (*it)(unsigned long node, - const char *uname, int depth, - void *data), + const char *full_path, void *data), void *data) { unsigned long p = ((unsigned long)initial_boot_params) + initial_boot_params->off_dt_struct; int rc = 0; - int depth = -1; do { u32 tag = *((u32 *)p); char *pathp; p += 4; - if (tag == OF_DT_END_NODE) { - depth --; - continue; - } - if (tag == OF_DT_NOP) + if (tag == OF_DT_END_NODE) continue; if (tag == OF_DT_END) break; if (tag == OF_DT_PROP) { u32 sz = *((u32 *)p); p += 8; - if (initial_boot_params->version < 0x10) - p = _ALIGN(p, sz >= 8 ? 8 : 4); + p = _ALIGN(p, sz >= 8 ? 8 : 4); p += sz; p = _ALIGN(p, 4); continue; @@ -671,18 +664,9 @@ static int __init scan_flat_dt(int (*it)(unsigned long node, " device tree !\n", tag); return -EINVAL; } - depth++; pathp = (char *)p; p = _ALIGN(p + strlen(pathp) + 1, 4); - if ((*pathp) == '/') { - char *lp, *np; - for (lp = NULL, np = pathp; *np; np++) - if ((*np) == '/') - lp = np+1; - if (lp != NULL) - pathp = lp; - } - rc = it(p, pathp, depth, data); + rc = it(p, pathp, data); if (rc != 0) break; } while(1); @@ -705,21 +689,17 @@ static void* __init get_flat_dt_prop(unsigned long node, const char *name, const char *nstr; p += 4; - if (tag == OF_DT_NOP) - continue; if (tag != OF_DT_PROP) return NULL; sz = *((u32 *)p); noff = *((u32 *)(p + 4)); p += 8; - if (initial_boot_params->version < 0x10) - p = _ALIGN(p, sz >= 8 ? 8 : 4); + p = _ALIGN(p, sz >= 8 ? 8 : 4); nstr = find_flat_dt_string(noff); if (nstr == NULL) { - printk(KERN_WARNING "Can't find property index" - " name !\n"); + printk(KERN_WARNING "Can't find property index name !\n"); return NULL; } if (strcmp(name, nstr) == 0) { @@ -733,7 +713,7 @@ static void* __init get_flat_dt_prop(unsigned long node, const char *name, } static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, - unsigned long align) + unsigned long align) { void *res; @@ -747,16 +727,13 @@ static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, static unsigned long __init unflatten_dt_node(unsigned long mem, unsigned long *p, struct device_node *dad, - struct device_node ***allnextpp, - unsigned long fpsize) + struct device_node ***allnextpp) { struct device_node *np; struct property *pp, **prev_pp = NULL; char *pathp; u32 tag; - unsigned int l, allocl; - int has_name = 0; - int new_format = 0; + unsigned int l; tag = *((u32 *)(*p)); if (tag != OF_DT_BEGIN_NODE) { @@ -765,62 +742,21 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, } *p += 4; pathp = (char *)*p; - l = allocl = strlen(pathp) + 1; + l = strlen(pathp) + 1; *p = _ALIGN(*p + l, 4); - /* version 0x10 has a more compact unit name here instead of the full - * path. we accumulate the full path size using "fpsize", we'll rebuild - * it later. We detect this because the first character of the name is - * not '/'. - */ - if ((*pathp) != '/') { - new_format = 1; - if (fpsize == 0) { - /* root node: special case. fpsize accounts for path - * plus terminating zero. root node only has '/', so - * fpsize should be 2, but we want to avoid the first - * level nodes to have two '/' so we use fpsize 1 here - */ - fpsize = 1; - allocl = 2; - } else { - /* account for '/' and path size minus terminal 0 - * already in 'l' - */ - fpsize += l; - allocl = fpsize; - } - } - - - np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, + np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + l, __alignof__(struct device_node)); if (allnextpp) { memset(np, 0, sizeof(*np)); np->full_name = ((char*)np) + sizeof(struct device_node); - if (new_format) { - char *p = np->full_name; - /* rebuild full path for new format */ - if (dad && dad->parent) { - strcpy(p, dad->full_name); -#ifdef DEBUG - if ((strlen(p) + l + 1) != allocl) { - DBG("%s: p: %d, l: %d, a: %d\n", - pathp, strlen(p), l, allocl); - } -#endif - p += strlen(p); - } - *(p++) = '/'; - memcpy(p, pathp, l); - } else - memcpy(np->full_name, pathp, l); + memcpy(np->full_name, pathp, l); prev_pp = &np->properties; **allnextpp = np; *allnextpp = &np->allnext; if (dad != NULL) { np->parent = dad; - /* we temporarily use the next field as `last_child'*/ + /* we temporarily use the `next' field as `last_child'. */ if (dad->next == 0) dad->child = np; else @@ -834,26 +770,18 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, char *pname; tag = *((u32 *)(*p)); - if (tag == OF_DT_NOP) { - *p += 4; - continue; - } if (tag != OF_DT_PROP) break; *p += 4; sz = *((u32 *)(*p)); noff = *((u32 *)((*p) + 4)); - *p += 8; - if (initial_boot_params->version < 0x10) - *p = _ALIGN(*p, sz >= 8 ? 8 : 4); + *p = _ALIGN((*p) + 8, sz >= 8 ? 8 : 4); pname = find_flat_dt_string(noff); if (pname == NULL) { printk("Can't find property name in list !\n"); break; } - if (strcmp(pname, "name") == 0) - has_name = 1; l = strlen(pname) + 1; pp = unflatten_dt_alloc(&mem, sizeof(struct property), __alignof__(struct property)); @@ -873,36 +801,6 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, } *p = _ALIGN((*p) + sz, 4); } - /* with version 0x10 we may not have the name property, recreate - * it here from the unit name if absent - */ - if (!has_name) { - char *p = pathp, *ps = pathp, *pa = NULL; - int sz; - - while (*p) { - if ((*p) == '@') - pa = p; - if ((*p) == '/') - ps = p + 1; - p++; - } - if (pa < ps) - pa = p; - sz = (pa - ps) + 1; - pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, - __alignof__(struct property)); - if (allnextpp) { - pp->name = "name"; - pp->length = sz; - pp->value = (unsigned char *)(pp + 1); - *prev_pp = pp; - prev_pp = &pp->next; - memcpy(pp->value, ps, sz - 1); - ((char *)pp->value)[sz - 1] = 0; - DBG("fixed up name for %s -> %s\n", pathp, pp->value); - } - } if (allnextpp) { *prev_pp = NULL; np->name = get_property(np, "name", NULL); @@ -914,11 +812,11 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, np->type = ""; } while (tag == OF_DT_BEGIN_NODE) { - mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize); + mem = unflatten_dt_node(mem, p, np, allnextpp); tag = *((u32 *)(*p)); } if (tag != OF_DT_END_NODE) { - printk("Weird tag at end of node: %x\n", tag); + printk("Weird tag at start of node: %x\n", tag); return mem; } *p += 4; @@ -944,32 +842,21 @@ void __init unflatten_device_tree(void) /* First pass, scan for size */ start = ((unsigned long)initial_boot_params) + initial_boot_params->off_dt_struct; - size = unflatten_dt_node(0, &start, NULL, NULL, 0); - size = (size | 3) + 1; + size = unflatten_dt_node(0, &start, NULL, NULL); DBG(" size is %lx, allocating...\n", size); /* Allocate memory for the expanded device tree */ - mem = lmb_alloc(size + 4, __alignof__(struct device_node)); - if (!mem) { - DBG("Couldn't allocate memory with lmb_alloc()!\n"); - panic("Couldn't allocate memory with lmb_alloc()!\n"); - } - mem = (unsigned long)abs_to_virt(mem); - - ((u32 *)mem)[size / 4] = 0xdeadbeef; - + mem = (unsigned long)abs_to_virt(lmb_alloc(size, + __alignof__(struct device_node))); DBG(" unflattening...\n", mem); /* Second pass, do actual unflattening */ start = ((unsigned long)initial_boot_params) + initial_boot_params->off_dt_struct; - unflatten_dt_node(mem, &start, NULL, &allnextp, 0); + unflatten_dt_node(mem, &start, NULL, &allnextp); if (*((u32 *)start) != OF_DT_END) - printk(KERN_WARNING "Weird tag at end of tree: %08x\n", *((u32 *)start)); - if (((u32 *)mem)[size / 4] != 0xdeadbeef) - printk(KERN_WARNING "End of tree marker overwritten: %08x\n", - ((u32 *)mem)[size / 4] ); + printk(KERN_WARNING "Weird tag at end of tree: %x\n", *((u32 *)start)); *allnextp = NULL; /* Get pointer to OF "/chosen" node for use everywhere */ @@ -993,7 +880,7 @@ void __init unflatten_device_tree(void) static int __init early_init_dt_scan_cpus(unsigned long node, - const char *uname, int depth, void *data) + const char *full_path, void *data) { char *type = get_flat_dt_prop(node, "device_type", NULL); u32 *prop; @@ -1060,15 +947,13 @@ static int __init early_init_dt_scan_cpus(unsigned long node, } static int __init early_init_dt_scan_chosen(unsigned long node, - const char *uname, int depth, void *data) + const char *full_path, void *data) { u32 *prop; u64 *prop64; extern unsigned long memory_limit, tce_alloc_start, tce_alloc_end; - DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname); - - if (depth != 1 || strcmp(uname, "chosen") != 0) + if (strcmp(full_path, "/chosen") != 0) return 0; /* get platform type */ @@ -1118,20 +1003,18 @@ static int __init early_init_dt_scan_chosen(unsigned long node, } static int __init early_init_dt_scan_root(unsigned long node, - const char *uname, int depth, void *data) + const char *full_path, void *data) { u32 *prop; - if (depth != 0) + if (strcmp(full_path, "/") != 0) return 0; prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL); dt_root_size_cells = (prop == NULL) ? 1 : *prop; - DBG("dt_root_size_cells = %x\n", dt_root_size_cells); - + prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL); dt_root_addr_cells = (prop == NULL) ? 2 : *prop; - DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); /* break now */ return 1; @@ -1159,7 +1042,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp) static int __init early_init_dt_scan_memory(unsigned long node, - const char *uname, int depth, void *data) + const char *full_path, void *data) { char *type = get_flat_dt_prop(node, "device_type", NULL); cell_t *reg, *endp; @@ -1175,9 +1058,7 @@ static int __init early_init_dt_scan_memory(unsigned long node, endp = reg + (l / sizeof(cell_t)); - DBG("memory scan node %s ..., reg size %ld, data: %x %x %x %x, ...\n", - uname, l, reg[0], reg[1], reg[2], reg[3]); - + DBG("memory scan node %s ...\n", full_path); while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { unsigned long base, size; @@ -1588,11 +1469,10 @@ struct device_node *of_find_node_by_path(const char *path) struct device_node *np = allnodes; read_lock(&devtree_lock); - for (; np != 0; np = np->allnext) { + for (; np != 0; np = np->allnext) if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0 && of_node_get(np)) break; - } read_unlock(&devtree_lock); return np; } diff --git a/trunk/arch/ppc64/kernel/prom_init.c b/trunk/arch/ppc64/kernel/prom_init.c index adcf972711fc..dbbe6c79d8da 100644 --- a/trunk/arch/ppc64/kernel/prom_init.c +++ b/trunk/arch/ppc64/kernel/prom_init.c @@ -1534,8 +1534,7 @@ static unsigned long __init dt_find_string(char *str) */ #define MAX_PROPERTY_NAME 64 -static void __init scan_dt_build_strings(phandle node, - unsigned long *mem_start, +static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, unsigned long *mem_end) { unsigned long offset = reloc_offset(); @@ -1548,21 +1547,16 @@ static void __init scan_dt_build_strings(phandle node, /* get and store all property names */ prev_name = RELOC(""); for (;;) { + int rc; + /* 64 is max len of name including nul. */ namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1); - if (call_prom("nextprop", 3, 1, node, prev_name, namep) != 1) { + rc = call_prom("nextprop", 3, 1, node, prev_name, namep); + if (rc != 1) { /* No more nodes: unwind alloc */ *mem_start = (unsigned long)namep; break; } - - /* skip "name" */ - if (strcmp(namep, RELOC("name")) == 0) { - *mem_start = (unsigned long)namep; - prev_name = RELOC("name"); - continue; - } - /* get/create string entry */ soff = dt_find_string(namep); if (soff != 0) { *mem_start = (unsigned long)namep; @@ -1577,7 +1571,7 @@ static void __init scan_dt_build_strings(phandle node, /* do all our children */ child = call_prom("child", 1, 1, node); - while (child != 0) { + while (child != (phandle)0) { scan_dt_build_strings(child, mem_start, mem_end); child = call_prom("peer", 1, 1, child); } @@ -1586,13 +1580,16 @@ static void __init scan_dt_build_strings(phandle node, static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, unsigned long *mem_end) { + int l, align; phandle child; - char *namep, *prev_name, *sstart, *p, *ep, *lp, *path; + char *namep, *prev_name, *sstart, *p, *ep; unsigned long soff; unsigned char *valp; unsigned long offset = reloc_offset(); - static char pname[MAX_PROPERTY_NAME]; - int l; + char pname[MAX_PROPERTY_NAME]; + char *path; + + path = RELOC(prom_scratch); dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end); @@ -1602,33 +1599,23 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, namep, *mem_end - *mem_start); if (l >= 0) { /* Didn't fit? Get more room. */ - if ((l+1) > (*mem_end - *mem_start)) { + if (l+1 > *mem_end - *mem_start) { namep = make_room(mem_start, mem_end, l+1, 1); call_prom("package-to-path", 3, 1, node, namep, l); } namep[l] = '\0'; - /* Fixup an Apple bug where they have bogus \0 chars in the * middle of the path in some properties */ for (p = namep, ep = namep + l; p < ep; p++) if (*p == '\0') { memmove(p, p+1, ep - p); - ep--; l--; p--; + ep--; l--; } - - /* now try to extract the unit name in that mess */ - for (p = namep, lp = NULL; *p; p++) - if (*p == '/') - lp = p + 1; - if (lp != NULL) - memmove(namep, lp, strlen(lp) + 1); - *mem_start = _ALIGN(((unsigned long) namep) + - strlen(namep) + 1, 4); + *mem_start = _ALIGN(((unsigned long) namep) + strlen(namep) + 1, 4); } /* get it again for debugging */ - path = RELOC(prom_scratch); memset(path, 0, PROM_SCRATCH_SIZE); call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); @@ -1636,27 +1623,23 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, prev_name = RELOC(""); sstart = (char *)RELOC(dt_string_start); for (;;) { - if (call_prom("nextprop", 3, 1, node, prev_name, - RELOC(pname)) != 1) - break; + int rc; - /* skip "name" */ - if (strcmp(RELOC(pname), RELOC("name")) == 0) { - prev_name = RELOC("name"); - continue; - } + rc = call_prom("nextprop", 3, 1, node, prev_name, pname); + if (rc != 1) + break; /* find string offset */ - soff = dt_find_string(RELOC(pname)); + soff = dt_find_string(pname); if (soff == 0) { - prom_printf("WARNING: Can't find string index for" - " <%s>, node %s\n", RELOC(pname), path); + prom_printf("WARNING: Can't find string index for <%s>, node %s\n", + pname, path); break; } prev_name = sstart + soff; /* get length */ - l = call_prom("getproplen", 2, 1, node, RELOC(pname)); + l = call_prom("getproplen", 2, 1, node, pname); /* sanity checks */ if (l == PROM_ERROR) @@ -1665,7 +1648,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, prom_printf("WARNING: ignoring large property "); /* It seems OF doesn't null-terminate the path :-( */ prom_printf("[%s] ", path); - prom_printf("%s length 0x%x\n", RELOC(pname), l); + prom_printf("%s length 0x%x\n", pname, l); continue; } @@ -1675,16 +1658,17 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, dt_push_token(soff, mem_start, mem_end); /* push property content */ - valp = make_room(mem_start, mem_end, l, 4); - call_prom("getprop", 4, 1, node, RELOC(pname), valp, l); + align = (l >= 8) ? 8 : 4; + valp = make_room(mem_start, mem_end, l, align); + call_prom("getprop", 4, 1, node, pname, valp, l); *mem_start = _ALIGN(*mem_start, 4); } /* Add a "linux,phandle" property. */ soff = dt_find_string(RELOC("linux,phandle")); if (soff == 0) - prom_printf("WARNING: Can't find string index for" - " node %s\n", path); + prom_printf("WARNING: Can't find string index for " + " node %s\n", path); else { dt_push_token(OF_DT_PROP, mem_start, mem_end); dt_push_token(4, mem_start, mem_end); @@ -1695,7 +1679,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, /* do all our children */ child = call_prom("child", 1, 1, node); - while (child != 0) { + while (child != (phandle)0) { scan_dt_build_struct(child, mem_start, mem_end); child = call_prom("peer", 1, 1, child); } @@ -1734,8 +1718,7 @@ static void __init flatten_device_tree(void) /* Build header and make room for mem rsv map */ mem_start = _ALIGN(mem_start, 4); - hdr = make_room(&mem_start, &mem_end, - sizeof(struct boot_param_header), 4); + hdr = make_room(&mem_start, &mem_end, sizeof(struct boot_param_header), 4); RELOC(dt_header_start) = (unsigned long)hdr; rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8); @@ -1748,11 +1731,11 @@ static void __init flatten_device_tree(void) namep = make_room(&mem_start, &mem_end, 16, 1); strcpy(namep, RELOC("linux,phandle")); mem_start = (unsigned long)namep + strlen(namep) + 1; + RELOC(dt_string_end) = mem_start; /* Build string array */ prom_printf("Building dt strings...\n"); scan_dt_build_strings(root, &mem_start, &mem_end); - RELOC(dt_string_end) = mem_start; /* Build structure */ mem_start = PAGE_ALIGN(mem_start); @@ -1767,11 +1750,9 @@ static void __init flatten_device_tree(void) hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start); hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start); hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start); - hdr->dt_strings_size = RELOC(dt_string_end) - RELOC(dt_string_start); hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start); hdr->version = OF_DT_VERSION; - /* Version 16 is not backward compatible */ - hdr->last_comp_version = 0x10; + hdr->last_comp_version = 1; /* Reserve the whole thing and copy the reserve map in, we * also bump mem_reserve_cnt to cause further reservations to @@ -1827,9 +1808,6 @@ static void __init fixup_device_tree(void) /* does it need fixup ? */ if (prom_getproplen(i2c, "interrupts") > 0) return; - - prom_printf("fixing up bogus interrupts for u3 i2c...\n"); - /* interrupt on this revision of u3 is number 0 and level */ interrupts[0] = 0; interrupts[1] = 1; diff --git a/trunk/arch/ppc64/kernel/rtas_pci.c b/trunk/arch/ppc64/kernel/rtas_pci.c index 1dccadaddd1d..1048817befb8 100644 --- a/trunk/arch/ppc64/kernel/rtas_pci.c +++ b/trunk/arch/ppc64/kernel/rtas_pci.c @@ -58,21 +58,6 @@ static int config_access_valid(struct device_node *dn, int where) return 0; } -static int of_device_available(struct device_node * dn) -{ - char * status; - - status = get_property(dn, "status", NULL); - - if (!status) - return 1; - - if (!strcmp(status, "okay")) - return 1; - - return 0; -} - static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val) { int returnval = -1; @@ -118,7 +103,7 @@ static int rtas_pci_read_config(struct pci_bus *bus, /* Search only direct children of the bus */ for (dn = busdn->child; dn; dn = dn->sibling) - if (dn->devfn == devfn && of_device_available(dn)) + if (dn->devfn == devfn) return rtas_read_config(dn, where, size, val); return PCIBIOS_DEVICE_NOT_FOUND; } @@ -161,7 +146,7 @@ static int rtas_pci_write_config(struct pci_bus *bus, /* Search only direct children of the bus */ for (dn = busdn->child; dn; dn = dn->sibling) - if (dn->devfn == devfn && of_device_available(dn)) + if (dn->devfn == devfn) return rtas_write_config(dn, where, size, val); return PCIBIOS_DEVICE_NOT_FOUND; } diff --git a/trunk/arch/ppc64/kernel/setup.c b/trunk/arch/ppc64/kernel/setup.c index ee3b20de2e7a..e9c24d2dbd91 100644 --- a/trunk/arch/ppc64/kernel/setup.c +++ b/trunk/arch/ppc64/kernel/setup.c @@ -536,19 +536,15 @@ static void __init check_for_initrd(void) DBG(" -> check_for_initrd()\n"); - if (of_chosen) { - prop = (u64 *)get_property(of_chosen, - "linux,initrd-start", NULL); + prop = (u64 *)get_property(of_chosen, "linux,initrd-start", NULL); + if (prop != NULL) { + initrd_start = (unsigned long)__va(*prop); + prop = (u64 *)get_property(of_chosen, "linux,initrd-end", NULL); if (prop != NULL) { - initrd_start = (unsigned long)__va(*prop); - prop = (u64 *)get_property(of_chosen, - "linux,initrd-end", NULL); - if (prop != NULL) { - initrd_end = (unsigned long)__va(*prop); - initrd_below_start_ok = 1; - } else - initrd_start = 0; - } + initrd_end = (unsigned long)__va(*prop); + initrd_below_start_ok = 1; + } else + initrd_start = 0; } /* If we were passed an initrd, set the ROOT_DEV properly if the values @@ -631,7 +627,7 @@ void __init setup_system(void) * Initialize xmon */ #ifdef CONFIG_XMON_DEFAULT - xmon_init(1); + xmon_init(); #endif /* * Register early console @@ -1347,13 +1343,11 @@ static int __init early_xmon(char *p) /* ensure xmon is enabled */ if (p) { if (strncmp(p, "on", 2) == 0) - xmon_init(1); - if (strncmp(p, "off", 3) == 0) - xmon_init(0); + xmon_init(); if (strncmp(p, "early", 5) != 0) return 0; } - xmon_init(1); + xmon_init(); debugger(NULL); return 0; diff --git a/trunk/arch/ppc64/kernel/sysfs.c b/trunk/arch/ppc64/kernel/sysfs.c index f311ee7c0070..02b8ac4e0168 100644 --- a/trunk/arch/ppc64/kernel/sysfs.c +++ b/trunk/arch/ppc64/kernel/sysfs.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -101,8 +100,6 @@ static int __init setup_smt_snooze_delay(char *str) } __setup("smt-snooze-delay=", setup_smt_snooze_delay); -#endif /* CONFIG_PPC_MULTIPLATFORM */ - /* * Enabling PMCs will slow partition context switch times so we only do * it the first time we write to the PMCs. @@ -112,15 +109,65 @@ static DEFINE_PER_CPU(char, pmcs_enabled); void ppc64_enable_pmcs(void) { + unsigned long hid0; +#ifdef CONFIG_PPC_PSERIES + unsigned long set, reset; +#endif /* CONFIG_PPC_PSERIES */ + /* Only need to enable them once */ if (__get_cpu_var(pmcs_enabled)) return; __get_cpu_var(pmcs_enabled) = 1; - if (ppc_md.enable_pmcs) - ppc_md.enable_pmcs(); + switch (systemcfg->platform) { + case PLATFORM_PSERIES: + case PLATFORM_POWERMAC: + hid0 = mfspr(HID0); + hid0 |= 1UL << (63 - 20); + + /* POWER4 requires the following sequence */ + asm volatile( + "sync\n" + "mtspr %1, %0\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0): + "memory"); + break; + +#ifdef CONFIG_PPC_PSERIES + case PLATFORM_PSERIES_LPAR: + set = 1UL << 63; + reset = 0; + plpar_hcall_norets(H_PERFMON, set, reset); + break; +#endif /* CONFIG_PPC_PSERIES */ + + default: + break; + } + +#ifdef CONFIG_PPC_PSERIES + /* instruct hypervisor to maintain PMCs */ + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) + get_paca()->lppaca.pmcregs_in_use = 1; +#endif /* CONFIG_PPC_PSERIES */ } + +#else + +/* PMC stuff */ +void ppc64_enable_pmcs(void) +{ + /* XXX Implement for iseries */ +} +#endif /* CONFIG_PPC_MULTIPLATFORM */ + EXPORT_SYMBOL(ppc64_enable_pmcs); /* XXX convert to rusty's on_one_cpu */ diff --git a/trunk/arch/ppc64/kernel/time.c b/trunk/arch/ppc64/kernel/time.c index 1696e1b05bb9..909462e1adea 100644 --- a/trunk/arch/ppc64/kernel/time.c +++ b/trunk/arch/ppc64/kernel/time.c @@ -67,7 +67,6 @@ #include #include #include -#include u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; @@ -371,11 +370,13 @@ int timer_interrupt(struct pt_regs * regs) process_hvlpevents(regs); #endif - /* collect purr register values often, for accurate calculations */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) { +/* collect purr register values often, for accurate calculations */ +#if defined(CONFIG_PPC_PSERIES) + if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); cu->current_tb = mfspr(SPRN_PURR); } +#endif irq_exit(); diff --git a/trunk/arch/ppc64/kernel/vio.c b/trunk/arch/ppc64/kernel/vio.c index 3b790bafcaad..0c0ba71ac0e8 100644 --- a/trunk/arch/ppc64/kernel/vio.c +++ b/trunk/arch/ppc64/kernel/vio.c @@ -1,11 +1,10 @@ /* * IBM PowerPC Virtual I/O Infrastructure Support. * - * Copyright (c) 2003-2005 IBM Corp. + * Copyright (c) 2003 IBM Corp. * Dave Engebretsen engebret@us.ibm.com * Santiago Leon santil@us.ibm.com * Hollis Blanchard - * Stephen Rothwell * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -15,27 +14,57 @@ #include #include +#include #include +#include #include #include +#include #include #include +#include #include +#include +#include +#include +#include +#include + +#define DBGENTER() pr_debug("%s entered\n", __FUNCTION__) + +extern struct subsystem devices_subsys; /* needed for vio_find_name() */ static const struct vio_device_id *vio_match_device( const struct vio_device_id *, const struct vio_dev *); -struct vio_dev vio_bus_device = { /* fake "parent" device */ +#ifdef CONFIG_PPC_PSERIES +static struct iommu_table *vio_build_iommu_table(struct vio_dev *); +static int vio_num_address_cells; +#endif +#ifdef CONFIG_PPC_ISERIES +static struct iommu_table veth_iommu_table; +static struct iommu_table vio_iommu_table; +#endif +static struct vio_dev vio_bus_device = { /* fake "parent" device */ .name = vio_bus_device.dev.bus_id, .type = "", +#ifdef CONFIG_PPC_ISERIES + .iommu_table = &vio_iommu_table, +#endif .dev.bus_id = "vio", .dev.bus = &vio_bus_type, }; -static int (*is_match)(const struct vio_device_id *id, - const struct vio_dev *dev); -static void (*unregister_device_callback)(struct vio_dev *dev); -static void (*release_device_callback)(struct device *dev); +#ifdef CONFIG_PPC_ISERIES +static struct vio_dev *__init vio_register_device_iseries(char *type, + uint32_t unit_num); + +struct device *iSeries_vio_dev = &vio_bus_device.dev; +EXPORT_SYMBOL(iSeries_vio_dev); + +#define device_is_compatible(a, b) 1 + +#endif /* convert from struct device to struct vio_dev and pass to driver. * dev->driver has already been set by generic code because vio_bus_match @@ -47,6 +76,8 @@ static int vio_bus_probe(struct device *dev) const struct vio_device_id *id; int error = -ENODEV; + DBGENTER(); + if (!viodrv->probe) return error; @@ -64,6 +95,8 @@ static int vio_bus_remove(struct device *dev) struct vio_dev *viodev = to_vio_dev(dev); struct vio_driver *viodrv = to_vio_driver(dev->driver); + DBGENTER(); + if (viodrv->remove) { return viodrv->remove(viodev); } @@ -113,65 +146,178 @@ EXPORT_SYMBOL(vio_unregister_driver); static const struct vio_device_id * vio_match_device(const struct vio_device_id *ids, const struct vio_dev *dev) { + DBGENTER(); + while (ids->type) { - if (is_match(ids, dev)) + if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) && + device_is_compatible(dev->dev.platform_data, ids->compat)) return ids; ids++; } return NULL; } +#ifdef CONFIG_PPC_ISERIES +void __init iommu_vio_init(void) +{ + struct iommu_table *t; + struct iommu_table_cb cb; + unsigned long cbp; + unsigned long itc_entries; + + cb.itc_busno = 255; /* Bus 255 is the virtual bus */ + cb.itc_virtbus = 0xff; /* Ask for virtual bus */ + + cbp = virt_to_abs(&cb); + HvCallXm_getTceTableParms(cbp); + + itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry); + veth_iommu_table.it_size = itc_entries / 2; + veth_iommu_table.it_busno = cb.itc_busno; + veth_iommu_table.it_offset = cb.itc_offset; + veth_iommu_table.it_index = cb.itc_index; + veth_iommu_table.it_type = TCE_VB; + veth_iommu_table.it_blocksize = 1; + + t = iommu_init_table(&veth_iommu_table); + + if (!t) + printk("Virtual Bus VETH TCE table failed.\n"); + + vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size; + vio_iommu_table.it_busno = cb.itc_busno; + vio_iommu_table.it_offset = cb.itc_offset + + veth_iommu_table.it_size; + vio_iommu_table.it_index = cb.itc_index; + vio_iommu_table.it_type = TCE_VB; + vio_iommu_table.it_blocksize = 1; + + t = iommu_init_table(&vio_iommu_table); + + if (!t) + printk("Virtual Bus VIO TCE table failed.\n"); +} +#endif + +#ifdef CONFIG_PPC_PSERIES +static void probe_bus_pseries(void) +{ + struct device_node *node_vroot, *of_node; + + node_vroot = find_devices("vdevice"); + if ((node_vroot == NULL) || (node_vroot->child == NULL)) + /* this machine doesn't do virtual IO, and that's ok */ + return; + + vio_num_address_cells = prom_n_addr_cells(node_vroot->child); + + /* + * Create struct vio_devices for each virtual device in the device tree. + * Drivers will associate with them later. + */ + for (of_node = node_vroot->child; of_node != NULL; + of_node = of_node->sibling) { + printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node); + vio_register_device_node(of_node); + } +} +#endif + +#ifdef CONFIG_PPC_ISERIES +static void probe_bus_iseries(void) +{ + HvLpIndexMap vlan_map = HvLpConfig_getVirtualLanIndexMap(); + struct vio_dev *viodev; + int i; + + /* there is only one of each of these */ + vio_register_device_iseries("viocons", 0); + vio_register_device_iseries("vscsi", 0); + + vlan_map = HvLpConfig_getVirtualLanIndexMap(); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { + if ((vlan_map & (0x8000 >> i)) == 0) + continue; + viodev = vio_register_device_iseries("vlan", i); + /* veth is special and has it own iommu_table */ + viodev->iommu_table = &veth_iommu_table; + } + for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++) + vio_register_device_iseries("viodasd", i); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++) + vio_register_device_iseries("viocd", i); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++) + vio_register_device_iseries("viotape", i); +} +#endif + /** * vio_bus_init: - Initialize the virtual IO bus */ -int __init vio_bus_init(int (*match_func)(const struct vio_device_id *id, - const struct vio_dev *dev), - void (*unregister_dev)(struct vio_dev *), - void (*release_dev)(struct device *)) +static int __init vio_bus_init(void) { int err; - is_match = match_func; - unregister_device_callback = unregister_dev; - release_device_callback = release_dev; - err = bus_register(&vio_bus_type); if (err) { printk(KERN_ERR "failed to register VIO bus\n"); return err; } - /* the fake parent of all vio devices, just to give us - * a nice directory - */ + /* the fake parent of all vio devices, just to give us a nice directory */ err = device_register(&vio_bus_device.dev); if (err) { - printk(KERN_WARNING "%s: device_register returned %i\n", - __FUNCTION__, err); + printk(KERN_WARNING "%s: device_register returned %i\n", __FUNCTION__, + err); return err; } +#ifdef CONFIG_PPC_PSERIES + probe_bus_pseries(); +#endif +#ifdef CONFIG_PPC_ISERIES + probe_bus_iseries(); +#endif + return 0; } +__initcall(vio_bus_init); + /* vio_dev refcount hit 0 */ static void __devinit vio_dev_release(struct device *dev) { - if (release_device_callback) - release_device_callback(dev); + DBGENTER(); + +#ifdef CONFIG_PPC_PSERIES + /* XXX free TCE table */ + of_node_put(dev->platform_data); +#endif kfree(to_vio_dev(dev)); } +#ifdef CONFIG_PPC_PSERIES +static ssize_t viodev_show_devspec(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct device_node *of_node = dev->platform_data; + + return sprintf(buf, "%s\n", of_node->full_name); +} +DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL); +#endif + static ssize_t viodev_show_name(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_vio_dev(dev)->name); } DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_name, NULL); -struct vio_dev * __devinit vio_register_device_common( +static struct vio_dev * __devinit vio_register_device_common( struct vio_dev *viodev, char *name, char *type, uint32_t unit_address, struct iommu_table *iommu_table) { + DBGENTER(); + viodev->name = name; viodev->type = type; viodev->unit_address = unit_address; @@ -192,15 +338,222 @@ struct vio_dev * __devinit vio_register_device_common( return viodev; } +#ifdef CONFIG_PPC_PSERIES +/** + * vio_register_device_node: - Register a new vio device. + * @of_node: The OF node for this device. + * + * Creates and initializes a vio_dev structure from the data in + * of_node (dev.platform_data) and adds it to the list of virtual devices. + * Returns a pointer to the created vio_dev or NULL if node has + * NULL device_type or compatible fields. + */ +struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) +{ + struct vio_dev *viodev; + unsigned int *unit_address; + unsigned int *irq_p; + + DBGENTER(); + + /* we need the 'device_type' property, in order to match with drivers */ + if ((NULL == of_node->type)) { + printk(KERN_WARNING + "%s: node %s missing 'device_type'\n", __FUNCTION__, + of_node->name ? of_node->name : ""); + return NULL; + } + + unit_address = (unsigned int *)get_property(of_node, "reg", NULL); + if (!unit_address) { + printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__, + of_node->name ? of_node->name : ""); + return NULL; + } + + /* allocate a vio_dev for this node */ + viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); + if (!viodev) { + return NULL; + } + memset(viodev, 0, sizeof(struct vio_dev)); + + viodev->dev.platform_data = of_node_get(of_node); + + viodev->irq = NO_IRQ; + irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL); + if (irq_p) { + int virq = virt_irq_create_mapping(*irq_p); + if (virq == NO_IRQ) { + printk(KERN_ERR "Unable to allocate interrupt " + "number for %s\n", of_node->full_name); + } else + viodev->irq = irq_offset_up(virq); + } + + snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); + + /* register with generic device framework */ + if (vio_register_device_common(viodev, of_node->name, of_node->type, + *unit_address, vio_build_iommu_table(viodev)) + == NULL) { + /* XXX free TCE table */ + kfree(viodev); + return NULL; + } + device_create_file(&viodev->dev, &dev_attr_devspec); + + return viodev; +} +EXPORT_SYMBOL(vio_register_device_node); +#endif + +#ifdef CONFIG_PPC_ISERIES +/** + * vio_register_device: - Register a new vio device. + * @voidev: The device to register. + */ +static struct vio_dev *__init vio_register_device_iseries(char *type, + uint32_t unit_num) +{ + struct vio_dev *viodev; + + DBGENTER(); + + /* allocate a vio_dev for this node */ + viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); + if (!viodev) + return NULL; + memset(viodev, 0, sizeof(struct vio_dev)); + + snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num); + + return vio_register_device_common(viodev, viodev->dev.bus_id, type, + unit_num, &vio_iommu_table); +} +#endif + void __devinit vio_unregister_device(struct vio_dev *viodev) { - if (unregister_device_callback) - unregister_device_callback(viodev); + DBGENTER(); +#ifdef CONFIG_PPC_PSERIES + device_remove_file(&viodev->dev, &dev_attr_devspec); +#endif device_remove_file(&viodev->dev, &dev_attr_name); device_unregister(&viodev->dev); } EXPORT_SYMBOL(vio_unregister_device); +#ifdef CONFIG_PPC_PSERIES +/** + * vio_get_attribute: - get attribute for virtual device + * @vdev: The vio device to get property. + * @which: The property/attribute to be extracted. + * @length: Pointer to length of returned data size (unused if NULL). + * + * Calls prom.c's get_property() to return the value of the + * attribute specified by the preprocessor constant @which +*/ +const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length) +{ + return get_property(vdev->dev.platform_data, (char*)which, length); +} +EXPORT_SYMBOL(vio_get_attribute); + +/* vio_find_name() - internal because only vio.c knows how we formatted the + * kobject name + * XXX once vio_bus_type.devices is actually used as a kset in + * drivers/base/bus.c, this function should be removed in favor of + * "device_find(kobj_name, &vio_bus_type)" + */ +static struct vio_dev *vio_find_name(const char *kobj_name) +{ + struct kobject *found; + + found = kset_find_obj(&devices_subsys.kset, kobj_name); + if (!found) + return NULL; + + return to_vio_dev(container_of(found, struct device, kobj)); +} + +/** + * vio_find_node - find an already-registered vio_dev + * @vnode: device_node of the virtual device we're looking for + */ +struct vio_dev *vio_find_node(struct device_node *vnode) +{ + uint32_t *unit_address; + char kobj_name[BUS_ID_SIZE]; + + /* construct the kobject name from the device node */ + unit_address = (uint32_t *)get_property(vnode, "reg", NULL); + if (!unit_address) + return NULL; + snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address); + + return vio_find_name(kobj_name); +} +EXPORT_SYMBOL(vio_find_node); + +/** + * vio_build_iommu_table: - gets the dma information from OF and builds the TCE tree. + * @dev: the virtual device. + * + * Returns a pointer to the built tce tree, or NULL if it can't + * find property. +*/ +static struct iommu_table * vio_build_iommu_table(struct vio_dev *dev) +{ + unsigned int *dma_window; + struct iommu_table *newTceTable; + unsigned long offset; + int dma_window_property_size; + + dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size); + if(!dma_window) { + return NULL; + } + + newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + /* There should be some code to extract the phys-encoded offset + using prom_n_addr_cells(). However, according to a comment + on earlier versions, it's always zero, so we don't bother */ + offset = dma_window[1] >> PAGE_SHIFT; + + /* TCE table size - measured in tce entries */ + newTceTable->it_size = dma_window[4] >> PAGE_SHIFT; + /* offset for VIO should always be 0 */ + newTceTable->it_offset = offset; + newTceTable->it_busno = 0; + newTceTable->it_index = (unsigned long)dma_window[0]; + newTceTable->it_type = TCE_VB; + + return iommu_init_table(newTceTable); +} + +int vio_enable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); + if (rc != H_Success) { + printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); + } + return rc; +} +EXPORT_SYMBOL(vio_enable_interrupts); + +int vio_disable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); + if (rc != H_Success) { + printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); + } + return rc; +} +EXPORT_SYMBOL(vio_disable_interrupts); +#endif + static dma_addr_t vio_map_single(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { @@ -264,6 +617,8 @@ static int vio_bus_match(struct device *dev, struct device_driver *drv) const struct vio_device_id *ids = vio_drv->id_table; const struct vio_device_id *found_id; + DBGENTER(); + if (!ids) return 0; diff --git a/trunk/arch/ppc64/mm/hash_native.c b/trunk/arch/ppc64/mm/hash_native.c index 7626bb59954d..a6abd3a979bf 100644 --- a/trunk/arch/ppc64/mm/hash_native.c +++ b/trunk/arch/ppc64/mm/hash_native.c @@ -51,6 +51,7 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long prpn, unsigned long vflags, unsigned long rflags) { + unsigned long arpn = physRpn_to_absRpn(prpn); hpte_t *hptep = htab_address + hpte_group; unsigned long hpte_v, hpte_r; int i; @@ -73,7 +74,7 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va, hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; if (vflags & HPTE_V_LARGE) va &= ~(1UL << HPTE_V_AVPN_SHIFT); - hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; + hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags; hptep->r = hpte_r; /* Guarantee the second dword is visible before the valid bit */ diff --git a/trunk/arch/ppc64/mm/hash_utils.c b/trunk/arch/ppc64/mm/hash_utils.c index 09475c8edf7c..623b5d130c31 100644 --- a/trunk/arch/ppc64/mm/hash_utils.c +++ b/trunk/arch/ppc64/mm/hash_utils.c @@ -210,7 +210,7 @@ void __init htab_initialize(void) /* create bolted the linear mapping in the hash table */ for (i=0; i < lmb.memory.cnt; i++) { - base = lmb.memory.region[i].base + KERNELBASE; + base = lmb.memory.region[i].physbase + KERNELBASE; size = lmb.memory.region[i].size; DBG("creating mapping for region: %lx : %lx\n", base, size); @@ -302,7 +302,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) int local = 0; cpumask_t tmp; - if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) + if ((ea & ~REGION_MASK) > EADDR_MASK) return 1; switch (REGION_ID(ea)) { diff --git a/trunk/arch/ppc64/mm/hugetlbpage.c b/trunk/arch/ppc64/mm/hugetlbpage.c index e7833c80eb68..f9524602818d 100644 --- a/trunk/arch/ppc64/mm/hugetlbpage.c +++ b/trunk/arch/ppc64/mm/hugetlbpage.c @@ -27,94 +27,124 @@ #include -#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) -#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) +#define HUGEPGDIR_SHIFT (HPAGE_SHIFT + PAGE_SHIFT - 3) +#define HUGEPGDIR_SIZE (1UL << HUGEPGDIR_SHIFT) +#define HUGEPGDIR_MASK (~(HUGEPGDIR_SIZE-1)) -/* Modelled after find_linux_pte() */ -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) +#define HUGEPTE_INDEX_SIZE 9 +#define HUGEPGD_INDEX_SIZE 10 + +#define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE) +#define PTRS_PER_HUGEPGD (1 << HUGEPGD_INDEX_SIZE) + +static inline int hugepgd_index(unsigned long addr) { - pgd_t *pg; - pud_t *pu; - pmd_t *pm; - pte_t *pt; + return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT; +} - BUG_ON(! in_hugepage_area(mm->context, addr)); +static pud_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr) +{ + int index; - addr &= HPAGE_MASK; - - pg = pgd_offset(mm, addr); - if (!pgd_none(*pg)) { - pu = pud_offset(pg, addr); - if (!pud_none(*pu)) { - pm = pmd_offset(pu, addr); - pt = (pte_t *)pm; - BUG_ON(!pmd_none(*pm) - && !(pte_present(*pt) && pte_huge(*pt))); - return pt; - } - } + if (! mm->context.huge_pgdir) + return NULL; - return NULL; + + index = hugepgd_index(addr); + BUG_ON(index >= PTRS_PER_HUGEPGD); + return (pud_t *)(mm->context.huge_pgdir + index); } -pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) +static inline pte_t *hugepte_offset(pud_t *dir, unsigned long addr) { - pgd_t *pg; - pud_t *pu; - pmd_t *pm; - pte_t *pt; + int index; + + if (pud_none(*dir)) + return NULL; + index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE; + return (pte_t *)pud_page(*dir) + index; +} + +static pud_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr) +{ BUG_ON(! in_hugepage_area(mm->context, addr)); - addr &= HPAGE_MASK; + if (! mm->context.huge_pgdir) { + pgd_t *new; + spin_unlock(&mm->page_table_lock); + /* Don't use pgd_alloc(), because we want __GFP_REPEAT */ + new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT); + BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE)); + spin_lock(&mm->page_table_lock); - pg = pgd_offset(mm, addr); - pu = pud_alloc(mm, pg, addr); + /* + * Because we dropped the lock, we should re-check the + * entry, as somebody else could have populated it.. + */ + if (mm->context.huge_pgdir) + pgd_free(new); + else + mm->context.huge_pgdir = new; + } + return hugepgd_offset(mm, addr); +} - if (pu) { - pm = pmd_alloc(mm, pu, addr); - if (pm) { - pt = (pte_t *)pm; - BUG_ON(!pmd_none(*pm) - && !(pte_present(*pt) && pte_huge(*pt))); - return pt; +static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr) +{ + if (! pud_present(*dir)) { + pte_t *new; + + spin_unlock(&mm->page_table_lock); + new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT); + BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE)); + spin_lock(&mm->page_table_lock); + /* + * Because we dropped the lock, we should re-check the + * entry, as somebody else could have populated it.. + */ + if (pud_present(*dir)) { + if (new) + kmem_cache_free(zero_cache, new); + } else { + struct page *ptepage; + + if (! new) + return NULL; + ptepage = virt_to_page(new); + ptepage->mapping = (void *) mm; + ptepage->index = addr & HUGEPGDIR_MASK; + pud_populate(mm, dir, new); } } - return NULL; + return hugepte_offset(dir, addr); } -#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE) - -void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { - int i; + pud_t *pud; - if (pte_present(*ptep)) { - pte_clear(mm, addr, ptep); - flush_tlb_pending(); - } + BUG_ON(! in_hugepage_area(mm->context, addr)); - for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) { - *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); - ptep++; - } + pud = hugepgd_offset(mm, addr); + if (! pud) + return NULL; + + return hugepte_offset(pud, addr); } -pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, - pte_t *ptep) +pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { - unsigned long old = pte_update(ptep, ~0UL); - int i; + pud_t *pud; - if (old & _PAGE_HASHPTE) - hpte_update(mm, addr, old, 0); + BUG_ON(! in_hugepage_area(mm->context, addr)); - for (i = 1; i < HUGEPTE_BATCH_SIZE; i++) - ptep[i] = __pte(0); + pud = hugepgd_alloc(mm, addr); + if (! pud) + return NULL; - return __pte(old); + return hugepte_alloc(mm, pud, addr); } /* @@ -132,17 +162,15 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) return 0; } -static void flush_low_segments(void *parm) +static void flush_segments(void *parm) { - u16 areas = (unsigned long) parm; + u16 segs = (unsigned long) parm; unsigned long i; asm volatile("isync" : : : "memory"); - BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS); - - for (i = 0; i < NUM_LOW_AREAS; i++) { - if (! (areas & (1U << i))) + for (i = 0; i < 16; i++) { + if (! (segs & (1U << i))) continue; asm volatile("slbie %0" : : "r" (i << SID_SHIFT)); } @@ -150,33 +178,13 @@ static void flush_low_segments(void *parm) asm volatile("isync" : : : "memory"); } -static void flush_high_segments(void *parm) +static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg) { - u16 areas = (unsigned long) parm; - unsigned long i, j; - - asm volatile("isync" : : : "memory"); - - BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS); - - for (i = 0; i < NUM_HIGH_AREAS; i++) { - if (! (areas & (1U << i))) - continue; - for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) - asm volatile("slbie %0" - :: "r" ((i << HTLB_AREA_SHIFT) + (j << SID_SHIFT))); - } - - asm volatile("isync" : : : "memory"); -} - -static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) -{ - unsigned long start = area << SID_SHIFT; - unsigned long end = (area+1) << SID_SHIFT; + unsigned long start = seg << SID_SHIFT; + unsigned long end = (seg+1) << SID_SHIFT; struct vm_area_struct *vma; - BUG_ON(area >= NUM_LOW_AREAS); + BUG_ON(seg >= 16); /* Check no VMAs are in the region */ vma = find_vma(mm, start); @@ -186,39 +194,20 @@ static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) return 0; } -static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) -{ - unsigned long start = area << HTLB_AREA_SHIFT; - unsigned long end = (area+1) << HTLB_AREA_SHIFT; - struct vm_area_struct *vma; - - BUG_ON(area >= NUM_HIGH_AREAS); - - /* Check no VMAs are in the region */ - vma = find_vma(mm, start); - if (vma && (vma->vm_start < end)) - return -EBUSY; - - return 0; -} - -static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) +static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs) { unsigned long i; - BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); - BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); - - newareas &= ~(mm->context.low_htlb_areas); - if (! newareas) + newsegs &= ~(mm->context.htlb_segs); + if (! newsegs) return 0; /* The segments we want are already open */ - for (i = 0; i < NUM_LOW_AREAS; i++) - if ((1 << i) & newareas) - if (prepare_low_area_for_htlb(mm, i) != 0) + for (i = 0; i < 16; i++) + if ((1 << i) & newsegs) + if (prepare_low_seg_for_htlb(mm, i) != 0) return -EBUSY; - mm->context.low_htlb_areas |= newareas; + mm->context.htlb_segs |= newsegs; /* update the paca copy of the context struct */ get_paca()->context = mm->context; @@ -226,63 +215,29 @@ static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) /* the context change must make it to memory before the flush, * so that further SLB misses do the right thing. */ mb(); - on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1); - - return 0; -} - -static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) -{ - unsigned long i; - - BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); - BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) - != NUM_HIGH_AREAS); - - newareas &= ~(mm->context.high_htlb_areas); - if (! newareas) - return 0; /* The areas we want are already open */ - - for (i = 0; i < NUM_HIGH_AREAS; i++) - if ((1 << i) & newareas) - if (prepare_high_area_for_htlb(mm, i) != 0) - return -EBUSY; - - mm->context.high_htlb_areas |= newareas; - - /* update the paca copy of the context struct */ - get_paca()->context = mm->context; - - /* the context change must make it to memory before the flush, - * so that further SLB misses do the right thing. */ - mb(); - on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1); + on_each_cpu(flush_segments, (void *)(unsigned long)newsegs, 0, 1); return 0; } int prepare_hugepage_range(unsigned long addr, unsigned long len) { - int err; - - if ( (addr+len) < addr ) - return -EINVAL; - - if ((addr + len) < 0x100000000UL) - err = open_low_hpage_areas(current->mm, + if (within_hugepage_high_range(addr, len)) + return 0; + else if ((addr < 0x100000000UL) && ((addr+len) < 0x100000000UL)) { + int err; + /* Yes, we need both tests, in case addr+len overflows + * 64-bit arithmetic */ + err = open_low_hpage_segs(current->mm, LOW_ESID_MASK(addr, len)); - else - err = open_high_hpage_areas(current->mm, - HTLB_AREA_MASK(addr, len)); - if (err) { - printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" - " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", - addr, len, - LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); + if (err) + printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" + " failed (segs: 0x%04hx)\n", addr, len, + LOW_ESID_MASK(addr, len)); return err; } - return 0; + return -EINVAL; } struct page * @@ -354,8 +309,8 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, vma = find_vma(mm, addr); continue; } - if (touches_hugepage_high_range(mm, addr, len)) { - addr = ALIGN(addr+1, 1UL<mm, addr); - while (addr + len <= TASK_SIZE_USER64) { + for (vma = find_vma(current->mm, addr); + addr + len <= TASK_HPAGE_END; + vma = vma->vm_next) { BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ - - if (! __within_hugepage_high_range(addr, len, areamask)) { - addr = ALIGN(addr+1, 1UL<mm, addr); - continue; - } + BUG_ON(! within_hugepage_high_range(addr, len)); if (!vma || (addr + len) <= vma->vm_start) return addr; addr = ALIGN(vma->vm_end, HPAGE_SIZE); - /* Depending on segmask this might not be a confirmed - * hugepage region, so the ALIGN could have skipped - * some VMAs */ - vma = find_vma(current->mm, addr); + /* Because we're in a hugepage region, this alignment + * should not skip us over any VMAs */ } return -ENOMEM; @@ -558,9 +507,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { - int lastshift; - u16 areamask, curareas; - if (len & ~HPAGE_MASK) return -EINVAL; @@ -568,49 +514,67 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return -EINVAL; if (test_thread_flag(TIF_32BIT)) { - curareas = current->mm->context.low_htlb_areas; + int lastshift = 0; + u16 segmask, cursegs = current->mm->context.htlb_segs; /* First see if we can do the mapping in the existing - * low areas */ - addr = htlb_get_low_area(len, curareas); + * low hpage segments */ + addr = htlb_get_low_area(len, cursegs); if (addr != -ENOMEM) return addr; - lastshift = 0; - for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); - ! lastshift; areamask >>=1) { - if (areamask & 1) + for (segmask = LOW_ESID_MASK(0x100000000UL-len, len); + ! lastshift; segmask >>=1) { + if (segmask & 1) lastshift = 1; - addr = htlb_get_low_area(len, curareas | areamask); + addr = htlb_get_low_area(len, cursegs | segmask); if ((addr != -ENOMEM) - && open_low_hpage_areas(current->mm, areamask) == 0) + && open_low_hpage_segs(current->mm, segmask) == 0) return addr; } + printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" + " enough segments\n"); + return -ENOMEM; } else { - curareas = current->mm->context.high_htlb_areas; + return htlb_get_high_area(len); + } +} - /* First see if we can do the mapping in the existing - * high areas */ - addr = htlb_get_high_area(len, curareas); - if (addr != -ENOMEM) - return addr; +void hugetlb_mm_free_pgd(struct mm_struct *mm) +{ + int i; + pgd_t *pgdir; - lastshift = 0; - for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); - ! lastshift; areamask >>=1) { - if (areamask & 1) - lastshift = 1; + spin_lock(&mm->page_table_lock); - addr = htlb_get_high_area(len, curareas | areamask); - if ((addr != -ENOMEM) - && open_high_hpage_areas(current->mm, areamask) == 0) - return addr; + pgdir = mm->context.huge_pgdir; + if (! pgdir) + goto out; + + mm->context.huge_pgdir = NULL; + + /* cleanup any hugepte pages leftover */ + for (i = 0; i < PTRS_PER_HUGEPGD; i++) { + pud_t *pud = (pud_t *)(pgdir + i); + + if (! pud_none(*pud)) { + pte_t *pte = (pte_t *)pud_page(*pud); + struct page *ptepage = virt_to_page(pte); + + ptepage->mapping = NULL; + + BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE)); + kmem_cache_free(zero_cache, pte); } + pud_clear(pud); } - printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" - " enough areas\n"); - return -ENOMEM; + + BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE)); + kmem_cache_free(zero_cache, pgdir); + + out: + spin_unlock(&mm->page_table_lock); } int hash_huge_page(struct mm_struct *mm, unsigned long access, diff --git a/trunk/arch/ppc64/mm/imalloc.c b/trunk/arch/ppc64/mm/imalloc.c index c65b87b92756..b6e75b891ac0 100644 --- a/trunk/arch/ppc64/mm/imalloc.c +++ b/trunk/arch/ppc64/mm/imalloc.c @@ -31,7 +31,7 @@ static int get_free_im_addr(unsigned long size, unsigned long *im_addr) break; if ((unsigned long)tmp->addr >= ioremap_bot) addr = tmp->size + (unsigned long) tmp->addr; - if (addr >= IMALLOC_END-size) + if (addr > IMALLOC_END-size) return 1; } *im_addr = addr; diff --git a/trunk/arch/ppc64/mm/init.c b/trunk/arch/ppc64/mm/init.c index c02dc9809ca5..e58a24d42879 100644 --- a/trunk/arch/ppc64/mm/init.c +++ b/trunk/arch/ppc64/mm/init.c @@ -42,6 +42,7 @@ #include #include +#include #include #include #include @@ -65,14 +66,6 @@ #include #include -#if PGTABLE_RANGE > USER_VSID_RANGE -#warning Limited user VSID range means pagetable space is wasted -#endif - -#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) -#warning TASK_SIZE is smaller than it needs to be. -#endif - int mem_init_done; unsigned long ioremap_bot = IMALLOC_BASE; static unsigned long phbs_io_bot = PHBS_IO_BASE; @@ -166,6 +159,7 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) ptep = pte_alloc_kernel(&init_mm, pmdp, ea); if (!ptep) return -ENOMEM; + pa = abs_to_phys(pa); set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); spin_unlock(&init_mm.page_table_lock); @@ -232,7 +226,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size, * Before that, we map using addresses going * up from ioremap_bot. imalloc will use * the addresses from ioremap_bot through - * IMALLOC_END + * IMALLOC_END (0xE000001fffffffff) * */ pa = addr & PAGE_MASK; @@ -423,6 +417,12 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) int index; int err; +#ifdef CONFIG_HUGETLB_PAGE + /* We leave htlb_segs as it was, but for a fork, we need to + * clear the huge_pgdir. */ + mm->context.huge_pgdir = NULL; +#endif + again: if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) return -ENOMEM; @@ -453,6 +453,8 @@ void destroy_context(struct mm_struct *mm) spin_unlock(&mmu_context_lock); mm->context.id = NO_CONTEXT; + + hugetlb_mm_free_pgd(mm); } /* @@ -482,9 +484,9 @@ void __init mm_init_ppc64(void) for (i = 1; i < lmb.memory.cnt; i++) { unsigned long base, prevbase, prevsize; - prevbase = lmb.memory.region[i-1].base; + prevbase = lmb.memory.region[i-1].physbase; prevsize = lmb.memory.region[i-1].size; - base = lmb.memory.region[i].base; + base = lmb.memory.region[i].physbase; if (base > (prevbase + prevsize)) { io_hole_start = prevbase + prevsize; io_hole_size = base - (prevbase + prevsize); @@ -511,8 +513,11 @@ int page_is_ram(unsigned long pfn) for (i=0; i < lmb.memory.cnt; i++) { unsigned long base; +#ifdef CONFIG_MSCHUNKS + base = lmb.memory.region[i].physbase; +#else base = lmb.memory.region[i].base; - +#endif if ((paddr >= base) && (paddr < (base + lmb.memory.region[i].size))) { return 1; @@ -542,7 +547,7 @@ void __init do_init_bootmem(void) */ bootmap_pages = bootmem_bootmap_pages(total_pages); - start = lmb_alloc(bootmap_pages<> PAGE_SHIFT, total_pages); @@ -553,25 +558,25 @@ void __init do_init_bootmem(void) * present. */ for (i=0; i < lmb.memory.cnt; i++) { - unsigned long base, size; + unsigned long physbase, size; unsigned long start_pfn, end_pfn; - base = lmb.memory.region[i].base; + physbase = lmb.memory.region[i].physbase; size = lmb.memory.region[i].size; - start_pfn = base >> PAGE_SHIFT; + start_pfn = physbase >> PAGE_SHIFT; end_pfn = start_pfn + (size >> PAGE_SHIFT); memory_present(0, start_pfn, end_pfn); - free_bootmem(base, size); + free_bootmem(physbase, size); } /* reserve the sections we're already using */ for (i=0; i < lmb.reserved.cnt; i++) { - unsigned long base = lmb.reserved.region[i].base; + unsigned long physbase = lmb.reserved.region[i].physbase; unsigned long size = lmb.reserved.region[i].size; - reserve_bootmem(base, size); + reserve_bootmem(physbase, size); } } @@ -610,10 +615,10 @@ static int __init setup_kcore(void) int i; for (i=0; i < lmb.memory.cnt; i++) { - unsigned long base, size; + unsigned long physbase, size; struct kcore_list *kcore_mem; - base = lmb.memory.region[i].base; + physbase = lmb.memory.region[i].physbase; size = lmb.memory.region[i].size; /* GFP_ATOMIC to avoid might_sleep warnings during boot */ @@ -621,7 +626,7 @@ static int __init setup_kcore(void) if (!kcore_mem) panic("mem_init: kmalloc failed\n"); - kclist_add(kcore_mem, __va(base), size); + kclist_add(kcore_mem, __va(physbase), size); } kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); @@ -681,6 +686,9 @@ void __init mem_init(void) mem_init_done = 1; +#ifdef CONFIG_PPC_ISERIES + iommu_vio_init(); +#endif /* Initialize the vDSO */ vdso_init(); } @@ -825,43 +833,23 @@ void __iomem * reserve_phb_iospace(unsigned long size) return virt_addr; } -static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) +kmem_cache_t *zero_cache; + +static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags) { - memset(addr, 0, kmem_cache_size(cache)); + memset(pte, 0, PAGE_SIZE); } -static const int pgtable_cache_size[2] = { - PTE_TABLE_SIZE, PMD_TABLE_SIZE -}; -static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { - "pgd_pte_cache", "pud_pmd_cache", -}; - -kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; - void pgtable_cache_init(void) { - int i; - - BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]); - BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]); - BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]); - BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]); - - for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { - int size = pgtable_cache_size[i]; - const char *name = pgtable_cache_name[i]; - - pgtable_cache[i] = kmem_cache_create(name, - size, size, - SLAB_HWCACHE_ALIGN - | SLAB_MUST_HWCACHE_ALIGN, - zero_ctor, - NULL); - if (! pgtable_cache[i]) - panic("pgtable_cache_init(): could not create %s!\n", - name); - } + zero_cache = kmem_cache_create("zero", + PAGE_SIZE, + 0, + SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, + zero_ctor, + NULL); + if (!zero_cache) + panic("pgtable_cache_init(): could not create zero_cache!\n"); } pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, diff --git a/trunk/arch/ppc64/mm/numa.c b/trunk/arch/ppc64/mm/numa.c index c3116f0d788c..0b191f2de016 100644 --- a/trunk/arch/ppc64/mm/numa.c +++ b/trunk/arch/ppc64/mm/numa.c @@ -671,7 +671,7 @@ void __init do_init_bootmem(void) * Mark reserved regions on this node */ for (i = 0; i < lmb.reserved.cnt; i++) { - unsigned long physbase = lmb.reserved.region[i].base; + unsigned long physbase = lmb.reserved.region[i].physbase; unsigned long size = lmb.reserved.region[i].size; if (pa_to_nid(physbase) != nid && diff --git a/trunk/arch/ppc64/mm/slb_low.S b/trunk/arch/ppc64/mm/slb_low.S index bab255889c58..8379d678f70f 100644 --- a/trunk/arch/ppc64/mm/slb_low.S +++ b/trunk/arch/ppc64/mm/slb_low.S @@ -89,29 +89,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) b 9f 0: /* user address: proto-VSID = context<<15 | ESID */ - srdi. r9,r3,USER_ESID_BITS + li r11,SLB_VSID_USER + + srdi. r9,r3,13 bne- 8f /* invalid ea bits set */ #ifdef CONFIG_HUGETLB_PAGE BEGIN_FTR_SECTION - lhz r9,PACAHIGHHTLBAREAS(r13) - srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT) - srd r9,r9,r11 - andi. r9,r9,1 - bne 5f - - li r11,SLB_VSID_USER - + /* check against the hugepage ranges */ + cmpldi r3,(TASK_HPAGE_END>>SID_SHIFT) + bge 6f /* >= TASK_HPAGE_END */ + cmpldi r3,(TASK_HPAGE_BASE>>SID_SHIFT) + bge 5f /* TASK_HPAGE_BASE..TASK_HPAGE_END */ cmpldi r3,16 - bge 6f + bge 6f /* 4GB..TASK_HPAGE_BASE */ - lhz r9,PACALOWHTLBAREAS(r13) + lhz r9,PACAHTLBSEGS(r13) srd r9,r9,r3 andi. r9,r9,1 - beq 6f -5: li r11,SLB_VSID_USER|SLB_VSID_L +5: /* this is a hugepage user address */ + li r11,(SLB_VSID_USER|SLB_VSID_L) END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/trunk/arch/ppc64/mm/tlb.c b/trunk/arch/ppc64/mm/tlb.c index d8a6593a13f0..26f0172c4527 100644 --- a/trunk/arch/ppc64/mm/tlb.c +++ b/trunk/arch/ppc64/mm/tlb.c @@ -41,58 +41,7 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); unsigned long pte_freelist_forced_free; -struct pte_freelist_batch -{ - struct rcu_head rcu; - unsigned int index; - pgtable_free_t tables[0]; -}; - -DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); -unsigned long pte_freelist_forced_free; - -#define PTE_FREELIST_SIZE \ - ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ - / sizeof(pgtable_free_t)) - -#ifdef CONFIG_SMP -static void pte_free_smp_sync(void *arg) -{ - /* Do nothing, just ensure we sync with all CPUs */ -} -#endif - -/* This is only called when we are critically out of memory - * (and fail to get a page in pte_free_tlb). - */ -static void pgtable_free_now(pgtable_free_t pgf) -{ - pte_freelist_forced_free++; - - smp_call_function(pte_free_smp_sync, NULL, 0, 1); - - pgtable_free(pgf); -} - -static void pte_free_rcu_callback(struct rcu_head *head) -{ - struct pte_freelist_batch *batch = - container_of(head, struct pte_freelist_batch, rcu); - unsigned int i; - - for (i = 0; i < batch->index; i++) - pgtable_free(batch->tables[i]); - - free_page((unsigned long)batch); -} - -static void pte_free_submit(struct pte_freelist_batch *batch) -{ - INIT_RCU_HEAD(&batch->rcu); - call_rcu(&batch->rcu, pte_free_rcu_callback); -} - -void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) +void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage) { /* This is safe as we are holding page_table_lock */ cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); @@ -100,19 +49,19 @@ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) if (atomic_read(&tlb->mm->mm_users) < 2 || cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { - pgtable_free(pgf); + pte_free(ptepage); return; } if (*batchp == NULL) { *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); if (*batchp == NULL) { - pgtable_free_now(pgf); + pte_free_now(ptepage); return; } (*batchp)->index = 0; } - (*batchp)->tables[(*batchp)->index++] = pgf; + (*batchp)->pages[(*batchp)->index++] = ptepage; if ((*batchp)->index == PTE_FREELIST_SIZE) { pte_free_submit(*batchp); *batchp = NULL; @@ -183,6 +132,42 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch) put_cpu(); } +#ifdef CONFIG_SMP +static void pte_free_smp_sync(void *arg) +{ + /* Do nothing, just ensure we sync with all CPUs */ +} +#endif + +/* This is only called when we are critically out of memory + * (and fail to get a page in pte_free_tlb). + */ +void pte_free_now(struct page *ptepage) +{ + pte_freelist_forced_free++; + + smp_call_function(pte_free_smp_sync, NULL, 0, 1); + + pte_free(ptepage); +} + +static void pte_free_rcu_callback(struct rcu_head *head) +{ + struct pte_freelist_batch *batch = + container_of(head, struct pte_freelist_batch, rcu); + unsigned int i; + + for (i = 0; i < batch->index; i++) + pte_free(batch->pages[i]); + free_page((unsigned long)batch); +} + +void pte_free_submit(struct pte_freelist_batch *batch) +{ + INIT_RCU_HEAD(&batch->rcu); + call_rcu(&batch->rcu, pte_free_rcu_callback); +} + void pte_free_finish(void) { /* This is safe as we are holding page_table_lock */ diff --git a/trunk/arch/ppc64/xmon/start.c b/trunk/arch/ppc64/xmon/start.c index f86b584acd76..a9265bcc79b2 100644 --- a/trunk/arch/ppc64/xmon/start.c +++ b/trunk/arch/ppc64/xmon/start.c @@ -27,7 +27,7 @@ static void sysrq_handle_xmon(int key, struct pt_regs *pt_regs, struct tty_struct *tty) { /* ensure xmon is enabled */ - xmon_init(1); + xmon_init(); debugger(pt_regs); } diff --git a/trunk/arch/ppc64/xmon/xmon.c b/trunk/arch/ppc64/xmon/xmon.c index 45908b10acd3..05539439e6bc 100644 --- a/trunk/arch/ppc64/xmon/xmon.c +++ b/trunk/arch/ppc64/xmon/xmon.c @@ -2496,25 +2496,15 @@ static void dump_stab(void) } } -void xmon_init(int enable) -{ - if (enable) { - __debugger = xmon; - __debugger_ipi = xmon_ipi; - __debugger_bpt = xmon_bpt; - __debugger_sstep = xmon_sstep; - __debugger_iabr_match = xmon_iabr_match; - __debugger_dabr_match = xmon_dabr_match; - __debugger_fault_handler = xmon_fault_handler; - } else { - __debugger = NULL; - __debugger_ipi = NULL; - __debugger_bpt = NULL; - __debugger_sstep = NULL; - __debugger_iabr_match = NULL; - __debugger_dabr_match = NULL; - __debugger_fault_handler = NULL; - } +void xmon_init(void) +{ + __debugger = xmon; + __debugger_ipi = xmon_ipi; + __debugger_bpt = xmon_bpt; + __debugger_sstep = xmon_sstep; + __debugger_iabr_match = xmon_iabr_match; + __debugger_dabr_match = xmon_dabr_match; + __debugger_fault_handler = xmon_fault_handler; } void dump_segments(void) diff --git a/trunk/include/asm-ppc64/abs_addr.h b/trunk/include/asm-ppc64/abs_addr.h index 84c24d4cdb71..6d4e8e787058 100644 --- a/trunk/include/asm-ppc64/abs_addr.h +++ b/trunk/include/asm-ppc64/abs_addr.h @@ -16,51 +16,93 @@ #include #include #include -#include -struct mschunks_map { +typedef u32 msChunks_entry; +struct msChunks { unsigned long num_chunks; unsigned long chunk_size; unsigned long chunk_shift; unsigned long chunk_mask; - u32 *mapping; + msChunks_entry *abs; }; -extern struct mschunks_map mschunks_map; +extern struct msChunks msChunks; -/* Chunks are 256 KB */ -#define MSCHUNKS_CHUNK_SHIFT (18) -#define MSCHUNKS_CHUNK_SIZE (1UL << MSCHUNKS_CHUNK_SHIFT) -#define MSCHUNKS_OFFSET_MASK (MSCHUNKS_CHUNK_SIZE - 1) +extern unsigned long msChunks_alloc(unsigned long, unsigned long, unsigned long); +extern unsigned long reloc_offset(void); -static inline unsigned long chunk_to_addr(unsigned long chunk) +#ifdef CONFIG_MSCHUNKS + +static inline unsigned long +chunk_to_addr(unsigned long chunk) { - return chunk << MSCHUNKS_CHUNK_SHIFT; + unsigned long offset = reloc_offset(); + struct msChunks *_msChunks = PTRRELOC(&msChunks); + + return chunk << _msChunks->chunk_shift; } -static inline unsigned long addr_to_chunk(unsigned long addr) +static inline unsigned long +addr_to_chunk(unsigned long addr) { - return addr >> MSCHUNKS_CHUNK_SHIFT; + unsigned long offset = reloc_offset(); + struct msChunks *_msChunks = PTRRELOC(&msChunks); + + return addr >> _msChunks->chunk_shift; } -static inline unsigned long phys_to_abs(unsigned long pa) +static inline unsigned long +chunk_offset(unsigned long addr) { - unsigned long chunk; + unsigned long offset = reloc_offset(); + struct msChunks *_msChunks = PTRRELOC(&msChunks); - /* This is a no-op on non-iSeries */ - if (!firmware_has_feature(FW_FEATURE_ISERIES)) - return pa; + return addr & _msChunks->chunk_mask; +} - chunk = addr_to_chunk(pa); +static inline unsigned long +abs_chunk(unsigned long pchunk) +{ + unsigned long offset = reloc_offset(); + struct msChunks *_msChunks = PTRRELOC(&msChunks); + if ( pchunk >= _msChunks->num_chunks ) { + return pchunk; + } + return PTRRELOC(_msChunks->abs)[pchunk]; +} - if (chunk < mschunks_map.num_chunks) - chunk = mschunks_map.mapping[chunk]; +/* A macro so it can take pointers or unsigned long. */ +#define phys_to_abs(pa) \ + ({ unsigned long _pa = (unsigned long)(pa); \ + chunk_to_addr(abs_chunk(addr_to_chunk(_pa))) + chunk_offset(_pa); \ + }) - return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK); +static inline unsigned long +physRpn_to_absRpn(unsigned long rpn) +{ + unsigned long pa = rpn << PAGE_SHIFT; + unsigned long aa = phys_to_abs(pa); + return (aa >> PAGE_SHIFT); } +/* A macro so it can take pointers or unsigned long. */ +#define abs_to_phys(aa) lmb_abs_to_phys((unsigned long)(aa)) + +#else /* !CONFIG_MSCHUNKS */ + +#define chunk_to_addr(chunk) ((unsigned long)(chunk)) +#define addr_to_chunk(addr) (addr) +#define chunk_offset(addr) (0) +#define abs_chunk(pchunk) (pchunk) + +#define phys_to_abs(pa) (pa) +#define physRpn_to_absRpn(rpn) (rpn) +#define abs_to_phys(aa) (aa) + +#endif /* !CONFIG_MSCHUNKS */ + /* Convenience macros */ #define virt_to_abs(va) phys_to_abs(__pa(va)) -#define abs_to_virt(aa) __va(aa) +#define abs_to_virt(aa) __va(abs_to_phys(aa)) #endif /* _ABS_ADDR_H */ diff --git a/trunk/include/asm-ppc64/cputable.h b/trunk/include/asm-ppc64/cputable.h index ae6cf3830108..d67fa9e26079 100644 --- a/trunk/include/asm-ppc64/cputable.h +++ b/trunk/include/asm-ppc64/cputable.h @@ -56,6 +56,11 @@ struct cpu_spec { * BHT, SPD, etc... from head.S before branching to identify_machine */ cpu_setup_t cpu_setup; + + /* This is used to identify firmware features which are available + * to the kernel. + */ + unsigned long firmware_features; }; extern struct cpu_spec cpu_specs[]; @@ -66,6 +71,39 @@ static inline unsigned long cpu_has_feature(unsigned long feature) return cur_cpu_spec->cpu_features & feature; } + +/* firmware feature bitmask values */ +#define FIRMWARE_MAX_FEATURES 63 + +#define FW_FEATURE_PFT (1UL<<0) +#define FW_FEATURE_TCE (1UL<<1) +#define FW_FEATURE_SPRG0 (1UL<<2) +#define FW_FEATURE_DABR (1UL<<3) +#define FW_FEATURE_COPY (1UL<<4) +#define FW_FEATURE_ASR (1UL<<5) +#define FW_FEATURE_DEBUG (1UL<<6) +#define FW_FEATURE_TERM (1UL<<7) +#define FW_FEATURE_PERF (1UL<<8) +#define FW_FEATURE_DUMP (1UL<<9) +#define FW_FEATURE_INTERRUPT (1UL<<10) +#define FW_FEATURE_MIGRATE (1UL<<11) +#define FW_FEATURE_PERFMON (1UL<<12) +#define FW_FEATURE_CRQ (1UL<<13) +#define FW_FEATURE_VIO (1UL<<14) +#define FW_FEATURE_RDMA (1UL<<15) +#define FW_FEATURE_LLAN (1UL<<16) +#define FW_FEATURE_BULK (1UL<<17) +#define FW_FEATURE_XDABR (1UL<<18) +#define FW_FEATURE_MULTITCE (1UL<<19) +#define FW_FEATURE_SPLPAR (1UL<<20) + +typedef struct { + unsigned long val; + char * name; +} firmware_feature_t; + +extern firmware_feature_t firmware_features_table[]; + #endif /* __ASSEMBLY__ */ /* CPU kernel features */ @@ -102,8 +140,10 @@ static inline unsigned long cpu_has_feature(unsigned long feature) #define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0000080000000000) #define CPU_FTR_CTRL ASM_CONST(0x0000100000000000) -#ifndef __ASSEMBLY__ +/* Platform firmware features */ +#define FW_FTR_ ASM_CONST(0x0000000000000001) +#ifndef __ASSEMBLY__ #define COMMON_USER_PPC64 (PPC_FEATURE_32 | PPC_FEATURE_64 | \ PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_MMU) @@ -116,9 +156,10 @@ static inline unsigned long cpu_has_feature(unsigned long feature) #define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE) #else #define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE | CPU_FTR_16M_PAGE) -#endif /* CONFIG_PPC_ISERIES */ +#endif -#endif /* __ASSEMBLY */ +#define COMMON_PPC64_FW (0) +#endif #ifdef __ASSEMBLY__ diff --git a/trunk/include/asm-ppc64/firmware.h b/trunk/include/asm-ppc64/firmware.h deleted file mode 100644 index 22bb85cf60af..000000000000 --- a/trunk/include/asm-ppc64/firmware.h +++ /dev/null @@ -1,101 +0,0 @@ -/* - * include/asm-ppc64/firmware.h - * - * Extracted from include/asm-ppc64/cputable.h - * - * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) - * - * Modifications for ppc64: - * Copyright (C) 2003 Dave Engebretsen - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#ifndef __ASM_PPC_FIRMWARE_H -#define __ASM_PPC_FIRMWARE_H - -#ifdef __KERNEL__ - -#ifndef __ASSEMBLY__ - -/* firmware feature bitmask values */ -#define FIRMWARE_MAX_FEATURES 63 - -#define FW_FEATURE_PFT (1UL<<0) -#define FW_FEATURE_TCE (1UL<<1) -#define FW_FEATURE_SPRG0 (1UL<<2) -#define FW_FEATURE_DABR (1UL<<3) -#define FW_FEATURE_COPY (1UL<<4) -#define FW_FEATURE_ASR (1UL<<5) -#define FW_FEATURE_DEBUG (1UL<<6) -#define FW_FEATURE_TERM (1UL<<7) -#define FW_FEATURE_PERF (1UL<<8) -#define FW_FEATURE_DUMP (1UL<<9) -#define FW_FEATURE_INTERRUPT (1UL<<10) -#define FW_FEATURE_MIGRATE (1UL<<11) -#define FW_FEATURE_PERFMON (1UL<<12) -#define FW_FEATURE_CRQ (1UL<<13) -#define FW_FEATURE_VIO (1UL<<14) -#define FW_FEATURE_RDMA (1UL<<15) -#define FW_FEATURE_LLAN (1UL<<16) -#define FW_FEATURE_BULK (1UL<<17) -#define FW_FEATURE_XDABR (1UL<<18) -#define FW_FEATURE_MULTITCE (1UL<<19) -#define FW_FEATURE_SPLPAR (1UL<<20) -#define FW_FEATURE_ISERIES (1UL<<21) - -enum { - FW_FEATURE_PSERIES_POSSIBLE = FW_FEATURE_PFT | FW_FEATURE_TCE | - FW_FEATURE_SPRG0 | FW_FEATURE_DABR | FW_FEATURE_COPY | - FW_FEATURE_ASR | FW_FEATURE_DEBUG | FW_FEATURE_TERM | - FW_FEATURE_PERF | FW_FEATURE_DUMP | FW_FEATURE_INTERRUPT | - FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ | - FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | - FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | - FW_FEATURE_SPLPAR, - FW_FEATURE_PSERIES_ALWAYS = 0, - FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES, - FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES, - FW_FEATURE_POSSIBLE = -#ifdef CONFIG_PPC_PSERIES - FW_FEATURE_PSERIES_POSSIBLE | -#endif -#ifdef CONFIG_PPC_ISERIES - FW_FEATURE_ISERIES_POSSIBLE | -#endif - 0, - FW_FEATURE_ALWAYS = -#ifdef CONFIG_PPC_PSERIES - FW_FEATURE_PSERIES_ALWAYS & -#endif -#ifdef CONFIG_PPC_ISERIES - FW_FEATURE_ISERIES_ALWAYS & -#endif - FW_FEATURE_POSSIBLE, -}; - -/* This is used to identify firmware features which are available - * to the kernel. - */ -extern unsigned long ppc64_firmware_features; - -static inline unsigned long firmware_has_feature(unsigned long feature) -{ - return (FW_FEATURE_ALWAYS & feature) || - (FW_FEATURE_POSSIBLE & ppc64_firmware_features & feature); -} - -#ifdef CONFIG_PPC_PSERIES -typedef struct { - unsigned long val; - char * name; -} firmware_feature_t; - -extern firmware_feature_t firmware_features_table[]; -#endif - -#endif /* __ASSEMBLY__ */ -#endif /* __KERNEL__ */ -#endif /* __ASM_PPC_FIRMWARE_H */ diff --git a/trunk/include/asm-ppc64/imalloc.h b/trunk/include/asm-ppc64/imalloc.h index 42adf7033a81..e46ff68a6e41 100644 --- a/trunk/include/asm-ppc64/imalloc.h +++ b/trunk/include/asm-ppc64/imalloc.h @@ -6,7 +6,7 @@ */ #define PHBS_IO_BASE VMALLOC_END #define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */ -#define IMALLOC_END (VMALLOC_START + PGTABLE_RANGE) +#define IMALLOC_END (VMALLOC_START + EADDR_MASK) /* imalloc region types */ diff --git a/trunk/include/asm-ppc64/iommu.h b/trunk/include/asm-ppc64/iommu.h index 72dcf8116b04..729de5cc21d9 100644 --- a/trunk/include/asm-ppc64/iommu.h +++ b/trunk/include/asm-ppc64/iommu.h @@ -104,6 +104,9 @@ extern void iommu_devnode_init_pSeries(struct device_node *dn); #ifdef CONFIG_PPC_ISERIES +/* Initializes tables for bio buses */ +extern void __init iommu_vio_init(void); + struct iSeries_Device_Node; /* Creates table for an individual device node */ extern void iommu_devnode_init_iSeries(struct iSeries_Device_Node *dn); diff --git a/trunk/include/asm-ppc64/lmb.h b/trunk/include/asm-ppc64/lmb.h index cb368bf0f264..a6cbca21ac1d 100644 --- a/trunk/include/asm-ppc64/lmb.h +++ b/trunk/include/asm-ppc64/lmb.h @@ -22,6 +22,7 @@ struct lmb_property { unsigned long base; + unsigned long physbase; unsigned long size; }; diff --git a/trunk/include/asm-ppc64/machdep.h b/trunk/include/asm-ppc64/machdep.h index ff2c9287d3b6..f0ef06375947 100644 --- a/trunk/include/asm-ppc64/machdep.h +++ b/trunk/include/asm-ppc64/machdep.h @@ -140,9 +140,6 @@ struct machdep_calls { /* Idle loop for this platform, leave empty for default idle loop */ int (*idle_loop)(void); - - /* Function to enable pmcs for this platform, called once per cpu. */ - void (*enable_pmcs)(void); }; extern int default_idle(void); diff --git a/trunk/include/asm-ppc64/mmu.h b/trunk/include/asm-ppc64/mmu.h index ad36bb28de29..70348a851313 100644 --- a/trunk/include/asm-ppc64/mmu.h +++ b/trunk/include/asm-ppc64/mmu.h @@ -28,12 +28,9 @@ #define STE_VSID_SHIFT 12 /* Location of cpu0's segment table */ -#define STAB0_PAGE 0x6 +#define STAB0_PAGE 0x9 #define STAB0_PHYS_ADDR (STAB0_PAGE< +#ifndef __ASSEMBLY__ + struct naca_struct { /* Kernel only data - undefined for user space */ void *xItVpdAreas; /* VPD Data 0x00 */ @@ -21,4 +23,9 @@ struct naca_struct { extern struct naca_struct naca; +#endif /* __ASSEMBLY__ */ + +#define NACA_PAGE 0x4 +#define NACA_PHYS_ADDR (NACA_PAGE<> HTLB_AREA_SHIFT) +/* For 64-bit processes the hugepage range is 1T-1.5T */ +#define TASK_HPAGE_BASE ASM_CONST(0x0000010000000000) +#define TASK_HPAGE_END ASM_CONST(0x0000018000000000) #define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ - (1U << GET_ESID(addr))) & 0xffff) -#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ - - (1U << GET_HTLB_AREA(addr))) & 0xffff) #define ARCH_HAS_HUGEPAGE_ONLY_RANGE #define ARCH_HAS_PREPARE_HUGEPAGE_RANGE -#define ARCH_HAS_SETCLEAR_HUGE_PTE #define touches_hugepage_low_range(mm, addr, len) \ - (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas) -#define touches_hugepage_high_range(mm, addr, len) \ - (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas) + (LOW_ESID_MASK((addr), (len)) & mm->context.htlb_segs) +#define touches_hugepage_high_range(addr, len) \ + (((addr) > (TASK_HPAGE_BASE-(len))) && ((addr) < TASK_HPAGE_END)) #define __within_hugepage_low_range(addr, len, segmask) \ ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)) #define within_hugepage_low_range(addr, len) \ __within_hugepage_low_range((addr), (len), \ - current->mm->context.low_htlb_areas) -#define __within_hugepage_high_range(addr, len, zonemask) \ - ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask)) -#define within_hugepage_high_range(addr, len) \ - __within_hugepage_high_range((addr), (len), \ - current->mm->context.high_htlb_areas) + current->mm->context.htlb_segs) +#define within_hugepage_high_range(addr, len) (((addr) >= TASK_HPAGE_BASE) \ + && ((addr)+(len) <= TASK_HPAGE_END) && ((addr)+(len) >= (addr))) #define is_hugepage_only_range(mm, addr, len) \ - (touches_hugepage_high_range((mm), (addr), (len)) || \ + (touches_hugepage_high_range((addr), (len)) || \ touches_hugepage_low_range((mm), (addr), (len))) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #define in_hugepage_area(context, addr) \ (cpu_has_feature(CPU_FTR_16M_PAGE) && \ - ( ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) || \ + ( (((addr) >= TASK_HPAGE_BASE) && ((addr) < TASK_HPAGE_END)) || \ ( ((addr) < 0x100000000L) && \ - ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) ) + ((1 << GET_ESID(addr)) & (context).htlb_segs) ) ) ) #else /* !CONFIG_HUGETLB_PAGE */ @@ -131,42 +125,36 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b. */ typedef struct { unsigned long pte; } pte_t; -typedef struct { unsigned long pmd; } pmd_t; -typedef struct { unsigned long pud; } pud_t; -typedef struct { unsigned long pgd; } pgd_t; +typedef struct { unsigned int pmd; } pmd_t; +typedef struct { unsigned int pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; #define pte_val(x) ((x).pte) #define pmd_val(x) ((x).pmd) -#define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) -#define __pte(x) ((pte_t) { (x) }) -#define __pmd(x) ((pmd_t) { (x) }) -#define __pud(x) ((pud_t) { (x) }) -#define __pgd(x) ((pgd_t) { (x) }) -#define __pgprot(x) ((pgprot_t) { (x) }) +#define __pte(x) ((pte_t) { (x) } ) +#define __pmd(x) ((pmd_t) { (x) } ) +#define __pgd(x) ((pgd_t) { (x) } ) +#define __pgprot(x) ((pgprot_t) { (x) } ) #else /* * .. while these make it easier on the compiler */ typedef unsigned long pte_t; -typedef unsigned long pmd_t; -typedef unsigned long pud_t; -typedef unsigned long pgd_t; +typedef unsigned int pmd_t; +typedef unsigned int pgd_t; typedef unsigned long pgprot_t; #define pte_val(x) (x) #define pmd_val(x) (x) -#define pud_val(x) (x) #define pgd_val(x) (x) #define pgprot_val(x) (x) #define __pte(x) (x) #define __pmd(x) (x) -#define __pud(x) (x) #define __pgd(x) (x) #define __pgprot(x) (x) @@ -220,6 +208,9 @@ extern u64 ppc64_pft_size; /* Log 2 of page table size */ #define USER_REGION_ID (0UL) #define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) +#define __bpn_to_ba(x) ((((unsigned long)(x)) << PAGE_SHIFT) + KERNELBASE) +#define __ba_to_bpn(x) ((((unsigned long)(x)) & ~REGION_MASK) >> PAGE_SHIFT) + #define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) #ifdef CONFIG_DISCONTIGMEM diff --git a/trunk/include/asm-ppc64/pgalloc.h b/trunk/include/asm-ppc64/pgalloc.h index 26bc49c1108d..4fc4b739b380 100644 --- a/trunk/include/asm-ppc64/pgalloc.h +++ b/trunk/include/asm-ppc64/pgalloc.h @@ -6,12 +6,7 @@ #include #include -extern kmem_cache_t *pgtable_cache[]; - -#define PTE_CACHE_NUM 0 -#define PMD_CACHE_NUM 1 -#define PUD_CACHE_NUM 1 -#define PGD_CACHE_NUM 0 +extern kmem_cache_t *zero_cache; /* * This program is free software; you can redistribute it and/or @@ -20,40 +15,30 @@ extern kmem_cache_t *pgtable_cache[]; * 2 of the License, or (at your option) any later version. */ -static inline pgd_t *pgd_alloc(struct mm_struct *mm) +static inline pgd_t * +pgd_alloc(struct mm_struct *mm) { - return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL); + return kmem_cache_alloc(zero_cache, GFP_KERNEL); } -static inline void pgd_free(pgd_t *pgd) +static inline void +pgd_free(pgd_t *pgd) { - kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd); -} - -#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) - -static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM], - GFP_KERNEL|__GFP_REPEAT); -} - -static inline void pud_free(pud_t *pud) -{ - kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud); + kmem_cache_free(zero_cache, pgd); } #define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) -static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +static inline pmd_t * +pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM], - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); } -static inline void pmd_free(pmd_t *pmd) +static inline void +pmd_free(pmd_t *pmd) { - kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd); + kmem_cache_free(zero_cache, pmd); } #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte) @@ -62,58 +47,44 @@ static inline void pmd_free(pmd_t *pmd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], - GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); } static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - return virt_to_page(pte_alloc_one_kernel(mm, address)); + pte_t *pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); + if (pte) + return virt_to_page(pte); + return NULL; } static inline void pte_free_kernel(pte_t *pte) { - kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte); + kmem_cache_free(zero_cache, pte); } static inline void pte_free(struct page *ptepage) { - pte_free_kernel(page_address(ptepage)); + kmem_cache_free(zero_cache, page_address(ptepage)); } -#define PGF_CACHENUM_MASK 0xf - -typedef struct pgtable_free { - unsigned long val; -} pgtable_free_t; - -static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum, - unsigned long mask) +struct pte_freelist_batch { - BUG_ON(cachenum > PGF_CACHENUM_MASK); + struct rcu_head rcu; + unsigned int index; + struct page * pages[0]; +}; - return (pgtable_free_t){.val = ((unsigned long) p & ~mask) | cachenum}; -} +#define PTE_FREELIST_SIZE ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) / \ + sizeof(struct page *)) -static inline void pgtable_free(pgtable_free_t pgf) -{ - void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK); - int cachenum = pgf.val & PGF_CACHENUM_MASK; +extern void pte_free_now(struct page *ptepage); +extern void pte_free_submit(struct pte_freelist_batch *batch); - kmem_cache_free(pgtable_cache[cachenum], p); -} +DECLARE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); -void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); - -#define __pte_free_tlb(tlb, ptepage) \ - pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \ - PTE_CACHE_NUM, PTE_TABLE_SIZE-1)) -#define __pmd_free_tlb(tlb, pmd) \ - pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ - PMD_CACHE_NUM, PMD_TABLE_SIZE-1)) -#define __pud_free_tlb(tlb, pmd) \ - pgtable_free_tlb(tlb, pgtable_free_cache(pud, \ - PUD_CACHE_NUM, PUD_TABLE_SIZE-1)) +void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage); +#define __pmd_free_tlb(tlb, pmd) __pte_free_tlb(tlb, virt_to_page(pmd)) #define check_pgt_cache() do { } while (0) diff --git a/trunk/include/asm-ppc64/pgtable.h b/trunk/include/asm-ppc64/pgtable.h index 5ea952ad7164..46cf61c2ff69 100644 --- a/trunk/include/asm-ppc64/pgtable.h +++ b/trunk/include/asm-ppc64/pgtable.h @@ -15,24 +15,19 @@ #include #endif /* __ASSEMBLY__ */ +#include + /* * Entries per page directory level. The PTE level must use a 64b record * for each page table entry. The PMD and PGD level use a 32b record for * each entry by assuming that each entry is page aligned. */ #define PTE_INDEX_SIZE 9 -#define PMD_INDEX_SIZE 7 -#define PUD_INDEX_SIZE 7 -#define PGD_INDEX_SIZE 9 - -#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) -#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) -#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) -#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) +#define PMD_INDEX_SIZE 10 +#define PGD_INDEX_SIZE 10 #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) -#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) /* PMD_SHIFT determines what a second-level page table entry can map */ @@ -40,13 +35,8 @@ #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -/* PUD_SHIFT determines what a third-level page table entry can map */ -#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) -#define PUD_SIZE (1UL << PUD_SHIFT) -#define PUD_MASK (~(PUD_SIZE-1)) - -/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ -#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) +/* PGDIR_SHIFT determines what a third-level page table entry can map */ +#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -55,23 +45,15 @@ /* * Size of EA range mapped by our pagetables. */ -#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ - PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) -#define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE) - -#if TASK_SIZE_USER64 > PGTABLE_RANGE -#error TASK_SIZE_USER64 exceeds pagetable range -#endif - -#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT)) -#error TASK_SIZE_USER64 exceeds user VSID range -#endif +#define EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ + PGD_INDEX_SIZE + PAGE_SHIFT) +#define EADDR_MASK ((1UL << EADDR_SIZE) - 1) /* * Define the address range of the vmalloc VM area. */ #define VMALLOC_START (0xD000000000000000ul) -#define VMALLOC_SIZE (0x80000000000UL) +#define VMALLOC_SIZE (0x10000000000UL) #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) /* @@ -172,6 +154,8 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; #ifndef __ASSEMBLY__ int hash_huge_page(struct mm_struct *mm, unsigned long access, unsigned long ea, unsigned long vsid, int local); + +void hugetlb_mm_free_pgd(struct mm_struct *mm); #endif /* __ASSEMBLY__ */ #define HAVE_ARCH_UNMAPPED_AREA @@ -179,6 +163,7 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, #else #define hash_huge_page(mm,a,ea,vsid,local) -1 +#define hugetlb_mm_free_pgd(mm) do {} while (0) #endif @@ -212,45 +197,39 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) #define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define pmd_set(pmdp, ptep) ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);}) +#define pmd_set(pmdp, ptep) \ + (pmd_val(*(pmdp)) = __ba_to_bpn(ptep)) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) == 0) #define pmd_present(pmd) (pmd_val(pmd) != 0) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) -#define pmd_page_kernel(pmd) (pmd_val(pmd)) +#define pmd_page_kernel(pmd) (__bpn_to_ba(pmd_val(pmd))) #define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) -#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (unsigned long)(pmdp)) +#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (__ba_to_bpn(pmdp))) #define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) ((pud_val(pud)) == 0) -#define pud_present(pud) (pud_val(pud) != 0) -#define pud_clear(pudp) (pud_val(*(pudp)) = 0) -#define pud_page(pud) (pud_val(pud)) - -#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) -#define pgd_none(pgd) (!pgd_val(pgd)) -#define pgd_bad(pgd) (pgd_val(pgd) == 0) -#define pgd_present(pgd) (pgd_val(pgd) != 0) -#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) -#define pgd_page(pgd) (pgd_val(pgd)) +#define pud_bad(pud) ((pud_val(pud)) == 0UL) +#define pud_present(pud) (pud_val(pud) != 0UL) +#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) +#define pud_page(pud) (__bpn_to_ba(pud_val(pud))) /* * Find an entry in a page-table-directory. We combine the address region * (the high order N bits) and the pgd portion of the address. */ /* to avoid overflow in free_pgtables we don't use PTRS_PER_PGD here */ -#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x1ff) +#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x7ff) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) -#define pud_offset(pgdp, addr) \ - (((pud_t *) pgd_page(*(pgdp))) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) - +/* Find an entry in the second-level page table.. */ #define pmd_offset(pudp,addr) \ - (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) + ((pmd_t *) pud_page(*(pudp)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) +/* Find an entry in the third-level page table.. */ #define pte_offset_kernel(dir,addr) \ - (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) + ((pte_t *) pmd_page_kernel(*(dir)) \ + + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) @@ -479,18 +458,23 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) #define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) -#define pud_ERROR(e) \ - printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e)) + printk("%s:%d: bad pmd %08x.\n", __FILE__, __LINE__, pmd_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + printk("%s:%d: bad pgd %08x.\n", __FILE__, __LINE__, pgd_val(e)) extern pgd_t swapper_pg_dir[]; extern void paging_init(void); +/* + * Because the huge pgtables are only 2 level, they can take + * at most around 4M, much less than one hugepage which the + * process is presumably entitled to use. So we don't bother + * freeing up the pagetables on unmap, and wait until + * destroy_context() to clean up the lot. + */ #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ - free_pgd_range(tlb, addr, end, floor, ceiling) + do { } while (0) /* * This gets called at the end of handling a page fault, when diff --git a/trunk/include/asm-ppc64/pmc.h b/trunk/include/asm-ppc64/pmc.h index d1d297dbccfe..c924748c0bea 100644 --- a/trunk/include/asm-ppc64/pmc.h +++ b/trunk/include/asm-ppc64/pmc.h @@ -26,6 +26,4 @@ typedef void (*perf_irq_t)(struct pt_regs *); int reserve_pmc_hardware(perf_irq_t new_perf_irq); void release_pmc_hardware(void); -void power4_enable_pmcs(void); - #endif /* _PPC64_PMC_H */ diff --git a/trunk/include/asm-ppc64/processor.h b/trunk/include/asm-ppc64/processor.h index 50b14c0ddb87..352306cfb579 100644 --- a/trunk/include/asm-ppc64/processor.h +++ b/trunk/include/asm-ppc64/processor.h @@ -382,8 +382,8 @@ extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern struct task_struct *last_task_used_math; extern struct task_struct *last_task_used_altivec; -/* 64-bit user address space is 44-bits (16TB user VM) */ -#define TASK_SIZE_USER64 (0x0000100000000000UL) +/* 64-bit user address space is 41-bits (2TBs user VM) */ +#define TASK_SIZE_USER64 (0x0000020000000000UL) /* * 32-bit user address space is 4GB - 1 page diff --git a/trunk/include/asm-ppc64/prom.h b/trunk/include/asm-ppc64/prom.h index dc5330b39509..04b1a84f7ca3 100644 --- a/trunk/include/asm-ppc64/prom.h +++ b/trunk/include/asm-ppc64/prom.h @@ -22,15 +22,13 @@ #define RELOC(x) (*PTRRELOC(&(x))) /* Definitions used by the flattened device tree */ -#define OF_DT_HEADER 0xd00dfeed /* marker */ -#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */ +#define OF_DT_HEADER 0xd00dfeed /* 4: version, 4: total size */ +#define OF_DT_BEGIN_NODE 0x1 /* Start node: full name */ #define OF_DT_END_NODE 0x2 /* End node */ -#define OF_DT_PROP 0x3 /* Property: name off, size, - * content */ -#define OF_DT_NOP 0x4 /* nop */ +#define OF_DT_PROP 0x3 /* Property: name off, size, content */ #define OF_DT_END 0x9 -#define OF_DT_VERSION 0x10 +#define OF_DT_VERSION 1 /* * This is what gets passed to the kernel by prom_init or kexec @@ -56,9 +54,7 @@ struct boot_param_header u32 version; /* format version */ u32 last_comp_version; /* last compatible version */ /* version 2 fields below */ - u32 boot_cpuid_phys; /* Physical CPU id we're booting on */ - /* version 3 fields below */ - u32 dt_strings_size; /* size of the DT strings block */ + u32 boot_cpuid_phys; /* Which physical CPU id we're booting on */ }; diff --git a/trunk/include/asm-ppc64/system.h b/trunk/include/asm-ppc64/system.h index b9e1835351e9..98d120ca8a91 100644 --- a/trunk/include/asm-ppc64/system.h +++ b/trunk/include/asm-ppc64/system.h @@ -88,7 +88,7 @@ DEBUGGER_BOILERPLATE(debugger_dabr_match) DEBUGGER_BOILERPLATE(debugger_fault_handler) #ifdef CONFIG_XMON -extern void xmon_init(int enable); +extern void xmon_init(void); #endif #else @@ -302,7 +302,5 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) #define arch_align_stack(x) (x) -extern unsigned long reloc_offset(void); - #endif /* __KERNEL__ */ #endif diff --git a/trunk/include/asm-ppc64/vio.h b/trunk/include/asm-ppc64/vio.h index a82e87c1c5fa..20cd98ee6337 100644 --- a/trunk/include/asm-ppc64/vio.h +++ b/trunk/include/asm-ppc64/vio.h @@ -56,9 +56,6 @@ const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length); int vio_get_irq(struct vio_dev *dev); int vio_enable_interrupts(struct vio_dev *dev); int vio_disable_interrupts(struct vio_dev *dev); -extern struct vio_dev * __devinit vio_register_device_common( - struct vio_dev *viodev, char *name, char *type, - uint32_t unit_address, struct iommu_table *iommu_table); extern struct dma_mapping_ops vio_dma_ops; @@ -98,16 +95,9 @@ struct vio_dev { struct device dev; }; -extern struct vio_dev vio_bus_device; - static inline struct vio_dev *to_vio_dev(struct device *dev) { return container_of(dev, struct vio_dev, dev); } -extern int vio_bus_init(int (*is_match)(const struct vio_device_id *id, - const struct vio_dev *dev), - void (*)(struct vio_dev *), - void (*)(struct device *)); - #endif /* _ASM_VIO_H */ diff --git a/trunk/include/linux/netfilter_ipv4/ip_conntrack.h b/trunk/include/linux/netfilter_ipv4/ip_conntrack.h index 08fe5f7d14a0..4ed720f0c4cd 100644 --- a/trunk/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/trunk/include/linux/netfilter_ipv4/ip_conntrack.h @@ -171,7 +171,7 @@ struct ip_conntrack #endif /* CONFIG_IP_NF_NAT_NEEDED */ #if defined(CONFIG_IP_NF_CONNTRACK_MARK) - unsigned long mark; + u_int32_t mark; #endif /* Traversed often, so hopefully in different cacheline to top */ diff --git a/trunk/include/linux/skbuff.h b/trunk/include/linux/skbuff.h index 948527e42a60..2e40f4c9f7a6 100644 --- a/trunk/include/linux/skbuff.h +++ b/trunk/include/linux/skbuff.h @@ -259,7 +259,7 @@ struct sk_buff { void (*destructor)(struct sk_buff *skb); #ifdef CONFIG_NETFILTER - unsigned long nfmark; + __u32 nfmark; __u32 nfcache; __u32 nfctinfo; struct nf_conntrack *nfct; diff --git a/trunk/mm/memory.c b/trunk/mm/memory.c index a596c1172248..e046b7e4b530 100644 --- a/trunk/mm/memory.c +++ b/trunk/mm/memory.c @@ -498,17 +498,6 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, unsigned long addr = vma->vm_start; unsigned long end = vma->vm_end; - /* - * Don't copy ptes where a page fault will fill them correctly. - * Fork becomes much lighter when there are big shared or private - * readonly mappings. The tradeoff is that copy_page_range is more - * efficient than faulting. - */ - if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) { - if (!vma->anon_vma) - return 0; - } - if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst_mm, src_mm, vma); diff --git a/trunk/net/ipv4/netfilter/ip_conntrack_standalone.c b/trunk/net/ipv4/netfilter/ip_conntrack_standalone.c index 61798c46e91d..dccd4abab7ae 100644 --- a/trunk/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/trunk/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -185,7 +185,7 @@ static int ct_seq_show(struct seq_file *s, void *v) return -ENOSPC; #if defined(CONFIG_IP_NF_CONNTRACK_MARK) - if (seq_printf(s, "mark=%lu ", conntrack->mark)) + if (seq_printf(s, "mark=%u ", conntrack->mark)) return -ENOSPC; #endif diff --git a/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c b/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c index 6706d3a1bc4f..2d05cafec221 100644 --- a/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -367,7 +367,7 @@ target(struct sk_buff **pskb, #ifdef DEBUG_CLUSTERP DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif - DEBUGP("hash=%u ct_hash=%lu ", hash, ct->mark); + DEBUGP("hash=%u ct_hash=%u ", hash, ct->mark); if (!clusterip_responsible(cipinfo->config, hash)) { DEBUGP("not responsible\n"); return NF_DROP; diff --git a/trunk/net/ipv4/netfilter/ipt_CONNMARK.c b/trunk/net/ipv4/netfilter/ipt_CONNMARK.c index 30ddd3e18eb7..8ed744157b1a 100644 --- a/trunk/net/ipv4/netfilter/ipt_CONNMARK.c +++ b/trunk/net/ipv4/netfilter/ipt_CONNMARK.c @@ -40,9 +40,9 @@ target(struct sk_buff **pskb, void *userinfo) { const struct ipt_connmark_target_info *markinfo = targinfo; - unsigned long diff; - unsigned long nfmark; - unsigned long newmark; + u_int32_t diff; + u_int32_t nfmark; + u_int32_t newmark; enum ip_conntrack_info ctinfo; struct ip_conntrack *ct = ip_conntrack_get((*pskb), &ctinfo); @@ -94,6 +94,11 @@ checkentry(const char *tablename, } } + if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) { + printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); + return 0; + } + return 1; } diff --git a/trunk/net/ipv4/netfilter/ipt_MARK.c b/trunk/net/ipv4/netfilter/ipt_MARK.c index 33c6f9b63b8d..8526398346cf 100644 --- a/trunk/net/ipv4/netfilter/ipt_MARK.c +++ b/trunk/net/ipv4/netfilter/ipt_MARK.c @@ -76,6 +76,8 @@ checkentry_v0(const char *tablename, unsigned int targinfosize, unsigned int hook_mask) { + struct ipt_mark_target_info *markinfo = targinfo; + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_mark_target_info))) { printk(KERN_WARNING "MARK: targinfosize %u != %Zu\n", targinfosize, @@ -88,6 +90,11 @@ checkentry_v0(const char *tablename, return 0; } + if (markinfo->mark > 0xffffffff) { + printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); + return 0; + } + return 1; } @@ -120,6 +127,11 @@ checkentry_v1(const char *tablename, return 0; } + if (markinfo->mark > 0xffffffff) { + printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); + return 0; + } + return 1; } diff --git a/trunk/net/ipv4/netfilter/ipt_connmark.c b/trunk/net/ipv4/netfilter/ipt_connmark.c index 2706f96cea55..bf8de47ce004 100644 --- a/trunk/net/ipv4/netfilter/ipt_connmark.c +++ b/trunk/net/ipv4/netfilter/ipt_connmark.c @@ -54,9 +54,16 @@ checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { + struct ipt_connmark_info *cm = + (struct ipt_connmark_info *)matchinfo; if (matchsize != IPT_ALIGN(sizeof(struct ipt_connmark_info))) return 0; + if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) { + printk(KERN_WARNING "connmark: only support 32bit mark\n"); + return 0; + } + return 1; } diff --git a/trunk/net/ipv4/netfilter/ipt_mark.c b/trunk/net/ipv4/netfilter/ipt_mark.c index 8955728127b9..00bef6cdd3f8 100644 --- a/trunk/net/ipv4/netfilter/ipt_mark.c +++ b/trunk/net/ipv4/netfilter/ipt_mark.c @@ -37,9 +37,16 @@ checkentry(const char *tablename, unsigned int matchsize, unsigned int hook_mask) { + struct ipt_mark_info *minfo = (struct ipt_mark_info *) matchinfo; + if (matchsize != IPT_ALIGN(sizeof(struct ipt_mark_info))) return 0; + if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) { + printk(KERN_WARNING "mark: only supports 32bit mark\n"); + return 0; + } + return 1; }