diff --git a/[refs] b/[refs] index a5b469e35d1b..13feab0b27d8 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 3d8a67b9f0428de3d8b76984c5c38675a7f4abd4 +refs/heads/master: 0df333ce01fedad1f7a4f281063a02d76ba33a7d diff --git a/trunk/Documentation/feature-removal-schedule.txt b/trunk/Documentation/feature-removal-schedule.txt index fb8258ebc577..63df2262d41a 100644 --- a/trunk/Documentation/feature-removal-schedule.txt +++ b/trunk/Documentation/feature-removal-schedule.txt @@ -205,6 +205,20 @@ Who: Len Brown --------------------------- +What: Compaq touchscreen device emulation +When: Oct 2007 +Files: drivers/input/tsdev.c +Why: The code says it was obsolete when it was written in 2001. + tslib is a userspace library which does anything tsdev can do and + much more besides in userspace where this code belongs. There is no + longer any need for tsdev and applications should have converted to + use tslib by now. + The name "tsdev" is also extremely confusing and lots of people have + it loaded when they don't need/use it. +Who: Richard Purdie + +--------------------------- + What: i2c-ixp2000, i2c-ixp4xx and scx200_i2c drivers When: September 2007 Why: Obsolete. The new i2c-gpio driver replaces all hardware-specific diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index 085e4a095eaa..c323778270ff 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -1083,13 +1083,6 @@ and is between 256 and 4096 characters. It is defined in the file [NFS] set the maximum lifetime for idmapper cache entries. - nfs.enable_ino64= - [NFS] enable 64-bit inode numbers. - If zero, the NFS client will fake up a 32-bit inode - number for the readdir() and stat() syscalls instead - of returning the full 64-bit number. - The default is to return 64-bit inode numbers. - nmi_watchdog= [KNL,BUGS=X86-32] Debugging features for SMP kernels no387 [BUGS=X86-32] Tells the kernel to use the 387 maths @@ -1890,6 +1883,9 @@ and is between 256 and 4096 characters. It is defined in the file Format: ,,,,,,,, + tsdev.xres= [TS] Horizontal screen resolution. + tsdev.yres= [TS] Vertical screen resolution. + turbografx.map[2|3]= [HW,JOY] TurboGraFX parallel port interface Format: diff --git a/trunk/Documentation/networking/bonding.txt b/trunk/Documentation/networking/bonding.txt index 11340625e363..1da566630831 100644 --- a/trunk/Documentation/networking/bonding.txt +++ b/trunk/Documentation/networking/bonding.txt @@ -281,39 +281,6 @@ downdelay will be rounded down to the nearest multiple. The default value is 0. -fail_over_mac - - Specifies whether active-backup mode should set all slaves to - the same MAC address (the traditional behavior), or, when - enabled, change the bond's MAC address when changing the - active interface (i.e., fail over the MAC address itself). - - Fail over MAC is useful for devices that cannot ever alter - their MAC address, or for devices that refuse incoming - broadcasts with their own source MAC (which interferes with - the ARP monitor). - - The down side of fail over MAC is that every device on the - network must be updated via gratuitous ARP, vs. just updating - a switch or set of switches (which often takes place for any - traffic, not just ARP traffic, if the switch snoops incoming - traffic to update its tables) for the traditional method. If - the gratuitous ARP is lost, communication may be disrupted. - - When fail over MAC is used in conjuction with the mii monitor, - devices which assert link up prior to being able to actually - transmit and receive are particularly susecptible to loss of - the gratuitous ARP, and an appropriate updelay setting may be - required. - - A value of 0 disables fail over MAC, and is the default. A - value of 1 enables fail over MAC. This option is enabled - automatically if the first slave added cannot change its MAC - address. This option may be modified via sysfs only when no - slaves are present in the bond. - - This option was added in bonding version 3.2.0. - lacp_rate Option specifying the rate in which we'll ask our link partner diff --git a/trunk/Documentation/networking/proc_net_tcp.txt b/trunk/Documentation/networking/proc_net_tcp.txt index 4a79209e77a7..5e21f7cb6383 100644 --- a/trunk/Documentation/networking/proc_net_tcp.txt +++ b/trunk/Documentation/networking/proc_net_tcp.txt @@ -1,9 +1,8 @@ This document describes the interfaces /proc/net/tcp and /proc/net/tcp6. -Note that these interfaces are deprecated in favor of tcp_diag. These /proc interfaces provide information about currently active TCP -connections, and are implemented by tcp4_seq_show() in net/ipv4/tcp_ipv4.c -and tcp6_seq_show() in net/ipv6/tcp_ipv6.c, respectively. +connections, and are implemented by tcp_get_info() in net/ipv4/tcp_ipv4.c and +tcp6_get_info() in net/ipv6/tcp_ipv6.c, respectively. It will first list all listening TCP sockets, and next list all established TCP connections. A typical entry of /proc/net/tcp would look like this (split diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index c7355e7f09ff..12cee3da2625 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -2404,15 +2404,6 @@ M: khali@linux-fr.org L: lm-sensors@lm-sensors.org S: Maintained -LOCKDEP AND LOCKSTAT -P: Peter Zijlstra -M: peterz@infradead.org -P: Ingo Molnar -M: mingo@redhat.com -L: linux-kernel@vger.kernel.org -T: git://git.kernel.org/pub/scm/linux/kernel/git/peterz/linux-2.6-lockdep.git -S: Maintained - LOGICAL DISK MANAGER SUPPORT (LDM, Windows 2000/XP/Vista Dynamic Disks) P: Richard Russon (FlatCap) M: ldm@flatcap.org diff --git a/trunk/arch/blackfin/mach-bf548/boards/ezkit.c b/trunk/arch/blackfin/mach-bf548/boards/ezkit.c index 046e6d84bbfc..2c47db494f7d 100644 --- a/trunk/arch/blackfin/mach-bf548/boards/ezkit.c +++ b/trunk/arch/blackfin/mach-bf548/boards/ezkit.c @@ -88,7 +88,7 @@ static struct platform_device bf54x_lq043_device = { #endif #if defined(CONFIG_KEYBOARD_BFIN) || defined(CONFIG_KEYBOARD_BFIN_MODULE) -static const unsigned int bf548_keymap[] = { +static int bf548_keymap[] = { KEYVAL(0, 0, KEY_ENTER), KEYVAL(0, 1, KEY_HELP), KEYVAL(0, 2, KEY_0), @@ -110,8 +110,8 @@ static const unsigned int bf548_keymap[] = { static struct bfin_kpad_platform_data bf54x_kpad_data = { .rows = 4, .cols = 4, - .keymap = bf548_keymap, - .keymapsize = ARRAY_SIZE(bf548_keymap), + .keymap = bf548_keymap, + .keymapsize = ARRAY_SIZE(bf548_keymap), .repeat = 0, .debounce_time = 5000, /* ns (5ms) */ .coldrive_time = 1000, /* ns (1ms) */ diff --git a/trunk/arch/m68k/atari/atakeyb.c b/trunk/arch/m68k/atari/atakeyb.c index 880add120eb3..fbbccb5e7511 100644 --- a/trunk/arch/m68k/atari/atakeyb.c +++ b/trunk/arch/m68k/atari/atakeyb.c @@ -1,4 +1,6 @@ /* + * linux/arch/m68k/atari/atakeyb.c + * * Atari Keyboard driver for 680x0 Linux * * This file is subject to the terms and conditions of the GNU General Public diff --git a/trunk/arch/mips/au1000/common/prom.c b/trunk/arch/mips/au1000/common/prom.c index 90d70695aa60..a8637cdb5b4b 100644 --- a/trunk/arch/mips/au1000/common/prom.c +++ b/trunk/arch/mips/au1000/common/prom.c @@ -33,6 +33,7 @@ * with this program; if not, write to the Free Software Foundation, Inc., * 675 Mass Ave, Cambridge, MA 02139, USA. */ + #include #include #include @@ -40,16 +41,18 @@ #include -int prom_argc; -char **prom_argv; -char **prom_envp; +/* #define DEBUG_CMDLINE */ + +extern int prom_argc; +extern char **prom_argv, **prom_envp; + char * __init_or_module prom_getcmdline(void) { return &(arcs_cmdline[0]); } -void prom_init_cmdline(void) +void prom_init_cmdline(void) { char *cp; int actr; @@ -58,7 +61,7 @@ void prom_init_cmdline(void) cp = &(arcs_cmdline[0]); while(actr < prom_argc) { - strcpy(cp, prom_argv[actr]); + strcpy(cp, prom_argv[actr]); cp += strlen(prom_argv[actr]); *cp++ = ' '; actr++; @@ -67,8 +70,10 @@ void prom_init_cmdline(void) --cp; if (prom_argc > 1) *cp = '\0'; + } + char *prom_getenv(char *envname) { /* @@ -90,23 +95,21 @@ char *prom_getenv(char *envname) } env++; } - return NULL; } -static inline unsigned char str2hexnum(unsigned char c) +inline unsigned char str2hexnum(unsigned char c) { - if (c >= '0' && c <= '9') + if(c >= '0' && c <= '9') return c - '0'; - if (c >= 'a' && c <= 'f') + if(c >= 'a' && c <= 'f') return c - 'a' + 10; - if (c >= 'A' && c <= 'F') + if(c >= 'A' && c <= 'F') return c - 'A' + 10; - return 0; /* foo */ } -static inline void str2eaddr(unsigned char *ea, unsigned char *str) +inline void str2eaddr(unsigned char *ea, unsigned char *str) { int i; @@ -121,29 +124,35 @@ static inline void str2eaddr(unsigned char *ea, unsigned char *str) } } -int prom_get_ethernet_addr(char *ethernet_addr) +int get_ethernet_addr(char *ethernet_addr) { - char *ethaddr_str; - char *argptr; + char *ethaddr_str; - /* Check the environment variables first */ - ethaddr_str = prom_getenv("ethaddr"); + ethaddr_str = prom_getenv("ethaddr"); if (!ethaddr_str) { - /* Check command line */ - argptr = prom_getcmdline(); - ethaddr_str = strstr(argptr, "ethaddr="); - if (!ethaddr_str) - return -1; - - ethaddr_str += strlen("ethaddr="); + printk("ethaddr not set in boot prom\n"); + return -1; } - str2eaddr(ethernet_addr, ethaddr_str); +#if 0 + { + int i; + + printk("get_ethernet_addr: "); + for (i=0; i<5; i++) + printk("%02x:", (unsigned char)*(ethernet_addr+i)); + printk("%02x\n", *(ethernet_addr+i)); + } +#endif + return 0; } -EXPORT_SYMBOL(prom_get_ethernet_addr); void __init prom_free_prom_memory(void) { } + +EXPORT_SYMBOL(prom_getcmdline); +EXPORT_SYMBOL(get_ethernet_addr); +EXPORT_SYMBOL(str2eaddr); diff --git a/trunk/arch/mips/au1000/common/setup.c b/trunk/arch/mips/au1000/common/setup.c index a90d425d4651..b212c0726125 100644 --- a/trunk/arch/mips/au1000/common/setup.c +++ b/trunk/arch/mips/au1000/common/setup.c @@ -40,11 +40,10 @@ #include #include #include +#include #include -#include -#include - +extern char * prom_getcmdline(void); extern void __init board_setup(void); extern void au1000_restart(char *); extern void au1000_halt(void); diff --git a/trunk/arch/mips/au1000/db1x00/init.c b/trunk/arch/mips/au1000/db1x00/init.c index 43298fd9459c..4d7bcfc8cf73 100644 --- a/trunk/arch/mips/au1000/db1x00/init.c +++ b/trunk/arch/mips/au1000/db1x00/init.c @@ -31,13 +31,15 @@ #include #include #include -#include -#include - #include #include +#include +#include -#include +int prom_argc; +char **prom_argv, **prom_envp; +extern void __init prom_init_cmdline(void); +extern char *prom_getenv(char *envname); const char *get_system_type(void) { diff --git a/trunk/arch/mips/au1000/mtx-1/init.c b/trunk/arch/mips/au1000/mtx-1/init.c index cdeae3212a2d..2aa7b2ed6a8c 100644 --- a/trunk/arch/mips/au1000/mtx-1/init.c +++ b/trunk/arch/mips/au1000/mtx-1/init.c @@ -34,11 +34,13 @@ #include #include #include - #include #include -#include +int prom_argc; +char **prom_argv, **prom_envp; +extern void __init prom_init_cmdline(void); +extern char *prom_getenv(char *envname); const char *get_system_type(void) { diff --git a/trunk/arch/mips/au1000/pb1000/init.c b/trunk/arch/mips/au1000/pb1000/init.c index ddccaf6997d0..4535f7208e18 100644 --- a/trunk/arch/mips/au1000/pb1000/init.c +++ b/trunk/arch/mips/au1000/pb1000/init.c @@ -30,13 +30,15 @@ #include #include #include -#include -#include - #include #include +#include +#include -#include +int prom_argc; +char **prom_argv, **prom_envp; +extern void __init prom_init_cmdline(void); +extern char *prom_getenv(char *envname); const char *get_system_type(void) { diff --git a/trunk/arch/mips/au1000/pb1100/init.c b/trunk/arch/mips/au1000/pb1100/init.c index c93fd39b4aba..7ba6852de7cd 100644 --- a/trunk/arch/mips/au1000/pb1100/init.c +++ b/trunk/arch/mips/au1000/pb1100/init.c @@ -31,13 +31,15 @@ #include #include #include -#include -#include - #include #include +#include +#include -#include +int prom_argc; +char **prom_argv, **prom_envp; +extern void __init prom_init_cmdline(void); +extern char *prom_getenv(char *envname); const char *get_system_type(void) { diff --git a/trunk/arch/mips/au1000/pb1200/board_setup.c b/trunk/arch/mips/au1000/pb1200/board_setup.c index 5dbc9868f598..2122515f79d7 100644 --- a/trunk/arch/mips/au1000/pb1200/board_setup.c +++ b/trunk/arch/mips/au1000/pb1200/board_setup.c @@ -41,10 +41,8 @@ #include #include #include - -#include -#include -#include +#include +#include #ifdef CONFIG_MIPS_PB1200 #include diff --git a/trunk/arch/mips/au1000/pb1200/init.c b/trunk/arch/mips/au1000/pb1200/init.c index c251570749ee..5a70029d5388 100644 --- a/trunk/arch/mips/au1000/pb1200/init.c +++ b/trunk/arch/mips/au1000/pb1200/init.c @@ -31,13 +31,15 @@ #include #include #include -#include -#include - #include #include +#include +#include -#include +int prom_argc; +char **prom_argv, **prom_envp; +extern void __init prom_init_cmdline(void); +extern char *prom_getenv(char *envname); const char *get_system_type(void) { diff --git a/trunk/arch/mips/au1000/pb1500/init.c b/trunk/arch/mips/au1000/pb1500/init.c index 507d4b204161..e58a9d6c5021 100644 --- a/trunk/arch/mips/au1000/pb1500/init.c +++ b/trunk/arch/mips/au1000/pb1500/init.c @@ -31,13 +31,15 @@ #include #include #include -#include -#include - #include #include +#include +#include -#include +int prom_argc; +char **prom_argv, **prom_envp; +extern void __init prom_init_cmdline(void); +extern char *prom_getenv(char *envname); const char *get_system_type(void) { diff --git a/trunk/arch/mips/au1000/pb1550/init.c b/trunk/arch/mips/au1000/pb1550/init.c index b03eee601e36..fad53bf5aad1 100644 --- a/trunk/arch/mips/au1000/pb1550/init.c +++ b/trunk/arch/mips/au1000/pb1550/init.c @@ -31,13 +31,15 @@ #include #include #include -#include -#include - #include #include +#include +#include -#include +int prom_argc; +char **prom_argv, **prom_envp; +extern void __init prom_init_cmdline(void); +extern char *prom_getenv(char *envname); const char *get_system_type(void) { diff --git a/trunk/arch/mips/au1000/xxs1500/init.c b/trunk/arch/mips/au1000/xxs1500/init.c index 6532939f377a..9f839c36f69e 100644 --- a/trunk/arch/mips/au1000/xxs1500/init.c +++ b/trunk/arch/mips/au1000/xxs1500/init.c @@ -30,13 +30,15 @@ #include #include #include -#include -#include - #include #include +#include +#include -#include +int prom_argc; +char **prom_argv, **prom_envp; +extern void __init prom_init_cmdline(void); +extern char *prom_getenv(char *envname); const char *get_system_type(void) { diff --git a/trunk/arch/powerpc/platforms/cell/axon_msi.c b/trunk/arch/powerpc/platforms/cell/axon_msi.c index 095988f13bf4..1245b2f517bb 100644 --- a/trunk/arch/powerpc/platforms/cell/axon_msi.c +++ b/trunk/arch/powerpc/platforms/cell/axon_msi.c @@ -77,7 +77,12 @@ static void msic_dcr_write(struct axon_msic *msic, unsigned int dcr_n, u32 val) { pr_debug("axon_msi: dcr_write(0x%x, 0x%x)\n", val, dcr_n); - dcr_write(msic->dcr_host, dcr_n, val); + dcr_write(msic->dcr_host, msic->dcr_host.base + dcr_n, val); +} + +static u32 msic_dcr_read(struct axon_msic *msic, unsigned int dcr_n) +{ + return dcr_read(msic->dcr_host, msic->dcr_host.base + dcr_n); } static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) @@ -86,7 +91,7 @@ static void axon_msi_cascade(unsigned int irq, struct irq_desc *desc) u32 write_offset, msi; int idx; - write_offset = dcr_read(msic->dcr_host, MSIC_WRITE_OFFSET_REG); + write_offset = msic_dcr_read(msic, MSIC_WRITE_OFFSET_REG); pr_debug("axon_msi: original write_offset 0x%x\n", write_offset); /* write_offset doesn't wrap properly, so we have to mask it */ @@ -301,7 +306,7 @@ static int axon_msi_notify_reboot(struct notifier_block *nb, list_for_each_entry(msic, &axon_msic_list, list) { pr_debug("axon_msi: disabling %s\n", msic->irq_host->of_node->full_name); - tmp = dcr_read(msic->dcr_host, MSIC_CTRL_REG); + tmp = msic_dcr_read(msic, MSIC_CTRL_REG); tmp &= ~MSIC_CTRL_ENABLE & ~MSIC_CTRL_IRQ_ENABLE; msic_dcr_write(msic, MSIC_CTRL_REG, tmp); } diff --git a/trunk/arch/powerpc/sysdev/dcr.c b/trunk/arch/powerpc/sysdev/dcr.c index 427027c7ea0f..ab11c0b29024 100644 --- a/trunk/arch/powerpc/sysdev/dcr.c +++ b/trunk/arch/powerpc/sysdev/dcr.c @@ -126,13 +126,13 @@ dcr_host_t dcr_map(struct device_node *dev, unsigned int dcr_n, } EXPORT_SYMBOL_GPL(dcr_map); -void dcr_unmap(dcr_host_t host, unsigned int dcr_c) +void dcr_unmap(dcr_host_t host, unsigned int dcr_n, unsigned int dcr_c) { dcr_host_t h = host; if (h.token == NULL) return; - h.token += host.base * h.stride; + h.token += dcr_n * h.stride; iounmap(h.token); h.token = NULL; } diff --git a/trunk/arch/powerpc/sysdev/mpic.c b/trunk/arch/powerpc/sysdev/mpic.c index e47938899a92..893e65439e85 100644 --- a/trunk/arch/powerpc/sysdev/mpic.c +++ b/trunk/arch/powerpc/sysdev/mpic.c @@ -156,7 +156,7 @@ static inline u32 _mpic_read(enum mpic_reg_type type, switch(type) { #ifdef CONFIG_PPC_DCR case mpic_access_dcr: - return dcr_read(rb->dhost, reg); + return dcr_read(rb->dhost, rb->dhost.base + reg); #endif case mpic_access_mmio_be: return in_be32(rb->base + (reg >> 2)); @@ -173,7 +173,7 @@ static inline void _mpic_write(enum mpic_reg_type type, switch(type) { #ifdef CONFIG_PPC_DCR case mpic_access_dcr: - return dcr_write(rb->dhost, reg, value); + return dcr_write(rb->dhost, rb->dhost.base + reg, value); #endif case mpic_access_mmio_be: return out_be32(rb->base + (reg >> 2), value); diff --git a/trunk/arch/s390/kernel/entry.S b/trunk/arch/s390/kernel/entry.S index 139ca153d5cc..f3bceb165321 100644 --- a/trunk/arch/s390/kernel/entry.S +++ b/trunk/arch/s390/kernel/entry.S @@ -68,15 +68,9 @@ STACK_SIZE = 1 << STACK_SHIFT l %r1,BASED(.Ltrace_irq_off) basr %r14,%r1 .endm - - .macro LOCKDEP_SYS_EXIT - l %r1,BASED(.Llockdep_sys_exit) - basr %r14,%r1 - .endm #else #define TRACE_IRQS_ON #define TRACE_IRQS_OFF -#define LOCKDEP_SYS_EXIT #endif /* @@ -266,7 +260,6 @@ sysc_return: bno BASED(sysc_leave) tm __TI_flags+3(%r9),_TIF_WORK_SVC bnz BASED(sysc_work) # there is work to do (signals etc.) - LOCKDEP_SYS_EXIT sysc_leave: RESTORE_ALL __LC_RETURN_PSW,1 @@ -290,7 +283,6 @@ sysc_work: bo BASED(sysc_restart) tm __TI_flags+3(%r9),_TIF_SINGLE_STEP bo BASED(sysc_singlestep) - LOCKDEP_SYS_EXIT b BASED(sysc_leave) # @@ -580,7 +572,6 @@ io_return: #endif tm __TI_flags+3(%r9),_TIF_WORK_INT bnz BASED(io_work) # there is work to do (signals etc.) - LOCKDEP_SYS_EXIT io_leave: RESTORE_ALL __LC_RETURN_PSW,0 io_done: @@ -627,7 +618,6 @@ io_work_loop: bo BASED(io_reschedule) tm __TI_flags+3(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) bnz BASED(io_sigpending) - LOCKDEP_SYS_EXIT b BASED(io_leave) # @@ -1050,8 +1040,6 @@ cleanup_io_leave_insn: .Ltrace_irq_on: .long trace_hardirqs_on .Ltrace_irq_off: .long trace_hardirqs_off -.Llockdep_sys_exit: - .long lockdep_sys_exit #endif .Lcritical_start: .long __critical_start + 0x80000000 diff --git a/trunk/arch/s390/kernel/entry64.S b/trunk/arch/s390/kernel/entry64.S index 05e26d1fdf40..9c0d5cc8269d 100644 --- a/trunk/arch/s390/kernel/entry64.S +++ b/trunk/arch/s390/kernel/entry64.S @@ -66,14 +66,9 @@ _TIF_WORK_INT = (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK | _TIF_NEED_RESCHED | \ .macro TRACE_IRQS_OFF brasl %r14,trace_hardirqs_off .endm - - .macro LOCKDEP_SYS_EXIT - brasl %r14,lockdep_sys_exit - .endm #else #define TRACE_IRQS_ON #define TRACE_IRQS_OFF -#define LOCKDEP_SYS_EXIT #endif .macro STORE_TIMER lc_offset @@ -260,7 +255,6 @@ sysc_return: jno sysc_leave tm __TI_flags+7(%r9),_TIF_WORK_SVC jnz sysc_work # there is work to do (signals etc.) - LOCKDEP_SYS_EXIT sysc_leave: RESTORE_ALL __LC_RETURN_PSW,1 @@ -284,7 +278,6 @@ sysc_work: jo sysc_restart tm __TI_flags+7(%r9),_TIF_SINGLE_STEP jo sysc_singlestep - LOCKDEP_SYS_EXIT j sysc_leave # @@ -565,7 +558,6 @@ io_return: #endif tm __TI_flags+7(%r9),_TIF_WORK_INT jnz io_work # there is work to do (signals etc.) - LOCKDEP_SYS_EXIT io_leave: RESTORE_ALL __LC_RETURN_PSW,0 io_done: @@ -613,7 +605,6 @@ io_work_loop: jo io_reschedule tm __TI_flags+7(%r9),(_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK) jnz io_sigpending - LOCKDEP_SYS_EXIT j io_leave # diff --git a/trunk/arch/x86/kernel/entry_32.S b/trunk/arch/x86/kernel/entry_32.S index 8099fea0a72f..290b7bc82da3 100644 --- a/trunk/arch/x86/kernel/entry_32.S +++ b/trunk/arch/x86/kernel/entry_32.S @@ -251,7 +251,6 @@ check_userspace: jb resume_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) - LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -339,7 +338,6 @@ sysenter_past_esp: jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) - LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx @@ -379,7 +377,6 @@ syscall_call: call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) # store the return value syscall_exit: - LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -470,7 +467,6 @@ work_pending: jz work_notifysig work_resched: call schedule - LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret diff --git a/trunk/arch/x86/kernel/entry_64.S b/trunk/arch/x86/kernel/entry_64.S index f1cacd4897f7..1d232e5f5658 100644 --- a/trunk/arch/x86/kernel/entry_64.S +++ b/trunk/arch/x86/kernel/entry_64.S @@ -244,7 +244,6 @@ ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: flagmask */ sysret_check: - LOCKDEP_SYS_EXIT GET_THREAD_INFO(%rcx) cli TRACE_IRQS_OFF @@ -334,7 +333,6 @@ int_ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ int_with_check: - LOCKDEP_SYS_EXIT_IRQ GET_THREAD_INFO(%rcx) movl threadinfo_flags(%rcx),%edx andl %edi,%edx @@ -546,13 +544,11 @@ exit_intr: retint_with_reschedule: movl $_TIF_WORK_MASK,%edi retint_check: - LOCKDEP_SYS_EXIT_IRQ movl threadinfo_flags(%rcx),%edx andl %edi,%edx CFI_REMEMBER_STATE jnz retint_careful - -retint_swapgs: /* return to user-space */ +retint_swapgs: /* * The iretq could re-enable interrupts: */ @@ -561,7 +557,7 @@ retint_swapgs: /* return to user-space */ swapgs jmp restore_args -retint_restore_args: /* return to kernel space */ +retint_restore_args: cli /* * The iretq could re-enable interrupts: @@ -870,21 +866,26 @@ error_sti: movq ORIG_RAX(%rsp),%rsi /* get error code */ movq $-1,ORIG_RAX(%rsp) call *%rax - /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ -error_exit: - movl %ebx,%eax + /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ +error_exit: + movl %ebx,%eax RESTORE_REST cli TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) testl %eax,%eax jne retint_kernel - LOCKDEP_SYS_EXIT_IRQ movl threadinfo_flags(%rcx),%edx movl $_TIF_WORK_MASK,%edi andl %edi,%edx jnz retint_careful - jmp retint_swapgs + /* + * The iret might restore flags: + */ + TRACE_IRQS_IRETQ + swapgs + RESTORE_ARGS 0,8,0 + jmp iret_label CFI_ENDPROC error_kernelspace: diff --git a/trunk/arch/x86/kernel/kprobes_32.c b/trunk/arch/x86/kernel/kprobes_32.c index e7d0d3c2ef64..c2d03e96ae9f 100644 --- a/trunk/arch/x86/kernel/kprobes_32.c +++ b/trunk/arch/x86/kernel/kprobes_32.c @@ -557,12 +557,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) resume_execution(cur, regs, kcb); regs->eflags |= kcb->kprobe_saved_eflags; -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - if (raw_irqs_disabled_flags(regs->eflags)) - trace_hardirqs_off(); - else - trace_hardirqs_on(); -#endif /*Restore back the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -700,7 +694,6 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr)); regs->eflags &= ~IF_MASK; - trace_hardirqs_off(); regs->eip = (unsigned long)(jp->entry); return 1; } diff --git a/trunk/arch/x86/kernel/kprobes_64.c b/trunk/arch/x86/kernel/kprobes_64.c index 62e28e52d784..1df17a0ec0c9 100644 --- a/trunk/arch/x86/kernel/kprobes_64.c +++ b/trunk/arch/x86/kernel/kprobes_64.c @@ -544,12 +544,6 @@ int __kprobes post_kprobe_handler(struct pt_regs *regs) resume_execution(cur, regs, kcb); regs->eflags |= kcb->kprobe_saved_rflags; -#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT - if (raw_irqs_disabled_flags(regs->eflags)) - trace_hardirqs_off(); - else - trace_hardirqs_on(); -#endif /* Restore the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { @@ -690,7 +684,6 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr)); regs->eflags &= ~IF_MASK; - trace_hardirqs_off(); regs->rip = (unsigned long)(jp->entry); return 1; } diff --git a/trunk/arch/x86/lib/thunk_64.S b/trunk/arch/x86/lib/thunk_64.S index 6ea73f3de567..55e586d352d3 100644 --- a/trunk/arch/x86/lib/thunk_64.S +++ b/trunk/arch/x86/lib/thunk_64.S @@ -50,10 +50,6 @@ thunk trace_hardirqs_on_thunk,trace_hardirqs_on thunk trace_hardirqs_off_thunk,trace_hardirqs_off #endif - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - thunk lockdep_sys_exit_thunk,lockdep_sys_exit -#endif /* SAVE_ARGS below is used only for the .cfi directives it contains. */ CFI_STARTPROC diff --git a/trunk/drivers/ata/Kconfig b/trunk/drivers/ata/Kconfig index 33f5eb038773..4672066167e3 100644 --- a/trunk/drivers/ata/Kconfig +++ b/trunk/drivers/ata/Kconfig @@ -272,15 +272,6 @@ config PATA_CS5535 If unsure, say N. -config PATA_CS5536 - tristate "CS5536 PATA support (Experimental)" - depends on PCI && X86 && !X86_64 && EXPERIMENTAL - help - This option enables support for the AMD CS5536 - companion chip used with the Geode LX processor family. - - If unsure, say N. - config PATA_CYPRESS tristate "Cypress CY82C693 PATA support (Very Experimental)" depends on PCI && EXPERIMENTAL diff --git a/trunk/drivers/ata/Makefile b/trunk/drivers/ata/Makefile index 6bdc307649e6..2a63645003eb 100644 --- a/trunk/drivers/ata/Makefile +++ b/trunk/drivers/ata/Makefile @@ -28,7 +28,6 @@ obj-$(CONFIG_PATA_CMD64X) += pata_cmd64x.o obj-$(CONFIG_PATA_CS5520) += pata_cs5520.o obj-$(CONFIG_PATA_CS5530) += pata_cs5530.o obj-$(CONFIG_PATA_CS5535) += pata_cs5535.o -obj-$(CONFIG_PATA_CS5536) += pata_cs5536.o obj-$(CONFIG_PATA_CYPRESS) += pata_cypress.o obj-$(CONFIG_PATA_EFAR) += pata_efar.o obj-$(CONFIG_PATA_HPT366) += pata_hpt366.o diff --git a/trunk/drivers/ata/ata_piix.c b/trunk/drivers/ata/ata_piix.c index 3c6f43e381f4..9ce4aa9c2f25 100644 --- a/trunk/drivers/ata/ata_piix.c +++ b/trunk/drivers/ata/ata_piix.c @@ -130,7 +130,6 @@ enum { ich8_sata_ahci = 9, piix_pata_mwdma = 10, /* PIIX3 MWDMA only */ tolapai_sata_ahci = 11, - ich9_2port_sata = 12, /* constants for mapping table */ P0 = 0, /* port 0 */ @@ -239,19 +238,19 @@ static const struct pci_device_id piix_pci_tbl[] = { /* SATA Controller 1 IDE (ICH8) */ { 0x8086, 0x2820, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, /* SATA Controller 2 IDE (ICH8) */ - { 0x8086, 0x2825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata }, + { 0x8086, 0x2825, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, /* Mobile SATA Controller IDE (ICH8M) */ { 0x8086, 0x2828, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, /* SATA Controller IDE (ICH9) */ { 0x8086, 0x2920, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, /* SATA Controller IDE (ICH9) */ - { 0x8086, 0x2921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata }, + { 0x8086, 0x2921, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, /* SATA Controller IDE (ICH9) */ - { 0x8086, 0x2926, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata }, + { 0x8086, 0x2926, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, /* SATA Controller IDE (ICH9M) */ - { 0x8086, 0x2928, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata }, + { 0x8086, 0x2928, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, /* SATA Controller IDE (ICH9M) */ - { 0x8086, 0x292d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich9_2port_sata }, + { 0x8086, 0x292d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, /* SATA Controller IDE (ICH9M) */ { 0x8086, 0x292e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ich8_sata_ahci }, /* SATA Controller IDE (Tolapai) */ @@ -449,18 +448,6 @@ static const struct piix_map_db tolapai_map_db = { }, }; -static const struct piix_map_db ich9_2port_map_db = { - .mask = 0x3, - .port_enable = 0x3, - .map = { - /* PM PS SM SS MAP */ - { P0, NA, P1, NA }, /* 00b */ - { RV, RV, RV, RV }, /* 01b */ - { RV, RV, RV, RV }, /* 10b */ - { RV, RV, RV, RV }, - }, -}; - static const struct piix_map_db *piix_map_db_table[] = { [ich5_sata] = &ich5_map_db, [ich6_sata] = &ich6_map_db, @@ -468,7 +455,6 @@ static const struct piix_map_db *piix_map_db_table[] = { [ich6m_sata_ahci] = &ich6m_map_db, [ich8_sata_ahci] = &ich8_map_db, [tolapai_sata_ahci] = &tolapai_map_db, - [ich9_2port_sata] = &ich9_2port_map_db, }; static struct ata_port_info piix_port_info[] = { @@ -584,17 +570,6 @@ static struct ata_port_info piix_port_info[] = { .udma_mask = ATA_UDMA6, .port_ops = &piix_sata_ops, }, - - [ich9_2port_sata] = - { - .sht = &piix_sht, - .flags = PIIX_SATA_FLAGS | PIIX_FLAG_SCR | - PIIX_FLAG_AHCI, - .pio_mask = 0x1f, /* pio0-4 */ - .mwdma_mask = 0x07, /* mwdma0-2 */ - .udma_mask = ATA_UDMA6, - .port_ops = &piix_sata_ops, - }, }; static struct pci_bits piix_enable_bits[] = { diff --git a/trunk/drivers/ata/libata-core.c b/trunk/drivers/ata/libata-core.c index 68699b3e7998..b05384a8c326 100644 --- a/trunk/drivers/ata/libata-core.c +++ b/trunk/drivers/ata/libata-core.c @@ -3984,7 +3984,6 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = { { "ST9120822AS", "3.CLF", ATA_HORKAGE_NONCQ, }, { "ST9160821AS", "3.CLF", ATA_HORKAGE_NONCQ, }, { "ST9160821AS", "3.ALD", ATA_HORKAGE_NONCQ, }, - { "ST9160821AS", "3.CCD", ATA_HORKAGE_NONCQ, }, { "ST3160812AS", "3.ADJ", ATA_HORKAGE_NONCQ, }, { "ST980813AS", "3.ADB", ATA_HORKAGE_NONCQ, }, { "SAMSUNG HD401LJ", "ZZ100-15", ATA_HORKAGE_NONCQ, }, @@ -4014,14 +4013,8 @@ int strn_pattern_cmp(const char *patt, const char *name, int wildchar) p = strchr(patt, wildchar); if (p && ((*(p + 1)) == 0)) len = p - patt; - else { + else len = strlen(name); - if (!len) { - if (!*patt) - return 0; - return -1; - } - } return strncmp(patt, name, len); } diff --git a/trunk/drivers/ata/libata-scsi.c b/trunk/drivers/ata/libata-scsi.c index d63c81ed084f..ea53e6a570b4 100644 --- a/trunk/drivers/ata/libata-scsi.c +++ b/trunk/drivers/ata/libata-scsi.c @@ -1363,7 +1363,6 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc) static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; - struct ata_eh_info *ehi = &qc->dev->link->eh_info; struct scsi_cmnd *cmd = qc->scsicmd; u8 *cdb = cmd->cmnd; int need_sense = (qc->err_mask != 0); @@ -1377,14 +1376,14 @@ static void ata_scsi_qc_complete(struct ata_queued_cmd *qc) case ATA_CMD_SET_FEATURES: if ((qc->tf.feature == SETFEATURES_WC_ON) || (qc->tf.feature == SETFEATURES_WC_OFF)) { - ehi->action |= ATA_EH_REVALIDATE; + ap->link.eh_info.action |= ATA_EH_REVALIDATE; ata_port_schedule_eh(ap); } break; case ATA_CMD_INIT_DEV_PARAMS: /* CHS translation changed */ case ATA_CMD_SET_MULTI: /* multi_count changed */ - ehi->action |= ATA_EH_REVALIDATE; + ap->link.eh_info.action |= ATA_EH_REVALIDATE; ata_port_schedule_eh(ap); break; } diff --git a/trunk/drivers/ata/pata_cs5536.c b/trunk/drivers/ata/pata_cs5536.c deleted file mode 100644 index 53070f6b1fc4..000000000000 --- a/trunk/drivers/ata/pata_cs5536.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - * pata_cs5536.c - CS5536 PATA for new ATA layer - * (C) 2007 Martin K. Petersen - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Documentation: - * Available from AMD web site. - * - * The IDE timing registers for the CS5536 live in the Geode Machine - * Specific Register file and not PCI config space. Most BIOSes - * virtualize the PCI registers so the chip looks like a standard IDE - * controller. Unfortunately not all implementations get this right. - * In particular some have problems with unaligned accesses to the - * virtualized PCI registers. This driver always does full dword - * writes to work around the issue. Also, in case of a bad BIOS this - * driver can be loaded with the "msr=1" parameter which forces using - * the Machine Specific Registers to configure the device. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DRV_NAME "pata_cs5536" -#define DRV_VERSION "0.0.5" - -enum { - CFG = 0, - DTC = 1, - CAST = 2, - ETC = 3, - - MSR_IDE_BASE = 0x51300000, - MSR_IDE_CFG = (MSR_IDE_BASE + 0x10), - MSR_IDE_DTC = (MSR_IDE_BASE + 0x12), - MSR_IDE_CAST = (MSR_IDE_BASE + 0x13), - MSR_IDE_ETC = (MSR_IDE_BASE + 0x14), - - PCI_IDE_CFG = 0x40, - PCI_IDE_DTC = 0x48, - PCI_IDE_CAST = 0x4c, - PCI_IDE_ETC = 0x50, - - IDE_CFG_CHANEN = 0x2, - IDE_CFG_CABLE = 0x10000, - - IDE_D0_SHIFT = 24, - IDE_D1_SHIFT = 16, - IDE_DRV_MASK = 0xff, - - IDE_CAST_D0_SHIFT = 6, - IDE_CAST_D1_SHIFT = 4, - IDE_CAST_DRV_MASK = 0x3, - IDE_CAST_CMD_MASK = 0xff, - IDE_CAST_CMD_SHIFT = 24, - - IDE_ETC_NODMA = 0x03, -}; - -static int use_msr; - -static const u32 msr_reg[4] = { - MSR_IDE_CFG, MSR_IDE_DTC, MSR_IDE_CAST, MSR_IDE_ETC, -}; - -static const u8 pci_reg[4] = { - PCI_IDE_CFG, PCI_IDE_DTC, PCI_IDE_CAST, PCI_IDE_ETC, -}; - -static inline int cs5536_read(struct pci_dev *pdev, int reg, int *val) -{ - if (unlikely(use_msr)) { - u32 dummy; - - rdmsr(msr_reg[reg], *val, dummy); - return 0; - } - - return pci_read_config_dword(pdev, pci_reg[reg], val); -} - -static inline int cs5536_write(struct pci_dev *pdev, int reg, int val) -{ - if (unlikely(use_msr)) { - wrmsr(msr_reg[reg], val, 0); - return 0; - } - - return pci_write_config_dword(pdev, pci_reg[reg], val); -} - -/** - * cs5536_cable_detect - detect cable type - * @ap: Port to detect on - * @deadline: deadline jiffies for the operation - * - * Perform cable detection for ATA66 capable cable. Return a libata - * cable type. - */ - -static int cs5536_cable_detect(struct ata_port *ap) -{ - struct pci_dev *pdev = to_pci_dev(ap->host->dev); - u32 cfg; - - cs5536_read(pdev, CFG, &cfg); - - if (cfg & (IDE_CFG_CABLE << ap->port_no)) - return ATA_CBL_PATA80; - else - return ATA_CBL_PATA40; -} - -/** - * cs5536_set_piomode - PIO setup - * @ap: ATA interface - * @adev: device on the interface - */ - -static void cs5536_set_piomode(struct ata_port *ap, struct ata_device *adev) -{ - static const u8 drv_timings[5] = { - 0x98, 0x55, 0x32, 0x21, 0x20, - }; - - static const u8 addr_timings[5] = { - 0x2, 0x1, 0x0, 0x0, 0x0, - }; - - static const u8 cmd_timings[5] = { - 0x99, 0x92, 0x90, 0x22, 0x20, - }; - - struct pci_dev *pdev = to_pci_dev(ap->host->dev); - struct ata_device *pair = ata_dev_pair(adev); - int mode = adev->pio_mode - XFER_PIO_0; - int cmdmode = mode; - int dshift = ap->port_no ? IDE_D1_SHIFT : IDE_D0_SHIFT; - int cshift = ap->port_no ? IDE_CAST_D1_SHIFT : IDE_CAST_D0_SHIFT; - u32 dtc, cast, etc; - - if (pair) - cmdmode = min(mode, pair->pio_mode - XFER_PIO_0); - - cs5536_read(pdev, DTC, &dtc); - cs5536_read(pdev, CAST, &cast); - cs5536_read(pdev, ETC, &etc); - - dtc &= ~(IDE_DRV_MASK << dshift); - dtc |= drv_timings[mode] << dshift; - - cast &= ~(IDE_CAST_DRV_MASK << cshift); - cast |= addr_timings[mode] << cshift; - - cast &= ~(IDE_CAST_CMD_MASK << IDE_CAST_CMD_SHIFT); - cast |= cmd_timings[cmdmode] << IDE_CAST_CMD_SHIFT; - - etc &= ~(IDE_DRV_MASK << dshift); - etc |= IDE_ETC_NODMA << dshift; - - cs5536_write(pdev, DTC, dtc); - cs5536_write(pdev, CAST, cast); - cs5536_write(pdev, ETC, etc); -} - -/** - * cs5536_set_dmamode - DMA timing setup - * @ap: ATA interface - * @adev: Device being configured - * - */ - -static void cs5536_set_dmamode(struct ata_port *ap, struct ata_device *adev) -{ - static const u8 udma_timings[6] = { - 0xc2, 0xc1, 0xc0, 0xc4, 0xc5, 0xc6, - }; - - static const u8 mwdma_timings[3] = { - 0x67, 0x21, 0x20, - }; - - struct pci_dev *pdev = to_pci_dev(ap->host->dev); - u32 dtc, etc; - int mode = adev->dma_mode; - int dshift = ap->port_no ? IDE_D1_SHIFT : IDE_D0_SHIFT; - - if (mode >= XFER_UDMA_0) { - cs5536_read(pdev, ETC, &etc); - - etc &= ~(IDE_DRV_MASK << dshift); - etc |= udma_timings[mode - XFER_UDMA_0] << dshift; - - cs5536_write(pdev, ETC, etc); - } else { /* MWDMA */ - cs5536_read(pdev, DTC, &dtc); - - dtc &= ~(IDE_DRV_MASK << dshift); - dtc |= mwdma_timings[mode] << dshift; - - cs5536_write(pdev, DTC, dtc); - } -} - -static struct scsi_host_template cs5536_sht = { - .module = THIS_MODULE, - .name = DRV_NAME, - .ioctl = ata_scsi_ioctl, - .queuecommand = ata_scsi_queuecmd, - .can_queue = ATA_DEF_QUEUE, - .this_id = ATA_SHT_THIS_ID, - .sg_tablesize = LIBATA_MAX_PRD, - .cmd_per_lun = ATA_SHT_CMD_PER_LUN, - .emulated = ATA_SHT_EMULATED, - .use_clustering = ATA_SHT_USE_CLUSTERING, - .proc_name = DRV_NAME, - .dma_boundary = ATA_DMA_BOUNDARY, - .slave_configure = ata_scsi_slave_config, - .slave_destroy = ata_scsi_slave_destroy, - .bios_param = ata_std_bios_param, -}; - -static struct ata_port_operations cs5536_port_ops = { - .set_piomode = cs5536_set_piomode, - .set_dmamode = cs5536_set_dmamode, - .mode_filter = ata_pci_default_filter, - - .tf_load = ata_tf_load, - .tf_read = ata_tf_read, - .check_status = ata_check_status, - .exec_command = ata_exec_command, - .dev_select = ata_std_dev_select, - - .freeze = ata_bmdma_freeze, - .thaw = ata_bmdma_thaw, - .error_handler = ata_bmdma_error_handler, - .post_internal_cmd = ata_bmdma_post_internal_cmd, - .cable_detect = cs5536_cable_detect, - - .bmdma_setup = ata_bmdma_setup, - .bmdma_start = ata_bmdma_start, - .bmdma_stop = ata_bmdma_stop, - .bmdma_status = ata_bmdma_status, - - .qc_prep = ata_qc_prep, - .qc_issue = ata_qc_issue_prot, - - .data_xfer = ata_data_xfer, - - .irq_handler = ata_interrupt, - .irq_clear = ata_bmdma_irq_clear, - .irq_on = ata_irq_on, - - .port_start = ata_port_start, -}; - -/** - * cs5536_init_one - * @dev: PCI device - * @id: Entry in match table - * - */ - -static int cs5536_init_one(struct pci_dev *dev, const struct pci_device_id *id) -{ - static const struct ata_port_info info = { - .sht = &cs5536_sht, - .flags = ATA_FLAG_SLAVE_POSS, - .pio_mask = 0x1f, - .mwdma_mask = 0x07, - .udma_mask = ATA_UDMA5, - .port_ops = &cs5536_port_ops, - }; - - const struct ata_port_info *ppi[] = { &info, &ata_dummy_port_info }; - u32 cfg; - - if (use_msr) - printk(KERN_ERR DRV_NAME ": Using MSR regs instead of PCI\n"); - - cs5536_read(dev, CFG, &cfg); - - if ((cfg & IDE_CFG_CHANEN) == 0) { - printk(KERN_ERR DRV_NAME ": disabled by BIOS\n"); - return -ENODEV; - } - - return ata_pci_init_one(dev, ppi); -} - -static const struct pci_device_id cs5536[] = { - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_CS5536_IDE), }, - { }, -}; - -static struct pci_driver cs5536_pci_driver = { - .name = DRV_NAME, - .id_table = cs5536, - .probe = cs5536_init_one, - .remove = ata_pci_remove_one, -#ifdef CONFIG_PM - .suspend = ata_pci_device_suspend, - .resume = ata_pci_device_resume, -#endif -}; - -static int __init cs5536_init(void) -{ - return pci_register_driver(&cs5536_pci_driver); -} - -static void __exit cs5536_exit(void) -{ - pci_unregister_driver(&cs5536_pci_driver); -} - -MODULE_AUTHOR("Martin K. Petersen"); -MODULE_DESCRIPTION("low-level driver for the CS5536 IDE controller"); -MODULE_LICENSE("GPL"); -MODULE_DEVICE_TABLE(pci, cs5536); -MODULE_VERSION(DRV_VERSION); -module_param_named(msr, use_msr, int, 0644); -MODULE_PARM_DESC(msr, "Force using MSR to configure IDE function (Default: 0)"); - -module_init(cs5536_init); -module_exit(cs5536_exit); diff --git a/trunk/drivers/ata/pata_pcmcia.c b/trunk/drivers/ata/pata_pcmcia.c index 5db2013230b3..782ff4ada9d1 100644 --- a/trunk/drivers/ata/pata_pcmcia.c +++ b/trunk/drivers/ata/pata_pcmcia.c @@ -353,7 +353,6 @@ static void pcmcia_remove_one(struct pcmcia_device *pdev) static struct pcmcia_device_id pcmcia_devices[] = { PCMCIA_DEVICE_FUNC_ID(4), - PCMCIA_DEVICE_MANF_CARD(0x0000, 0x0000), /* Corsair */ PCMCIA_DEVICE_MANF_CARD(0x0007, 0x0000), /* Hitachi */ PCMCIA_DEVICE_MANF_CARD(0x000a, 0x0000), /* I-O Data CFA */ PCMCIA_DEVICE_MANF_CARD(0x001c, 0x0001), /* Mitsubishi CFA */ @@ -379,7 +378,6 @@ static struct pcmcia_device_id pcmcia_devices[] = { PCMCIA_DEVICE_PROD_ID12("EXP ", "CD-ROM", 0x0a5c52fd, 0x66536591), PCMCIA_DEVICE_PROD_ID12("EXP ", "PnPIDE", 0x0a5c52fd, 0x0c694728), PCMCIA_DEVICE_PROD_ID12("FREECOM", "PCCARD-IDE", 0x5714cbf7, 0x48e0ab8e), - PCMCIA_DEVICE_PROD_ID12("Hyperstone", "Model1", 0x3d5b9ef5, 0xca6ab420), PCMCIA_DEVICE_PROD_ID12("HITACHI", "FLASH", 0xf4f43949, 0x9eb86aae), PCMCIA_DEVICE_PROD_ID12("HITACHI", "microdrive", 0xf4f43949, 0xa6d76178), PCMCIA_DEVICE_PROD_ID12("IBM", "microdrive", 0xb569a6e5, 0xa6d76178), diff --git a/trunk/drivers/ata/pata_sil680.c b/trunk/drivers/ata/pata_sil680.c index 4dc2e73298fd..2eb75cd74a96 100644 --- a/trunk/drivers/ata/pata_sil680.c +++ b/trunk/drivers/ata/pata_sil680.c @@ -279,7 +279,7 @@ static struct ata_port_operations sil680_port_ops = { * Returns the final clock settings. */ -static u8 sil680_init_chip(struct pci_dev *pdev, int *try_mmio) +static u8 sil680_init_chip(struct pci_dev *pdev) { u32 class_rev = 0; u8 tmpbyte = 0; @@ -297,8 +297,6 @@ static u8 sil680_init_chip(struct pci_dev *pdev, int *try_mmio) dev_dbg(&pdev->dev, "sil680: BA5_EN = %d clock = %02X\n", tmpbyte & 1, tmpbyte & 0x30); - *try_mmio = (tmpbyte & 1) || pci_resource_start(pdev, 5); - switch(tmpbyte & 0x30) { case 0x00: /* 133 clock attempt to force it on */ @@ -363,76 +361,25 @@ static int __devinit sil680_init_one(struct pci_dev *pdev, }; const struct ata_port_info *ppi[] = { &info, NULL }; static int printed_version; - struct ata_host *host; - void __iomem *mmio_base; - int rc, try_mmio; if (!printed_version++) dev_printk(KERN_DEBUG, &pdev->dev, "version " DRV_VERSION "\n"); - switch (sil680_init_chip(pdev, &try_mmio)) { + switch(sil680_init_chip(pdev)) + { case 0: ppi[0] = &info_slow; break; case 0x30: return -ENODEV; } - - if (!try_mmio) - goto use_ioports; - - /* Try to acquire MMIO resources and fallback to PIO if - * that fails - */ - rc = pcim_enable_device(pdev); - if (rc) - return rc; - rc = pcim_iomap_regions(pdev, 1 << SIL680_MMIO_BAR, DRV_NAME); - if (rc) - goto use_ioports; - - /* Allocate host and set it up */ - host = ata_host_alloc_pinfo(&pdev->dev, ppi, 2); - if (!host) - return -ENOMEM; - host->iomap = pcim_iomap_table(pdev); - - /* Setup DMA masks */ - rc = pci_set_dma_mask(pdev, ATA_DMA_MASK); - if (rc) - return rc; - rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK); - if (rc) - return rc; - pci_set_master(pdev); - - /* Get MMIO base and initialize port addresses */ - mmio_base = host->iomap[SIL680_MMIO_BAR]; - host->ports[0]->ioaddr.bmdma_addr = mmio_base + 0x00; - host->ports[0]->ioaddr.cmd_addr = mmio_base + 0x80; - host->ports[0]->ioaddr.ctl_addr = mmio_base + 0x8a; - host->ports[0]->ioaddr.altstatus_addr = mmio_base + 0x8a; - ata_std_ports(&host->ports[0]->ioaddr); - host->ports[1]->ioaddr.bmdma_addr = mmio_base + 0x08; - host->ports[1]->ioaddr.cmd_addr = mmio_base + 0xc0; - host->ports[1]->ioaddr.ctl_addr = mmio_base + 0xca; - host->ports[1]->ioaddr.altstatus_addr = mmio_base + 0xca; - ata_std_ports(&host->ports[1]->ioaddr); - - /* Register & activate */ - return ata_host_activate(host, pdev->irq, ata_interrupt, IRQF_SHARED, - &sil680_sht); - -use_ioports: return ata_pci_init_one(pdev, ppi); } #ifdef CONFIG_PM static int sil680_reinit_one(struct pci_dev *pdev) { - int try_mmio; - - sil680_init_chip(pdev, &try_mmio); + sil680_init_chip(pdev); return ata_pci_device_resume(pdev); } #endif diff --git a/trunk/drivers/ata/sata_nv.c b/trunk/drivers/ata/sata_nv.c index 240a8920d0bd..40557fe2ffdf 100644 --- a/trunk/drivers/ata/sata_nv.c +++ b/trunk/drivers/ata/sata_nv.c @@ -169,35 +169,6 @@ enum { NV_ADMA_PORT_REGISTER_MODE = (1 << 0), NV_ADMA_ATAPI_SETUP_COMPLETE = (1 << 1), - /* MCP55 reg offset */ - NV_CTL_MCP55 = 0x400, - NV_INT_STATUS_MCP55 = 0x440, - NV_INT_ENABLE_MCP55 = 0x444, - NV_NCQ_REG_MCP55 = 0x448, - - /* MCP55 */ - NV_INT_ALL_MCP55 = 0xffff, - NV_INT_PORT_SHIFT_MCP55 = 16, /* each port occupies 16 bits */ - NV_INT_MASK_MCP55 = NV_INT_ALL_MCP55 & 0xfffd, - - /* SWNCQ ENABLE BITS*/ - NV_CTL_PRI_SWNCQ = 0x02, - NV_CTL_SEC_SWNCQ = 0x04, - - /* SW NCQ status bits*/ - NV_SWNCQ_IRQ_DEV = (1 << 0), - NV_SWNCQ_IRQ_PM = (1 << 1), - NV_SWNCQ_IRQ_ADDED = (1 << 2), - NV_SWNCQ_IRQ_REMOVED = (1 << 3), - - NV_SWNCQ_IRQ_BACKOUT = (1 << 4), - NV_SWNCQ_IRQ_SDBFIS = (1 << 5), - NV_SWNCQ_IRQ_DHREGFIS = (1 << 6), - NV_SWNCQ_IRQ_DMASETUP = (1 << 7), - - NV_SWNCQ_IRQ_HOTPLUG = NV_SWNCQ_IRQ_ADDED | - NV_SWNCQ_IRQ_REMOVED, - }; /* ADMA Physical Region Descriptor - one SG segment */ @@ -255,42 +226,6 @@ struct nv_host_priv { unsigned long type; }; -struct defer_queue { - u32 defer_bits; - unsigned int head; - unsigned int tail; - unsigned int tag[ATA_MAX_QUEUE]; -}; - -enum ncq_saw_flag_list { - ncq_saw_d2h = (1U << 0), - ncq_saw_dmas = (1U << 1), - ncq_saw_sdb = (1U << 2), - ncq_saw_backout = (1U << 3), -}; - -struct nv_swncq_port_priv { - struct ata_prd *prd; /* our SG list */ - dma_addr_t prd_dma; /* and its DMA mapping */ - void __iomem *sactive_block; - void __iomem *irq_block; - void __iomem *tag_block; - u32 qc_active; - - unsigned int last_issue_tag; - - /* fifo circular queue to store deferral command */ - struct defer_queue defer_queue; - - /* for NCQ interrupt analysis */ - u32 dhfis_bits; - u32 dmafis_bits; - u32 sdbfis_bits; - - unsigned int ncq_flags; -}; - - #define NV_ADMA_CHECK_INTR(GCTL, PORT) ((GCTL) & ( 1 << (19 + (12 * (PORT))))) static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent); @@ -328,29 +263,13 @@ static void nv_adma_host_stop(struct ata_host *host); static void nv_adma_post_internal_cmd(struct ata_queued_cmd *qc); static void nv_adma_tf_read(struct ata_port *ap, struct ata_taskfile *tf); -static void nv_mcp55_thaw(struct ata_port *ap); -static void nv_mcp55_freeze(struct ata_port *ap); -static void nv_swncq_error_handler(struct ata_port *ap); -static int nv_swncq_slave_config(struct scsi_device *sdev); -static int nv_swncq_port_start(struct ata_port *ap); -static void nv_swncq_qc_prep(struct ata_queued_cmd *qc); -static void nv_swncq_fill_sg(struct ata_queued_cmd *qc); -static unsigned int nv_swncq_qc_issue(struct ata_queued_cmd *qc); -static void nv_swncq_irq_clear(struct ata_port *ap, u16 fis); -static irqreturn_t nv_swncq_interrupt(int irq, void *dev_instance); -#ifdef CONFIG_PM -static int nv_swncq_port_suspend(struct ata_port *ap, pm_message_t mesg); -static int nv_swncq_port_resume(struct ata_port *ap); -#endif - enum nv_host_type { GENERIC, NFORCE2, NFORCE3 = NFORCE2, /* NF2 == NF3 as far as sata_nv is concerned */ CK804, - ADMA, - SWNCQ, + ADMA }; static const struct pci_device_id nv_pci_tbl[] = { @@ -361,13 +280,13 @@ static const struct pci_device_id nv_pci_tbl[] = { { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_CK804_SATA2), CK804 }, { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA), CK804 }, { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA2), CK804 }, - { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA), SWNCQ }, - { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2), SWNCQ }, - { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA), SWNCQ }, - { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2), SWNCQ }, - { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA), SWNCQ }, - { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA2), SWNCQ }, - { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA3), SWNCQ }, + { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA), GENERIC }, + { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2), GENERIC }, + { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA), GENERIC }, + { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2), GENERIC }, + { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA), GENERIC }, + { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA2), GENERIC }, + { PCI_VDEVICE(NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP61_SATA3), GENERIC }, { } /* terminate list */ }; @@ -420,25 +339,6 @@ static struct scsi_host_template nv_adma_sht = { .bios_param = ata_std_bios_param, }; -static struct scsi_host_template nv_swncq_sht = { - .module = THIS_MODULE, - .name = DRV_NAME, - .ioctl = ata_scsi_ioctl, - .queuecommand = ata_scsi_queuecmd, - .change_queue_depth = ata_scsi_change_queue_depth, - .can_queue = ATA_MAX_QUEUE, - .this_id = ATA_SHT_THIS_ID, - .sg_tablesize = LIBATA_MAX_PRD, - .cmd_per_lun = ATA_SHT_CMD_PER_LUN, - .emulated = ATA_SHT_EMULATED, - .use_clustering = ATA_SHT_USE_CLUSTERING, - .proc_name = DRV_NAME, - .dma_boundary = ATA_DMA_BOUNDARY, - .slave_configure = nv_swncq_slave_config, - .slave_destroy = ata_scsi_slave_destroy, - .bios_param = ata_std_bios_param, -}; - static const struct ata_port_operations nv_generic_ops = { .tf_load = ata_tf_load, .tf_read = ata_tf_read, @@ -544,35 +444,6 @@ static const struct ata_port_operations nv_adma_ops = { .host_stop = nv_adma_host_stop, }; -static const struct ata_port_operations nv_swncq_ops = { - .tf_load = ata_tf_load, - .tf_read = ata_tf_read, - .exec_command = ata_exec_command, - .check_status = ata_check_status, - .dev_select = ata_std_dev_select, - .bmdma_setup = ata_bmdma_setup, - .bmdma_start = ata_bmdma_start, - .bmdma_stop = ata_bmdma_stop, - .bmdma_status = ata_bmdma_status, - .qc_defer = ata_std_qc_defer, - .qc_prep = nv_swncq_qc_prep, - .qc_issue = nv_swncq_qc_issue, - .freeze = nv_mcp55_freeze, - .thaw = nv_mcp55_thaw, - .error_handler = nv_swncq_error_handler, - .post_internal_cmd = ata_bmdma_post_internal_cmd, - .data_xfer = ata_data_xfer, - .irq_clear = ata_bmdma_irq_clear, - .irq_on = ata_irq_on, - .scr_read = nv_scr_read, - .scr_write = nv_scr_write, -#ifdef CONFIG_PM - .port_suspend = nv_swncq_port_suspend, - .port_resume = nv_swncq_port_resume, -#endif - .port_start = nv_swncq_port_start, -}; - static const struct ata_port_info nv_port_info[] = { /* generic */ { @@ -619,18 +490,6 @@ static const struct ata_port_info nv_port_info[] = { .port_ops = &nv_adma_ops, .irq_handler = nv_adma_interrupt, }, - /* SWNCQ */ - { - .sht = &nv_swncq_sht, - .flags = ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY | - ATA_FLAG_NCQ, - .link_flags = ATA_LFLAG_HRST_TO_RESUME, - .pio_mask = NV_PIO_MASK, - .mwdma_mask = NV_MWDMA_MASK, - .udma_mask = NV_UDMA_MASK, - .port_ops = &nv_swncq_ops, - .irq_handler = nv_swncq_interrupt, - }, }; MODULE_AUTHOR("NVIDIA"); @@ -640,7 +499,6 @@ MODULE_DEVICE_TABLE(pci, nv_pci_tbl); MODULE_VERSION(DRV_VERSION); static int adma_enabled = 1; -static int swncq_enabled; static void nv_adma_register_mode(struct ata_port *ap) { @@ -1594,34 +1452,6 @@ static void nv_ck804_thaw(struct ata_port *ap) writeb(mask, mmio_base + NV_INT_ENABLE_CK804); } -static void nv_mcp55_freeze(struct ata_port *ap) -{ - void __iomem *mmio_base = ap->host->iomap[NV_MMIO_BAR]; - int shift = ap->port_no * NV_INT_PORT_SHIFT_MCP55; - u32 mask; - - writel(NV_INT_ALL_MCP55 << shift, mmio_base + NV_INT_STATUS_MCP55); - - mask = readl(mmio_base + NV_INT_ENABLE_MCP55); - mask &= ~(NV_INT_ALL_MCP55 << shift); - writel(mask, mmio_base + NV_INT_ENABLE_MCP55); - ata_bmdma_freeze(ap); -} - -static void nv_mcp55_thaw(struct ata_port *ap) -{ - void __iomem *mmio_base = ap->host->iomap[NV_MMIO_BAR]; - int shift = ap->port_no * NV_INT_PORT_SHIFT_MCP55; - u32 mask; - - writel(NV_INT_ALL_MCP55 << shift, mmio_base + NV_INT_STATUS_MCP55); - - mask = readl(mmio_base + NV_INT_ENABLE_MCP55); - mask |= (NV_INT_MASK_MCP55 << shift); - writel(mask, mmio_base + NV_INT_ENABLE_MCP55); - ata_bmdma_thaw(ap); -} - static int nv_hardreset(struct ata_link *link, unsigned int *class, unsigned long deadline) { @@ -1695,663 +1525,6 @@ static void nv_adma_error_handler(struct ata_port *ap) nv_hardreset, ata_std_postreset); } -static void nv_swncq_qc_to_dq(struct ata_port *ap, struct ata_queued_cmd *qc) -{ - struct nv_swncq_port_priv *pp = ap->private_data; - struct defer_queue *dq = &pp->defer_queue; - - /* queue is full */ - WARN_ON(dq->tail - dq->head == ATA_MAX_QUEUE); - dq->defer_bits |= (1 << qc->tag); - dq->tag[dq->tail++ & (ATA_MAX_QUEUE - 1)] = qc->tag; -} - -static struct ata_queued_cmd *nv_swncq_qc_from_dq(struct ata_port *ap) -{ - struct nv_swncq_port_priv *pp = ap->private_data; - struct defer_queue *dq = &pp->defer_queue; - unsigned int tag; - - if (dq->head == dq->tail) /* null queue */ - return NULL; - - tag = dq->tag[dq->head & (ATA_MAX_QUEUE - 1)]; - dq->tag[dq->head++ & (ATA_MAX_QUEUE - 1)] = ATA_TAG_POISON; - WARN_ON(!(dq->defer_bits & (1 << tag))); - dq->defer_bits &= ~(1 << tag); - - return ata_qc_from_tag(ap, tag); -} - -static void nv_swncq_fis_reinit(struct ata_port *ap) -{ - struct nv_swncq_port_priv *pp = ap->private_data; - - pp->dhfis_bits = 0; - pp->dmafis_bits = 0; - pp->sdbfis_bits = 0; - pp->ncq_flags = 0; -} - -static void nv_swncq_pp_reinit(struct ata_port *ap) -{ - struct nv_swncq_port_priv *pp = ap->private_data; - struct defer_queue *dq = &pp->defer_queue; - - dq->head = 0; - dq->tail = 0; - dq->defer_bits = 0; - pp->qc_active = 0; - pp->last_issue_tag = ATA_TAG_POISON; - nv_swncq_fis_reinit(ap); -} - -static void nv_swncq_irq_clear(struct ata_port *ap, u16 fis) -{ - struct nv_swncq_port_priv *pp = ap->private_data; - - writew(fis, pp->irq_block); -} - -static void __ata_bmdma_stop(struct ata_port *ap) -{ - struct ata_queued_cmd qc; - - qc.ap = ap; - ata_bmdma_stop(&qc); -} - -static void nv_swncq_ncq_stop(struct ata_port *ap) -{ - struct nv_swncq_port_priv *pp = ap->private_data; - unsigned int i; - u32 sactive; - u32 done_mask; - - ata_port_printk(ap, KERN_ERR, - "EH in SWNCQ mode,QC:qc_active 0x%X sactive 0x%X\n", - ap->qc_active, ap->link.sactive); - ata_port_printk(ap, KERN_ERR, - "SWNCQ:qc_active 0x%X defer_bits 0x%X last_issue_tag 0x%x\n " - "dhfis 0x%X dmafis 0x%X sdbfis 0x%X\n", - pp->qc_active, pp->defer_queue.defer_bits, pp->last_issue_tag, - pp->dhfis_bits, pp->dmafis_bits, pp->sdbfis_bits); - - ata_port_printk(ap, KERN_ERR, "ATA_REG 0x%X ERR_REG 0x%X\n", - ap->ops->check_status(ap), - ioread8(ap->ioaddr.error_addr)); - - sactive = readl(pp->sactive_block); - done_mask = pp->qc_active ^ sactive; - - ata_port_printk(ap, KERN_ERR, "tag : dhfis dmafis sdbfis sacitve\n"); - for (i = 0; i < ATA_MAX_QUEUE; i++) { - u8 err = 0; - if (pp->qc_active & (1 << i)) - err = 0; - else if (done_mask & (1 << i)) - err = 1; - else - continue; - - ata_port_printk(ap, KERN_ERR, - "tag 0x%x: %01x %01x %01x %01x %s\n", i, - (pp->dhfis_bits >> i) & 0x1, - (pp->dmafis_bits >> i) & 0x1, - (pp->sdbfis_bits >> i) & 0x1, - (sactive >> i) & 0x1, - (err ? "error! tag doesn't exit" : " ")); - } - - nv_swncq_pp_reinit(ap); - ap->ops->irq_clear(ap); - __ata_bmdma_stop(ap); - nv_swncq_irq_clear(ap, 0xffff); -} - -static void nv_swncq_error_handler(struct ata_port *ap) -{ - struct ata_eh_context *ehc = &ap->link.eh_context; - - if (ap->link.sactive) { - nv_swncq_ncq_stop(ap); - ehc->i.action |= ATA_EH_HARDRESET; - } - - ata_bmdma_drive_eh(ap, ata_std_prereset, ata_std_softreset, - nv_hardreset, ata_std_postreset); -} - -#ifdef CONFIG_PM -static int nv_swncq_port_suspend(struct ata_port *ap, pm_message_t mesg) -{ - void __iomem *mmio = ap->host->iomap[NV_MMIO_BAR]; - u32 tmp; - - /* clear irq */ - writel(~0, mmio + NV_INT_STATUS_MCP55); - - /* disable irq */ - writel(0, mmio + NV_INT_ENABLE_MCP55); - - /* disable swncq */ - tmp = readl(mmio + NV_CTL_MCP55); - tmp &= ~(NV_CTL_PRI_SWNCQ | NV_CTL_SEC_SWNCQ); - writel(tmp, mmio + NV_CTL_MCP55); - - return 0; -} - -static int nv_swncq_port_resume(struct ata_port *ap) -{ - void __iomem *mmio = ap->host->iomap[NV_MMIO_BAR]; - u32 tmp; - - /* clear irq */ - writel(~0, mmio + NV_INT_STATUS_MCP55); - - /* enable irq */ - writel(0x00fd00fd, mmio + NV_INT_ENABLE_MCP55); - - /* enable swncq */ - tmp = readl(mmio + NV_CTL_MCP55); - writel(tmp | NV_CTL_PRI_SWNCQ | NV_CTL_SEC_SWNCQ, mmio + NV_CTL_MCP55); - - return 0; -} -#endif - -static void nv_swncq_host_init(struct ata_host *host) -{ - u32 tmp; - void __iomem *mmio = host->iomap[NV_MMIO_BAR]; - struct pci_dev *pdev = to_pci_dev(host->dev); - u8 regval; - - /* disable ECO 398 */ - pci_read_config_byte(pdev, 0x7f, ®val); - regval &= ~(1 << 7); - pci_write_config_byte(pdev, 0x7f, regval); - - /* enable swncq */ - tmp = readl(mmio + NV_CTL_MCP55); - VPRINTK("HOST_CTL:0x%X\n", tmp); - writel(tmp | NV_CTL_PRI_SWNCQ | NV_CTL_SEC_SWNCQ, mmio + NV_CTL_MCP55); - - /* enable irq intr */ - tmp = readl(mmio + NV_INT_ENABLE_MCP55); - VPRINTK("HOST_ENABLE:0x%X\n", tmp); - writel(tmp | 0x00fd00fd, mmio + NV_INT_ENABLE_MCP55); - - /* clear port irq */ - writel(~0x0, mmio + NV_INT_STATUS_MCP55); -} - -static int nv_swncq_slave_config(struct scsi_device *sdev) -{ - struct ata_port *ap = ata_shost_to_port(sdev->host); - struct pci_dev *pdev = to_pci_dev(ap->host->dev); - struct ata_device *dev; - int rc; - u8 rev; - u8 check_maxtor = 0; - unsigned char model_num[ATA_ID_PROD_LEN + 1]; - - rc = ata_scsi_slave_config(sdev); - if (sdev->id >= ATA_MAX_DEVICES || sdev->channel || sdev->lun) - /* Not a proper libata device, ignore */ - return rc; - - dev = &ap->link.device[sdev->id]; - if (!(ap->flags & ATA_FLAG_NCQ) || dev->class == ATA_DEV_ATAPI) - return rc; - - /* if MCP51 and Maxtor, then disable ncq */ - if (pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA || - pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP51_SATA2) - check_maxtor = 1; - - /* if MCP55 and rev <= a2 and Maxtor, then disable ncq */ - if (pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA || - pdev->device == PCI_DEVICE_ID_NVIDIA_NFORCE_MCP55_SATA2) { - pci_read_config_byte(pdev, 0x8, &rev); - if (rev <= 0xa2) - check_maxtor = 1; - } - - if (!check_maxtor) - return rc; - - ata_id_c_string(dev->id, model_num, ATA_ID_PROD, sizeof(model_num)); - - if (strncmp(model_num, "Maxtor", 6) == 0) { - ata_scsi_change_queue_depth(sdev, 1); - ata_dev_printk(dev, KERN_NOTICE, - "Disabling SWNCQ mode (depth %x)\n", sdev->queue_depth); - } - - return rc; -} - -static int nv_swncq_port_start(struct ata_port *ap) -{ - struct device *dev = ap->host->dev; - void __iomem *mmio = ap->host->iomap[NV_MMIO_BAR]; - struct nv_swncq_port_priv *pp; - int rc; - - rc = ata_port_start(ap); - if (rc) - return rc; - - pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL); - if (!pp) - return -ENOMEM; - - pp->prd = dmam_alloc_coherent(dev, ATA_PRD_TBL_SZ * ATA_MAX_QUEUE, - &pp->prd_dma, GFP_KERNEL); - if (!pp->prd) - return -ENOMEM; - memset(pp->prd, 0, ATA_PRD_TBL_SZ * ATA_MAX_QUEUE); - - ap->private_data = pp; - pp->sactive_block = ap->ioaddr.scr_addr + 4 * SCR_ACTIVE; - pp->irq_block = mmio + NV_INT_STATUS_MCP55 + ap->port_no * 2; - pp->tag_block = mmio + NV_NCQ_REG_MCP55 + ap->port_no * 2; - - return 0; -} - -static void nv_swncq_qc_prep(struct ata_queued_cmd *qc) -{ - if (qc->tf.protocol != ATA_PROT_NCQ) { - ata_qc_prep(qc); - return; - } - - if (!(qc->flags & ATA_QCFLAG_DMAMAP)) - return; - - nv_swncq_fill_sg(qc); -} - -static void nv_swncq_fill_sg(struct ata_queued_cmd *qc) -{ - struct ata_port *ap = qc->ap; - struct scatterlist *sg; - unsigned int idx; - struct nv_swncq_port_priv *pp = ap->private_data; - struct ata_prd *prd; - - WARN_ON(qc->__sg == NULL); - WARN_ON(qc->n_elem == 0 && qc->pad_len == 0); - - prd = pp->prd + ATA_MAX_PRD * qc->tag; - - idx = 0; - ata_for_each_sg(sg, qc) { - u32 addr, offset; - u32 sg_len, len; - - addr = (u32)sg_dma_address(sg); - sg_len = sg_dma_len(sg); - - while (sg_len) { - offset = addr & 0xffff; - len = sg_len; - if ((offset + sg_len) > 0x10000) - len = 0x10000 - offset; - - prd[idx].addr = cpu_to_le32(addr); - prd[idx].flags_len = cpu_to_le32(len & 0xffff); - - idx++; - sg_len -= len; - addr += len; - } - } - - if (idx) - prd[idx - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT); -} - -static unsigned int nv_swncq_issue_atacmd(struct ata_port *ap, - struct ata_queued_cmd *qc) -{ - struct nv_swncq_port_priv *pp = ap->private_data; - - if (qc == NULL) - return 0; - - DPRINTK("Enter\n"); - - writel((1 << qc->tag), pp->sactive_block); - pp->last_issue_tag = qc->tag; - pp->dhfis_bits &= ~(1 << qc->tag); - pp->dmafis_bits &= ~(1 << qc->tag); - pp->qc_active |= (0x1 << qc->tag); - - ap->ops->tf_load(ap, &qc->tf); /* load tf registers */ - ap->ops->exec_command(ap, &qc->tf); - - DPRINTK("Issued tag %u\n", qc->tag); - - return 0; -} - -static unsigned int nv_swncq_qc_issue(struct ata_queued_cmd *qc) -{ - struct ata_port *ap = qc->ap; - struct nv_swncq_port_priv *pp = ap->private_data; - - if (qc->tf.protocol != ATA_PROT_NCQ) - return ata_qc_issue_prot(qc); - - DPRINTK("Enter\n"); - - if (!pp->qc_active) - nv_swncq_issue_atacmd(ap, qc); - else - nv_swncq_qc_to_dq(ap, qc); /* add qc to defer queue */ - - return 0; -} - -static void nv_swncq_hotplug(struct ata_port *ap, u32 fis) -{ - u32 serror; - struct ata_eh_info *ehi = &ap->link.eh_info; - - ata_ehi_clear_desc(ehi); - - /* AHCI needs SError cleared; otherwise, it might lock up */ - sata_scr_read(&ap->link, SCR_ERROR, &serror); - sata_scr_write(&ap->link, SCR_ERROR, serror); - - /* analyze @irq_stat */ - if (fis & NV_SWNCQ_IRQ_ADDED) - ata_ehi_push_desc(ehi, "hot plug"); - else if (fis & NV_SWNCQ_IRQ_REMOVED) - ata_ehi_push_desc(ehi, "hot unplug"); - - ata_ehi_hotplugged(ehi); - - /* okay, let's hand over to EH */ - ehi->serror |= serror; - - ata_port_freeze(ap); -} - -static int nv_swncq_sdbfis(struct ata_port *ap) -{ - struct ata_queued_cmd *qc; - struct nv_swncq_port_priv *pp = ap->private_data; - struct ata_eh_info *ehi = &ap->link.eh_info; - u32 sactive; - int nr_done = 0; - u32 done_mask; - int i; - u8 host_stat; - u8 lack_dhfis = 0; - - host_stat = ap->ops->bmdma_status(ap); - if (unlikely(host_stat & ATA_DMA_ERR)) { - /* error when transfering data to/from memory */ - ata_ehi_clear_desc(ehi); - ata_ehi_push_desc(ehi, "BMDMA stat 0x%x", host_stat); - ehi->err_mask |= AC_ERR_HOST_BUS; - ehi->action |= ATA_EH_SOFTRESET; - return -EINVAL; - } - - ap->ops->irq_clear(ap); - __ata_bmdma_stop(ap); - - sactive = readl(pp->sactive_block); - done_mask = pp->qc_active ^ sactive; - - if (unlikely(done_mask & sactive)) { - ata_ehi_clear_desc(ehi); - ata_ehi_push_desc(ehi, "illegal SWNCQ:qc_active transition" - "(%08x->%08x)", pp->qc_active, sactive); - ehi->err_mask |= AC_ERR_HSM; - ehi->action |= ATA_EH_HARDRESET; - return -EINVAL; - } - for (i = 0; i < ATA_MAX_QUEUE; i++) { - if (!(done_mask & (1 << i))) - continue; - - qc = ata_qc_from_tag(ap, i); - if (qc) { - ata_qc_complete(qc); - pp->qc_active &= ~(1 << i); - pp->dhfis_bits &= ~(1 << i); - pp->dmafis_bits &= ~(1 << i); - pp->sdbfis_bits |= (1 << i); - nr_done++; - } - } - - if (!ap->qc_active) { - DPRINTK("over\n"); - nv_swncq_pp_reinit(ap); - return nr_done; - } - - if (pp->qc_active & pp->dhfis_bits) - return nr_done; - - if ((pp->ncq_flags & ncq_saw_backout) || - (pp->qc_active ^ pp->dhfis_bits)) - /* if the controller cann't get a device to host register FIS, - * The driver needs to reissue the new command. - */ - lack_dhfis = 1; - - DPRINTK("id 0x%x QC: qc_active 0x%x," - "SWNCQ:qc_active 0x%X defer_bits %X " - "dhfis 0x%X dmafis 0x%X last_issue_tag %x\n", - ap->print_id, ap->qc_active, pp->qc_active, - pp->defer_queue.defer_bits, pp->dhfis_bits, - pp->dmafis_bits, pp->last_issue_tag); - - nv_swncq_fis_reinit(ap); - - if (lack_dhfis) { - qc = ata_qc_from_tag(ap, pp->last_issue_tag); - nv_swncq_issue_atacmd(ap, qc); - return nr_done; - } - - if (pp->defer_queue.defer_bits) { - /* send deferral queue command */ - qc = nv_swncq_qc_from_dq(ap); - WARN_ON(qc == NULL); - nv_swncq_issue_atacmd(ap, qc); - } - - return nr_done; -} - -static inline u32 nv_swncq_tag(struct ata_port *ap) -{ - struct nv_swncq_port_priv *pp = ap->private_data; - u32 tag; - - tag = readb(pp->tag_block) >> 2; - return (tag & 0x1f); -} - -static int nv_swncq_dmafis(struct ata_port *ap) -{ - struct ata_queued_cmd *qc; - unsigned int rw; - u8 dmactl; - u32 tag; - struct nv_swncq_port_priv *pp = ap->private_data; - - __ata_bmdma_stop(ap); - tag = nv_swncq_tag(ap); - - DPRINTK("dma setup tag 0x%x\n", tag); - qc = ata_qc_from_tag(ap, tag); - - if (unlikely(!qc)) - return 0; - - rw = qc->tf.flags & ATA_TFLAG_WRITE; - - /* load PRD table addr. */ - iowrite32(pp->prd_dma + ATA_PRD_TBL_SZ * qc->tag, - ap->ioaddr.bmdma_addr + ATA_DMA_TABLE_OFS); - - /* specify data direction, triple-check start bit is clear */ - dmactl = ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_CMD); - dmactl &= ~ATA_DMA_WR; - if (!rw) - dmactl |= ATA_DMA_WR; - - iowrite8(dmactl | ATA_DMA_START, ap->ioaddr.bmdma_addr + ATA_DMA_CMD); - - return 1; -} - -static void nv_swncq_host_interrupt(struct ata_port *ap, u16 fis) -{ - struct nv_swncq_port_priv *pp = ap->private_data; - struct ata_queued_cmd *qc; - struct ata_eh_info *ehi = &ap->link.eh_info; - u32 serror; - u8 ata_stat; - int rc = 0; - - ata_stat = ap->ops->check_status(ap); - nv_swncq_irq_clear(ap, fis); - if (!fis) - return; - - if (ap->pflags & ATA_PFLAG_FROZEN) - return; - - if (fis & NV_SWNCQ_IRQ_HOTPLUG) { - nv_swncq_hotplug(ap, fis); - return; - } - - if (!pp->qc_active) - return; - - if (ap->ops->scr_read(ap, SCR_ERROR, &serror)) - return; - ap->ops->scr_write(ap, SCR_ERROR, serror); - - if (ata_stat & ATA_ERR) { - ata_ehi_clear_desc(ehi); - ata_ehi_push_desc(ehi, "Ata error. fis:0x%X", fis); - ehi->err_mask |= AC_ERR_DEV; - ehi->serror |= serror; - ehi->action |= ATA_EH_SOFTRESET; - ata_port_freeze(ap); - return; - } - - if (fis & NV_SWNCQ_IRQ_BACKOUT) { - /* If the IRQ is backout, driver must issue - * the new command again some time later. - */ - pp->ncq_flags |= ncq_saw_backout; - } - - if (fis & NV_SWNCQ_IRQ_SDBFIS) { - pp->ncq_flags |= ncq_saw_sdb; - DPRINTK("id 0x%x SWNCQ: qc_active 0x%X " - "dhfis 0x%X dmafis 0x%X sactive 0x%X\n", - ap->print_id, pp->qc_active, pp->dhfis_bits, - pp->dmafis_bits, readl(pp->sactive_block)); - rc = nv_swncq_sdbfis(ap); - if (rc < 0) - goto irq_error; - } - - if (fis & NV_SWNCQ_IRQ_DHREGFIS) { - /* The interrupt indicates the new command - * was transmitted correctly to the drive. - */ - pp->dhfis_bits |= (0x1 << pp->last_issue_tag); - pp->ncq_flags |= ncq_saw_d2h; - if (pp->ncq_flags & (ncq_saw_sdb | ncq_saw_backout)) { - ata_ehi_push_desc(ehi, "illegal fis transaction"); - ehi->err_mask |= AC_ERR_HSM; - ehi->action |= ATA_EH_HARDRESET; - goto irq_error; - } - - if (!(fis & NV_SWNCQ_IRQ_DMASETUP) && - !(pp->ncq_flags & ncq_saw_dmas)) { - ata_stat = ap->ops->check_status(ap); - if (ata_stat & ATA_BUSY) - goto irq_exit; - - if (pp->defer_queue.defer_bits) { - DPRINTK("send next command\n"); - qc = nv_swncq_qc_from_dq(ap); - nv_swncq_issue_atacmd(ap, qc); - } - } - } - - if (fis & NV_SWNCQ_IRQ_DMASETUP) { - /* program the dma controller with appropriate PRD buffers - * and start the DMA transfer for requested command. - */ - pp->dmafis_bits |= (0x1 << nv_swncq_tag(ap)); - pp->ncq_flags |= ncq_saw_dmas; - rc = nv_swncq_dmafis(ap); - } - -irq_exit: - return; -irq_error: - ata_ehi_push_desc(ehi, "fis:0x%x", fis); - ata_port_freeze(ap); - return; -} - -static irqreturn_t nv_swncq_interrupt(int irq, void *dev_instance) -{ - struct ata_host *host = dev_instance; - unsigned int i; - unsigned int handled = 0; - unsigned long flags; - u32 irq_stat; - - spin_lock_irqsave(&host->lock, flags); - - irq_stat = readl(host->iomap[NV_MMIO_BAR] + NV_INT_STATUS_MCP55); - - for (i = 0; i < host->n_ports; i++) { - struct ata_port *ap = host->ports[i]; - - if (ap && !(ap->flags & ATA_FLAG_DISABLED)) { - if (ap->link.sactive) { - nv_swncq_host_interrupt(ap, (u16)irq_stat); - handled = 1; - } else { - if (irq_stat) /* reserve Hotplug */ - nv_swncq_irq_clear(ap, 0xfff0); - - handled += nv_host_intr(ap, (u8)irq_stat); - } - } - irq_stat >>= NV_INT_PORT_SHIFT_MCP55; - } - - spin_unlock_irqrestore(&host->lock, flags); - - return IRQ_RETVAL(handled); -} - static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) { static int printed_version = 0; @@ -2378,7 +1551,7 @@ static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) return rc; /* determine type and allocate host */ - if (type == CK804 && adma_enabled) { + if (type >= CK804 && adma_enabled) { dev_printk(KERN_NOTICE, &pdev->dev, "Using ADMA mode\n"); type = ADMA; } @@ -2424,9 +1597,6 @@ static int nv_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) rc = nv_adma_host_init(host); if (rc) return rc; - } else if (type == SWNCQ && swncq_enabled) { - dev_printk(KERN_NOTICE, &pdev->dev, "Using SWNCQ mode\n"); - nv_swncq_host_init(host); } pci_set_master(pdev); @@ -2526,6 +1696,3 @@ module_init(nv_init); module_exit(nv_exit); module_param_named(adma, adma_enabled, bool, 0444); MODULE_PARM_DESC(adma, "Enable use of ADMA (Default: true)"); -module_param_named(swncq, swncq_enabled, bool, 0444); -MODULE_PARM_DESC(swncq, "Enable use of SWNCQ (Default: false)"); - diff --git a/trunk/drivers/char/ec3104_keyb.c b/trunk/drivers/char/ec3104_keyb.c new file mode 100644 index 000000000000..020011495d91 --- /dev/null +++ b/trunk/drivers/char/ec3104_keyb.c @@ -0,0 +1,457 @@ +/* + * linux/drivers/char/ec3104_keyb.c + * + * Copyright (C) 2000 Philipp Rumpf + * + * based on linux/drivers/char/pc_keyb.c, which had the following comments: + * + * Separation of the PC low-level part by Geert Uytterhoeven, May 1997 + * See keyboard.c for the whole history. + * + * Major cleanup by Martin Mares, May 1997 + * + * Combined the keyboard and PS/2 mouse handling into one file, + * because they share the same hardware. + * Johan Myreen 1998-10-08. + * + * Code fixes to handle mouse ACKs properly. + * C. Scott Ananian 1999-01-29. + */ +/* EC3104 note: + * This code was written without any documentation about the EC3104 chip. While + * I hope I got most of the basic functionality right, the register names I use + * are most likely completely different from those in the chip documentation. + * + * If you have any further information about the EC3104, please tell me + * (prumpf@tux.org). + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +/* Some configuration switches are present in the include file... */ + +#include + +#define MSR_CTS 0x10 +#define MCR_RTS 0x02 +#define LSR_DR 0x01 +#define LSR_BOTH_EMPTY 0x60 + +static struct e5_struct { + u8 packet[8]; + int pos; + int length; + + u8 cached_mcr; + u8 last_msr; +} ec3104_keyb; + +/* Simple translation table for the SysRq keys */ + + +#ifdef CONFIG_MAGIC_SYSRQ +unsigned char ec3104_kbd_sysrq_xlate[128] = + "\000\0331234567890-=\177\t" /* 0x00 - 0x0f */ + "qwertyuiop[]\r\000as" /* 0x10 - 0x1f */ + "dfghjkl;'`\000\\zxcv" /* 0x20 - 0x2f */ + "bnm,./\000*\000 \000\201\202\203\204\205" /* 0x30 - 0x3f */ + "\206\207\210\211\212\000\000789-456+1" /* 0x40 - 0x4f */ + "230\177\000\000\213\214\000\000\000\000\000\000\000\000\000\000" /* 0x50 - 0x5f */ + "\r\000/"; /* 0x60 - 0x6f */ +#endif + +static void kbd_write_command_w(int data); +static void kbd_write_output_w(int data); +#ifdef CONFIG_PSMOUSE +static void aux_write_ack(int val); +static void __aux_write_ack(int val); +#endif + +static DEFINE_SPINLOCK(kbd_controller_lock); +static unsigned char handle_kbd_event(void); + +/* used only by send_data - set by keyboard_interrupt */ +static volatile unsigned char reply_expected; +static volatile unsigned char acknowledge; +static volatile unsigned char resend; + + +int ec3104_kbd_setkeycode(unsigned int scancode, unsigned int keycode) +{ + return 0; +} + +int ec3104_kbd_getkeycode(unsigned int scancode) +{ + return 0; +} + + +/* yes, it probably would be faster to use an array. I don't care. */ + +static inline unsigned char ec3104_scan2key(unsigned char scancode) +{ + switch (scancode) { + case 1: /* '`' */ + return 41; + + case 2 ... 27: + return scancode; + + case 28: /* '\\' */ + return 43; + + case 29 ... 39: + return scancode + 1; + + case 40: /* '\r' */ + return 28; + + case 41 ... 50: + return scancode + 3; + + case 51: /* ' ' */ + return 57; + + case 52: /* escape */ + return 1; + + case 54: /* insert/delete (labelled delete) */ + /* this should arguably be 110, but I'd like to have ctrl-alt-del + * working with a standard keymap */ + return 111; + + case 55: /* left */ + return 105; + case 56: /* home */ + return 102; + case 57: /* end */ + return 107; + case 58: /* up */ + return 103; + case 59: /* down */ + return 108; + case 60: /* pgup */ + return 104; + case 61: /* pgdown */ + return 109; + case 62: /* right */ + return 106; + + case 79 ... 88: /* f1 - f10 */ + return scancode - 20; + + case 89 ... 90: /* f11 - f12 */ + return scancode - 2; + + case 91: /* left shift */ + return 42; + + case 92: /* right shift */ + return 54; + + case 93: /* left alt */ + return 56; + case 94: /* right alt */ + return 100; + case 95: /* left ctrl */ + return 29; + case 96: /* right ctrl */ + return 97; + + case 97: /* caps lock */ + return 58; + case 102: /* left windows */ + return 125; + case 103: /* right windows */ + return 126; + + case 106: /* Fn */ + /* this is wrong. */ + return 84; + + default: + return 0; + } +} + +int ec3104_kbd_translate(unsigned char scancode, unsigned char *keycode, + char raw_mode) +{ + scancode &= 0x7f; + + *keycode = ec3104_scan2key(scancode); + + return 1; +} + +char ec3104_kbd_unexpected_up(unsigned char keycode) +{ + return 0200; +} + +static inline void handle_keyboard_event(unsigned char scancode) +{ +#ifdef CONFIG_VT + handle_scancode(scancode, !(scancode & 0x80)); +#endif + tasklet_schedule(&keyboard_tasklet); +} + +void ec3104_kbd_leds(unsigned char leds) +{ +} + +static u8 e5_checksum(u8 *packet, int count) +{ + int i; + u8 sum = 0; + + for (i=0; i", byte); + + ctrl_outb(byte, EC3104_SER4_DATA); + + do { + status = ctrl_inb(EC3104_SER4_LSR); + } while ((status & LSR_BOTH_EMPTY) != LSR_BOTH_EMPTY); + +} + +static int e5_send_packet(u8 *packet, int count, struct e5_struct *k) +{ + int i; + + disable_irq(EC3104_IRQ_SER4); + + if (k->cached_mcr & MCR_RTS) { + printk("e5_send_packet: too slow\n"); + enable_irq(EC3104_IRQ_SER4); + return -EAGAIN; + } + + k->cached_mcr |= MCR_RTS; + ctrl_outb(k->cached_mcr, EC3104_SER4_MCR); + + e5_wait_for_cts(k); + + printk("p: "); + + for(i=0; icached_mcr &= ~MCR_RTS; + ctrl_outb(k->cached_mcr, EC3104_SER4_MCR); + + set_current_state(TASK_UNINTERRUPTIBLE); + + + + enable_irq(EC3104_IRQ_SER4); + + + + return 0; +} + +/* + * E5 packets we know about: + * E5->host 0x80 0x05 - resend packet + * host->E5 0x83 0x43 - set LCD contrast + * host->E5 0x85 0x41 0x02 0x02 - set LCD backlight + * E5->host 0x87 0x00 - external PS2 + * E5->host 0x88 - key press + */ + +static void e5_receive(struct e5_struct *k) +{ + k->packet[k->pos++] = ctrl_inb(EC3104_SER4_DATA); + + if (k->pos == 1) { + switch(k->packet[0]) { + case 0x80: + k->length = 3; + break; + + case 0x87: /* PS2 ext */ + k->length = 6; + break; + + case 0x88: /* keyboard */ + k->length = 3; + break; + + default: + k->length = 1; + printk(KERN_WARNING "unknown E5 packet %02x\n", + k->packet[0]); + } + } + + if (k->pos == k->length) { + int i; + + if (e5_checksum(k->packet, k->length) != 0) + printk(KERN_WARNING "E5: wrong checksum\n"); + +#if 0 + printk("E5 packet ["); + for(i=0; ilength; i++) { + printk("%02x ", k->packet[i]); + } + + printk("(%02x)]\n", e5_checksum(k->packet, k->length-1)); +#endif + + switch(k->packet[0]) { + case 0x80: + case 0x88: + handle_keyboard_event(k->packet[1]); + break; + } + + k->pos = k->length = 0; + } +} + +static void ec3104_keyb_interrupt(int irq, void *data) +{ + struct e5_struct *k = &ec3104_keyb; + u8 msr, lsr; + + msr = ctrl_inb(EC3104_SER4_MSR); + + if ((msr & MSR_CTS) && !(k->last_msr & MSR_CTS)) { + if (k->cached_mcr & MCR_RTS) + printk("confused: RTS already high\n"); + /* CTS went high. Send RTS. */ + k->cached_mcr |= MCR_RTS; + + ctrl_outb(k->cached_mcr, EC3104_SER4_MCR); + } else if ((!(msr & MSR_CTS)) && (k->last_msr & MSR_CTS)) { + /* CTS went low. */ + if (!(k->cached_mcr & MCR_RTS)) + printk("confused: RTS already low\n"); + + k->cached_mcr &= ~MCR_RTS; + + ctrl_outb(k->cached_mcr, EC3104_SER4_MCR); + } + + k->last_msr = msr; + + lsr = ctrl_inb(EC3104_SER4_LSR); + + if (lsr & LSR_DR) + e5_receive(k); +} + +static void ec3104_keyb_clear_state(void) +{ + struct e5_struct *k = &ec3104_keyb; + u8 msr, lsr; + + /* we want CTS to be low */ + k->last_msr = 0; + + for (;;) { + msleep(100); + + msr = ctrl_inb(EC3104_SER4_MSR); + + lsr = ctrl_inb(EC3104_SER4_LSR); + + if (lsr & LSR_DR) { + e5_receive(k); + continue; + } + + if ((msr & MSR_CTS) && !(k->last_msr & MSR_CTS)) { + if (k->cached_mcr & MCR_RTS) + printk("confused: RTS already high\n"); + /* CTS went high. Send RTS. */ + k->cached_mcr |= MCR_RTS; + + ctrl_outb(k->cached_mcr, EC3104_SER4_MCR); + } else if ((!(msr & MSR_CTS)) && (k->last_msr & MSR_CTS)) { + /* CTS went low. */ + if (!(k->cached_mcr & MCR_RTS)) + printk("confused: RTS already low\n"); + + k->cached_mcr &= ~MCR_RTS; + + ctrl_outb(k->cached_mcr, EC3104_SER4_MCR); + } else + break; + + k->last_msr = msr; + + continue; + } +} + +void __init ec3104_kbd_init_hw(void) +{ + ec3104_keyb.last_msr = ctrl_inb(EC3104_SER4_MSR); + ec3104_keyb.cached_mcr = ctrl_inb(EC3104_SER4_MCR); + + ec3104_keyb_clear_state(); + + /* Ok, finally allocate the IRQ, and off we go.. */ + request_irq(EC3104_IRQ_SER4, ec3104_keyb_interrupt, 0, "keyboard", NULL); +} diff --git a/trunk/drivers/hwmon/Kconfig b/trunk/drivers/hwmon/Kconfig index 700a1657554f..e47f88170806 100644 --- a/trunk/drivers/hwmon/Kconfig +++ b/trunk/drivers/hwmon/Kconfig @@ -158,7 +158,6 @@ config SENSORS_K8TEMP config SENSORS_AMS tristate "Apple Motion Sensor driver" depends on PPC_PMAC && !PPC64 && INPUT && ((ADB_PMU && I2C = y) || (ADB_PMU && !I2C) || I2C) && EXPERIMENTAL - select INPUT_POLLDEV help Support for the motion sensor included in PowerBooks. Includes implementations for PMU and I2C. @@ -702,7 +701,6 @@ config SENSORS_W83627EHF config SENSORS_HDAPS tristate "IBM Hard Drive Active Protection System (hdaps)" depends on INPUT && X86 - select INPUT_POLLDEV default n help This driver provides support for the IBM Hard Drive Active Protection @@ -724,7 +722,6 @@ config SENSORS_APPLESMC depends on INPUT && X86 select NEW_LEDS select LEDS_CLASS - select INPUT_POLLDEV default n help This driver provides support for the Apple System Management diff --git a/trunk/drivers/hwmon/ams/ams-input.c b/trunk/drivers/hwmon/ams/ams-input.c index 7b81e0c2c2d9..ca7095d96ad0 100644 --- a/trunk/drivers/hwmon/ams/ams-input.c +++ b/trunk/drivers/hwmon/ams/ams-input.c @@ -27,32 +27,47 @@ static unsigned int invert; module_param(invert, bool, 0644); MODULE_PARM_DESC(invert, "Invert input data on X and Y axis"); -static void ams_idev_poll(struct input_polled_dev *dev) +static int ams_input_kthread(void *data) { - struct input_dev *idev = dev->input; s8 x, y, z; - mutex_lock(&ams_info.lock); + while (!kthread_should_stop()) { + mutex_lock(&ams_info.lock); - ams_sensors(&x, &y, &z); + ams_sensors(&x, &y, &z); - x -= ams_info.xcalib; - y -= ams_info.ycalib; - z -= ams_info.zcalib; + x -= ams_info.xcalib; + y -= ams_info.ycalib; + z -= ams_info.zcalib; - input_report_abs(idev, ABS_X, invert ? -x : x); - input_report_abs(idev, ABS_Y, invert ? -y : y); - input_report_abs(idev, ABS_Z, z); + input_report_abs(ams_info.idev, ABS_X, invert ? -x : x); + input_report_abs(ams_info.idev, ABS_Y, invert ? -y : y); + input_report_abs(ams_info.idev, ABS_Z, z); - input_sync(idev); + input_sync(ams_info.idev); - mutex_unlock(&ams_info.lock); + mutex_unlock(&ams_info.lock); + + msleep(25); + } + + return 0; +} + +static int ams_input_open(struct input_dev *dev) +{ + ams_info.kthread = kthread_run(ams_input_kthread, NULL, "kams"); + return IS_ERR(ams_info.kthread) ? PTR_ERR(ams_info.kthread) : 0; +} + +static void ams_input_close(struct input_dev *dev) +{ + kthread_stop(ams_info.kthread); } /* Call with ams_info.lock held! */ static void ams_input_enable(void) { - struct input_dev *input; s8 x, y, z; if (ams_info.idev) @@ -63,29 +78,27 @@ static void ams_input_enable(void) ams_info.ycalib = y; ams_info.zcalib = z; - ams_info.idev = input_allocate_polled_device(); + ams_info.idev = input_allocate_device(); if (!ams_info.idev) return; - ams_info.idev->poll = ams_idev_poll; - ams_info.idev->poll_interval = 25; - - input = ams_info.idev->input; - input->name = "Apple Motion Sensor"; - input->id.bustype = ams_info.bustype; - input->id.vendor = 0; - input->dev.parent = &ams_info.of_dev->dev; + ams_info.idev->name = "Apple Motion Sensor"; + ams_info.idev->id.bustype = ams_info.bustype; + ams_info.idev->id.vendor = 0; + ams_info.idev->open = ams_input_open; + ams_info.idev->close = ams_input_close; + ams_info.idev->dev.parent = &ams_info.of_dev->dev; - input_set_abs_params(input, ABS_X, -50, 50, 3, 0); - input_set_abs_params(input, ABS_Y, -50, 50, 3, 0); - input_set_abs_params(input, ABS_Z, -50, 50, 3, 0); + input_set_abs_params(ams_info.idev, ABS_X, -50, 50, 3, 0); + input_set_abs_params(ams_info.idev, ABS_Y, -50, 50, 3, 0); + input_set_abs_params(ams_info.idev, ABS_Z, -50, 50, 3, 0); - set_bit(EV_ABS, input->evbit); - set_bit(EV_KEY, input->evbit); - set_bit(BTN_TOUCH, input->keybit); + set_bit(EV_ABS, ams_info.idev->evbit); + set_bit(EV_KEY, ams_info.idev->evbit); + set_bit(BTN_TOUCH, ams_info.idev->keybit); - if (input_register_polled_device(ams_info.idev)) { - input_free_polled_device(ams_info.idev); + if (input_register_device(ams_info.idev)) { + input_free_device(ams_info.idev); ams_info.idev = NULL; return; } @@ -95,8 +108,7 @@ static void ams_input_enable(void) static void ams_input_disable(void) { if (ams_info.idev) { - input_unregister_polled_device(ams_info.idev); - input_free_polled_device(ams_info.idev); + input_unregister_device(ams_info.idev); ams_info.idev = NULL; } } diff --git a/trunk/drivers/hwmon/ams/ams.h b/trunk/drivers/hwmon/ams/ams.h index a6221e5dd984..240730e6bcde 100644 --- a/trunk/drivers/hwmon/ams/ams.h +++ b/trunk/drivers/hwmon/ams/ams.h @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include @@ -52,7 +52,8 @@ struct ams { #endif /* Joystick emulation */ - struct input_polled_dev *idev; + struct task_struct *kthread; + struct input_dev *idev; __u16 bustype; /* calibrated null values */ diff --git a/trunk/drivers/hwmon/applesmc.c b/trunk/drivers/hwmon/applesmc.c index 4879125b4cdc..f37fd7ebf65a 100644 --- a/trunk/drivers/hwmon/applesmc.c +++ b/trunk/drivers/hwmon/applesmc.c @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include #include @@ -59,9 +59,9 @@ #define LIGHT_SENSOR_LEFT_KEY "ALV0" /* r-o {alv (6 bytes) */ #define LIGHT_SENSOR_RIGHT_KEY "ALV1" /* r-o {alv (6 bytes) */ -#define BACKLIGHT_KEY "LKSB" /* w-o {lkb (2 bytes) */ +#define BACKLIGHT_KEY "LKSB" /* w-o {lkb (2 bytes) */ -#define CLAMSHELL_KEY "MSLD" /* r-o ui8 (unused) */ +#define CLAMSHELL_KEY "MSLD" /* r-o ui8 (unused) */ #define MOTION_SENSOR_X_KEY "MO_X" /* r-o sp78 (2 bytes) */ #define MOTION_SENSOR_Y_KEY "MO_Y" /* r-o sp78 (2 bytes) */ @@ -103,7 +103,7 @@ static const char* fan_speed_keys[] = { #define INIT_TIMEOUT_MSECS 5000 /* wait up to 5s for device init ... */ #define INIT_WAIT_MSECS 50 /* ... in 50ms increments */ -#define APPLESMC_POLL_INTERVAL 50 /* msecs */ +#define APPLESMC_POLL_PERIOD (HZ/20) /* poll for input every 1/20s */ #define APPLESMC_INPUT_FUZZ 4 /* input event threshold */ #define APPLESMC_INPUT_FLAT 4 @@ -125,8 +125,9 @@ static const int debug; static struct platform_device *pdev; static s16 rest_x; static s16 rest_y; +static struct timer_list applesmc_timer; +static struct input_dev *applesmc_idev; static struct device *hwmon_dev; -static struct input_polled_dev *applesmc_idev; /* Indicates whether this computer has an accelerometer. */ static unsigned int applesmc_accelerometer; @@ -137,7 +138,7 @@ static unsigned int applesmc_light; /* Indicates which temperature sensors set to use. */ static unsigned int applesmc_temperature_set; -static DEFINE_MUTEX(applesmc_lock); +static struct mutex applesmc_lock; /* * Last index written to key_at_index sysfs file, and value to use for all other @@ -454,12 +455,27 @@ static void applesmc_calibrate(void) rest_x = -rest_x; } -static void applesmc_idev_poll(struct input_polled_dev *dev) +static int applesmc_idev_open(struct input_dev *dev) +{ + add_timer(&applesmc_timer); + + return 0; +} + +static void applesmc_idev_close(struct input_dev *dev) +{ + del_timer_sync(&applesmc_timer); +} + +static void applesmc_idev_poll(unsigned long unused) { - struct input_dev *idev = dev->input; s16 x, y; - mutex_lock(&applesmc_lock); + /* Cannot sleep. Try nonblockingly. If we fail, try again later. */ + if (!mutex_trylock(&applesmc_lock)) { + mod_timer(&applesmc_timer, jiffies + APPLESMC_POLL_PERIOD); + return; + } if (applesmc_read_motion_sensor(SENSOR_X, &x)) goto out; @@ -467,11 +483,13 @@ static void applesmc_idev_poll(struct input_polled_dev *dev) goto out; x = -x; - input_report_abs(idev, ABS_X, x - rest_x); - input_report_abs(idev, ABS_Y, y - rest_y); - input_sync(idev); + input_report_abs(applesmc_idev, ABS_X, x - rest_x); + input_report_abs(applesmc_idev, ABS_Y, y - rest_y); + input_sync(applesmc_idev); out: + mod_timer(&applesmc_timer, jiffies + APPLESMC_POLL_PERIOD); + mutex_unlock(&applesmc_lock); } @@ -803,7 +821,8 @@ static ssize_t applesmc_key_at_index_read_show(struct device *dev, if (!ret) { return info[0]; - } else { + } + else { return ret; } } @@ -1074,7 +1093,6 @@ static int applesmc_dmi_match(const struct dmi_system_id *id) /* Create accelerometer ressources */ static int applesmc_create_accelerometer(void) { - struct input_dev *idev; int ret; ret = sysfs_create_group(&pdev->dev.kobj, @@ -1082,37 +1100,40 @@ static int applesmc_create_accelerometer(void) if (ret) goto out; - applesmc_idev = input_allocate_polled_device(); + applesmc_idev = input_allocate_device(); if (!applesmc_idev) { ret = -ENOMEM; goto out_sysfs; } - applesmc_idev->poll = applesmc_idev_poll; - applesmc_idev->poll_interval = APPLESMC_POLL_INTERVAL; - /* initial calibrate for the input device */ applesmc_calibrate(); - /* initialize the input device */ - idev = applesmc_idev->input; - idev->name = "applesmc"; - idev->id.bustype = BUS_HOST; - idev->dev.parent = &pdev->dev; - idev->evbit[0] = BIT(EV_ABS); - input_set_abs_params(idev, ABS_X, + /* initialize the input class */ + applesmc_idev->name = "applesmc"; + applesmc_idev->id.bustype = BUS_HOST; + applesmc_idev->dev.parent = &pdev->dev; + applesmc_idev->evbit[0] = BIT(EV_ABS); + applesmc_idev->open = applesmc_idev_open; + applesmc_idev->close = applesmc_idev_close; + input_set_abs_params(applesmc_idev, ABS_X, -256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT); - input_set_abs_params(idev, ABS_Y, + input_set_abs_params(applesmc_idev, ABS_Y, -256, 256, APPLESMC_INPUT_FUZZ, APPLESMC_INPUT_FLAT); - ret = input_register_polled_device(applesmc_idev); + ret = input_register_device(applesmc_idev); if (ret) goto out_idev; + /* start up our timer for the input device */ + init_timer(&applesmc_timer); + applesmc_timer.function = applesmc_idev_poll; + applesmc_timer.expires = jiffies + APPLESMC_POLL_PERIOD; + return 0; out_idev: - input_free_polled_device(applesmc_idev); + input_free_device(applesmc_idev); out_sysfs: sysfs_remove_group(&pdev->dev.kobj, &accelerometer_attributes_group); @@ -1125,8 +1146,8 @@ static int applesmc_create_accelerometer(void) /* Release all ressources used by the accelerometer */ static void applesmc_release_accelerometer(void) { - input_unregister_polled_device(applesmc_idev); - input_free_polled_device(applesmc_idev); + del_timer_sync(&applesmc_timer); + input_unregister_device(applesmc_idev); sysfs_remove_group(&pdev->dev.kobj, &accelerometer_attributes_group); } @@ -1163,6 +1184,8 @@ static int __init applesmc_init(void) int count; int i; + mutex_init(&applesmc_lock); + if (!dmi_check_system(applesmc_whitelist)) { printk(KERN_WARNING "applesmc: supported laptop not found!\n"); ret = -ENODEV; diff --git a/trunk/drivers/hwmon/hdaps.c b/trunk/drivers/hwmon/hdaps.c index 8a7ae03aeee4..a7c6d407572b 100644 --- a/trunk/drivers/hwmon/hdaps.c +++ b/trunk/drivers/hwmon/hdaps.c @@ -28,7 +28,7 @@ #include #include -#include +#include #include #include #include @@ -61,12 +61,13 @@ #define INIT_TIMEOUT_MSECS 4000 /* wait up to 4s for device init ... */ #define INIT_WAIT_MSECS 200 /* ... in 200ms increments */ -#define HDAPS_POLL_INTERVAL 50 /* poll for input every 1/20s (50 ms)*/ +#define HDAPS_POLL_PERIOD (HZ/20) /* poll for input every 1/20s */ #define HDAPS_INPUT_FUZZ 4 /* input event threshold */ #define HDAPS_INPUT_FLAT 4 +static struct timer_list hdaps_timer; static struct platform_device *pdev; -static struct input_polled_dev *hdaps_idev; +static struct input_dev *hdaps_idev; static unsigned int hdaps_invert; static u8 km_activity; static int rest_x; @@ -322,19 +323,24 @@ static void hdaps_calibrate(void) __hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &rest_x, &rest_y); } -static void hdaps_mousedev_poll(struct input_polled_dev *dev) +static void hdaps_mousedev_poll(unsigned long unused) { - struct input_dev *input_dev = dev->input; int x, y; - mutex_lock(&hdaps_mtx); + /* Cannot sleep. Try nonblockingly. If we fail, try again later. */ + if (mutex_trylock(&hdaps_mtx)) { + mod_timer(&hdaps_timer,jiffies + HDAPS_POLL_PERIOD); + return; + } if (__hdaps_read_pair(HDAPS_PORT_XPOS, HDAPS_PORT_YPOS, &x, &y)) goto out; - input_report_abs(input_dev, ABS_X, x - rest_x); - input_report_abs(input_dev, ABS_Y, y - rest_y); - input_sync(input_dev); + input_report_abs(hdaps_idev, ABS_X, x - rest_x); + input_report_abs(hdaps_idev, ABS_Y, y - rest_y); + input_sync(hdaps_idev); + + mod_timer(&hdaps_timer, jiffies + HDAPS_POLL_PERIOD); out: mutex_unlock(&hdaps_mtx); @@ -530,7 +536,6 @@ static struct dmi_system_id __initdata hdaps_whitelist[] = { static int __init hdaps_init(void) { - struct input_dev *idev; int ret; if (!dmi_check_system(hdaps_whitelist)) { @@ -558,37 +563,39 @@ static int __init hdaps_init(void) if (ret) goto out_device; - hdaps_idev = input_allocate_polled_device(); + hdaps_idev = input_allocate_device(); if (!hdaps_idev) { ret = -ENOMEM; goto out_group; } - hdaps_idev->poll = hdaps_mousedev_poll; - hdaps_idev->poll_interval = HDAPS_POLL_INTERVAL; - /* initial calibrate for the input device */ hdaps_calibrate(); /* initialize the input class */ - idev = hdaps_idev->input; - idev->name = "hdaps"; - idev->dev.parent = &pdev->dev; - idev->evbit[0] = BIT(EV_ABS); - input_set_abs_params(idev, ABS_X, + hdaps_idev->name = "hdaps"; + hdaps_idev->dev.parent = &pdev->dev; + hdaps_idev->evbit[0] = BIT(EV_ABS); + input_set_abs_params(hdaps_idev, ABS_X, -256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT); - input_set_abs_params(idev, ABS_Y, + input_set_abs_params(hdaps_idev, ABS_Y, -256, 256, HDAPS_INPUT_FUZZ, HDAPS_INPUT_FLAT); - ret = input_register_polled_device(hdaps_idev); + ret = input_register_device(hdaps_idev); if (ret) goto out_idev; + /* start up our timer for the input device */ + init_timer(&hdaps_timer); + hdaps_timer.function = hdaps_mousedev_poll; + hdaps_timer.expires = jiffies + HDAPS_POLL_PERIOD; + add_timer(&hdaps_timer); + printk(KERN_INFO "hdaps: driver successfully loaded.\n"); return 0; out_idev: - input_free_polled_device(hdaps_idev); + input_free_device(hdaps_idev); out_group: sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group); out_device: @@ -604,8 +611,8 @@ static int __init hdaps_init(void) static void __exit hdaps_exit(void) { - input_unregister_polled_device(hdaps_idev); - input_free_polled_device(hdaps_idev); + del_timer_sync(&hdaps_timer); + input_unregister_device(hdaps_idev); sysfs_remove_group(&pdev->dev.kobj, &hdaps_attribute_group); platform_device_unregister(pdev); platform_driver_unregister(&hdaps_driver); diff --git a/trunk/drivers/infiniband/ulp/ipoib/ipoib.h b/trunk/drivers/infiniband/ulp/ipoib/ipoib.h index 1b3327ad6bc4..6545fa798b12 100644 --- a/trunk/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/trunk/drivers/infiniband/ulp/ipoib/ipoib.h @@ -349,7 +349,6 @@ struct ipoib_neigh { struct sk_buff_head queue; struct neighbour *neighbour; - struct net_device *dev; struct list_head list; }; @@ -366,8 +365,7 @@ static inline struct ipoib_neigh **to_ipoib_neigh(struct neighbour *neigh) INFINIBAND_ALEN, sizeof(void *)); } -struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh, - struct net_device *dev); +struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neigh); void ipoib_neigh_free(struct net_device *dev, struct ipoib_neigh *neigh); extern struct workqueue_struct *ipoib_workqueue; diff --git a/trunk/drivers/infiniband/ulp/ipoib/ipoib_main.c b/trunk/drivers/infiniband/ulp/ipoib/ipoib_main.c index 362610d870e4..e072f3c32ce6 100644 --- a/trunk/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/trunk/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -517,7 +517,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) struct ipoib_path *path; struct ipoib_neigh *neigh; - neigh = ipoib_neigh_alloc(skb->dst->neighbour, skb->dev); + neigh = ipoib_neigh_alloc(skb->dst->neighbour); if (!neigh) { ++dev->stats.tx_dropped; dev_kfree_skb_any(skb); @@ -692,10 +692,9 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) goto out; } } else if (neigh->ah) { - if (unlikely((memcmp(&neigh->dgid.raw, + if (unlikely(memcmp(&neigh->dgid.raw, skb->dst->neighbour->ha + 4, - sizeof(union ib_gid))) || - (neigh->dev != dev))) { + sizeof(union ib_gid)))) { spin_lock(&priv->lock); /* * It's safe to call ipoib_put_ah() inside @@ -818,13 +817,6 @@ static void ipoib_neigh_cleanup(struct neighbour *n) unsigned long flags; struct ipoib_ah *ah = NULL; - neigh = *to_ipoib_neigh(n); - if (neigh) { - priv = netdev_priv(neigh->dev); - ipoib_dbg(priv, "neigh_destructor for bonding device: %s\n", - n->dev->name); - } else - return; ipoib_dbg(priv, "neigh_cleanup for %06x " IPOIB_GID_FMT "\n", IPOIB_QPN(n->ha), @@ -832,10 +824,13 @@ static void ipoib_neigh_cleanup(struct neighbour *n) spin_lock_irqsave(&priv->lock, flags); - if (neigh->ah) - ah = neigh->ah; - list_del(&neigh->list); - ipoib_neigh_free(n->dev, neigh); + neigh = *to_ipoib_neigh(n); + if (neigh) { + if (neigh->ah) + ah = neigh->ah; + list_del(&neigh->list); + ipoib_neigh_free(n->dev, neigh); + } spin_unlock_irqrestore(&priv->lock, flags); @@ -843,8 +838,7 @@ static void ipoib_neigh_cleanup(struct neighbour *n) ipoib_put_ah(ah); } -struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour, - struct net_device *dev) +struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour) { struct ipoib_neigh *neigh; @@ -853,7 +847,6 @@ struct ipoib_neigh *ipoib_neigh_alloc(struct neighbour *neighbour, return NULL; neigh->neighbour = neighbour; - neigh->dev = dev; *to_ipoib_neigh(neighbour) = neigh; skb_queue_head_init(&neigh->queue); ipoib_cm_set(neigh, NULL); diff --git a/trunk/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/trunk/drivers/infiniband/ulp/ipoib/ipoib_multicast.c index 9bcfc7ad6aa6..827820ec66d1 100644 --- a/trunk/drivers/infiniband/ulp/ipoib/ipoib_multicast.c +++ b/trunk/drivers/infiniband/ulp/ipoib/ipoib_multicast.c @@ -705,8 +705,7 @@ void ipoib_mcast_send(struct net_device *dev, void *mgid, struct sk_buff *skb) if (skb->dst && skb->dst->neighbour && !*to_ipoib_neigh(skb->dst->neighbour)) { - struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour, - skb->dev); + struct ipoib_neigh *neigh = ipoib_neigh_alloc(skb->dst->neighbour); if (neigh) { kref_get(&mcast->ah->ref); diff --git a/trunk/drivers/input/Kconfig b/trunk/drivers/input/Kconfig index 63512d906f02..2d87357e2b2b 100644 --- a/trunk/drivers/input/Kconfig +++ b/trunk/drivers/input/Kconfig @@ -114,6 +114,28 @@ config INPUT_JOYDEV To compile this driver as a module, choose M here: the module will be called joydev. +config INPUT_TSDEV + tristate "Touchscreen interface" + ---help--- + Say Y here if you have an application that only can understand the + Compaq touchscreen protocol for absolute pointer data. This is + useful namely for embedded configurations. + + If unsure, say N. + + To compile this driver as a module, choose M here: the + module will be called tsdev. + +config INPUT_TSDEV_SCREEN_X + int "Horizontal screen resolution" + depends on INPUT_TSDEV + default "240" + +config INPUT_TSDEV_SCREEN_Y + int "Vertical screen resolution" + depends on INPUT_TSDEV + default "320" + config INPUT_EVDEV tristate "Event interface" help diff --git a/trunk/drivers/input/Makefile b/trunk/drivers/input/Makefile index 99af903bd3ce..15eb752697b3 100644 --- a/trunk/drivers/input/Makefile +++ b/trunk/drivers/input/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_INPUT_POLLDEV) += input-polldev.o obj-$(CONFIG_INPUT_MOUSEDEV) += mousedev.o obj-$(CONFIG_INPUT_JOYDEV) += joydev.o obj-$(CONFIG_INPUT_EVDEV) += evdev.o +obj-$(CONFIG_INPUT_TSDEV) += tsdev.o obj-$(CONFIG_INPUT_EVBUG) += evbug.o obj-$(CONFIG_INPUT_KEYBOARD) += keyboard/ diff --git a/trunk/drivers/input/evdev.c b/trunk/drivers/input/evdev.c index 1d62c8b88e12..f1c3d6cebd58 100644 --- a/trunk/drivers/input/evdev.c +++ b/trunk/drivers/input/evdev.c @@ -30,8 +30,6 @@ struct evdev { wait_queue_head_t wait; struct evdev_client *grab; struct list_head client_list; - spinlock_t client_lock; /* protects client_list */ - struct mutex mutex; struct device dev; }; @@ -39,54 +37,39 @@ struct evdev_client { struct input_event buffer[EVDEV_BUFFER_SIZE]; int head; int tail; - spinlock_t buffer_lock; /* protects access to buffer, head and tail */ struct fasync_struct *fasync; struct evdev *evdev; struct list_head node; }; static struct evdev *evdev_table[EVDEV_MINORS]; -static DEFINE_MUTEX(evdev_table_mutex); -static void evdev_pass_event(struct evdev_client *client, - struct input_event *event) -{ - /* - * Interrupts are disabled, just acquire the lock - */ - spin_lock(&client->buffer_lock); - client->buffer[client->head++] = *event; - client->head &= EVDEV_BUFFER_SIZE - 1; - spin_unlock(&client->buffer_lock); - - kill_fasync(&client->fasync, SIGIO, POLL_IN); -} - -/* - * Pass incoming event to all connected clients. - */ -static void evdev_event(struct input_handle *handle, - unsigned int type, unsigned int code, int value) +static void evdev_event(struct input_handle *handle, unsigned int type, unsigned int code, int value) { struct evdev *evdev = handle->private; struct evdev_client *client; - struct input_event event; - do_gettimeofday(&event.time); - event.type = type; - event.code = code; - event.value = value; + if (evdev->grab) { + client = evdev->grab; - rcu_read_lock(); + do_gettimeofday(&client->buffer[client->head].time); + client->buffer[client->head].type = type; + client->buffer[client->head].code = code; + client->buffer[client->head].value = value; + client->head = (client->head + 1) & (EVDEV_BUFFER_SIZE - 1); - client = rcu_dereference(evdev->grab); - if (client) - evdev_pass_event(client, &event); - else - list_for_each_entry_rcu(client, &evdev->client_list, node) - evdev_pass_event(client, &event); + kill_fasync(&client->fasync, SIGIO, POLL_IN); + } else + list_for_each_entry(client, &evdev->client_list, node) { - rcu_read_unlock(); + do_gettimeofday(&client->buffer[client->head].time); + client->buffer[client->head].type = type; + client->buffer[client->head].code = code; + client->buffer[client->head].value = value; + client->head = (client->head + 1) & (EVDEV_BUFFER_SIZE - 1); + + kill_fasync(&client->fasync, SIGIO, POLL_IN); + } wake_up_interruptible(&evdev->wait); } @@ -105,140 +88,38 @@ static int evdev_flush(struct file *file, fl_owner_t id) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; - int retval; - - retval = mutex_lock_interruptible(&evdev->mutex); - if (retval) - return retval; if (!evdev->exist) - retval = -ENODEV; - else - retval = input_flush_device(&evdev->handle, file); + return -ENODEV; - mutex_unlock(&evdev->mutex); - return retval; + return input_flush_device(&evdev->handle, file); } static void evdev_free(struct device *dev) { struct evdev *evdev = container_of(dev, struct evdev, dev); + evdev_table[evdev->minor] = NULL; kfree(evdev); } -/* - * Grabs an event device (along with underlying input device). - * This function is called with evdev->mutex taken. - */ -static int evdev_grab(struct evdev *evdev, struct evdev_client *client) -{ - int error; - - if (evdev->grab) - return -EBUSY; - - error = input_grab_device(&evdev->handle); - if (error) - return error; - - rcu_assign_pointer(evdev->grab, client); - synchronize_rcu(); - - return 0; -} - -static int evdev_ungrab(struct evdev *evdev, struct evdev_client *client) -{ - if (evdev->grab != client) - return -EINVAL; - - rcu_assign_pointer(evdev->grab, NULL); - synchronize_rcu(); - input_release_device(&evdev->handle); - - return 0; -} - -static void evdev_attach_client(struct evdev *evdev, - struct evdev_client *client) -{ - spin_lock(&evdev->client_lock); - list_add_tail_rcu(&client->node, &evdev->client_list); - spin_unlock(&evdev->client_lock); - synchronize_rcu(); -} - -static void evdev_detach_client(struct evdev *evdev, - struct evdev_client *client) -{ - spin_lock(&evdev->client_lock); - list_del_rcu(&client->node); - spin_unlock(&evdev->client_lock); - synchronize_rcu(); -} - -static int evdev_open_device(struct evdev *evdev) -{ - int retval; - - retval = mutex_lock_interruptible(&evdev->mutex); - if (retval) - return retval; - - if (!evdev->exist) - retval = -ENODEV; - else if (!evdev->open++) { - retval = input_open_device(&evdev->handle); - if (retval) - evdev->open--; - } - - mutex_unlock(&evdev->mutex); - return retval; -} - -static void evdev_close_device(struct evdev *evdev) -{ - mutex_lock(&evdev->mutex); - - if (evdev->exist && !--evdev->open) - input_close_device(&evdev->handle); - - mutex_unlock(&evdev->mutex); -} - -/* - * Wake up users waiting for IO so they can disconnect from - * dead device. - */ -static void evdev_hangup(struct evdev *evdev) -{ - struct evdev_client *client; - - spin_lock(&evdev->client_lock); - list_for_each_entry(client, &evdev->client_list, node) - kill_fasync(&client->fasync, SIGIO, POLL_HUP); - spin_unlock(&evdev->client_lock); - - wake_up_interruptible(&evdev->wait); -} - static int evdev_release(struct inode *inode, struct file *file) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; - mutex_lock(&evdev->mutex); - if (evdev->grab == client) - evdev_ungrab(evdev, client); - mutex_unlock(&evdev->mutex); + if (evdev->grab == client) { + input_release_device(&evdev->handle); + evdev->grab = NULL; + } evdev_fasync(-1, file, 0); - evdev_detach_client(evdev, client); + list_del(&client->node); kfree(client); - evdev_close_device(evdev); + if (!--evdev->open && evdev->exist) + input_close_device(&evdev->handle); + put_device(&evdev->dev); return 0; @@ -246,44 +127,41 @@ static int evdev_release(struct inode *inode, struct file *file) static int evdev_open(struct inode *inode, struct file *file) { - struct evdev *evdev; struct evdev_client *client; + struct evdev *evdev; int i = iminor(inode) - EVDEV_MINOR_BASE; int error; if (i >= EVDEV_MINORS) return -ENODEV; - error = mutex_lock_interruptible(&evdev_table_mutex); - if (error) - return error; evdev = evdev_table[i]; - if (evdev) - get_device(&evdev->dev); - mutex_unlock(&evdev_table_mutex); - if (!evdev) + if (!evdev || !evdev->exist) return -ENODEV; + get_device(&evdev->dev); + client = kzalloc(sizeof(struct evdev_client), GFP_KERNEL); if (!client) { error = -ENOMEM; goto err_put_evdev; } - spin_lock_init(&client->buffer_lock); client->evdev = evdev; - evdev_attach_client(evdev, client); + list_add_tail(&client->node, &evdev->client_list); - error = evdev_open_device(evdev); - if (error) - goto err_free_client; + if (!evdev->open++ && evdev->exist) { + error = input_open_device(&evdev->handle); + if (error) + goto err_free_client; + } file->private_data = client; return 0; err_free_client: - evdev_detach_client(evdev, client); + list_del(&client->node); kfree(client); err_put_evdev: put_device(&evdev->dev); @@ -319,14 +197,12 @@ static inline size_t evdev_event_size(void) sizeof(struct input_event_compat) : sizeof(struct input_event); } -static int evdev_event_from_user(const char __user *buffer, - struct input_event *event) +static int evdev_event_from_user(const char __user *buffer, struct input_event *event) { if (COMPAT_TEST) { struct input_event_compat compat_event; - if (copy_from_user(&compat_event, buffer, - sizeof(struct input_event_compat))) + if (copy_from_user(&compat_event, buffer, sizeof(struct input_event_compat))) return -EFAULT; event->time.tv_sec = compat_event.time.tv_sec; @@ -343,8 +219,7 @@ static int evdev_event_from_user(const char __user *buffer, return 0; } -static int evdev_event_to_user(char __user *buffer, - const struct input_event *event) +static int evdev_event_to_user(char __user *buffer, const struct input_event *event) { if (COMPAT_TEST) { struct input_event_compat compat_event; @@ -355,8 +230,7 @@ static int evdev_event_to_user(char __user *buffer, compat_event.code = event->code; compat_event.value = event->value; - if (copy_to_user(buffer, &compat_event, - sizeof(struct input_event_compat))) + if (copy_to_user(buffer, &compat_event, sizeof(struct input_event_compat))) return -EFAULT; } else { @@ -374,8 +248,7 @@ static inline size_t evdev_event_size(void) return sizeof(struct input_event); } -static int evdev_event_from_user(const char __user *buffer, - struct input_event *event) +static int evdev_event_from_user(const char __user *buffer, struct input_event *event) { if (copy_from_user(event, buffer, sizeof(struct input_event))) return -EFAULT; @@ -383,8 +256,7 @@ static int evdev_event_from_user(const char __user *buffer, return 0; } -static int evdev_event_to_user(char __user *buffer, - const struct input_event *event) +static int evdev_event_to_user(char __user *buffer, const struct input_event *event) { if (copy_to_user(buffer, event, sizeof(struct input_event))) return -EFAULT; @@ -394,71 +266,37 @@ static int evdev_event_to_user(char __user *buffer, #endif /* CONFIG_COMPAT */ -static ssize_t evdev_write(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) +static ssize_t evdev_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; struct input_event event; - int retval; + int retval = 0; - retval = mutex_lock_interruptible(&evdev->mutex); - if (retval) - return retval; - - if (!evdev->exist) { - retval = -ENODEV; - goto out; - } + if (!evdev->exist) + return -ENODEV; while (retval < count) { - if (evdev_event_from_user(buffer + retval, &event)) { - retval = -EFAULT; - goto out; - } - - input_inject_event(&evdev->handle, - event.type, event.code, event.value); + if (evdev_event_from_user(buffer + retval, &event)) + return -EFAULT; + input_inject_event(&evdev->handle, event.type, event.code, event.value); retval += evdev_event_size(); } - out: - mutex_unlock(&evdev->mutex); return retval; } -static int evdev_fetch_next_event(struct evdev_client *client, - struct input_event *event) -{ - int have_event; - - spin_lock_irq(&client->buffer_lock); - - have_event = client->head != client->tail; - if (have_event) { - *event = client->buffer[client->tail++]; - client->tail &= EVDEV_BUFFER_SIZE - 1; - } - - spin_unlock_irq(&client->buffer_lock); - - return have_event; -} - -static ssize_t evdev_read(struct file *file, char __user *buffer, - size_t count, loff_t *ppos) +static ssize_t evdev_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; - struct input_event event; int retval; if (count < evdev_event_size()) return -EINVAL; - if (client->head == client->tail && evdev->exist && - (file->f_flags & O_NONBLOCK)) + if (client->head == client->tail && evdev->exist && (file->f_flags & O_NONBLOCK)) return -EAGAIN; retval = wait_event_interruptible(evdev->wait, @@ -469,12 +307,14 @@ static ssize_t evdev_read(struct file *file, char __user *buffer, if (!evdev->exist) return -ENODEV; - while (retval + evdev_event_size() <= count && - evdev_fetch_next_event(client, &event)) { + while (client->head != client->tail && retval + evdev_event_size() <= count) { + + struct input_event *event = (struct input_event *) client->buffer + client->tail; - if (evdev_event_to_user(buffer + retval, &event)) + if (evdev_event_to_user(buffer + retval, event)) return -EFAULT; + client->tail = (client->tail + 1) & (EVDEV_BUFFER_SIZE - 1); retval += evdev_event_size(); } @@ -569,8 +409,8 @@ static int str_to_user(const char *str, unsigned int maxlen, void __user *p) return copy_to_user(p, str, len) ? -EFAULT : len; } -static long evdev_do_ioctl(struct file *file, unsigned int cmd, - void __user *p, int compat_mode) +static long evdev_ioctl_handler(struct file *file, unsigned int cmd, + void __user *p, int compat_mode) { struct evdev_client *client = file->private_data; struct evdev *evdev = client->evdev; @@ -581,289 +421,215 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd, int i, t, u, v; int error; - switch (cmd) { + if (!evdev->exist) + return -ENODEV; - case EVIOCGVERSION: - return put_user(EV_VERSION, ip); + switch (cmd) { - case EVIOCGID: - if (copy_to_user(p, &dev->id, sizeof(struct input_id))) - return -EFAULT; - return 0; + case EVIOCGVERSION: + return put_user(EV_VERSION, ip); - case EVIOCGREP: - if (!test_bit(EV_REP, dev->evbit)) - return -ENOSYS; - if (put_user(dev->rep[REP_DELAY], ip)) - return -EFAULT; - if (put_user(dev->rep[REP_PERIOD], ip + 1)) - return -EFAULT; - return 0; + case EVIOCGID: + if (copy_to_user(p, &dev->id, sizeof(struct input_id))) + return -EFAULT; + return 0; - case EVIOCSREP: - if (!test_bit(EV_REP, dev->evbit)) - return -ENOSYS; - if (get_user(u, ip)) - return -EFAULT; - if (get_user(v, ip + 1)) - return -EFAULT; + case EVIOCGREP: + if (!test_bit(EV_REP, dev->evbit)) + return -ENOSYS; + if (put_user(dev->rep[REP_DELAY], ip)) + return -EFAULT; + if (put_user(dev->rep[REP_PERIOD], ip + 1)) + return -EFAULT; + return 0; - input_inject_event(&evdev->handle, EV_REP, REP_DELAY, u); - input_inject_event(&evdev->handle, EV_REP, REP_PERIOD, v); + case EVIOCSREP: + if (!test_bit(EV_REP, dev->evbit)) + return -ENOSYS; + if (get_user(u, ip)) + return -EFAULT; + if (get_user(v, ip + 1)) + return -EFAULT; - return 0; + input_inject_event(&evdev->handle, EV_REP, REP_DELAY, u); + input_inject_event(&evdev->handle, EV_REP, REP_PERIOD, v); - case EVIOCGKEYCODE: - if (get_user(t, ip)) - return -EFAULT; + return 0; - error = dev->getkeycode(dev, t, &v); - if (error) - return error; + case EVIOCGKEYCODE: + if (get_user(t, ip)) + return -EFAULT; - if (put_user(v, ip + 1)) - return -EFAULT; + error = dev->getkeycode(dev, t, &v); + if (error) + return error; - return 0; + if (put_user(v, ip + 1)) + return -EFAULT; - case EVIOCSKEYCODE: - if (get_user(t, ip) || get_user(v, ip + 1)) - return -EFAULT; + return 0; - return dev->setkeycode(dev, t, v); + case EVIOCSKEYCODE: + if (get_user(t, ip) || get_user(v, ip + 1)) + return -EFAULT; - case EVIOCSFF: - if (copy_from_user(&effect, p, sizeof(effect))) - return -EFAULT; + return dev->setkeycode(dev, t, v); - error = input_ff_upload(dev, &effect, file); + case EVIOCSFF: + if (copy_from_user(&effect, p, sizeof(effect))) + return -EFAULT; - if (put_user(effect.id, &(((struct ff_effect __user *)p)->id))) - return -EFAULT; + error = input_ff_upload(dev, &effect, file); - return error; + if (put_user(effect.id, &(((struct ff_effect __user *)p)->id))) + return -EFAULT; - case EVIOCRMFF: - return input_ff_erase(dev, (int)(unsigned long) p, file); + return error; - case EVIOCGEFFECTS: - i = test_bit(EV_FF, dev->evbit) ? - dev->ff->max_effects : 0; - if (put_user(i, ip)) - return -EFAULT; - return 0; + case EVIOCRMFF: + return input_ff_erase(dev, (int)(unsigned long) p, file); - case EVIOCGRAB: - if (p) - return evdev_grab(evdev, client); - else - return evdev_ungrab(evdev, client); + case EVIOCGEFFECTS: + i = test_bit(EV_FF, dev->evbit) ? dev->ff->max_effects : 0; + if (put_user(i, ip)) + return -EFAULT; + return 0; + + case EVIOCGRAB: + if (p) { + if (evdev->grab) + return -EBUSY; + if (input_grab_device(&evdev->handle)) + return -EBUSY; + evdev->grab = client; + return 0; + } else { + if (evdev->grab != client) + return -EINVAL; + input_release_device(&evdev->handle); + evdev->grab = NULL; + return 0; + } - default: + default: - if (_IOC_TYPE(cmd) != 'E') - return -EINVAL; + if (_IOC_TYPE(cmd) != 'E') + return -EINVAL; - if (_IOC_DIR(cmd) == _IOC_READ) { + if (_IOC_DIR(cmd) == _IOC_READ) { - if ((_IOC_NR(cmd) & ~EV_MAX) == _IOC_NR(EVIOCGBIT(0, 0))) { + if ((_IOC_NR(cmd) & ~EV_MAX) == _IOC_NR(EVIOCGBIT(0,0))) { - unsigned long *bits; - int len; + unsigned long *bits; + int len; - switch (_IOC_NR(cmd) & EV_MAX) { + switch (_IOC_NR(cmd) & EV_MAX) { + case 0: bits = dev->evbit; len = EV_MAX; break; + case EV_KEY: bits = dev->keybit; len = KEY_MAX; break; + case EV_REL: bits = dev->relbit; len = REL_MAX; break; + case EV_ABS: bits = dev->absbit; len = ABS_MAX; break; + case EV_MSC: bits = dev->mscbit; len = MSC_MAX; break; + case EV_LED: bits = dev->ledbit; len = LED_MAX; break; + case EV_SND: bits = dev->sndbit; len = SND_MAX; break; + case EV_FF: bits = dev->ffbit; len = FF_MAX; break; + case EV_SW: bits = dev->swbit; len = SW_MAX; break; + default: return -EINVAL; + } + return bits_to_user(bits, len, _IOC_SIZE(cmd), p, compat_mode); + } - case 0: bits = dev->evbit; len = EV_MAX; break; - case EV_KEY: bits = dev->keybit; len = KEY_MAX; break; - case EV_REL: bits = dev->relbit; len = REL_MAX; break; - case EV_ABS: bits = dev->absbit; len = ABS_MAX; break; - case EV_MSC: bits = dev->mscbit; len = MSC_MAX; break; - case EV_LED: bits = dev->ledbit; len = LED_MAX; break; - case EV_SND: bits = dev->sndbit; len = SND_MAX; break; - case EV_FF: bits = dev->ffbit; len = FF_MAX; break; - case EV_SW: bits = dev->swbit; len = SW_MAX; break; - default: return -EINVAL; - } - return bits_to_user(bits, len, _IOC_SIZE(cmd), p, compat_mode); - } + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGKEY(0))) + return bits_to_user(dev->key, KEY_MAX, _IOC_SIZE(cmd), + p, compat_mode); - if (_IOC_NR(cmd) == _IOC_NR(EVIOCGKEY(0))) - return bits_to_user(dev->key, KEY_MAX, _IOC_SIZE(cmd), - p, compat_mode); + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGLED(0))) + return bits_to_user(dev->led, LED_MAX, _IOC_SIZE(cmd), + p, compat_mode); - if (_IOC_NR(cmd) == _IOC_NR(EVIOCGLED(0))) - return bits_to_user(dev->led, LED_MAX, _IOC_SIZE(cmd), - p, compat_mode); + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSND(0))) + return bits_to_user(dev->snd, SND_MAX, _IOC_SIZE(cmd), + p, compat_mode); - if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSND(0))) - return bits_to_user(dev->snd, SND_MAX, _IOC_SIZE(cmd), - p, compat_mode); + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSW(0))) + return bits_to_user(dev->sw, SW_MAX, _IOC_SIZE(cmd), + p, compat_mode); - if (_IOC_NR(cmd) == _IOC_NR(EVIOCGSW(0))) - return bits_to_user(dev->sw, SW_MAX, _IOC_SIZE(cmd), - p, compat_mode); + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0))) + return str_to_user(dev->name, _IOC_SIZE(cmd), p); - if (_IOC_NR(cmd) == _IOC_NR(EVIOCGNAME(0))) - return str_to_user(dev->name, _IOC_SIZE(cmd), p); + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGPHYS(0))) + return str_to_user(dev->phys, _IOC_SIZE(cmd), p); - if (_IOC_NR(cmd) == _IOC_NR(EVIOCGPHYS(0))) - return str_to_user(dev->phys, _IOC_SIZE(cmd), p); + if (_IOC_NR(cmd) == _IOC_NR(EVIOCGUNIQ(0))) + return str_to_user(dev->uniq, _IOC_SIZE(cmd), p); - if (_IOC_NR(cmd) == _IOC_NR(EVIOCGUNIQ(0))) - return str_to_user(dev->uniq, _IOC_SIZE(cmd), p); + if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) { - if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCGABS(0))) { + t = _IOC_NR(cmd) & ABS_MAX; - t = _IOC_NR(cmd) & ABS_MAX; + abs.value = dev->abs[t]; + abs.minimum = dev->absmin[t]; + abs.maximum = dev->absmax[t]; + abs.fuzz = dev->absfuzz[t]; + abs.flat = dev->absflat[t]; - abs.value = dev->abs[t]; - abs.minimum = dev->absmin[t]; - abs.maximum = dev->absmax[t]; - abs.fuzz = dev->absfuzz[t]; - abs.flat = dev->absflat[t]; + if (copy_to_user(p, &abs, sizeof(struct input_absinfo))) + return -EFAULT; - if (copy_to_user(p, &abs, sizeof(struct input_absinfo))) - return -EFAULT; + return 0; + } - return 0; } - } - - if (_IOC_DIR(cmd) == _IOC_WRITE) { + if (_IOC_DIR(cmd) == _IOC_WRITE) { - if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) { + if ((_IOC_NR(cmd) & ~ABS_MAX) == _IOC_NR(EVIOCSABS(0))) { - t = _IOC_NR(cmd) & ABS_MAX; + t = _IOC_NR(cmd) & ABS_MAX; - if (copy_from_user(&abs, p, - sizeof(struct input_absinfo))) - return -EFAULT; + if (copy_from_user(&abs, p, sizeof(struct input_absinfo))) + return -EFAULT; - /* - * Take event lock to ensure that we are not - * changing device parameters in the middle - * of event. - */ - spin_lock_irq(&dev->event_lock); + dev->abs[t] = abs.value; + dev->absmin[t] = abs.minimum; + dev->absmax[t] = abs.maximum; + dev->absfuzz[t] = abs.fuzz; + dev->absflat[t] = abs.flat; - dev->abs[t] = abs.value; - dev->absmin[t] = abs.minimum; - dev->absmax[t] = abs.maximum; - dev->absfuzz[t] = abs.fuzz; - dev->absflat[t] = abs.flat; - - spin_unlock_irq(&dev->event_lock); - - return 0; + return 0; + } } - } } return -EINVAL; } -static long evdev_ioctl_handler(struct file *file, unsigned int cmd, - void __user *p, int compat_mode) -{ - struct evdev_client *client = file->private_data; - struct evdev *evdev = client->evdev; - int retval; - - retval = mutex_lock_interruptible(&evdev->mutex); - if (retval) - return retval; - - if (!evdev->exist) { - retval = -ENODEV; - goto out; - } - - retval = evdev_do_ioctl(file, cmd, p, compat_mode); - - out: - mutex_unlock(&evdev->mutex); - return retval; -} - static long evdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return evdev_ioctl_handler(file, cmd, (void __user *)arg, 0); } #ifdef CONFIG_COMPAT -static long evdev_ioctl_compat(struct file *file, - unsigned int cmd, unsigned long arg) +static long evdev_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) { return evdev_ioctl_handler(file, cmd, compat_ptr(arg), 1); } #endif static const struct file_operations evdev_fops = { - .owner = THIS_MODULE, - .read = evdev_read, - .write = evdev_write, - .poll = evdev_poll, - .open = evdev_open, - .release = evdev_release, - .unlocked_ioctl = evdev_ioctl, + .owner = THIS_MODULE, + .read = evdev_read, + .write = evdev_write, + .poll = evdev_poll, + .open = evdev_open, + .release = evdev_release, + .unlocked_ioctl = evdev_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = evdev_ioctl_compat, + .compat_ioctl = evdev_ioctl_compat, #endif - .fasync = evdev_fasync, - .flush = evdev_flush + .fasync = evdev_fasync, + .flush = evdev_flush }; -static int evdev_install_chrdev(struct evdev *evdev) -{ - /* - * No need to do any locking here as calls to connect and - * disconnect are serialized by the input core - */ - evdev_table[evdev->minor] = evdev; - return 0; -} - -static void evdev_remove_chrdev(struct evdev *evdev) -{ - /* - * Lock evdev table to prevent race with evdev_open() - */ - mutex_lock(&evdev_table_mutex); - evdev_table[evdev->minor] = NULL; - mutex_unlock(&evdev_table_mutex); -} - -/* - * Mark device non-existent. This disables writes, ioctls and - * prevents new users from opening the device. Already posted - * blocking reads will stay, however new ones will fail. - */ -static void evdev_mark_dead(struct evdev *evdev) -{ - mutex_lock(&evdev->mutex); - evdev->exist = 0; - mutex_unlock(&evdev->mutex); -} - -static void evdev_cleanup(struct evdev *evdev) -{ - struct input_handle *handle = &evdev->handle; - - evdev_mark_dead(evdev); - evdev_hangup(evdev); - evdev_remove_chrdev(evdev); - - /* evdev is marked dead so no one else accesses evdev->open */ - if (evdev->open) { - input_flush_device(handle, NULL); - input_close_device(handle); - } -} - -/* - * Create new evdev device. Note that input core serializes calls - * to connect and disconnect so we don't need to lock evdev_table here. - */ static int evdev_connect(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id) { @@ -871,10 +637,7 @@ static int evdev_connect(struct input_handler *handler, struct input_dev *dev, int minor; int error; - for (minor = 0; minor < EVDEV_MINORS; minor++) - if (!evdev_table[minor]) - break; - + for (minor = 0; minor < EVDEV_MINORS && evdev_table[minor]; minor++); if (minor == EVDEV_MINORS) { printk(KERN_ERR "evdev: no more free evdev devices\n"); return -ENFILE; @@ -885,44 +648,38 @@ static int evdev_connect(struct input_handler *handler, struct input_dev *dev, return -ENOMEM; INIT_LIST_HEAD(&evdev->client_list); - spin_lock_init(&evdev->client_lock); - mutex_init(&evdev->mutex); init_waitqueue_head(&evdev->wait); - snprintf(evdev->name, sizeof(evdev->name), "event%d", minor); evdev->exist = 1; evdev->minor = minor; - evdev->handle.dev = dev; evdev->handle.name = evdev->name; evdev->handle.handler = handler; evdev->handle.private = evdev; + snprintf(evdev->name, sizeof(evdev->name), "event%d", minor); - strlcpy(evdev->dev.bus_id, evdev->name, sizeof(evdev->dev.bus_id)); - evdev->dev.devt = MKDEV(INPUT_MAJOR, EVDEV_MINOR_BASE + minor); + snprintf(evdev->dev.bus_id, sizeof(evdev->dev.bus_id), + "event%d", minor); evdev->dev.class = &input_class; evdev->dev.parent = &dev->dev; + evdev->dev.devt = MKDEV(INPUT_MAJOR, EVDEV_MINOR_BASE + minor); evdev->dev.release = evdev_free; device_initialize(&evdev->dev); - error = input_register_handle(&evdev->handle); - if (error) - goto err_free_evdev; + evdev_table[minor] = evdev; - error = evdev_install_chrdev(evdev); + error = device_add(&evdev->dev); if (error) - goto err_unregister_handle; + goto err_free_evdev; - error = device_add(&evdev->dev); + error = input_register_handle(&evdev->handle); if (error) - goto err_cleanup_evdev; + goto err_delete_evdev; return 0; - err_cleanup_evdev: - evdev_cleanup(evdev); - err_unregister_handle: - input_unregister_handle(&evdev->handle); + err_delete_evdev: + device_del(&evdev->dev); err_free_evdev: put_device(&evdev->dev); return error; @@ -931,10 +688,21 @@ static int evdev_connect(struct input_handler *handler, struct input_dev *dev, static void evdev_disconnect(struct input_handle *handle) { struct evdev *evdev = handle->private; + struct evdev_client *client; - device_del(&evdev->dev); - evdev_cleanup(evdev); input_unregister_handle(handle); + device_del(&evdev->dev); + + evdev->exist = 0; + + if (evdev->open) { + input_flush_device(handle, NULL); + input_close_device(handle); + list_for_each_entry(client, &evdev->client_list, node) + kill_fasync(&client->fasync, SIGIO, POLL_HUP); + wake_up_interruptible(&evdev->wait); + } + put_device(&evdev->dev); } @@ -946,13 +714,13 @@ static const struct input_device_id evdev_ids[] = { MODULE_DEVICE_TABLE(input, evdev_ids); static struct input_handler evdev_handler = { - .event = evdev_event, - .connect = evdev_connect, - .disconnect = evdev_disconnect, - .fops = &evdev_fops, - .minor = EVDEV_MINOR_BASE, - .name = "evdev", - .id_table = evdev_ids, + .event = evdev_event, + .connect = evdev_connect, + .disconnect = evdev_disconnect, + .fops = &evdev_fops, + .minor = EVDEV_MINOR_BASE, + .name = "evdev", + .id_table = evdev_ids, }; static int __init evdev_init(void) diff --git a/trunk/drivers/input/input-polldev.c b/trunk/drivers/input/input-polldev.c index 92b359894e81..b773d4c756a6 100644 --- a/trunk/drivers/input/input-polldev.c +++ b/trunk/drivers/input/input-polldev.c @@ -70,7 +70,6 @@ static int input_open_polled_device(struct input_dev *input) { struct input_polled_dev *dev = input->private; int error; - unsigned long ticks; error = input_polldev_start_workqueue(); if (error) @@ -79,10 +78,8 @@ static int input_open_polled_device(struct input_dev *input) if (dev->flush) dev->flush(dev); - ticks = msecs_to_jiffies(dev->poll_interval); - if (ticks >= HZ) - ticks = round_jiffies(ticks); - queue_delayed_work(polldev_wq, &dev->work, ticks); + queue_delayed_work(polldev_wq, &dev->work, + msecs_to_jiffies(dev->poll_interval)); return 0; } diff --git a/trunk/drivers/input/input.c b/trunk/drivers/input/input.c index 2f2b020cd629..5dc361c954e2 100644 --- a/trunk/drivers/input/input.c +++ b/trunk/drivers/input/input.c @@ -17,10 +17,10 @@ #include #include #include +#include #include #include #include -#include MODULE_AUTHOR("Vojtech Pavlik "); MODULE_DESCRIPTION("Input core"); @@ -31,245 +31,167 @@ MODULE_LICENSE("GPL"); static LIST_HEAD(input_dev_list); static LIST_HEAD(input_handler_list); -/* - * input_mutex protects access to both input_dev_list and input_handler_list. - * This also causes input_[un]register_device and input_[un]register_handler - * be mutually exclusive which simplifies locking in drivers implementing - * input handlers. - */ -static DEFINE_MUTEX(input_mutex); - static struct input_handler *input_table[8]; -static inline int is_event_supported(unsigned int code, - unsigned long *bm, unsigned int max) +/** + * input_event() - report new input event + * @dev: device that generated the event + * @type: type of the event + * @code: event code + * @value: value of the event + * + * This function should be used by drivers implementing various input devices + * See also input_inject_event() + */ +void input_event(struct input_dev *dev, unsigned int type, unsigned int code, int value) { - return code <= max && test_bit(code, bm); -} + struct input_handle *handle; -static int input_defuzz_abs_event(int value, int old_val, int fuzz) -{ - if (fuzz) { - if (value > old_val - fuzz / 2 && value < old_val + fuzz / 2) - return old_val; + if (type > EV_MAX || !test_bit(type, dev->evbit)) + return; - if (value > old_val - fuzz && value < old_val + fuzz) - return (old_val * 3 + value) / 4; + add_input_randomness(type, code, value); - if (value > old_val - fuzz * 2 && value < old_val + fuzz * 2) - return (old_val + value) / 2; - } + switch (type) { - return value; -} + case EV_SYN: + switch (code) { + case SYN_CONFIG: + if (dev->event) + dev->event(dev, type, code, value); + break; + + case SYN_REPORT: + if (dev->sync) + return; + dev->sync = 1; + break; + } + break; -/* - * Pass event through all open handles. This function is called with - * dev->event_lock held and interrupts disabled. - */ -static void input_pass_event(struct input_dev *dev, - unsigned int type, unsigned int code, int value) -{ - struct input_handle *handle; + case EV_KEY: - rcu_read_lock(); + if (code > KEY_MAX || !test_bit(code, dev->keybit) || !!test_bit(code, dev->key) == value) + return; - handle = rcu_dereference(dev->grab); - if (handle) - handle->handler->event(handle, type, code, value); - else - list_for_each_entry_rcu(handle, &dev->h_list, d_node) - if (handle->open) - handle->handler->event(handle, - type, code, value); - rcu_read_unlock(); -} + if (value == 2) + break; -/* - * Generate software autorepeat event. Note that we take - * dev->event_lock here to avoid racing with input_event - * which may cause keys get "stuck". - */ -static void input_repeat_key(unsigned long data) -{ - struct input_dev *dev = (void *) data; - unsigned long flags; - - spin_lock_irqsave(&dev->event_lock, flags); + change_bit(code, dev->key); - if (test_bit(dev->repeat_key, dev->key) && - is_event_supported(dev->repeat_key, dev->keybit, KEY_MAX)) { + if (test_bit(EV_REP, dev->evbit) && dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] && dev->timer.data && value) { + dev->repeat_key = code; + mod_timer(&dev->timer, jiffies + msecs_to_jiffies(dev->rep[REP_DELAY])); + } - input_pass_event(dev, EV_KEY, dev->repeat_key, 2); + break; - if (dev->sync) { - /* - * Only send SYN_REPORT if we are not in a middle - * of driver parsing a new hardware packet. - * Otherwise assume that the driver will send - * SYN_REPORT once it's done. - */ - input_pass_event(dev, EV_SYN, SYN_REPORT, 1); - } + case EV_SW: - if (dev->rep[REP_PERIOD]) - mod_timer(&dev->timer, jiffies + - msecs_to_jiffies(dev->rep[REP_PERIOD])); - } + if (code > SW_MAX || !test_bit(code, dev->swbit) || !!test_bit(code, dev->sw) == value) + return; - spin_unlock_irqrestore(&dev->event_lock, flags); -} + change_bit(code, dev->sw); -static void input_start_autorepeat(struct input_dev *dev, int code) -{ - if (test_bit(EV_REP, dev->evbit) && - dev->rep[REP_PERIOD] && dev->rep[REP_DELAY] && - dev->timer.data) { - dev->repeat_key = code; - mod_timer(&dev->timer, - jiffies + msecs_to_jiffies(dev->rep[REP_DELAY])); - } -} + break; -#define INPUT_IGNORE_EVENT 0 -#define INPUT_PASS_TO_HANDLERS 1 -#define INPUT_PASS_TO_DEVICE 2 -#define INPUT_PASS_TO_ALL (INPUT_PASS_TO_HANDLERS | INPUT_PASS_TO_DEVICE) + case EV_ABS: -static void input_handle_event(struct input_dev *dev, - unsigned int type, unsigned int code, int value) -{ - int disposition = INPUT_IGNORE_EVENT; + if (code > ABS_MAX || !test_bit(code, dev->absbit)) + return; - switch (type) { + if (dev->absfuzz[code]) { + if ((value > dev->abs[code] - (dev->absfuzz[code] >> 1)) && + (value < dev->abs[code] + (dev->absfuzz[code] >> 1))) + return; - case EV_SYN: - switch (code) { - case SYN_CONFIG: - disposition = INPUT_PASS_TO_ALL; - break; + if ((value > dev->abs[code] - dev->absfuzz[code]) && + (value < dev->abs[code] + dev->absfuzz[code])) + value = (dev->abs[code] * 3 + value) >> 2; - case SYN_REPORT: - if (!dev->sync) { - dev->sync = 1; - disposition = INPUT_PASS_TO_HANDLERS; + if ((value > dev->abs[code] - (dev->absfuzz[code] << 1)) && + (value < dev->abs[code] + (dev->absfuzz[code] << 1))) + value = (dev->abs[code] + value) >> 1; } - break; - } - break; - case EV_KEY: - if (is_event_supported(code, dev->keybit, KEY_MAX) && - !!test_bit(code, dev->key) != value) { + if (dev->abs[code] == value) + return; - if (value != 2) { - __change_bit(code, dev->key); - if (value) - input_start_autorepeat(dev, code); - } + dev->abs[code] = value; + break; - disposition = INPUT_PASS_TO_HANDLERS; - } - break; + case EV_REL: - case EV_SW: - if (is_event_supported(code, dev->swbit, SW_MAX) && - !!test_bit(code, dev->sw) != value) { + if (code > REL_MAX || !test_bit(code, dev->relbit) || (value == 0)) + return; - __change_bit(code, dev->sw); - disposition = INPUT_PASS_TO_HANDLERS; - } - break; + break; - case EV_ABS: - if (is_event_supported(code, dev->absbit, ABS_MAX)) { + case EV_MSC: - value = input_defuzz_abs_event(value, - dev->abs[code], dev->absfuzz[code]); + if (code > MSC_MAX || !test_bit(code, dev->mscbit)) + return; - if (dev->abs[code] != value) { - dev->abs[code] = value; - disposition = INPUT_PASS_TO_HANDLERS; - } - } - break; + if (dev->event) + dev->event(dev, type, code, value); - case EV_REL: - if (is_event_supported(code, dev->relbit, REL_MAX) && value) - disposition = INPUT_PASS_TO_HANDLERS; + break; - break; + case EV_LED: - case EV_MSC: - if (is_event_supported(code, dev->mscbit, MSC_MAX)) - disposition = INPUT_PASS_TO_ALL; + if (code > LED_MAX || !test_bit(code, dev->ledbit) || !!test_bit(code, dev->led) == value) + return; - break; + change_bit(code, dev->led); - case EV_LED: - if (is_event_supported(code, dev->ledbit, LED_MAX) && - !!test_bit(code, dev->led) != value) { + if (dev->event) + dev->event(dev, type, code, value); - __change_bit(code, dev->led); - disposition = INPUT_PASS_TO_ALL; - } - break; + break; - case EV_SND: - if (is_event_supported(code, dev->sndbit, SND_MAX)) { + case EV_SND: + + if (code > SND_MAX || !test_bit(code, dev->sndbit)) + return; if (!!test_bit(code, dev->snd) != !!value) - __change_bit(code, dev->snd); - disposition = INPUT_PASS_TO_ALL; - } - break; + change_bit(code, dev->snd); - case EV_REP: - if (code <= REP_MAX && value >= 0 && dev->rep[code] != value) { - dev->rep[code] = value; - disposition = INPUT_PASS_TO_ALL; - } - break; + if (dev->event) + dev->event(dev, type, code, value); - case EV_FF: - if (value >= 0) - disposition = INPUT_PASS_TO_ALL; - break; - } + break; - if (type != EV_SYN) - dev->sync = 0; + case EV_REP: - if ((disposition & INPUT_PASS_TO_DEVICE) && dev->event) - dev->event(dev, type, code, value); + if (code > REP_MAX || value < 0 || dev->rep[code] == value) + return; - if (disposition & INPUT_PASS_TO_HANDLERS) - input_pass_event(dev, type, code, value); -} + dev->rep[code] = value; + if (dev->event) + dev->event(dev, type, code, value); -/** - * input_event() - report new input event - * @dev: device that generated the event - * @type: type of the event - * @code: event code - * @value: value of the event - * - * This function should be used by drivers implementing various input - * devices. See also input_inject_event(). - */ + break; -void input_event(struct input_dev *dev, - unsigned int type, unsigned int code, int value) -{ - unsigned long flags; + case EV_FF: - if (is_event_supported(type, dev->evbit, EV_MAX)) { + if (value < 0) + return; - spin_lock_irqsave(&dev->event_lock, flags); - add_input_randomness(type, code, value); - input_handle_event(dev, type, code, value); - spin_unlock_irqrestore(&dev->event_lock, flags); + if (dev->event) + dev->event(dev, type, code, value); + break; } + + if (type != EV_SYN) + dev->sync = 0; + + if (dev->grab) + dev->grab->handler->event(dev->grab, type, code, value); + else + list_for_each_entry(handle, &dev->h_list, d_node) + if (handle->open) + handle->handler->event(handle, type, code, value); } EXPORT_SYMBOL(input_event); @@ -280,228 +202,102 @@ EXPORT_SYMBOL(input_event); * @code: event code * @value: value of the event * - * Similar to input_event() but will ignore event if device is - * "grabbed" and handle injecting event is not the one that owns - * the device. + * Similar to input_event() but will ignore event if device is "grabbed" and handle + * injecting event is not the one that owns the device. */ -void input_inject_event(struct input_handle *handle, - unsigned int type, unsigned int code, int value) +void input_inject_event(struct input_handle *handle, unsigned int type, unsigned int code, int value) { - struct input_dev *dev = handle->dev; - struct input_handle *grab; - unsigned long flags; - - if (is_event_supported(type, dev->evbit, EV_MAX)) { - spin_lock_irqsave(&dev->event_lock, flags); - - rcu_read_lock(); - grab = rcu_dereference(dev->grab); - if (!grab || grab == handle) - input_handle_event(dev, type, code, value); - rcu_read_unlock(); - - spin_unlock_irqrestore(&dev->event_lock, flags); - } + if (!handle->dev->grab || handle->dev->grab == handle) + input_event(handle->dev, type, code, value); } EXPORT_SYMBOL(input_inject_event); -/** - * input_grab_device - grabs device for exclusive use - * @handle: input handle that wants to own the device - * - * When a device is grabbed by an input handle all events generated by - * the device are delivered only to this handle. Also events injected - * by other input handles are ignored while device is grabbed. - */ -int input_grab_device(struct input_handle *handle) +static void input_repeat_key(unsigned long data) { - struct input_dev *dev = handle->dev; - int retval; + struct input_dev *dev = (void *) data; - retval = mutex_lock_interruptible(&dev->mutex); - if (retval) - return retval; + if (!test_bit(dev->repeat_key, dev->key)) + return; - if (dev->grab) { - retval = -EBUSY; - goto out; - } + input_event(dev, EV_KEY, dev->repeat_key, 2); + input_sync(dev); - rcu_assign_pointer(dev->grab, handle); - synchronize_rcu(); + if (dev->rep[REP_PERIOD]) + mod_timer(&dev->timer, jiffies + msecs_to_jiffies(dev->rep[REP_PERIOD])); +} - out: - mutex_unlock(&dev->mutex); - return retval; +int input_grab_device(struct input_handle *handle) +{ + if (handle->dev->grab) + return -EBUSY; + + handle->dev->grab = handle; + return 0; } EXPORT_SYMBOL(input_grab_device); -static void __input_release_device(struct input_handle *handle) +void input_release_device(struct input_handle *handle) { struct input_dev *dev = handle->dev; if (dev->grab == handle) { - rcu_assign_pointer(dev->grab, NULL); - /* Make sure input_pass_event() notices that grab is gone */ - synchronize_rcu(); + dev->grab = NULL; list_for_each_entry(handle, &dev->h_list, d_node) - if (handle->open && handle->handler->start) + if (handle->handler->start) handle->handler->start(handle); } } - -/** - * input_release_device - release previously grabbed device - * @handle: input handle that owns the device - * - * Releases previously grabbed device so that other input handles can - * start receiving input events. Upon release all handlers attached - * to the device have their start() method called so they have a change - * to synchronize device state with the rest of the system. - */ -void input_release_device(struct input_handle *handle) -{ - struct input_dev *dev = handle->dev; - - mutex_lock(&dev->mutex); - __input_release_device(handle); - mutex_unlock(&dev->mutex); -} EXPORT_SYMBOL(input_release_device); -/** - * input_open_device - open input device - * @handle: handle through which device is being accessed - * - * This function should be called by input handlers when they - * want to start receive events from given input device. - */ int input_open_device(struct input_handle *handle) { struct input_dev *dev = handle->dev; - int retval; - - retval = mutex_lock_interruptible(&dev->mutex); - if (retval) - return retval; + int err; - if (dev->going_away) { - retval = -ENODEV; - goto out; - } + err = mutex_lock_interruptible(&dev->mutex); + if (err) + return err; handle->open++; if (!dev->users++ && dev->open) - retval = dev->open(dev); - - if (retval) { - dev->users--; - if (!--handle->open) { - /* - * Make sure we are not delivering any more events - * through this handle - */ - synchronize_rcu(); - } - } + err = dev->open(dev); + + if (err) + handle->open--; - out: mutex_unlock(&dev->mutex); - return retval; + + return err; } EXPORT_SYMBOL(input_open_device); -int input_flush_device(struct input_handle *handle, struct file *file) +int input_flush_device(struct input_handle* handle, struct file* file) { - struct input_dev *dev = handle->dev; - int retval; + if (handle->dev->flush) + return handle->dev->flush(handle->dev, file); - retval = mutex_lock_interruptible(&dev->mutex); - if (retval) - return retval; - - if (dev->flush) - retval = dev->flush(dev, file); - - mutex_unlock(&dev->mutex); - return retval; + return 0; } EXPORT_SYMBOL(input_flush_device); -/** - * input_close_device - close input device - * @handle: handle through which device is being accessed - * - * This function should be called by input handlers when they - * want to stop receive events from given input device. - */ void input_close_device(struct input_handle *handle) { struct input_dev *dev = handle->dev; - mutex_lock(&dev->mutex); + input_release_device(handle); - __input_release_device(handle); + mutex_lock(&dev->mutex); if (!--dev->users && dev->close) dev->close(dev); - - if (!--handle->open) { - /* - * synchronize_rcu() makes sure that input_pass_event() - * completed and that no more input events are delivered - * through this handle - */ - synchronize_rcu(); - } + handle->open--; mutex_unlock(&dev->mutex); } EXPORT_SYMBOL(input_close_device); -/* - * Prepare device for unregistering - */ -static void input_disconnect_device(struct input_dev *dev) -{ - struct input_handle *handle; - int code; - - /* - * Mark device as going away. Note that we take dev->mutex here - * not to protect access to dev->going_away but rather to ensure - * that there are no threads in the middle of input_open_device() - */ - mutex_lock(&dev->mutex); - dev->going_away = 1; - mutex_unlock(&dev->mutex); - - spin_lock_irq(&dev->event_lock); - - /* - * Simulate keyup events for all pressed keys so that handlers - * are not left with "stuck" keys. The driver may continue - * generate events even after we done here but they will not - * reach any handlers. - */ - if (is_event_supported(EV_KEY, dev->evbit, EV_MAX)) { - for (code = 0; code <= KEY_MAX; code++) { - if (is_event_supported(code, dev->keybit, KEY_MAX) && - test_bit(code, dev->key)) { - input_pass_event(dev, EV_KEY, code, 0); - } - } - input_pass_event(dev, EV_SYN, SYN_REPORT, 1); - } - - list_for_each_entry(handle, &dev->h_list, d_node) - handle->open = 0; - - spin_unlock_irq(&dev->event_lock); -} - static int input_fetch_keycode(struct input_dev *dev, int scancode) { switch (dev->keycodesize) { @@ -677,8 +473,7 @@ static unsigned int input_proc_devices_poll(struct file *file, poll_table *wait) static void *input_devices_seq_start(struct seq_file *seq, loff_t *pos) { - if (mutex_lock_interruptible(&input_mutex)) - return NULL; + /* acquire lock here ... Yes, we do need locking, I knowi, I know... */ return seq_list_start(&input_dev_list, *pos); } @@ -690,7 +485,7 @@ static void *input_devices_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void input_devices_seq_stop(struct seq_file *seq, void *v) { - mutex_unlock(&input_mutex); + /* release lock here */ } static void input_seq_print_bitmap(struct seq_file *seq, const char *name, @@ -774,9 +569,7 @@ static const struct file_operations input_devices_fileops = { static void *input_handlers_seq_start(struct seq_file *seq, loff_t *pos) { - if (mutex_lock_interruptible(&input_mutex)) - return NULL; - + /* acquire lock here ... Yes, we do need locking, I knowi, I know... */ seq->private = (void *)(unsigned long)*pos; return seq_list_start(&input_handler_list, *pos); } @@ -789,7 +582,7 @@ static void *input_handlers_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void input_handlers_seq_stop(struct seq_file *seq, void *v) { - mutex_unlock(&input_mutex); + /* release lock here */ } static int input_handlers_seq_show(struct seq_file *seq, void *v) @@ -1190,7 +983,6 @@ struct input_dev *input_allocate_device(void) dev->dev.class = &input_class; device_initialize(&dev->dev); mutex_init(&dev->mutex); - spin_lock_init(&dev->event_lock); INIT_LIST_HEAD(&dev->h_list); INIT_LIST_HEAD(&dev->node); @@ -1208,7 +1000,7 @@ EXPORT_SYMBOL(input_allocate_device); * This function should only be used if input_register_device() * was not called yet or if it failed. Once device was registered * use input_unregister_device() and memory will be freed once last - * reference to the device is dropped. + * refrence to the device is dropped. * * Device should be allocated by input_allocate_device(). * @@ -1278,18 +1070,6 @@ void input_set_capability(struct input_dev *dev, unsigned int type, unsigned int } EXPORT_SYMBOL(input_set_capability); -/** - * input_register_device - register device with input core - * @dev: device to be registered - * - * This function registers device with input core. The device must be - * allocated with input_allocate_device() and all it's capabilities - * set up before registering. - * If function fails the device must be freed with input_free_device(). - * Once device has been successfully registered it can be unregistered - * with input_unregister_device(); input_free_device() should not be - * called in this case. - */ int input_register_device(struct input_dev *dev) { static atomic_t input_no = ATOMIC_INIT(0); @@ -1297,7 +1077,7 @@ int input_register_device(struct input_dev *dev) const char *path; int error; - __set_bit(EV_SYN, dev->evbit); + set_bit(EV_SYN, dev->evbit); /* * If delay and period are pre-set by the driver, then autorepeating @@ -1318,6 +1098,8 @@ int input_register_device(struct input_dev *dev) if (!dev->setkeycode) dev->setkeycode = input_default_setkeycode; + list_add_tail(&dev->node, &input_dev_list); + snprintf(dev->dev.bus_id, sizeof(dev->dev.bus_id), "input%ld", (unsigned long) atomic_inc_return(&input_no) - 1); @@ -1333,79 +1115,49 @@ int input_register_device(struct input_dev *dev) dev->name ? dev->name : "Unspecified device", path ? path : "N/A"); kfree(path); - error = mutex_lock_interruptible(&input_mutex); - if (error) { - device_del(&dev->dev); - return error; - } - - list_add_tail(&dev->node, &input_dev_list); - list_for_each_entry(handler, &input_handler_list, node) input_attach_handler(dev, handler); input_wakeup_procfs_readers(); - mutex_unlock(&input_mutex); - return 0; } EXPORT_SYMBOL(input_register_device); -/** - * input_unregister_device - unregister previously registered device - * @dev: device to be unregistered - * - * This function unregisters an input device. Once device is unregistered - * the caller should not try to access it as it may get freed at any moment. - */ void input_unregister_device(struct input_dev *dev) { struct input_handle *handle, *next; + int code; - input_disconnect_device(dev); + for (code = 0; code <= KEY_MAX; code++) + if (test_bit(code, dev->key)) + input_report_key(dev, code, 0); + input_sync(dev); - mutex_lock(&input_mutex); + del_timer_sync(&dev->timer); list_for_each_entry_safe(handle, next, &dev->h_list, d_node) handle->handler->disconnect(handle); WARN_ON(!list_empty(&dev->h_list)); - del_timer_sync(&dev->timer); list_del_init(&dev->node); - input_wakeup_procfs_readers(); - - mutex_unlock(&input_mutex); - device_unregister(&dev->dev); + + input_wakeup_procfs_readers(); } EXPORT_SYMBOL(input_unregister_device); -/** - * input_register_handler - register a new input handler - * @handler: handler to be registered - * - * This function registers a new input handler (interface) for input - * devices in the system and attaches it to all input devices that - * are compatible with the handler. - */ int input_register_handler(struct input_handler *handler) { struct input_dev *dev; - int retval; - - retval = mutex_lock_interruptible(&input_mutex); - if (retval) - return retval; INIT_LIST_HEAD(&handler->h_list); if (handler->fops != NULL) { - if (input_table[handler->minor >> 5]) { - retval = -EBUSY; - goto out; - } + if (input_table[handler->minor >> 5]) + return -EBUSY; + input_table[handler->minor >> 5] = handler; } @@ -1415,26 +1167,14 @@ int input_register_handler(struct input_handler *handler) input_attach_handler(dev, handler); input_wakeup_procfs_readers(); - - out: - mutex_unlock(&input_mutex); - return retval; + return 0; } EXPORT_SYMBOL(input_register_handler); -/** - * input_unregister_handler - unregisters an input handler - * @handler: handler to be unregistered - * - * This function disconnects a handler from its input devices and - * removes it from lists of known handlers. - */ void input_unregister_handler(struct input_handler *handler) { struct input_handle *handle, *next; - mutex_lock(&input_mutex); - list_for_each_entry_safe(handle, next, &handler->h_list, h_node) handler->disconnect(handle); WARN_ON(!list_empty(&handler->h_list)); @@ -1445,45 +1185,14 @@ void input_unregister_handler(struct input_handler *handler) input_table[handler->minor >> 5] = NULL; input_wakeup_procfs_readers(); - - mutex_unlock(&input_mutex); } EXPORT_SYMBOL(input_unregister_handler); -/** - * input_register_handle - register a new input handle - * @handle: handle to register - * - * This function puts a new input handle onto device's - * and handler's lists so that events can flow through - * it once it is opened using input_open_device(). - * - * This function is supposed to be called from handler's - * connect() method. - */ int input_register_handle(struct input_handle *handle) { struct input_handler *handler = handle->handler; - struct input_dev *dev = handle->dev; - int error; - - /* - * We take dev->mutex here to prevent race with - * input_release_device(). - */ - error = mutex_lock_interruptible(&dev->mutex); - if (error) - return error; - list_add_tail_rcu(&handle->d_node, &dev->h_list); - mutex_unlock(&dev->mutex); - synchronize_rcu(); - /* - * Since we are supposed to be called from ->connect() - * which is mutually exclusive with ->disconnect() - * we can't be racing with input_unregister_handle() - * and so separate lock is not needed here. - */ + list_add_tail(&handle->d_node, &handle->dev->h_list); list_add_tail(&handle->h_node, &handler->h_list); if (handler->start) @@ -1493,29 +1202,10 @@ int input_register_handle(struct input_handle *handle) } EXPORT_SYMBOL(input_register_handle); -/** - * input_unregister_handle - unregister an input handle - * @handle: handle to unregister - * - * This function removes input handle from device's - * and handler's lists. - * - * This function is supposed to be called from handler's - * disconnect() method. - */ void input_unregister_handle(struct input_handle *handle) { - struct input_dev *dev = handle->dev; - list_del_init(&handle->h_node); - - /* - * Take dev->mutex to prevent race with input_release_device(). - */ - mutex_lock(&dev->mutex); - list_del_rcu(&handle->d_node); - mutex_unlock(&dev->mutex); - synchronize_rcu(); + list_del_init(&handle->d_node); } EXPORT_SYMBOL(input_unregister_handle); diff --git a/trunk/drivers/input/joydev.c b/trunk/drivers/input/joydev.c index 2b201f9aa024..a9a0180bfd46 100644 --- a/trunk/drivers/input/joydev.c +++ b/trunk/drivers/input/joydev.c @@ -43,8 +43,6 @@ struct joydev { struct input_handle handle; wait_queue_head_t wait; struct list_head client_list; - spinlock_t client_lock; /* protects client_list */ - struct mutex mutex; struct device dev; struct js_corr corr[ABS_MAX + 1]; @@ -63,61 +61,31 @@ struct joydev_client { int head; int tail; int startup; - spinlock_t buffer_lock; /* protects access to buffer, head and tail */ struct fasync_struct *fasync; struct joydev *joydev; struct list_head node; }; static struct joydev *joydev_table[JOYDEV_MINORS]; -static DEFINE_MUTEX(joydev_table_mutex); static int joydev_correct(int value, struct js_corr *corr) { switch (corr->type) { - - case JS_CORR_NONE: - break; - - case JS_CORR_BROKEN: - value = value > corr->coef[0] ? (value < corr->coef[1] ? 0 : - ((corr->coef[3] * (value - corr->coef[1])) >> 14)) : - ((corr->coef[2] * (value - corr->coef[0])) >> 14); - break; - - default: - return 0; + case JS_CORR_NONE: + break; + case JS_CORR_BROKEN: + value = value > corr->coef[0] ? (value < corr->coef[1] ? 0 : + ((corr->coef[3] * (value - corr->coef[1])) >> 14)) : + ((corr->coef[2] * (value - corr->coef[0])) >> 14); + break; + default: + return 0; } return value < -32767 ? -32767 : (value > 32767 ? 32767 : value); } -static void joydev_pass_event(struct joydev_client *client, - struct js_event *event) -{ - struct joydev *joydev = client->joydev; - - /* - * IRQs already disabled, just acquire the lock - */ - spin_lock(&client->buffer_lock); - - client->buffer[client->head] = *event; - - if (client->startup == joydev->nabs + joydev->nkey) { - client->head++; - client->head &= JOYDEV_BUFFER_SIZE - 1; - if (client->tail == client->head) - client->startup = 0; - } - - spin_unlock(&client->buffer_lock); - - kill_fasync(&client->fasync, SIGIO, POLL_IN); -} - -static void joydev_event(struct input_handle *handle, - unsigned int type, unsigned int code, int value) +static void joydev_event(struct input_handle *handle, unsigned int type, unsigned int code, int value) { struct joydev *joydev = handle->private; struct joydev_client *client; @@ -125,34 +93,39 @@ static void joydev_event(struct input_handle *handle, switch (type) { - case EV_KEY: - if (code < BTN_MISC || value == 2) - return; - event.type = JS_EVENT_BUTTON; - event.number = joydev->keymap[code - BTN_MISC]; - event.value = value; - break; + case EV_KEY: + if (code < BTN_MISC || value == 2) + return; + event.type = JS_EVENT_BUTTON; + event.number = joydev->keymap[code - BTN_MISC]; + event.value = value; + break; - case EV_ABS: - event.type = JS_EVENT_AXIS; - event.number = joydev->absmap[code]; - event.value = joydev_correct(value, - &joydev->corr[event.number]); - if (event.value == joydev->abs[event.number]) - return; - joydev->abs[event.number] = event.value; - break; + case EV_ABS: + event.type = JS_EVENT_AXIS; + event.number = joydev->absmap[code]; + event.value = joydev_correct(value, joydev->corr + event.number); + if (event.value == joydev->abs[event.number]) + return; + joydev->abs[event.number] = event.value; + break; - default: - return; + default: + return; } event.time = jiffies_to_msecs(jiffies); - rcu_read_lock(); - list_for_each_entry_rcu(client, &joydev->client_list, node) - joydev_pass_event(client, &event); - rcu_read_unlock(); + list_for_each_entry(client, &joydev->client_list, node) { + + memcpy(client->buffer + client->head, &event, sizeof(struct js_event)); + + if (client->startup == joydev->nabs + joydev->nkey) + if (client->tail == (client->head = (client->head + 1) & (JOYDEV_BUFFER_SIZE - 1))) + client->startup = 0; + + kill_fasync(&client->fasync, SIGIO, POLL_IN); + } wake_up_interruptible(&joydev->wait); } @@ -171,83 +144,23 @@ static void joydev_free(struct device *dev) { struct joydev *joydev = container_of(dev, struct joydev, dev); + joydev_table[joydev->minor] = NULL; kfree(joydev); } -static void joydev_attach_client(struct joydev *joydev, - struct joydev_client *client) -{ - spin_lock(&joydev->client_lock); - list_add_tail_rcu(&client->node, &joydev->client_list); - spin_unlock(&joydev->client_lock); - synchronize_rcu(); -} - -static void joydev_detach_client(struct joydev *joydev, - struct joydev_client *client) -{ - spin_lock(&joydev->client_lock); - list_del_rcu(&client->node); - spin_unlock(&joydev->client_lock); - synchronize_rcu(); -} - -static int joydev_open_device(struct joydev *joydev) -{ - int retval; - - retval = mutex_lock_interruptible(&joydev->mutex); - if (retval) - return retval; - - if (!joydev->exist) - retval = -ENODEV; - else if (!joydev->open++) { - retval = input_open_device(&joydev->handle); - if (retval) - joydev->open--; - } - - mutex_unlock(&joydev->mutex); - return retval; -} - -static void joydev_close_device(struct joydev *joydev) -{ - mutex_lock(&joydev->mutex); - - if (joydev->exist && !--joydev->open) - input_close_device(&joydev->handle); - - mutex_unlock(&joydev->mutex); -} - -/* - * Wake up users waiting for IO so they can disconnect from - * dead device. - */ -static void joydev_hangup(struct joydev *joydev) -{ - struct joydev_client *client; - - spin_lock(&joydev->client_lock); - list_for_each_entry(client, &joydev->client_list, node) - kill_fasync(&client->fasync, SIGIO, POLL_HUP); - spin_unlock(&joydev->client_lock); - - wake_up_interruptible(&joydev->wait); -} - static int joydev_release(struct inode *inode, struct file *file) { struct joydev_client *client = file->private_data; struct joydev *joydev = client->joydev; joydev_fasync(-1, file, 0); - joydev_detach_client(joydev, client); + + list_del(&client->node); kfree(client); - joydev_close_device(joydev); + if (!--joydev->open && joydev->exist) + input_close_device(&joydev->handle); + put_device(&joydev->dev); return 0; @@ -263,146 +176,49 @@ static int joydev_open(struct inode *inode, struct file *file) if (i >= JOYDEV_MINORS) return -ENODEV; - error = mutex_lock_interruptible(&joydev_table_mutex); - if (error) - return error; joydev = joydev_table[i]; - if (joydev) - get_device(&joydev->dev); - mutex_unlock(&joydev_table_mutex); - - if (!joydev) + if (!joydev || !joydev->exist) return -ENODEV; + get_device(&joydev->dev); + client = kzalloc(sizeof(struct joydev_client), GFP_KERNEL); if (!client) { error = -ENOMEM; goto err_put_joydev; } - spin_lock_init(&client->buffer_lock); client->joydev = joydev; - joydev_attach_client(joydev, client); + list_add_tail(&client->node, &joydev->client_list); - error = joydev_open_device(joydev); - if (error) - goto err_free_client; + if (!joydev->open++ && joydev->exist) { + error = input_open_device(&joydev->handle); + if (error) + goto err_free_client; + } file->private_data = client; return 0; err_free_client: - joydev_detach_client(joydev, client); + list_del(&client->node); kfree(client); err_put_joydev: put_device(&joydev->dev); return error; } -static int joydev_generate_startup_event(struct joydev_client *client, - struct input_dev *input, - struct js_event *event) -{ - struct joydev *joydev = client->joydev; - int have_event; - - spin_lock_irq(&client->buffer_lock); - - have_event = client->startup < joydev->nabs + joydev->nkey; - - if (have_event) { - - event->time = jiffies_to_msecs(jiffies); - if (client->startup < joydev->nkey) { - event->type = JS_EVENT_BUTTON | JS_EVENT_INIT; - event->number = client->startup; - event->value = !!test_bit(joydev->keypam[event->number], - input->key); - } else { - event->type = JS_EVENT_AXIS | JS_EVENT_INIT; - event->number = client->startup - joydev->nkey; - event->value = joydev->abs[event->number]; - } - client->startup++; - } - - spin_unlock_irq(&client->buffer_lock); - - return have_event; -} - -static int joydev_fetch_next_event(struct joydev_client *client, - struct js_event *event) -{ - int have_event; - - spin_lock_irq(&client->buffer_lock); - - have_event = client->head != client->tail; - if (have_event) { - *event = client->buffer[client->tail++]; - client->tail &= JOYDEV_BUFFER_SIZE - 1; - } - - spin_unlock_irq(&client->buffer_lock); - - return have_event; -} - -/* - * Old joystick interface - */ -static ssize_t joydev_0x_read(struct joydev_client *client, - struct input_dev *input, - char __user *buf) -{ - struct joydev *joydev = client->joydev; - struct JS_DATA_TYPE data; - int i; - - spin_lock_irq(&input->event_lock); - - /* - * Get device state - */ - for (data.buttons = i = 0; i < 32 && i < joydev->nkey; i++) - data.buttons |= - test_bit(joydev->keypam[i], input->key) ? (1 << i) : 0; - data.x = (joydev->abs[0] / 256 + 128) >> joydev->glue.JS_CORR.x; - data.y = (joydev->abs[1] / 256 + 128) >> joydev->glue.JS_CORR.y; - - /* - * Reset reader's event queue - */ - spin_lock(&client->buffer_lock); - client->startup = 0; - client->tail = client->head; - spin_unlock(&client->buffer_lock); - - spin_unlock_irq(&input->event_lock); - - if (copy_to_user(buf, &data, sizeof(struct JS_DATA_TYPE))) - return -EFAULT; - - return sizeof(struct JS_DATA_TYPE); -} - -static inline int joydev_data_pending(struct joydev_client *client) +static ssize_t joydev_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { - struct joydev *joydev = client->joydev; - - return client->startup < joydev->nabs + joydev->nkey || - client->head != client->tail; + return -EINVAL; } -static ssize_t joydev_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t joydev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct joydev_client *client = file->private_data; struct joydev *joydev = client->joydev; struct input_dev *input = joydev->handle.dev; - struct js_event event; - int retval; + int retval = 0; if (!joydev->exist) return -ENODEV; @@ -410,35 +226,68 @@ static ssize_t joydev_read(struct file *file, char __user *buf, if (count < sizeof(struct js_event)) return -EINVAL; - if (count == sizeof(struct JS_DATA_TYPE)) - return joydev_0x_read(client, input, buf); + if (count == sizeof(struct JS_DATA_TYPE)) { + + struct JS_DATA_TYPE data; + int i; + + for (data.buttons = i = 0; i < 32 && i < joydev->nkey; i++) + data.buttons |= test_bit(joydev->keypam[i], input->key) ? (1 << i) : 0; + data.x = (joydev->abs[0] / 256 + 128) >> joydev->glue.JS_CORR.x; + data.y = (joydev->abs[1] / 256 + 128) >> joydev->glue.JS_CORR.y; + + if (copy_to_user(buf, &data, sizeof(struct JS_DATA_TYPE))) + return -EFAULT; + + client->startup = 0; + client->tail = client->head; - if (!joydev_data_pending(client) && (file->f_flags & O_NONBLOCK)) + return sizeof(struct JS_DATA_TYPE); + } + + if (client->startup == joydev->nabs + joydev->nkey && + client->head == client->tail && (file->f_flags & O_NONBLOCK)) return -EAGAIN; retval = wait_event_interruptible(joydev->wait, - !joydev->exist || joydev_data_pending(client)); + !joydev->exist || + client->startup < joydev->nabs + joydev->nkey || + client->head != client->tail); if (retval) return retval; if (!joydev->exist) return -ENODEV; - while (retval + sizeof(struct js_event) <= count && - joydev_generate_startup_event(client, input, &event)) { + while (client->startup < joydev->nabs + joydev->nkey && retval + sizeof(struct js_event) <= count) { + + struct js_event event; + + event.time = jiffies_to_msecs(jiffies); + + if (client->startup < joydev->nkey) { + event.type = JS_EVENT_BUTTON | JS_EVENT_INIT; + event.number = client->startup; + event.value = !!test_bit(joydev->keypam[event.number], input->key); + } else { + event.type = JS_EVENT_AXIS | JS_EVENT_INIT; + event.number = client->startup - joydev->nkey; + event.value = joydev->abs[event.number]; + } if (copy_to_user(buf + retval, &event, sizeof(struct js_event))) return -EFAULT; + client->startup++; retval += sizeof(struct js_event); } - while (retval + sizeof(struct js_event) <= count && - joydev_fetch_next_event(client, &event)) { + while (client->head != client->tail && retval + sizeof(struct js_event) <= count) { - if (copy_to_user(buf + retval, &event, sizeof(struct js_event))) + if (copy_to_user(buf + retval, client->buffer + client->tail, sizeof(struct js_event))) return -EFAULT; + client->tail = (client->tail + 1) & (JOYDEV_BUFFER_SIZE - 1); retval += sizeof(struct js_event); } @@ -452,144 +301,126 @@ static unsigned int joydev_poll(struct file *file, poll_table *wait) struct joydev *joydev = client->joydev; poll_wait(file, &joydev->wait, wait); - return (joydev_data_pending(client) ? (POLLIN | POLLRDNORM) : 0) | - (joydev->exist ? 0 : (POLLHUP | POLLERR)); + return ((client->head != client->tail || client->startup < joydev->nabs + joydev->nkey) ? + (POLLIN | POLLRDNORM) : 0) | (joydev->exist ? 0 : (POLLHUP | POLLERR)); } -static int joydev_ioctl_common(struct joydev *joydev, - unsigned int cmd, void __user *argp) +static int joydev_ioctl_common(struct joydev *joydev, unsigned int cmd, void __user *argp) { struct input_dev *dev = joydev->handle.dev; int i, j; switch (cmd) { - case JS_SET_CAL: - return copy_from_user(&joydev->glue.JS_CORR, argp, + case JS_SET_CAL: + return copy_from_user(&joydev->glue.JS_CORR, argp, sizeof(joydev->glue.JS_CORR)) ? -EFAULT : 0; - case JS_GET_CAL: - return copy_to_user(argp, &joydev->glue.JS_CORR, + case JS_GET_CAL: + return copy_to_user(argp, &joydev->glue.JS_CORR, sizeof(joydev->glue.JS_CORR)) ? -EFAULT : 0; - case JS_SET_TIMEOUT: - return get_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp); - - case JS_GET_TIMEOUT: - return put_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp); - - case JSIOCGVERSION: - return put_user(JS_VERSION, (__u32 __user *) argp); - - case JSIOCGAXES: - return put_user(joydev->nabs, (__u8 __user *) argp); - - case JSIOCGBUTTONS: - return put_user(joydev->nkey, (__u8 __user *) argp); + case JS_SET_TIMEOUT: + return get_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp); - case JSIOCSCORR: - if (copy_from_user(joydev->corr, argp, - sizeof(joydev->corr[0]) * joydev->nabs)) - return -EFAULT; + case JS_GET_TIMEOUT: + return put_user(joydev->glue.JS_TIMEOUT, (s32 __user *) argp); - for (i = 0; i < joydev->nabs; i++) { - j = joydev->abspam[i]; - joydev->abs[i] = joydev_correct(dev->abs[j], - &joydev->corr[i]); - } - return 0; - - case JSIOCGCORR: - return copy_to_user(argp, joydev->corr, - sizeof(joydev->corr[0]) * joydev->nabs) ? -EFAULT : 0; - - case JSIOCSAXMAP: - if (copy_from_user(joydev->abspam, argp, - sizeof(__u8) * (ABS_MAX + 1))) - return -EFAULT; - - for (i = 0; i < joydev->nabs; i++) { - if (joydev->abspam[i] > ABS_MAX) - return -EINVAL; - joydev->absmap[joydev->abspam[i]] = i; - } - return 0; - - case JSIOCGAXMAP: - return copy_to_user(argp, joydev->abspam, - sizeof(__u8) * (ABS_MAX + 1)) ? -EFAULT : 0; + case JSIOCGVERSION: + return put_user(JS_VERSION, (__u32 __user *) argp); - case JSIOCSBTNMAP: - if (copy_from_user(joydev->keypam, argp, - sizeof(__u16) * (KEY_MAX - BTN_MISC + 1))) - return -EFAULT; + case JSIOCGAXES: + return put_user(joydev->nabs, (__u8 __user *) argp); - for (i = 0; i < joydev->nkey; i++) { - if (joydev->keypam[i] > KEY_MAX || - joydev->keypam[i] < BTN_MISC) - return -EINVAL; - joydev->keymap[joydev->keypam[i] - BTN_MISC] = i; - } + case JSIOCGBUTTONS: + return put_user(joydev->nkey, (__u8 __user *) argp); - return 0; + case JSIOCSCORR: + if (copy_from_user(joydev->corr, argp, + sizeof(joydev->corr[0]) * joydev->nabs)) + return -EFAULT; + for (i = 0; i < joydev->nabs; i++) { + j = joydev->abspam[i]; + joydev->abs[i] = joydev_correct(dev->abs[j], joydev->corr + i); + } + return 0; - case JSIOCGBTNMAP: - return copy_to_user(argp, joydev->keypam, - sizeof(__u16) * (KEY_MAX - BTN_MISC + 1)) ? -EFAULT : 0; + case JSIOCGCORR: + return copy_to_user(argp, joydev->corr, + sizeof(joydev->corr[0]) * joydev->nabs) ? -EFAULT : 0; - default: - if ((cmd & ~IOCSIZE_MASK) == JSIOCGNAME(0)) { - int len; - if (!dev->name) - return 0; - len = strlen(dev->name) + 1; - if (len > _IOC_SIZE(cmd)) - len = _IOC_SIZE(cmd); - if (copy_to_user(argp, dev->name, len)) + case JSIOCSAXMAP: + if (copy_from_user(joydev->abspam, argp, sizeof(__u8) * (ABS_MAX + 1))) return -EFAULT; - return len; - } + for (i = 0; i < joydev->nabs; i++) { + if (joydev->abspam[i] > ABS_MAX) + return -EINVAL; + joydev->absmap[joydev->abspam[i]] = i; + } + return 0; + + case JSIOCGAXMAP: + return copy_to_user(argp, joydev->abspam, + sizeof(__u8) * (ABS_MAX + 1)) ? -EFAULT : 0; + + case JSIOCSBTNMAP: + if (copy_from_user(joydev->keypam, argp, sizeof(__u16) * (KEY_MAX - BTN_MISC + 1))) + return -EFAULT; + for (i = 0; i < joydev->nkey; i++) { + if (joydev->keypam[i] > KEY_MAX || joydev->keypam[i] < BTN_MISC) + return -EINVAL; + joydev->keymap[joydev->keypam[i] - BTN_MISC] = i; + } + return 0; + + case JSIOCGBTNMAP: + return copy_to_user(argp, joydev->keypam, + sizeof(__u16) * (KEY_MAX - BTN_MISC + 1)) ? -EFAULT : 0; + + default: + if ((cmd & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT)) == JSIOCGNAME(0)) { + int len; + if (!dev->name) + return 0; + len = strlen(dev->name) + 1; + if (len > _IOC_SIZE(cmd)) + len = _IOC_SIZE(cmd); + if (copy_to_user(argp, dev->name, len)) + return -EFAULT; + return len; + } } return -EINVAL; } #ifdef CONFIG_COMPAT -static long joydev_compat_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) +static long joydev_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct joydev_client *client = file->private_data; struct joydev *joydev = client->joydev; void __user *argp = (void __user *)arg; s32 tmp32; struct JS_DATA_SAVE_TYPE_32 ds32; - int retval; - - retval = mutex_lock_interruptible(&joydev->mutex); - if (retval) - return retval; - - if (!joydev->exist) { - retval = -ENODEV; - goto out; - } + int err; - switch (cmd) { + if (!joydev->exist) + return -ENODEV; + switch(cmd) { case JS_SET_TIMELIMIT: - retval = get_user(tmp32, (s32 __user *) arg); - if (retval == 0) + err = get_user(tmp32, (s32 __user *) arg); + if (err == 0) joydev->glue.JS_TIMELIMIT = tmp32; break; - case JS_GET_TIMELIMIT: tmp32 = joydev->glue.JS_TIMELIMIT; - retval = put_user(tmp32, (s32 __user *) arg); + err = put_user(tmp32, (s32 __user *) arg); break; case JS_SET_ALL: - retval = copy_from_user(&ds32, argp, - sizeof(ds32)) ? -EFAULT : 0; - if (retval == 0) { + err = copy_from_user(&ds32, argp, + sizeof(ds32)) ? -EFAULT : 0; + if (err == 0) { joydev->glue.JS_TIMEOUT = ds32.JS_TIMEOUT; joydev->glue.BUSY = ds32.BUSY; joydev->glue.JS_EXPIRETIME = ds32.JS_EXPIRETIME; @@ -607,119 +438,55 @@ static long joydev_compat_ioctl(struct file *file, ds32.JS_SAVE = joydev->glue.JS_SAVE; ds32.JS_CORR = joydev->glue.JS_CORR; - retval = copy_to_user(argp, &ds32, sizeof(ds32)) ? -EFAULT : 0; + err = copy_to_user(argp, &ds32, sizeof(ds32)) ? -EFAULT : 0; break; default: - retval = joydev_ioctl_common(joydev, cmd, argp); - break; + err = joydev_ioctl_common(joydev, cmd, argp); } - - out: - mutex_unlock(&joydev->mutex); - return retval; + return err; } #endif /* CONFIG_COMPAT */ -static long joydev_ioctl(struct file *file, - unsigned int cmd, unsigned long arg) +static int joydev_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) { struct joydev_client *client = file->private_data; struct joydev *joydev = client->joydev; void __user *argp = (void __user *)arg; - int retval; - - retval = mutex_lock_interruptible(&joydev->mutex); - if (retval) - return retval; - - if (!joydev->exist) { - retval = -ENODEV; - goto out; - } - - switch (cmd) { - - case JS_SET_TIMELIMIT: - retval = get_user(joydev->glue.JS_TIMELIMIT, - (long __user *) arg); - break; - - case JS_GET_TIMELIMIT: - retval = put_user(joydev->glue.JS_TIMELIMIT, - (long __user *) arg); - break; - - case JS_SET_ALL: - retval = copy_from_user(&joydev->glue, argp, - sizeof(joydev->glue)) ? -EFAULT: 0; - break; - case JS_GET_ALL: - retval = copy_to_user(argp, &joydev->glue, - sizeof(joydev->glue)) ? -EFAULT : 0; - break; + if (!joydev->exist) + return -ENODEV; - default: - retval = joydev_ioctl_common(joydev, cmd, argp); - break; + switch(cmd) { + case JS_SET_TIMELIMIT: + return get_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg); + case JS_GET_TIMELIMIT: + return put_user(joydev->glue.JS_TIMELIMIT, (long __user *) arg); + case JS_SET_ALL: + return copy_from_user(&joydev->glue, argp, + sizeof(joydev->glue)) ? -EFAULT : 0; + case JS_GET_ALL: + return copy_to_user(argp, &joydev->glue, + sizeof(joydev->glue)) ? -EFAULT : 0; + default: + return joydev_ioctl_common(joydev, cmd, argp); } - out: - mutex_unlock(&joydev->mutex); - return retval; } static const struct file_operations joydev_fops = { - .owner = THIS_MODULE, - .read = joydev_read, - .poll = joydev_poll, - .open = joydev_open, - .release = joydev_release, - .unlocked_ioctl = joydev_ioctl, + .owner = THIS_MODULE, + .read = joydev_read, + .write = joydev_write, + .poll = joydev_poll, + .open = joydev_open, + .release = joydev_release, + .ioctl = joydev_ioctl, #ifdef CONFIG_COMPAT - .compat_ioctl = joydev_compat_ioctl, + .compat_ioctl = joydev_compat_ioctl, #endif - .fasync = joydev_fasync, + .fasync = joydev_fasync, }; -static int joydev_install_chrdev(struct joydev *joydev) -{ - joydev_table[joydev->minor] = joydev; - return 0; -} - -static void joydev_remove_chrdev(struct joydev *joydev) -{ - mutex_lock(&joydev_table_mutex); - joydev_table[joydev->minor] = NULL; - mutex_unlock(&joydev_table_mutex); -} - -/* - * Mark device non-existant. This disables writes, ioctls and - * prevents new users from opening the device. Already posted - * blocking reads will stay, however new ones will fail. - */ -static void joydev_mark_dead(struct joydev *joydev) -{ - mutex_lock(&joydev->mutex); - joydev->exist = 0; - mutex_unlock(&joydev->mutex); -} - -static void joydev_cleanup(struct joydev *joydev) -{ - struct input_handle *handle = &joydev->handle; - - joydev_mark_dead(joydev); - joydev_hangup(joydev); - joydev_remove_chrdev(joydev); - - /* joydev is marked dead so noone else accesses joydev->open */ - if (joydev->open) - input_close_device(handle); -} - static int joydev_connect(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id) { @@ -727,10 +494,7 @@ static int joydev_connect(struct input_handler *handler, struct input_dev *dev, int i, j, t, minor; int error; - for (minor = 0; minor < JOYDEV_MINORS; minor++) - if (!joydev_table[minor]) - break; - + for (minor = 0; minor < JOYDEV_MINORS && joydev_table[minor]; minor++); if (minor == JOYDEV_MINORS) { printk(KERN_ERR "joydev: no more free joydev devices\n"); return -ENFILE; @@ -741,19 +505,15 @@ static int joydev_connect(struct input_handler *handler, struct input_dev *dev, return -ENOMEM; INIT_LIST_HEAD(&joydev->client_list); - spin_lock_init(&joydev->client_lock); - mutex_init(&joydev->mutex); init_waitqueue_head(&joydev->wait); - snprintf(joydev->name, sizeof(joydev->name), "js%d", minor); - joydev->exist = 1; joydev->minor = minor; - joydev->exist = 1; joydev->handle.dev = dev; joydev->handle.name = joydev->name; joydev->handle.handler = handler; joydev->handle.private = joydev; + snprintf(joydev->name, sizeof(joydev->name), "js%d", minor); for (i = 0; i < ABS_MAX + 1; i++) if (test_bit(i, dev->absbit)) { @@ -785,65 +545,67 @@ static int joydev_connect(struct input_handler *handler, struct input_dev *dev, } joydev->corr[i].type = JS_CORR_BROKEN; joydev->corr[i].prec = dev->absfuzz[j]; - joydev->corr[i].coef[0] = - (dev->absmax[j] + dev->absmin[j]) / 2 - dev->absflat[j]; - joydev->corr[i].coef[1] = - (dev->absmax[j] + dev->absmin[j]) / 2 + dev->absflat[j]; - - t = (dev->absmax[j] - dev->absmin[j]) / 2 - 2 * dev->absflat[j]; - if (t) { - joydev->corr[i].coef[2] = (1 << 29) / t; - joydev->corr[i].coef[3] = (1 << 29) / t; - - joydev->abs[i] = joydev_correct(dev->abs[j], - joydev->corr + i); - } + joydev->corr[i].coef[0] = (dev->absmax[j] + dev->absmin[j]) / 2 - dev->absflat[j]; + joydev->corr[i].coef[1] = (dev->absmax[j] + dev->absmin[j]) / 2 + dev->absflat[j]; + if (!(t = ((dev->absmax[j] - dev->absmin[j]) / 2 - 2 * dev->absflat[j]))) + continue; + joydev->corr[i].coef[2] = (1 << 29) / t; + joydev->corr[i].coef[3] = (1 << 29) / t; + + joydev->abs[i] = joydev_correct(dev->abs[j], joydev->corr + i); } - strlcpy(joydev->dev.bus_id, joydev->name, sizeof(joydev->dev.bus_id)); - joydev->dev.devt = MKDEV(INPUT_MAJOR, JOYDEV_MINOR_BASE + minor); + snprintf(joydev->dev.bus_id, sizeof(joydev->dev.bus_id), + "js%d", minor); joydev->dev.class = &input_class; joydev->dev.parent = &dev->dev; + joydev->dev.devt = MKDEV(INPUT_MAJOR, JOYDEV_MINOR_BASE + minor); joydev->dev.release = joydev_free; device_initialize(&joydev->dev); - error = input_register_handle(&joydev->handle); - if (error) - goto err_free_joydev; + joydev_table[minor] = joydev; - error = joydev_install_chrdev(joydev); + error = device_add(&joydev->dev); if (error) - goto err_unregister_handle; + goto err_free_joydev; - error = device_add(&joydev->dev); + error = input_register_handle(&joydev->handle); if (error) - goto err_cleanup_joydev; + goto err_delete_joydev; return 0; - err_cleanup_joydev: - joydev_cleanup(joydev); - err_unregister_handle: - input_unregister_handle(&joydev->handle); + err_delete_joydev: + device_del(&joydev->dev); err_free_joydev: put_device(&joydev->dev); return error; } + static void joydev_disconnect(struct input_handle *handle) { struct joydev *joydev = handle->private; + struct joydev_client *client; - device_del(&joydev->dev); - joydev_cleanup(joydev); input_unregister_handle(handle); + device_del(&joydev->dev); + + joydev->exist = 0; + + if (joydev->open) { + input_close_device(handle); + list_for_each_entry(client, &joydev->client_list, node) + kill_fasync(&client->fasync, SIGIO, POLL_HUP); + wake_up_interruptible(&joydev->wait); + } + put_device(&joydev->dev); } static const struct input_device_id joydev_blacklist[] = { { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | - INPUT_DEVICE_ID_MATCH_KEYBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT, .evbit = { BIT(EV_KEY) }, .keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) }, }, /* Avoid itouchpads, touchscreens and tablets */ @@ -852,20 +614,17 @@ static const struct input_device_id joydev_blacklist[] = { static const struct input_device_id joydev_ids[] = { { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | - INPUT_DEVICE_ID_MATCH_ABSBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, .evbit = { BIT(EV_ABS) }, .absbit = { BIT(ABS_X) }, }, { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | - INPUT_DEVICE_ID_MATCH_ABSBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, .evbit = { BIT(EV_ABS) }, .absbit = { BIT(ABS_WHEEL) }, }, { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | - INPUT_DEVICE_ID_MATCH_ABSBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, .evbit = { BIT(EV_ABS) }, .absbit = { BIT(ABS_THROTTLE) }, }, @@ -875,14 +634,14 @@ static const struct input_device_id joydev_ids[] = { MODULE_DEVICE_TABLE(input, joydev_ids); static struct input_handler joydev_handler = { - .event = joydev_event, - .connect = joydev_connect, - .disconnect = joydev_disconnect, - .fops = &joydev_fops, - .minor = JOYDEV_MINOR_BASE, - .name = "joydev", - .id_table = joydev_ids, - .blacklist = joydev_blacklist, + .event = joydev_event, + .connect = joydev_connect, + .disconnect = joydev_disconnect, + .fops = &joydev_fops, + .minor = JOYDEV_MINOR_BASE, + .name = "joydev", + .id_table = joydev_ids, + .blacklist = joydev_blacklist, }; static int __init joydev_init(void) diff --git a/trunk/drivers/input/joystick/xpad.c b/trunk/drivers/input/joystick/xpad.c index 623629a69b03..28080395899c 100644 --- a/trunk/drivers/input/joystick/xpad.c +++ b/trunk/drivers/input/joystick/xpad.c @@ -223,16 +223,12 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d struct input_dev *dev = xpad->dev; /* left stick */ - input_report_abs(dev, ABS_X, - (__s16) le16_to_cpup((__le16 *)(data + 12))); - input_report_abs(dev, ABS_Y, - (__s16) le16_to_cpup((__le16 *)(data + 14))); + input_report_abs(dev, ABS_X, (__s16) (((__s16)data[13] << 8) | data[12])); + input_report_abs(dev, ABS_Y, (__s16) (((__s16)data[15] << 8) | data[14])); /* right stick */ - input_report_abs(dev, ABS_RX, - (__s16) le16_to_cpup((__le16 *)(data + 16))); - input_report_abs(dev, ABS_RY, - (__s16) le16_to_cpup((__le16 *)(data + 18))); + input_report_abs(dev, ABS_RX, (__s16) (((__s16)data[17] << 8) | data[16])); + input_report_abs(dev, ABS_RY, (__s16) (((__s16)data[19] << 8) | data[18])); /* triggers left/right */ input_report_abs(dev, ABS_Z, data[10]); @@ -240,10 +236,8 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d /* digital pad */ if (xpad->dpad_mapping == MAP_DPAD_TO_AXES) { - input_report_abs(dev, ABS_HAT0X, - !!(data[2] & 0x08) - !!(data[2] & 0x04)); - input_report_abs(dev, ABS_HAT0Y, - !!(data[2] & 0x02) - !!(data[2] & 0x01)); + input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04)); + input_report_abs(dev, ABS_HAT0Y, !!(data[2] & 0x02) - !!(data[2] & 0x01)); } else /* xpad->dpad_mapping == MAP_DPAD_TO_BUTTONS */ { input_report_key(dev, BTN_LEFT, data[2] & 0x04); input_report_key(dev, BTN_RIGHT, data[2] & 0x08); @@ -280,17 +274,14 @@ static void xpad_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *d * http://www.free60.org/wiki/Gamepad */ -static void xpad360_process_packet(struct usb_xpad *xpad, - u16 cmd, unsigned char *data) +static void xpad360_process_packet(struct usb_xpad *xpad, u16 cmd, unsigned char *data) { struct input_dev *dev = xpad->dev; /* digital pad */ if (xpad->dpad_mapping == MAP_DPAD_TO_AXES) { - input_report_abs(dev, ABS_HAT0X, - !!(data[2] & 0x08) - !!(data[2] & 0x04)); - input_report_abs(dev, ABS_HAT0Y, - !!(data[2] & 0x02) - !!(data[2] & 0x01)); + input_report_abs(dev, ABS_HAT0X, !!(data[2] & 0x08) - !!(data[2] & 0x04)); + input_report_abs(dev, ABS_HAT0Y, !!(data[2] & 0x02) - !!(data[2] & 0x01)); } else if (xpad->dpad_mapping == MAP_DPAD_TO_BUTTONS) { /* dpad as buttons (right, left, down, up) */ input_report_key(dev, BTN_LEFT, data[2] & 0x04); @@ -317,16 +308,12 @@ static void xpad360_process_packet(struct usb_xpad *xpad, input_report_key(dev, BTN_MODE, data[3] & 0x04); /* left stick */ - input_report_abs(dev, ABS_X, - (__s16) le16_to_cpup((__le16 *)(data + 6))); - input_report_abs(dev, ABS_Y, - (__s16) le16_to_cpup((__le16 *)(data + 8))); + input_report_abs(dev, ABS_X, (__s16) (((__s16)data[7] << 8) | (__s16)data[6])); + input_report_abs(dev, ABS_Y, (__s16) (((__s16)data[9] << 8) | (__s16)data[8])); /* right stick */ - input_report_abs(dev, ABS_RX, - (__s16) le16_to_cpup((__le16 *)(data + 10))); - input_report_abs(dev, ABS_RY, - (__s16) le16_to_cpup((__le16 *)(data + 12))); + input_report_abs(dev, ABS_RX, (__s16) (((__s16)data[11] << 8) | (__s16)data[10])); + input_report_abs(dev, ABS_RY, (__s16) (((__s16)data[13] << 8) | (__s16)data[12])); /* triggers left/right */ input_report_abs(dev, ABS_Z, data[4]); @@ -348,12 +335,10 @@ static void xpad_irq_in(struct urb *urb) case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ - dbg("%s - urb shutting down with status: %d", - __FUNCTION__, urb->status); + dbg("%s - urb shutting down with status: %d", __FUNCTION__, urb->status); return; default: - dbg("%s - nonzero urb status received: %d", - __FUNCTION__, urb->status); + dbg("%s - nonzero urb status received: %d", __FUNCTION__, urb->status); goto exit; } @@ -382,12 +367,10 @@ static void xpad_irq_out(struct urb *urb) case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ - dbg("%s - urb shutting down with status: %d", - __FUNCTION__, urb->status); + dbg("%s - urb shutting down with status: %d", __FUNCTION__, urb->status); return; default: - dbg("%s - nonzero urb status received: %d", - __FUNCTION__, urb->status); + dbg("%s - nonzero urb status received: %d", __FUNCTION__, urb->status); goto exit; } @@ -395,7 +378,7 @@ static void xpad_irq_out(struct urb *urb) retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) err("%s - usb_submit_urb failed with result %d", - __FUNCTION__, retval); + __FUNCTION__, retval); } static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad) @@ -612,7 +595,7 @@ static void xpad_set_up_abs(struct input_dev *input_dev, signed short abs) static int xpad_probe(struct usb_interface *intf, const struct usb_device_id *id) { - struct usb_device *udev = interface_to_usbdev(intf); + struct usb_device *udev = interface_to_usbdev (intf); struct usb_xpad *xpad; struct input_dev *input_dev; struct usb_endpoint_descriptor *ep_irq_in; diff --git a/trunk/drivers/input/keyboard/Kconfig b/trunk/drivers/input/keyboard/Kconfig index 2316a018fae6..c97d5eb0075d 100644 --- a/trunk/drivers/input/keyboard/Kconfig +++ b/trunk/drivers/input/keyboard/Kconfig @@ -208,27 +208,6 @@ config KEYBOARD_HIL This driver implements support for HIL-keyboards attached to your machine, so normally you should say Y here. -config KEYBOARD_HP6XX - tristate "HP Jornada 6XX Keyboard support" - depends on SH_HP6XX - select INPUT_POLLDEV - help - This adds support for the onboard keyboard found on - HP Jornada 620/660/680/690. - - To compile this driver as a module, choose M here: the - module will be called jornada680_kbd. - -config KEYBOARD_HP7XX - tristate "HP Jornada 7XX Keyboard Driver" - depends on SA1100_JORNADA720_SSP && SA1100_SSP - help - Say Y here to add support for the HP Jornada 7xx (710/720/728) - onboard keyboard. - - To compile this driver as a module, choose M here: the - module will be called jornada720_kbd. - config KEYBOARD_OMAP tristate "TI OMAP keypad support" depends on (ARCH_OMAP1 || ARCH_OMAP2) @@ -274,23 +253,4 @@ config KEYBOARD_GPIO To compile this driver as a module, choose M here: the module will be called gpio-keys. -config KEYBOARD_MAPLE - tristate "Maple bus keyboard" - depends on SH_DREAMCAST && MAPLE - help - Say Y here if you have a Dreamcast console running Linux and have - a keyboard attached to its Maple bus. - - To compile this driver as a module, choose M here: the - module will be called maple_keyb. - -config KEYBOARD_BFIN - tristate "Blackfin BF54x keypad support" - depends on BF54x - help - Say Y here if you want to use the BF54x keypad. - - To compile this driver as a module, choose M here: the - module will be called bf54x-keys. - endif diff --git a/trunk/drivers/input/keyboard/Makefile b/trunk/drivers/input/keyboard/Makefile index e97455fdcc83..28d211b87b14 100644 --- a/trunk/drivers/input/keyboard/Makefile +++ b/trunk/drivers/input/keyboard/Makefile @@ -21,7 +21,4 @@ obj-$(CONFIG_KEYBOARD_OMAP) += omap-keypad.o obj-$(CONFIG_KEYBOARD_PXA27x) += pxa27x_keyboard.o obj-$(CONFIG_KEYBOARD_AAED2000) += aaed2000_kbd.o obj-$(CONFIG_KEYBOARD_GPIO) += gpio_keys.o -obj-$(CONFIG_KEYBOARD_HP6XX) += jornada680_kbd.o -obj-$(CONFIG_KEYBOARD_HP7XX) += jornada720_kbd.o -obj-$(CONFIG_KEYBOARD_MAPLE) += maple_keyb.o -obj-$(CONFIG_KEYBOARD_BFIN) += bf54x-keys.o + diff --git a/trunk/drivers/input/keyboard/atakbd.c b/trunk/drivers/input/keyboard/atakbd.c index a1800151b6ce..f948d3a14a93 100644 --- a/trunk/drivers/input/keyboard/atakbd.c +++ b/trunk/drivers/input/keyboard/atakbd.c @@ -217,7 +217,7 @@ static void atakbd_interrupt(unsigned char scancode, char down) static int __init atakbd_init(void) { - int i, error; + int i; if (!MACH_IS_ATARI || !ATARIHW_PRESENT(ST_MFP)) return -EIO; @@ -247,10 +247,9 @@ static int __init atakbd_init(void) } /* error check */ - error = input_register_device(atakbd_dev); - if (error) { + if (input_register_device(atakbd_dev)) { input_free_device(atakbd_dev); - return error; + return -ENOMEM; } atari_input_keyboard_interrupt_hook = atakbd_interrupt; diff --git a/trunk/drivers/input/keyboard/bf54x-keys.c b/trunk/drivers/input/keyboard/bf54x-keys.c deleted file mode 100644 index a67b29b089ef..000000000000 --- a/trunk/drivers/input/keyboard/bf54x-keys.c +++ /dev/null @@ -1,382 +0,0 @@ -/* - * File: drivers/input/keyboard/bf54x-keys.c - * Based on: - * Author: Michael Hennerich - * - * Created: - * Description: keypad driver for Analog Devices Blackfin BF54x Processors - * - * - * Modified: - * Copyright 2007 Analog Devices Inc. - * - * Bugs: Enter bugs at http://blackfin.uclinux.org/ - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see the file COPYING, or write - * to the Free Software Foundation, Inc., - * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#define DRV_NAME "bf54x-keys" -#define TIME_SCALE 100 /* 100 ns */ -#define MAX_MULT (0xFF * TIME_SCALE) -#define MAX_RC 8 /* Max Row/Col */ - -static const u16 per_rows[] = { - P_KEY_ROW7, - P_KEY_ROW6, - P_KEY_ROW5, - P_KEY_ROW4, - P_KEY_ROW3, - P_KEY_ROW2, - P_KEY_ROW1, - P_KEY_ROW0, - 0 -}; - -static const u16 per_cols[] = { - P_KEY_COL7, - P_KEY_COL6, - P_KEY_COL5, - P_KEY_COL4, - P_KEY_COL3, - P_KEY_COL2, - P_KEY_COL1, - P_KEY_COL0, - 0 -}; - -struct bf54x_kpad { - struct input_dev *input; - int irq; - unsigned short lastkey; - unsigned short *keycode; - struct timer_list timer; - unsigned int keyup_test_jiffies; -}; - -static inline int bfin_kpad_find_key(struct bf54x_kpad *bf54x_kpad, - struct input_dev *input, u16 keyident) -{ - u16 i; - - for (i = 0; i < input->keycodemax; i++) - if (bf54x_kpad->keycode[i + input->keycodemax] == keyident) - return bf54x_kpad->keycode[i]; - return -1; -} - -static inline void bfin_keycodecpy(unsigned short *keycode, - const unsigned int *pdata_kc, - unsigned short keymapsize) -{ - unsigned int i; - - for (i = 0; i < keymapsize; i++) { - keycode[i] = pdata_kc[i] & 0xffff; - keycode[i + keymapsize] = pdata_kc[i] >> 16; - } -} - -static inline u16 bfin_kpad_get_prescale(u32 timescale) -{ - u32 sclk = get_sclk(); - - return ((((sclk / 1000) * timescale) / 1024) - 1); -} - -static inline u16 bfin_kpad_get_keypressed(struct bf54x_kpad *bf54x_kpad) -{ - return (bfin_read_KPAD_STAT() & KPAD_PRESSED); -} - -static inline void bfin_kpad_clear_irq(void) -{ - bfin_write_KPAD_STAT(0xFFFF); - bfin_write_KPAD_ROWCOL(0xFFFF); -} - -static void bfin_kpad_timer(unsigned long data) -{ - struct platform_device *pdev = (struct platform_device *) data; - struct bf54x_kpad *bf54x_kpad = platform_get_drvdata(pdev); - - if (bfin_kpad_get_keypressed(bf54x_kpad)) { - /* Try again later */ - mod_timer(&bf54x_kpad->timer, - jiffies + bf54x_kpad->keyup_test_jiffies); - return; - } - - input_report_key(bf54x_kpad->input, bf54x_kpad->lastkey, 0); - input_sync(bf54x_kpad->input); - - /* Clear IRQ Status */ - - bfin_kpad_clear_irq(); - enable_irq(bf54x_kpad->irq); -} - -static irqreturn_t bfin_kpad_isr(int irq, void *dev_id) -{ - struct platform_device *pdev = dev_id; - struct bf54x_kpad *bf54x_kpad = platform_get_drvdata(pdev); - struct input_dev *input = bf54x_kpad->input; - int key; - u16 rowcol = bfin_read_KPAD_ROWCOL(); - - key = bfin_kpad_find_key(bf54x_kpad, input, rowcol); - - input_report_key(input, key, 1); - input_sync(input); - - if (bfin_kpad_get_keypressed(bf54x_kpad)) { - disable_irq(bf54x_kpad->irq); - bf54x_kpad->lastkey = key; - mod_timer(&bf54x_kpad->timer, - jiffies + bf54x_kpad->keyup_test_jiffies); - } else { - input_report_key(input, key, 0); - input_sync(input); - - bfin_kpad_clear_irq(); - } - - return IRQ_HANDLED; -} - -static int __devinit bfin_kpad_probe(struct platform_device *pdev) -{ - struct bf54x_kpad *bf54x_kpad; - struct bfin_kpad_platform_data *pdata = pdev->dev.platform_data; - struct input_dev *input; - int i, error; - - if (!pdata->rows || !pdata->cols || !pdata->keymap) { - printk(KERN_ERR DRV_NAME - ": No rows, cols or keymap from pdata\n"); - return -EINVAL; - } - - if (!pdata->keymapsize || - pdata->keymapsize > (pdata->rows * pdata->cols)) { - printk(KERN_ERR DRV_NAME ": Invalid keymapsize\n"); - return -EINVAL; - } - - bf54x_kpad = kzalloc(sizeof(struct bf54x_kpad), GFP_KERNEL); - if (!bf54x_kpad) - return -ENOMEM; - - platform_set_drvdata(pdev, bf54x_kpad); - - /* Allocate memory for keymap followed by private LUT */ - bf54x_kpad->keycode = kmalloc(pdata->keymapsize * - sizeof(unsigned short) * 2, GFP_KERNEL); - if (!bf54x_kpad->keycode) { - error = -ENOMEM; - goto out; - } - - if (!pdata->debounce_time || !pdata->debounce_time > MAX_MULT || - !pdata->coldrive_time || !pdata->coldrive_time > MAX_MULT) { - printk(KERN_ERR DRV_NAME - ": Invalid Debounce/Columdrive Time from pdata\n"); - bfin_write_KPAD_MSEL(0xFF0); /* Default MSEL */ - } else { - bfin_write_KPAD_MSEL( - ((pdata->debounce_time / TIME_SCALE) - & DBON_SCALE) | - (((pdata->coldrive_time / TIME_SCALE) << 8) - & COLDRV_SCALE)); - - } - - if (!pdata->keyup_test_interval) - bf54x_kpad->keyup_test_jiffies = msecs_to_jiffies(50); - else - bf54x_kpad->keyup_test_jiffies = - msecs_to_jiffies(pdata->keyup_test_interval); - - if (peripheral_request_list((u16 *)&per_rows[MAX_RC - pdata->rows], - DRV_NAME)) { - printk(KERN_ERR DRV_NAME - ": Requesting Peripherals failed\n"); - error = -EFAULT; - goto out0; - } - - if (peripheral_request_list((u16 *)&per_cols[MAX_RC - pdata->cols], - DRV_NAME)) { - printk(KERN_ERR DRV_NAME - ": Requesting Peripherals failed\n"); - error = -EFAULT; - goto out1; - } - - bf54x_kpad->irq = platform_get_irq(pdev, 0); - if (bf54x_kpad->irq < 0) { - error = -ENODEV; - goto out2; - } - - error = request_irq(bf54x_kpad->irq, bfin_kpad_isr, - IRQF_SAMPLE_RANDOM, DRV_NAME, pdev); - if (error) { - printk(KERN_ERR DRV_NAME - ": unable to claim irq %d; error %d\n", - bf54x_kpad->irq, error); - error = -EBUSY; - goto out2; - } - - input = input_allocate_device(); - if (!input) { - error = -ENOMEM; - goto out3; - } - - bf54x_kpad->input = input; - - input->name = pdev->name; - input->phys = "bf54x-keys/input0"; - input->dev.parent = &pdev->dev; - - input_set_drvdata(input, bf54x_kpad); - - input->id.bustype = BUS_HOST; - input->id.vendor = 0x0001; - input->id.product = 0x0001; - input->id.version = 0x0100; - - input->keycodesize = sizeof(unsigned short); - input->keycodemax = pdata->keymapsize; - input->keycode = bf54x_kpad->keycode; - - bfin_keycodecpy(bf54x_kpad->keycode, pdata->keymap, pdata->keymapsize); - - /* setup input device */ - __set_bit(EV_KEY, input->evbit); - - if (pdata->repeat) - __set_bit(EV_REP, input->evbit); - - for (i = 0; i < input->keycodemax; i++) - __set_bit(bf54x_kpad->keycode[i] & KEY_MAX, input->keybit); - __clear_bit(KEY_RESERVED, input->keybit); - - error = input_register_device(input); - if (error) { - printk(KERN_ERR DRV_NAME - ": Unable to register input device (%d)\n", error); - goto out4; - } - - /* Init Keypad Key Up/Release test timer */ - - setup_timer(&bf54x_kpad->timer, bfin_kpad_timer, (unsigned long) pdev); - - bfin_write_KPAD_PRESCALE(bfin_kpad_get_prescale(TIME_SCALE)); - - bfin_write_KPAD_CTL((((pdata->cols - 1) << 13) & KPAD_COLEN) | - (((pdata->rows - 1) << 10) & KPAD_ROWEN) | - (2 & KPAD_IRQMODE)); - - bfin_write_KPAD_CTL(bfin_read_KPAD_CTL() | KPAD_EN); - - printk(KERN_ERR DRV_NAME - ": Blackfin BF54x Keypad registered IRQ %d\n", bf54x_kpad->irq); - - return 0; - -out4: - input_free_device(input); -out3: - free_irq(bf54x_kpad->irq, pdev); -out2: - peripheral_free_list((u16 *)&per_cols[MAX_RC - pdata->cols]); -out1: - peripheral_free_list((u16 *)&per_rows[MAX_RC - pdata->rows]); -out0: - kfree(bf54x_kpad->keycode); -out: - kfree(bf54x_kpad); - platform_set_drvdata(pdev, NULL); - - return error; -} - -static int __devexit bfin_kpad_remove(struct platform_device *pdev) -{ - struct bfin_kpad_platform_data *pdata = pdev->dev.platform_data; - struct bf54x_kpad *bf54x_kpad = platform_get_drvdata(pdev); - - del_timer_sync(&bf54x_kpad->timer); - free_irq(bf54x_kpad->irq, pdev); - - input_unregister_device(bf54x_kpad->input); - - peripheral_free_list((u16 *)&per_rows[MAX_RC - pdata->rows]); - peripheral_free_list((u16 *)&per_cols[MAX_RC - pdata->cols]); - - kfree(bf54x_kpad->keycode); - kfree(bf54x_kpad); - platform_set_drvdata(pdev, NULL); - - return 0; -} - -struct platform_driver bfin_kpad_device_driver = { - .probe = bfin_kpad_probe, - .remove = __devexit_p(bfin_kpad_remove), - .driver = { - .name = DRV_NAME, - } -}; - -static int __init bfin_kpad_init(void) -{ - return platform_driver_register(&bfin_kpad_device_driver); -} - -static void __exit bfin_kpad_exit(void) -{ - platform_driver_unregister(&bfin_kpad_device_driver); -} - -module_init(bfin_kpad_init); -module_exit(bfin_kpad_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Michael Hennerich "); -MODULE_DESCRIPTION("Keypad driver for BF54x Processors"); diff --git a/trunk/drivers/input/keyboard/gpio_keys.c b/trunk/drivers/input/keyboard/gpio_keys.c index e2a3293bc67e..f0b22b8b2769 100644 --- a/trunk/drivers/input/keyboard/gpio_keys.c +++ b/trunk/drivers/input/keyboard/gpio_keys.c @@ -54,7 +54,6 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) struct gpio_keys_platform_data *pdata = pdev->dev.platform_data; struct input_dev *input; int i, error; - int wakeup = 0; input = input_allocate_device(); if (!input) @@ -78,51 +77,31 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) int irq = gpio_to_irq(button->gpio); unsigned int type = button->type ?: EV_KEY; - if (irq < 0) { - error = irq; - printk(KERN_ERR - "gpio-keys: " - "Unable to get irq number for GPIO %d," - "error %d\n", - button->gpio, error); - goto fail; - } - - error = request_irq(irq, gpio_keys_isr, - IRQF_SAMPLE_RANDOM | IRQF_TRIGGER_RISING | - IRQF_TRIGGER_FALLING, - button->desc ? button->desc : "gpio_keys", - pdev); + set_irq_type(irq, IRQ_TYPE_EDGE_BOTH); + error = request_irq(irq, gpio_keys_isr, IRQF_SAMPLE_RANDOM, + button->desc ? button->desc : "gpio_keys", + pdev); if (error) { - printk(KERN_ERR - "gpio-keys: Unable to claim irq %d; error %d\n", + printk(KERN_ERR "gpio-keys: unable to claim irq %d; error %d\n", irq, error); goto fail; } - if (button->wakeup) - wakeup = 1; - input_set_capability(input, type, button->code); } error = input_register_device(input); if (error) { - printk(KERN_ERR - "gpio-keys: Unable to register input device, " - "error: %d\n", error); + printk(KERN_ERR "Unable to register gpio-keys input device\n"); goto fail; } - device_init_wakeup(&pdev->dev, wakeup); - return 0; fail: - while (--i >= 0) + for (i = i - 1; i >= 0; i--) free_irq(gpio_to_irq(pdata->buttons[i].gpio), pdev); - platform_set_drvdata(pdev, NULL); input_free_device(input); return error; @@ -134,8 +113,6 @@ static int __devexit gpio_keys_remove(struct platform_device *pdev) struct input_dev *input = platform_get_drvdata(pdev); int i; - device_init_wakeup(&pdev->dev, 0); - for (i = 0; i < pdata->nbuttons; i++) { int irq = gpio_to_irq(pdata->buttons[i].gpio); free_irq(irq, pdev); @@ -146,53 +123,9 @@ static int __devexit gpio_keys_remove(struct platform_device *pdev) return 0; } - -#ifdef CONFIG_PM -static int gpio_keys_suspend(struct platform_device *pdev, pm_message_t state) -{ - struct gpio_keys_platform_data *pdata = pdev->dev.platform_data; - int i; - - if (device_may_wakeup(&pdev->dev)) { - for (i = 0; i < pdata->nbuttons; i++) { - struct gpio_keys_button *button = &pdata->buttons[i]; - if (button->wakeup) { - int irq = gpio_to_irq(button->gpio); - enable_irq_wake(irq); - } - } - } - - return 0; -} - -static int gpio_keys_resume(struct platform_device *pdev) -{ - struct gpio_keys_platform_data *pdata = pdev->dev.platform_data; - int i; - - if (device_may_wakeup(&pdev->dev)) { - for (i = 0; i < pdata->nbuttons; i++) { - struct gpio_keys_button *button = &pdata->buttons[i]; - if (button->wakeup) { - int irq = gpio_to_irq(button->gpio); - disable_irq_wake(irq); - } - } - } - - return 0; -} -#else -#define gpio_keys_suspend NULL -#define gpio_keys_resume NULL -#endif - struct platform_driver gpio_keys_device_driver = { .probe = gpio_keys_probe, .remove = __devexit_p(gpio_keys_remove), - .suspend = gpio_keys_suspend, - .resume = gpio_keys_resume, .driver = { .name = "gpio-keys", } diff --git a/trunk/drivers/input/keyboard/jornada680_kbd.c b/trunk/drivers/input/keyboard/jornada680_kbd.c deleted file mode 100644 index bec1cf483723..000000000000 --- a/trunk/drivers/input/keyboard/jornada680_kbd.c +++ /dev/null @@ -1,277 +0,0 @@ -/* - * drivers/input/keyboard/jornada680_kbd.c - * - * HP Jornada 620/660/680/690 scan keyboard platform driver - * Copyright (C) 2007 Kristoffer Ericson - * - * Based on hp680_keyb.c - * Copyright (C) 2006 Paul Mundt - * Copyright (C) 2005 Andriy Skulysh - * Split from drivers/input/keyboard/hp600_keyb.c - * Copyright (C) 2000 Yaegashi Takeshi (hp6xx kbd scan routine and translation table) - * Copyright (C) 2000 Niibe Yutaka (HP620 Keyb translation table) - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#define PCCR 0xa4000104 -#define PDCR 0xa4000106 -#define PECR 0xa4000108 -#define PFCR 0xa400010a -#define PCDR 0xa4000124 -#define PDDR 0xa4000126 -#define PEDR 0xa4000128 -#define PFDR 0xa400012a -#define PGDR 0xa400012c -#define PHDR 0xa400012e -#define PJDR 0xa4000130 -#define PKDR 0xa4000132 -#define PLDR 0xa4000134 - -static const unsigned short jornada_scancodes[] = { -/* PTD1 */ KEY_CAPSLOCK, KEY_MACRO, KEY_LEFTCTRL, 0, KEY_ESC, 0, 0, 0, /* 1 -> 8 */ - KEY_F1, KEY_F2, KEY_F3, KEY_F8, KEY_F7, KEY_F2, KEY_F4, KEY_F5, /* 9 -> 16 */ -/* PTD5 */ KEY_SLASH, KEY_APOSTROPHE, KEY_ENTER, 0, KEY_Z, 0, 0, 0, /* 17 -> 24 */ - KEY_X, KEY_C, KEY_V, KEY_DOT, KEY_COMMA, KEY_M, KEY_B, KEY_N, /* 25 -> 32 */ -/* PTD7 */ KEY_KP2, KEY_KP6, 0, 0, 0, 0, 0, 0, /* 33 -> 40 */ - 0, 0, 0, KEY_KP4, 0, 0, KEY_LEFTALT, KEY_HANJA, /* 41 -> 48 */ -/* PTE0 */ 0, 0, 0, 0, KEY_FINANCE, 0, 0, 0, /* 49 -> 56 */ - KEY_LEFTCTRL, 0, KEY_SPACE, KEY_KPDOT, KEY_VOLUMEUP, 249, 0, 0, /* 57 -> 64 */ -/* PTE1 */ KEY_SEMICOLON, KEY_RIGHTBRACE, KEY_BACKSLASH, 0, KEY_A, 0, 0, 0,/* 65 -> 72 */ - KEY_S, KEY_D, KEY_F, KEY_L, KEY_K, KEY_J, KEY_G, KEY_H, /* 73 -> 80 */ -/* PTE3 */ KEY_KP8, KEY_LEFTMETA, KEY_RIGHTSHIFT, 0, KEY_TAB, 0, 0,0, /* 81 -> 88 */ - 0, KEY_LEFTSHIFT, 0, 0, 0, 0, 0, 0, /* 89 -> 96 */ -/* PTE6 */ KEY_P, KEY_LEFTBRACE, KEY_BACKSPACE, 0, KEY_Q, 0, 0, 0, /* 97 -> 104 */ - KEY_W, KEY_E, KEY_R, KEY_O, KEY_I, KEY_U, KEY_T, KEY_R, /* 105 -> 112 */ -/* PTE7 */ KEY_0, KEY_MINUS, KEY_EQUAL, 0, KEY_1, 0, 0, 0, /* 113 -> 120 */ - KEY_2, KEY_3, KEY_4, KEY_9, KEY_8, KEY_7, KEY_5, KEY_6, /* 121 -> 128 */ -/* **** */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0 -}; - -#define JORNADA_SCAN_SIZE 18 - -struct jornadakbd { - struct input_polled_dev *poll_dev; - unsigned short keymap[ARRAY_SIZE(jornada_scancodes)]; - unsigned char length; - unsigned char old_scan[JORNADA_SCAN_SIZE]; - unsigned char new_scan[JORNADA_SCAN_SIZE]; -}; - -static void jornada_parse_kbd(struct jornadakbd *jornadakbd) -{ - struct input_dev *input_dev = jornadakbd->poll_dev->input; - unsigned short *keymap = jornadakbd->keymap; - unsigned int sync_me = 0; - unsigned int i, j; - - for (i = 0; i < JORNADA_SCAN_SIZE; i++) { - unsigned char new = jornadakbd->new_scan[i]; - unsigned char old = jornadakbd->old_scan[i]; - unsigned int xor = new ^ old; - - if (xor == 0) - continue; - - for (j = 0; j < 8; j++) { - unsigned int bit = 1 << j; - if (xor & bit) { - unsigned int scancode = (i << 3) + j; - input_event(input_dev, - EV_MSC, MSC_SCAN, scancode); - input_report_key(input_dev, - keymap[scancode], - !(new & bit)); - sync_me = 1; - } - } - } - - if (sync_me) - input_sync(input_dev); -} - -static void jornada_scan_keyb(unsigned char *s) -{ - int i; - unsigned short ec_static, dc_static; /* = UINT16_t */ - unsigned char matrix_switch[] = { - 0xfd, 0xff, /* PTD1 PD(1) */ - 0xdf, 0xff, /* PTD5 PD(5) */ - 0x7f, 0xff, /* PTD7 PD(7) */ - 0xff, 0xfe, /* PTE0 PE(0) */ - 0xff, 0xfd, /* PTE1 PE(1) */ - 0xff, 0xf7, /* PTE3 PE(3) */ - 0xff, 0xbf, /* PTE6 PE(6) */ - 0xff, 0x7f, /* PTE7 PE(7) */ - }, *t = matrix_switch; - /* PD(x) : - 1. 0xcc0c & (1~(1 << (2*(x)+1))))) - 2. (0xf0cf & 0xfffff) */ - /* PE(x) : - 1. 0xcc0c & 0xffff - 2. 0xf0cf & (1~(1 << (2*(x)+1))))) */ - unsigned short matrix_PDE[] = { - 0xcc04, 0xf0cf, /* PD(1) */ - 0xc40c, 0xf0cf, /* PD(5) */ - 0x4c0c, 0xf0cf, /* PD(7) */ - 0xcc0c, 0xf0cd, /* PE(0) */ - 0xcc0c, 0xf0c7, /* PE(1) */ - 0xcc0c, 0xf04f, /* PE(3) */ - 0xcc0c, 0xd0cf, /* PE(6) */ - 0xcc0c, 0x70cf, /* PE(7) */ - }, *y = matrix_PDE; - - /* Save these control reg bits */ - dc_static = (ctrl_inw(PDCR) & (~0xcc0c)); - ec_static = (ctrl_inw(PECR) & (~0xf0cf)); - - for (i = 0; i < 8; i++) { - /* disable output for all but the one we want to scan */ - ctrl_outw((dc_static | *y++), PDCR); - ctrl_outw((ec_static | *y++), PECR); - udelay(5); - - /* Get scanline row */ - ctrl_outb(*t++, PDDR); - ctrl_outb(*t++, PEDR); - udelay(50); - - /* Read data */ - *s++ = ctrl_inb(PCDR); - *s++ = ctrl_inb(PFDR); - } - /* Scan no lines */ - ctrl_outb(0xff, PDDR); - ctrl_outb(0xff, PEDR); - - /* Enable all scanlines */ - ctrl_outw((dc_static | (0x5555 & 0xcc0c)),PDCR); - ctrl_outw((ec_static | (0x5555 & 0xf0cf)),PECR); - - /* Ignore extra keys and events */ - *s++ = ctrl_inb(PGDR); - *s++ = ctrl_inb(PHDR); -} - -static void jornadakbd680_poll(struct input_polled_dev *dev) -{ - struct jornadakbd *jornadakbd = dev->private; - - jornada_scan_keyb(jornadakbd->new_scan); - jornada_parse_kbd(jornadakbd); - memcpy(jornadakbd->old_scan, jornadakbd->new_scan, JORNADA_SCAN_SIZE); -} - -static int __devinit jornada680kbd_probe(struct platform_device *pdev) -{ - struct jornadakbd *jornadakbd; - struct input_polled_dev *poll_dev; - struct input_dev *input_dev; - int i, error; - - jornadakbd = kzalloc(sizeof(struct jornadakbd), GFP_KERNEL); - if (!jornadakbd) - return -ENOMEM; - - poll_dev = input_allocate_polled_device(); - if (!poll_dev) { - error = -ENOMEM; - goto failed; - } - - platform_set_drvdata(pdev, jornadakbd); - - jornadakbd->poll_dev = poll_dev; - - memcpy(jornadakbd->keymap, jornada_scancodes, - sizeof(jornadakbd->keymap)); - - poll_dev->private = jornadakbd; - poll_dev->poll = jornadakbd680_poll; - poll_dev->poll_interval = 50; /* msec */ - - input_dev = poll_dev->input; - input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); - input_dev->name = "HP Jornada 680 keyboard"; - input_dev->phys = "jornadakbd/input0"; - input_dev->keycode = jornadakbd->keymap; - input_dev->keycodesize = sizeof(unsigned short); - input_dev->keycodemax = ARRAY_SIZE(jornada_scancodes); - input_dev->dev.parent = &pdev->dev; - input_dev->id.bustype = BUS_HOST; - - for (i = 0; i < 128; i++) - if (jornadakbd->keymap[i]) - __set_bit(jornadakbd->keymap[i], input_dev->keybit); - __clear_bit(KEY_RESERVED, input_dev->keybit); - - input_set_capability(input_dev, EV_MSC, MSC_SCAN); - - error = input_register_polled_device(jornadakbd->poll_dev); - if (error) - goto failed; - - return 0; - - failed: - printk(KERN_ERR "Jornadakbd: failed to register driver, error: %d\n", - error); - platform_set_drvdata(pdev, NULL); - input_free_polled_device(poll_dev); - kfree(jornadakbd); - return error; - -} - -static int __devexit jornada680kbd_remove(struct platform_device *pdev) -{ - struct jornadakbd *jornadakbd = platform_get_drvdata(pdev); - - platform_set_drvdata(pdev, NULL); - input_unregister_polled_device(jornadakbd->poll_dev); - input_free_polled_device(jornadakbd->poll_dev); - kfree(jornadakbd); - - return 0; -} - -static struct platform_driver jornada680kbd_driver = { - .driver = { - .name = "jornada680_kbd", - }, - .probe = jornada680kbd_probe, - .remove = __devexit_p(jornada680kbd_remove), -}; - -static int __init jornada680kbd_init(void) -{ - return platform_driver_register(&jornada680kbd_driver); -} - -static void __exit jornada680kbd_exit(void) -{ - platform_driver_unregister(&jornada680kbd_driver); -} - -module_init(jornada680kbd_init); -module_exit(jornada680kbd_exit); - -MODULE_AUTHOR("Kristoffer Ericson "); -MODULE_DESCRIPTION("HP Jornada 620/660/680/690 Keyboard Driver"); -MODULE_LICENSE("GPLv2"); diff --git a/trunk/drivers/input/keyboard/jornada720_kbd.c b/trunk/drivers/input/keyboard/jornada720_kbd.c deleted file mode 100644 index e6696b3c9416..000000000000 --- a/trunk/drivers/input/keyboard/jornada720_kbd.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * drivers/input/keyboard/jornada720_kbd.c - * - * HP Jornada 720 keyboard platform driver - * - * Copyright (C) 2006/2007 Kristoffer Ericson - * - * Copyright (C) 2006 jornada 720 kbd driver by - Filip Zyzniewsk - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Kristoffer Ericson "); -MODULE_DESCRIPTION("HP Jornada 710/720/728 keyboard driver"); -MODULE_LICENSE("GPLv2"); - -static unsigned short jornada_std_keymap[128] = { /* ROW */ - 0, KEY_ESC, KEY_F1, KEY_F2, KEY_F3, KEY_F4, KEY_F5, KEY_F6, KEY_F7, /* #1 */ - KEY_F8, KEY_F9, KEY_F10, KEY_F11, KEY_VOLUMEUP, KEY_VOLUMEDOWN, KEY_MUTE, /* -> */ - 0, KEY_1, KEY_2, KEY_3, KEY_4, KEY_5, KEY_6, KEY_7, KEY_8, KEY_9, /* #2 */ - KEY_0, KEY_MINUS, KEY_EQUAL,0, 0, 0, /* -> */ - 0, KEY_Q, KEY_W, KEY_E, KEY_R, KEY_T, KEY_Y, KEY_U, KEY_I, KEY_O, /* #3 */ - KEY_P, KEY_BACKSLASH, KEY_BACKSPACE, 0, 0, 0, /* -> */ - 0, KEY_A, KEY_S, KEY_D, KEY_F, KEY_G, KEY_H, KEY_J, KEY_K, KEY_L, /* #4 */ - KEY_SEMICOLON, KEY_LEFTBRACE, KEY_RIGHTBRACE, 0, 0, 0, /* -> */ - 0, KEY_Z, KEY_X, KEY_C, KEY_V, KEY_B, KEY_N, KEY_M, KEY_COMMA, /* #5 */ - KEY_DOT, KEY_KPMINUS, KEY_APOSTROPHE, KEY_ENTER, 0, 0,0, /* -> */ - 0, KEY_TAB, 0, KEY_LEFTSHIFT, 0, KEY_APOSTROPHE, 0, 0, 0, 0, /* #6 */ - KEY_UP, 0, KEY_RIGHTSHIFT, 0, 0, 0,0, 0, 0, 0, 0, KEY_LEFTALT, KEY_GRAVE, /* -> */ - 0, 0, KEY_LEFT, KEY_DOWN, KEY_RIGHT, 0, 0, 0, 0,0, KEY_KPASTERISK, /* -> */ - KEY_LEFTCTRL, 0, KEY_SPACE, 0, 0, 0, KEY_SLASH, KEY_DELETE, 0, 0, /* -> */ - 0, 0, 0, KEY_POWER, /* -> */ -}; - -struct jornadakbd { - unsigned short keymap[ARRAY_SIZE(jornada_std_keymap)]; - struct input_dev *input; -}; - -static irqreturn_t jornada720_kbd_interrupt(int irq, void *dev_id) -{ - struct platform_device *pdev = dev_id; - struct jornadakbd *jornadakbd = platform_get_drvdata(pdev); - struct input_dev *input = jornadakbd->input; - u8 count, kbd_data, scan_code; - - /* startup ssp with spinlock */ - jornada_ssp_start(); - - if (jornada_ssp_inout(GETSCANKEYCODE) != TXDUMMY) { - printk(KERN_DEBUG - "jornada720_kbd: " - "GetKeycode command failed with ETIMEDOUT, " - "flushed bus\n"); - } else { - /* How many keycodes are waiting for us? */ - count = jornada_ssp_byte(TXDUMMY); - - /* Lets drag them out one at a time */ - while (count--) { - /* Exchange TxDummy for location (keymap[kbddata]) */ - kbd_data = jornada_ssp_byte(TXDUMMY); - scan_code = kbd_data & 0x7f; - - input_event(input, EV_MSC, MSC_SCAN, scan_code); - input_report_key(input, jornadakbd->keymap[scan_code], - !(kbd_data & 0x80)); - input_sync(input); - } - } - - /* release spinlock and turn off ssp */ - jornada_ssp_end(); - - return IRQ_HANDLED; -}; - -static int __devinit jornada720_kbd_probe(struct platform_device *pdev) -{ - struct jornadakbd *jornadakbd; - struct input_dev *input_dev; - int i, err; - - jornadakbd = kzalloc(sizeof(struct jornadakbd), GFP_KERNEL); - input_dev = input_allocate_device(); - if (!jornadakbd || !input_dev) { - err = -ENOMEM; - goto fail1; - } - - platform_set_drvdata(pdev, jornadakbd); - - memcpy(jornadakbd->keymap, jornada_std_keymap, - sizeof(jornada_std_keymap)); - jornadakbd->input = input_dev; - - input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); - input_dev->name = "HP Jornada 720 keyboard"; - input_dev->phys = "jornadakbd/input0"; - input_dev->keycode = jornadakbd->keymap; - input_dev->keycodesize = sizeof(unsigned short); - input_dev->keycodemax = ARRAY_SIZE(jornada_std_keymap); - input_dev->id.bustype = BUS_HOST; - input_dev->dev.parent = &pdev->dev; - - for (i = 0; i < ARRAY_SIZE(jornadakbd->keymap); i++) - __set_bit(jornadakbd->keymap[i], input_dev->keybit); - __clear_bit(KEY_RESERVED, input_dev->keybit); - - input_set_capability(input_dev, EV_MSC, MSC_SCAN); - - err = request_irq(IRQ_GPIO0, - jornada720_kbd_interrupt, - IRQF_DISABLED | IRQF_TRIGGER_FALLING, - "jornadakbd", pdev); - if (err) { - printk(KERN_INFO "jornadakbd720_kbd: Unable to grab IRQ\n"); - goto fail1; - } - - err = input_register_device(jornadakbd->input); - if (err) - goto fail2; - - return 0; - - fail2: /* IRQ, DEVICE, MEMORY */ - free_irq(IRQ_GPIO0, pdev); - fail1: /* DEVICE, MEMORY */ - platform_set_drvdata(pdev, NULL); - input_free_device(input_dev); - kfree(jornadakbd); - return err; -}; - -static int __devexit jornada720_kbd_remove(struct platform_device *pdev) -{ - struct jornadakbd *jornadakbd = platform_get_drvdata(pdev); - - free_irq(IRQ_GPIO0, pdev); - platform_set_drvdata(pdev, NULL); - input_unregister_device(jornadakbd->input); - kfree(jornadakbd); - - return 0; -} - -static struct platform_driver jornada720_kbd_driver = { - .driver = { - .name = "jornada720_kbd", - }, - .probe = jornada720_kbd_probe, - .remove = __devexit_p(jornada720_kbd_remove), -}; - -static int __init jornada720_kbd_init(void) -{ - return platform_driver_register(&jornada720_kbd_driver); -} - -static void __exit jornada720_kbd_exit(void) -{ - platform_driver_unregister(&jornada720_kbd_driver); -} - -module_init(jornada720_kbd_init); -module_exit(jornada720_kbd_exit); diff --git a/trunk/drivers/input/keyboard/maple_keyb.c b/trunk/drivers/input/keyboard/maple_keyb.c deleted file mode 100644 index 2b404284c28a..000000000000 --- a/trunk/drivers/input/keyboard/maple_keyb.c +++ /dev/null @@ -1,252 +0,0 @@ -/* - * SEGA Dreamcast keyboard driver - * Based on drivers/usb/usbkbd.c - * Copyright YAEGASHI Takeshi, 2001 - * Porting to 2.6 Copyright Adrian McMenamin, 2007 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see the file COPYING, or write - * to the Free Software Foundation, Inc., - * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -/* Very simple mutex to ensure proper cleanup */ -static DEFINE_MUTEX(maple_keyb_mutex); - -#define NR_SCANCODES 256 - -MODULE_AUTHOR("YAEGASHI Takeshi, Adrian McMenamin"); -MODULE_DESCRIPTION("SEGA Dreamcast keyboard driver"); -MODULE_LICENSE("GPL"); - -struct dc_kbd { - struct input_dev *dev; - unsigned short keycode[NR_SCANCODES]; - unsigned char new[8]; - unsigned char old[8]; -}; - -static const unsigned short dc_kbd_keycode[NR_SCANCODES] = { - KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_A, KEY_B, KEY_C, KEY_D, - KEY_E, KEY_F, KEY_G, KEY_H, KEY_I, KEY_J, KEY_K, KEY_L, - KEY_M, KEY_N, KEY_O, KEY_P, KEY_Q, KEY_R, KEY_S, KEY_T, - KEY_U, KEY_V, KEY_W, KEY_X, KEY_Y, KEY_Z, KEY_1, KEY_2, - KEY_3, KEY_4, KEY_5, KEY_6, KEY_7, KEY_8, KEY_9, KEY_0, - KEY_ENTER, KEY_ESC, KEY_BACKSPACE, KEY_TAB, KEY_SPACE, KEY_MINUS, KEY_EQUAL, KEY_LEFTBRACE, - KEY_RIGHTBRACE, KEY_BACKSLASH, KEY_BACKSLASH, KEY_SEMICOLON, KEY_APOSTROPHE, KEY_GRAVE, KEY_COMMA, - KEY_DOT, KEY_SLASH, KEY_CAPSLOCK, KEY_F1, KEY_F2, KEY_F3, KEY_F4, KEY_F5, KEY_F6, - KEY_F7, KEY_F8, KEY_F9, KEY_F10, KEY_F11, KEY_F12, KEY_SYSRQ, - KEY_SCROLLLOCK, KEY_PAUSE, KEY_INSERT, KEY_HOME, KEY_PAGEUP, KEY_DELETE, - KEY_END, KEY_PAGEDOWN, KEY_RIGHT, KEY_LEFT, KEY_DOWN, KEY_UP, - KEY_NUMLOCK, KEY_KPSLASH, KEY_KPASTERISK, KEY_KPMINUS, KEY_KPPLUS, KEY_KPENTER, KEY_KP1, KEY_KP2, - KEY_KP3, KEY_KP4, KEY_KP5, KEY_KP6, KEY_KP7, KEY_KP8, KEY_KP9, KEY_KP0, KEY_KPDOT, - KEY_102ND, KEY_COMPOSE, KEY_POWER, KEY_KPEQUAL, KEY_F13, KEY_F14, KEY_F15, - KEY_F16, KEY_F17, KEY_F18, KEY_F19, KEY_F20, - KEY_F21, KEY_F22, KEY_F23, KEY_F24, KEY_OPEN, KEY_HELP, KEY_PROPS, KEY_FRONT, - KEY_STOP, KEY_AGAIN, KEY_UNDO, KEY_CUT, KEY_COPY, KEY_PASTE, KEY_FIND, KEY_MUTE, - KEY_VOLUMEUP, KEY_VOLUMEDOWN, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_KPCOMMA, KEY_RESERVED, KEY_RO, KEY_KATAKANAHIRAGANA , KEY_YEN, - KEY_HENKAN, KEY_MUHENKAN, KEY_KPJPCOMMA, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, - KEY_HANGEUL, KEY_HANJA, KEY_KATAKANA, KEY_HIRAGANA, KEY_ZENKAKUHANKAKU, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, - KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, - KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, - KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, - KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, - KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, - KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, - KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, - KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, - KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, - KEY_LEFTCTRL, KEY_LEFTSHIFT, KEY_LEFTALT, KEY_LEFTMETA, KEY_RIGHTCTRL, KEY_RIGHTSHIFT, KEY_RIGHTALT, KEY_RIGHTMETA, - KEY_PLAYPAUSE, KEY_STOPCD, KEY_PREVIOUSSONG, KEY_NEXTSONG, KEY_EJECTCD, KEY_VOLUMEUP, KEY_VOLUMEDOWN, KEY_MUTE, - KEY_WWW, KEY_BACK, KEY_FORWARD, KEY_STOP, KEY_FIND, KEY_SCROLLUP, KEY_SCROLLDOWN, KEY_EDIT, KEY_SLEEP, - KEY_SCREENLOCK, KEY_REFRESH, KEY_CALC, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED, KEY_RESERVED -}; - -static void dc_scan_kbd(struct dc_kbd *kbd) -{ - struct input_dev *dev = kbd->dev; - void *ptr; - int code, keycode; - int i; - - for (i = 0; i < 8; i++) { - code = i + 224; - keycode = kbd->keycode[code]; - input_event(dev, EV_MSC, MSC_SCAN, code); - input_report_key(dev, keycode, (kbd->new[0] >> i) & 1); - } - - for (i = 2; i < 8; i++) { - ptr = memchr(kbd->new + 2, kbd->old[i], 6); - code = kbd->old[i]; - if (code > 3 && ptr == NULL) { - keycode = kbd->keycode[code]; - if (keycode) { - input_event(dev, EV_MSC, MSC_SCAN, code); - input_report_key(dev, keycode, 0); - } else - printk(KERN_DEBUG "maple_keyb: " - "Unknown key (scancode %#x) released.", - code); - } - ptr = memchr(kbd->old + 2, kbd->new[i], 6); - code = kbd->new[i]; - if (code > 3 && ptr) { - keycode = kbd->keycode[code]; - if (keycode) { - input_event(dev, EV_MSC, MSC_SCAN, code); - input_report_key(dev, keycode, 1); - } else - printk(KERN_DEBUG "maple_keyb: " - "Unknown key (scancode %#x) pressed.", - code); - } - } - input_sync(dev); - memcpy(kbd->old, kbd->new, 8); -} - -static void dc_kbd_callback(struct mapleq *mq) -{ - struct maple_device *mapledev = mq->dev; - struct dc_kbd *kbd = mapledev->private_data; - unsigned long *buf = mq->recvbuf; - - /* - * We should always be getting the lock because the only - * time it may be locked if driver is in cleanup phase. - */ - if (likely(mutex_trylock(&maple_keyb_mutex))) { - - if (buf[1] == mapledev->function) { - memcpy(kbd->new, buf + 2, 8); - dc_scan_kbd(kbd); - } - - mutex_unlock(&maple_keyb_mutex); - } -} - -static int dc_kbd_connect(struct maple_device *mdev) -{ - int i, error; - struct dc_kbd *kbd; - struct input_dev *dev; - - if (!(mdev->function & MAPLE_FUNC_KEYBOARD)) - return -EINVAL; - - kbd = kzalloc(sizeof(struct dc_kbd), GFP_KERNEL); - dev = input_allocate_device(); - if (!kbd || !dev) { - error = -ENOMEM; - goto fail; - } - - mdev->private_data = kbd; - - kbd->dev = dev; - memcpy(kbd->keycode, dc_kbd_keycode, sizeof(kbd->keycode)); - - dev->name = mdev->product_name; - dev->evbit[0] = BIT(EV_KEY) | BIT(EV_REP); - dev->keycode = kbd->keycode; - dev->keycodesize = sizeof (unsigned short); - dev->keycodemax = ARRAY_SIZE(kbd->keycode); - dev->id.bustype = BUS_HOST; - dev->dev.parent = &mdev->dev; - - for (i = 0; i < NR_SCANCODES; i++) - __set_bit(dc_kbd_keycode[i], dev->keybit); - __clear_bit(KEY_RESERVED, dev->keybit); - - input_set_capability(dev, EV_MSC, MSC_SCAN); - input_set_drvdata(dev, kbd); - - error = input_register_device(dev); - if (error) - goto fail; - - /* Maple polling is locked to VBLANK - which may be just 50/s */ - maple_getcond_callback(mdev, dc_kbd_callback, HZ/50, MAPLE_FUNC_KEYBOARD); - return 0; - - fail: - input_free_device(dev); - kfree(kbd); - mdev->private_data = NULL; - return error; -} - -static void dc_kbd_disconnect(struct maple_device *mdev) -{ - struct dc_kbd *kbd; - - mutex_lock(&maple_keyb_mutex); - - kbd = mdev->private_data; - mdev->private_data = NULL; - input_unregister_device(kbd->dev); - kfree(kbd); - - mutex_unlock(&maple_keyb_mutex); -} - -/* allow the keyboard to be used */ -static int probe_maple_kbd(struct device *dev) -{ - struct maple_device *mdev = to_maple_dev(dev); - struct maple_driver *mdrv = to_maple_driver(dev->driver); - int error; - - error = dc_kbd_connect(mdev); - if (error) - return error; - - mdev->driver = mdrv; - mdev->registered = 1; - - return 0; -} - -static struct maple_driver dc_kbd_driver = { - .function = MAPLE_FUNC_KEYBOARD, - .connect = dc_kbd_connect, - .disconnect = dc_kbd_disconnect, - .drv = { - .name = "Dreamcast_keyboard", - .probe = probe_maple_kbd, - }, -}; - -static int __init dc_kbd_init(void) -{ - return maple_driver_register(&dc_kbd_driver.drv); -} - -static void __exit dc_kbd_exit(void) -{ - driver_unregister(&dc_kbd_driver.drv); -} - -module_init(dc_kbd_init); -module_exit(dc_kbd_exit); diff --git a/trunk/drivers/input/keyboard/omap-keypad.c b/trunk/drivers/input/keyboard/omap-keypad.c index 76f1969552c5..3a228634f101 100644 --- a/trunk/drivers/input/keyboard/omap-keypad.c +++ b/trunk/drivers/input/keyboard/omap-keypad.c @@ -233,7 +233,7 @@ static void omap_kp_tasklet(unsigned long data) omap_writew(0, OMAP_MPUIO_BASE + OMAP_MPUIO_KBD_MASKIT); kp_cur_group = -1; } - } + } } static ssize_t omap_kp_enable_show(struct device *dev, @@ -318,7 +318,7 @@ static int __init omap_kp_probe(struct platform_device *pdev) keymap = pdata->keymap; if (pdata->rep) - __set_bit(EV_REP, input_dev->evbit); + set_bit(EV_REP, input_dev->evbit); if (pdata->delay) omap_kp->delay = pdata->delay; @@ -365,9 +365,9 @@ static int __init omap_kp_probe(struct platform_device *pdev) goto err2; /* setup input device */ - __set_bit(EV_KEY, input_dev->evbit); + set_bit(EV_KEY, input_dev->evbit); for (i = 0; keymap[i] != 0; i++) - __set_bit(keymap[i] & KEY_MAX, input_dev->keybit); + set_bit(keymap[i] & KEY_MAX, input_dev->keybit); input_dev->name = "omap-keypad"; input_dev->phys = "omap-keypad/input0"; input_dev->dev.parent = &pdev->dev; @@ -377,6 +377,10 @@ static int __init omap_kp_probe(struct platform_device *pdev) input_dev->id.product = 0x0001; input_dev->id.version = 0x0100; + input_dev->keycode = keymap; + input_dev->keycodesize = sizeof(unsigned int); + input_dev->keycodemax = pdata->keymapsize; + ret = input_register_device(omap_kp->input); if (ret < 0) { printk(KERN_ERR "Unable to register omap-keypad input device\n"); @@ -399,15 +403,15 @@ static int __init omap_kp_probe(struct platform_device *pdev) } else { for (irq_idx = 0; irq_idx < omap_kp->rows; irq_idx++) { if (request_irq(OMAP_GPIO_IRQ(row_gpios[irq_idx]), - omap_kp_interrupt, + omap_kp_interrupt, IRQF_TRIGGER_FALLING, - "omap-keypad", omap_kp) < 0) + "omap-keypad", omap_kp) < 0) goto err5; } } return 0; err5: - for (i = irq_idx - 1; i >=0; i--) + for (i = irq_idx-1; i >=0; i--) free_irq(row_gpios[i], 0); err4: input_unregister_device(omap_kp->input); @@ -436,9 +440,9 @@ static int omap_kp_remove(struct platform_device *pdev) if (cpu_is_omap24xx()) { int i; for (i = 0; i < omap_kp->cols; i++) - omap_free_gpio(col_gpios[i]); + omap_free_gpio(col_gpios[i]); for (i = 0; i < omap_kp->rows; i++) { - omap_free_gpio(row_gpios[i]); + omap_free_gpio(row_gpios[i]); free_irq(OMAP_GPIO_IRQ(row_gpios[i]), 0); } } else { diff --git a/trunk/drivers/input/mouse/alps.c b/trunk/drivers/input/mouse/alps.c index 64d70a9b714c..2c5f11a4f6b4 100644 --- a/trunk/drivers/input/mouse/alps.c +++ b/trunk/drivers/input/mouse/alps.c @@ -48,13 +48,11 @@ static const struct alps_model_info alps_model_data[] = { { { 0x63, 0x02, 0x50 }, 0xef, 0xef, ALPS_FW_BK_1 }, /* NEC Versa L320 */ { { 0x63, 0x02, 0x64 }, 0xf8, 0xf8, 0 }, { { 0x63, 0x03, 0xc8 }, 0xf8, 0xf8, ALPS_PASS }, /* Dell Latitude D800 */ - { { 0x73, 0x00, 0x0a }, 0xf8, 0xf8, ALPS_DUALPOINT }, /* ThinkPad R61 8918-5QG */ { { 0x73, 0x02, 0x0a }, 0xf8, 0xf8, 0 }, { { 0x73, 0x02, 0x14 }, 0xf8, 0xf8, ALPS_FW_BK_2 }, /* Ahtec Laptop */ { { 0x20, 0x02, 0x0e }, 0xf8, 0xf8, ALPS_PASS | ALPS_DUALPOINT }, /* XXX */ { { 0x22, 0x02, 0x0a }, 0xf8, 0xf8, ALPS_PASS | ALPS_DUALPOINT }, { { 0x22, 0x02, 0x14 }, 0xff, 0xff, ALPS_PASS | ALPS_DUALPOINT }, /* Dell Latitude D600 */ - { { 0x73, 0x02, 0x50 }, 0xcf, 0xff, ALPS_FW_BK_1 } /* Dell Vostro 1400 */ }; /* diff --git a/trunk/drivers/input/mouse/appletouch.c b/trunk/drivers/input/mouse/appletouch.c index 0117817bf538..a1804bfdbb8c 100644 --- a/trunk/drivers/input/mouse/appletouch.c +++ b/trunk/drivers/input/mouse/appletouch.c @@ -502,23 +502,18 @@ static void atp_complete(struct urb* urb) /* reset the accumulator on release */ memset(dev->xy_acc, 0, sizeof(dev->xy_acc)); - } - - /* Geyser 3 will continue to send packets continually after - the first touch unless reinitialised. Do so if it's been - idle for a while in order to avoid waking the kernel up - several hundred times a second */ - if (atp_is_geyser_3(dev)) { - if (!x && !y && !key) { + /* Geyser 3 will continue to send packets continually after + the first touch unless reinitialised. Do so if it's been + idle for a while in order to avoid waking the kernel up + several hundred times a second */ + if (!key && atp_is_geyser_3(dev)) { dev->idlecount++; if (dev->idlecount == 10) { dev->valid = 0; schedule_work(&dev->work); } } - else - dev->idlecount = 0; } input_report_key(dev->input, BTN_LEFT, key); diff --git a/trunk/drivers/input/mouse/lifebook.c b/trunk/drivers/input/mouse/lifebook.c index d7de4c53b3d8..608674d0be8b 100644 --- a/trunk/drivers/input/mouse/lifebook.c +++ b/trunk/drivers/input/mouse/lifebook.c @@ -96,14 +96,6 @@ static const struct dmi_system_id lifebook_dmi_table[] = { }, .callback = lifebook_set_6byte_proto, }, - { - .ident = "CF-72", - .matches = { - DMI_MATCH(DMI_PRODUCT_NAME, "CF-72"), - }, - .callback = lifebook_set_serio_phys, - .driver_data = "isa0060/serio3", - }, { .ident = "Lifebook B142", .matches = { @@ -290,7 +282,7 @@ static int lifebook_create_relative_device(struct psmouse *psmouse) int lifebook_init(struct psmouse *psmouse) { struct input_dev *dev1 = psmouse->dev; - int max_coord = lifebook_use_6byte_proto ? 4096 : 1024; + int max_coord = lifebook_use_6byte_proto ? 1024 : 4096; if (lifebook_absolute_mode(psmouse)) return -1; diff --git a/trunk/drivers/input/mouse/psmouse-base.c b/trunk/drivers/input/mouse/psmouse-base.c index 073525756532..b9f0fb2530e2 100644 --- a/trunk/drivers/input/mouse/psmouse-base.c +++ b/trunk/drivers/input/mouse/psmouse-base.c @@ -648,10 +648,9 @@ static int psmouse_extensions(struct psmouse *psmouse, /* * Reset to defaults in case the device got confused by extended - * protocol probes. Note that we follow up with full reset because - * some mice put themselves to sleep when they see PSMOUSE_RESET_DIS. + * protocol probes. Note that we do full reset becuase some mice + * put themselves to sleep when see PSMOUSE_RESET_DIS. */ - ps2_command(&psmouse->ps2dev, NULL, PSMOUSE_CMD_RESET_DIS); psmouse_reset(psmouse); if (max_proto >= PSMOUSE_IMEX && im_explorer_detect(psmouse, set_properties) == 0) diff --git a/trunk/drivers/input/mousedev.c b/trunk/drivers/input/mousedev.c index 79146d6ed2ab..9173916b8be5 100644 --- a/trunk/drivers/input/mousedev.c +++ b/trunk/drivers/input/mousedev.c @@ -61,11 +61,9 @@ struct mousedev { int open; int minor; char name[16]; - struct input_handle handle; wait_queue_head_t wait; struct list_head client_list; - spinlock_t client_lock; /* protects client_list */ - struct mutex mutex; + struct input_handle handle; struct device dev; struct list_head mixdev_node; @@ -115,137 +113,108 @@ static unsigned char mousedev_imex_seq[] = { 0xf3, 200, 0xf3, 200, 0xf3, 80 }; static struct input_handler mousedev_handler; static struct mousedev *mousedev_table[MOUSEDEV_MINORS]; -static DEFINE_MUTEX(mousedev_table_mutex); static struct mousedev *mousedev_mix; static LIST_HEAD(mousedev_mix_list); -static void mixdev_open_devices(void); -static void mixdev_close_devices(void); - #define fx(i) (mousedev->old_x[(mousedev->pkt_count - (i)) & 03]) #define fy(i) (mousedev->old_y[(mousedev->pkt_count - (i)) & 03]) -static void mousedev_touchpad_event(struct input_dev *dev, - struct mousedev *mousedev, - unsigned int code, int value) +static void mousedev_touchpad_event(struct input_dev *dev, struct mousedev *mousedev, unsigned int code, int value) { int size, tmp; enum { FRACTION_DENOM = 128 }; switch (code) { + case ABS_X: + fx(0) = value; + if (mousedev->touch && mousedev->pkt_count >= 2) { + size = dev->absmax[ABS_X] - dev->absmin[ABS_X]; + if (size == 0) + size = 256 * 2; + tmp = ((value - fx(2)) * (256 * FRACTION_DENOM)) / size; + tmp += mousedev->frac_dx; + mousedev->packet.dx = tmp / FRACTION_DENOM; + mousedev->frac_dx = tmp - mousedev->packet.dx * FRACTION_DENOM; + } + break; - case ABS_X: - fx(0) = value; - if (mousedev->touch && mousedev->pkt_count >= 2) { - size = dev->absmax[ABS_X] - dev->absmin[ABS_X]; - if (size == 0) - size = 256 * 2; - tmp = ((value - fx(2)) * 256 * FRACTION_DENOM) / size; - tmp += mousedev->frac_dx; - mousedev->packet.dx = tmp / FRACTION_DENOM; - mousedev->frac_dx = - tmp - mousedev->packet.dx * FRACTION_DENOM; - } - break; - - case ABS_Y: - fy(0) = value; - if (mousedev->touch && mousedev->pkt_count >= 2) { - /* use X size to keep the same scale */ - size = dev->absmax[ABS_X] - dev->absmin[ABS_X]; - if (size == 0) - size = 256 * 2; - tmp = -((value - fy(2)) * 256 * FRACTION_DENOM) / size; - tmp += mousedev->frac_dy; - mousedev->packet.dy = tmp / FRACTION_DENOM; - mousedev->frac_dy = tmp - - mousedev->packet.dy * FRACTION_DENOM; - } - break; + case ABS_Y: + fy(0) = value; + if (mousedev->touch && mousedev->pkt_count >= 2) { + /* use X size to keep the same scale */ + size = dev->absmax[ABS_X] - dev->absmin[ABS_X]; + if (size == 0) + size = 256 * 2; + tmp = -((value - fy(2)) * (256 * FRACTION_DENOM)) / size; + tmp += mousedev->frac_dy; + mousedev->packet.dy = tmp / FRACTION_DENOM; + mousedev->frac_dy = tmp - mousedev->packet.dy * FRACTION_DENOM; + } + break; } } -static void mousedev_abs_event(struct input_dev *dev, struct mousedev *mousedev, - unsigned int code, int value) +static void mousedev_abs_event(struct input_dev *dev, struct mousedev *mousedev, unsigned int code, int value) { int size; switch (code) { + case ABS_X: + size = dev->absmax[ABS_X] - dev->absmin[ABS_X]; + if (size == 0) + size = xres ? : 1; + if (value > dev->absmax[ABS_X]) + value = dev->absmax[ABS_X]; + if (value < dev->absmin[ABS_X]) + value = dev->absmin[ABS_X]; + mousedev->packet.x = ((value - dev->absmin[ABS_X]) * xres) / size; + mousedev->packet.abs_event = 1; + break; - case ABS_X: - size = dev->absmax[ABS_X] - dev->absmin[ABS_X]; - if (size == 0) - size = xres ? : 1; - if (value > dev->absmax[ABS_X]) - value = dev->absmax[ABS_X]; - if (value < dev->absmin[ABS_X]) - value = dev->absmin[ABS_X]; - mousedev->packet.x = - ((value - dev->absmin[ABS_X]) * xres) / size; - mousedev->packet.abs_event = 1; - break; - - case ABS_Y: - size = dev->absmax[ABS_Y] - dev->absmin[ABS_Y]; - if (size == 0) - size = yres ? : 1; - if (value > dev->absmax[ABS_Y]) - value = dev->absmax[ABS_Y]; - if (value < dev->absmin[ABS_Y]) - value = dev->absmin[ABS_Y]; - mousedev->packet.y = yres - - ((value - dev->absmin[ABS_Y]) * yres) / size; - mousedev->packet.abs_event = 1; - break; + case ABS_Y: + size = dev->absmax[ABS_Y] - dev->absmin[ABS_Y]; + if (size == 0) + size = yres ? : 1; + if (value > dev->absmax[ABS_Y]) + value = dev->absmax[ABS_Y]; + if (value < dev->absmin[ABS_Y]) + value = dev->absmin[ABS_Y]; + mousedev->packet.y = yres - ((value - dev->absmin[ABS_Y]) * yres) / size; + mousedev->packet.abs_event = 1; + break; } } -static void mousedev_rel_event(struct mousedev *mousedev, - unsigned int code, int value) +static void mousedev_rel_event(struct mousedev *mousedev, unsigned int code, int value) { switch (code) { - case REL_X: - mousedev->packet.dx += value; - break; - - case REL_Y: - mousedev->packet.dy -= value; - break; - - case REL_WHEEL: - mousedev->packet.dz -= value; - break; + case REL_X: mousedev->packet.dx += value; break; + case REL_Y: mousedev->packet.dy -= value; break; + case REL_WHEEL: mousedev->packet.dz -= value; break; } } -static void mousedev_key_event(struct mousedev *mousedev, - unsigned int code, int value) +static void mousedev_key_event(struct mousedev *mousedev, unsigned int code, int value) { int index; switch (code) { - - case BTN_TOUCH: - case BTN_0: - case BTN_LEFT: index = 0; break; - - case BTN_STYLUS: - case BTN_1: - case BTN_RIGHT: index = 1; break; - - case BTN_2: - case BTN_FORWARD: - case BTN_STYLUS2: - case BTN_MIDDLE: index = 2; break; - - case BTN_3: - case BTN_BACK: - case BTN_SIDE: index = 3; break; - - case BTN_4: - case BTN_EXTRA: index = 4; break; - - default: return; + case BTN_TOUCH: + case BTN_0: + case BTN_LEFT: index = 0; break; + case BTN_STYLUS: + case BTN_1: + case BTN_RIGHT: index = 1; break; + case BTN_2: + case BTN_FORWARD: + case BTN_STYLUS2: + case BTN_MIDDLE: index = 2; break; + case BTN_3: + case BTN_BACK: + case BTN_SIDE: index = 3; break; + case BTN_4: + case BTN_EXTRA: index = 4; break; + default: return; } if (value) { @@ -257,23 +226,19 @@ static void mousedev_key_event(struct mousedev *mousedev, } } -static void mousedev_notify_readers(struct mousedev *mousedev, - struct mousedev_hw_data *packet) +static void mousedev_notify_readers(struct mousedev *mousedev, struct mousedev_hw_data *packet) { struct mousedev_client *client; struct mousedev_motion *p; - unsigned int new_head; + unsigned long flags; int wake_readers = 0; - rcu_read_lock(); - list_for_each_entry_rcu(client, &mousedev->client_list, node) { - - /* Just acquire the lock, interrupts already disabled */ - spin_lock(&client->packet_lock); + list_for_each_entry(client, &mousedev->client_list, node) { + spin_lock_irqsave(&client->packet_lock, flags); p = &client->packets[client->head]; if (client->ready && p->buttons != mousedev->packet.buttons) { - new_head = (client->head + 1) % PACKET_QUEUE_LEN; + unsigned int new_head = (client->head + 1) % PACKET_QUEUE_LEN; if (new_head != client->tail) { p = &client->packets[client->head = new_head]; memset(p, 0, sizeof(struct mousedev_motion)); @@ -288,29 +253,25 @@ static void mousedev_notify_readers(struct mousedev *mousedev, } client->pos_x += packet->dx; - client->pos_x = client->pos_x < 0 ? - 0 : (client->pos_x >= xres ? xres : client->pos_x); + client->pos_x = client->pos_x < 0 ? 0 : (client->pos_x >= xres ? xres : client->pos_x); client->pos_y += packet->dy; - client->pos_y = client->pos_y < 0 ? - 0 : (client->pos_y >= yres ? yres : client->pos_y); + client->pos_y = client->pos_y < 0 ? 0 : (client->pos_y >= yres ? yres : client->pos_y); p->dx += packet->dx; p->dy += packet->dy; p->dz += packet->dz; p->buttons = mousedev->packet.buttons; - if (p->dx || p->dy || p->dz || - p->buttons != client->last_buttons) + if (p->dx || p->dy || p->dz || p->buttons != client->last_buttons) client->ready = 1; - spin_unlock(&client->packet_lock); + spin_unlock_irqrestore(&client->packet_lock, flags); if (client->ready) { kill_fasync(&client->fasync, SIGIO, POLL_IN); wake_readers = 1; } } - rcu_read_unlock(); if (wake_readers) wake_up_interruptible(&mousedev->wait); @@ -320,8 +281,7 @@ static void mousedev_touchpad_touch(struct mousedev *mousedev, int value) { if (!value) { if (mousedev->touch && - time_before(jiffies, - mousedev->touch + msecs_to_jiffies(tap_time))) { + time_before(jiffies, mousedev->touch + msecs_to_jiffies(tap_time))) { /* * Toggle left button to emulate tap. * We rely on the fact that mousedev_mix always has 0 @@ -330,8 +290,7 @@ static void mousedev_touchpad_touch(struct mousedev *mousedev, int value) set_bit(0, &mousedev->packet.buttons); set_bit(0, &mousedev_mix->packet.buttons); mousedev_notify_readers(mousedev, &mousedev_mix->packet); - mousedev_notify_readers(mousedev_mix, - &mousedev_mix->packet); + mousedev_notify_readers(mousedev_mix, &mousedev_mix->packet); clear_bit(0, &mousedev->packet.buttons); clear_bit(0, &mousedev_mix->packet.buttons); } @@ -343,61 +302,54 @@ static void mousedev_touchpad_touch(struct mousedev *mousedev, int value) mousedev->touch = jiffies; } -static void mousedev_event(struct input_handle *handle, - unsigned int type, unsigned int code, int value) +static void mousedev_event(struct input_handle *handle, unsigned int type, unsigned int code, int value) { struct mousedev *mousedev = handle->private; switch (type) { + case EV_ABS: + /* Ignore joysticks */ + if (test_bit(BTN_TRIGGER, handle->dev->keybit)) + return; - case EV_ABS: - /* Ignore joysticks */ - if (test_bit(BTN_TRIGGER, handle->dev->keybit)) - return; - - if (test_bit(BTN_TOOL_FINGER, handle->dev->keybit)) - mousedev_touchpad_event(handle->dev, - mousedev, code, value); - else - mousedev_abs_event(handle->dev, mousedev, code, value); + if (test_bit(BTN_TOOL_FINGER, handle->dev->keybit)) + mousedev_touchpad_event(handle->dev, mousedev, code, value); + else + mousedev_abs_event(handle->dev, mousedev, code, value); - break; + break; - case EV_REL: - mousedev_rel_event(mousedev, code, value); - break; + case EV_REL: + mousedev_rel_event(mousedev, code, value); + break; - case EV_KEY: - if (value != 2) { - if (code == BTN_TOUCH && - test_bit(BTN_TOOL_FINGER, handle->dev->keybit)) - mousedev_touchpad_touch(mousedev, value); - else - mousedev_key_event(mousedev, code, value); - } - break; - - case EV_SYN: - if (code == SYN_REPORT) { - if (mousedev->touch) { - mousedev->pkt_count++; - /* - * Input system eats duplicate events, - * but we need all of them to do correct - * averaging so apply present one forward - */ - fx(0) = fx(1); - fy(0) = fy(1); + case EV_KEY: + if (value != 2) { + if (code == BTN_TOUCH && test_bit(BTN_TOOL_FINGER, handle->dev->keybit)) + mousedev_touchpad_touch(mousedev, value); + else + mousedev_key_event(mousedev, code, value); } + break; - mousedev_notify_readers(mousedev, &mousedev->packet); - mousedev_notify_readers(mousedev_mix, &mousedev->packet); - - mousedev->packet.dx = mousedev->packet.dy = - mousedev->packet.dz = 0; - mousedev->packet.abs_event = 0; - } - break; + case EV_SYN: + if (code == SYN_REPORT) { + if (mousedev->touch) { + mousedev->pkt_count++; + /* Input system eats duplicate events, but we need all of them + * to do correct averaging so apply present one forward + */ + fx(0) = fx(1); + fy(0) = fy(1); + } + + mousedev_notify_readers(mousedev, &mousedev->packet); + mousedev_notify_readers(mousedev_mix, &mousedev->packet); + + mousedev->packet.dx = mousedev->packet.dy = mousedev->packet.dz = 0; + mousedev->packet.abs_event = 0; + } + break; } } @@ -415,48 +367,41 @@ static void mousedev_free(struct device *dev) { struct mousedev *mousedev = container_of(dev, struct mousedev, dev); + mousedev_table[mousedev->minor] = NULL; kfree(mousedev); } -static int mousedev_open_device(struct mousedev *mousedev) +static int mixdev_add_device(struct mousedev *mousedev) { - int retval; + int error; - retval = mutex_lock_interruptible(&mousedev->mutex); - if (retval) - return retval; + if (mousedev_mix->open) { + error = input_open_device(&mousedev->handle); + if (error) + return error; - if (mousedev->minor == MOUSEDEV_MIX) - mixdev_open_devices(); - else if (!mousedev->exist) - retval = -ENODEV; - else if (!mousedev->open++) { - retval = input_open_device(&mousedev->handle); - if (retval) - mousedev->open--; + mousedev->open++; + mousedev->mixdev_open = 1; } - mutex_unlock(&mousedev->mutex); - return retval; + get_device(&mousedev->dev); + list_add_tail(&mousedev->mixdev_node, &mousedev_mix_list); + + return 0; } -static void mousedev_close_device(struct mousedev *mousedev) +static void mixdev_remove_device(struct mousedev *mousedev) { - mutex_lock(&mousedev->mutex); - - if (mousedev->minor == MOUSEDEV_MIX) - mixdev_close_devices(); - else if (mousedev->exist && !--mousedev->open) - input_close_device(&mousedev->handle); + if (mousedev->mixdev_open) { + mousedev->mixdev_open = 0; + if (!--mousedev->open && mousedev->exist) + input_close_device(&mousedev->handle); + } - mutex_unlock(&mousedev->mutex); + list_del_init(&mousedev->mixdev_node); + put_device(&mousedev->dev); } -/* - * Open all available devices so they can all be multiplexed in one. - * stream. Note that this function is called with mousedev_mix->mutex - * held. - */ static void mixdev_open_devices(void) { struct mousedev *mousedev; @@ -466,19 +411,16 @@ static void mixdev_open_devices(void) list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) { if (!mousedev->mixdev_open) { - if (mousedev_open_device(mousedev)) - continue; + if (!mousedev->open && mousedev->exist) + if (input_open_device(&mousedev->handle)) + continue; + mousedev->open++; mousedev->mixdev_open = 1; } } } -/* - * Close all devices that were opened as part of multiplexed - * device. Note that this function is called with mousedev_mix->mutex - * held. - */ static void mixdev_close_devices(void) { struct mousedev *mousedev; @@ -489,45 +431,33 @@ static void mixdev_close_devices(void) list_for_each_entry(mousedev, &mousedev_mix_list, mixdev_node) { if (mousedev->mixdev_open) { mousedev->mixdev_open = 0; - mousedev_close_device(mousedev); + if (!--mousedev->open && mousedev->exist) + input_close_device(&mousedev->handle); } } } - -static void mousedev_attach_client(struct mousedev *mousedev, - struct mousedev_client *client) -{ - spin_lock(&mousedev->client_lock); - list_add_tail_rcu(&client->node, &mousedev->client_list); - spin_unlock(&mousedev->client_lock); - synchronize_rcu(); -} - -static void mousedev_detach_client(struct mousedev *mousedev, - struct mousedev_client *client) -{ - spin_lock(&mousedev->client_lock); - list_del_rcu(&client->node); - spin_unlock(&mousedev->client_lock); - synchronize_rcu(); -} - static int mousedev_release(struct inode *inode, struct file *file) { struct mousedev_client *client = file->private_data; struct mousedev *mousedev = client->mousedev; mousedev_fasync(-1, file, 0); - mousedev_detach_client(mousedev, client); + + list_del(&client->node); kfree(client); - mousedev_close_device(mousedev); + if (mousedev->minor == MOUSEDEV_MIX) + mixdev_close_devices(); + else if (!--mousedev->open && mousedev->exist) + input_close_device(&mousedev->handle); + put_device(&mousedev->dev); return 0; } + static int mousedev_open(struct inode *inode, struct file *file) { struct mousedev_client *client; @@ -545,17 +475,12 @@ static int mousedev_open(struct inode *inode, struct file *file) if (i >= MOUSEDEV_MINORS) return -ENODEV; - error = mutex_lock_interruptible(&mousedev_table_mutex); - if (error) - return error; mousedev = mousedev_table[i]; - if (mousedev) - get_device(&mousedev->dev); - mutex_unlock(&mousedev_table_mutex); - if (!mousedev) return -ENODEV; + get_device(&mousedev->dev); + client = kzalloc(sizeof(struct mousedev_client), GFP_KERNEL); if (!client) { error = -ENOMEM; @@ -566,17 +491,21 @@ static int mousedev_open(struct inode *inode, struct file *file) client->pos_x = xres / 2; client->pos_y = yres / 2; client->mousedev = mousedev; - mousedev_attach_client(mousedev, client); + list_add_tail(&client->node, &mousedev->client_list); - error = mousedev_open_device(mousedev); - if (error) - goto err_free_client; + if (mousedev->minor == MOUSEDEV_MIX) + mixdev_open_devices(); + else if (!mousedev->open++ && mousedev->exist) { + error = input_open_device(&mousedev->handle); + if (error) + goto err_free_client; + } file->private_data = client; return 0; err_free_client: - mousedev_detach_client(mousedev, client); + list_del(&client->node); kfree(client); err_put_mousedev: put_device(&mousedev->dev); @@ -588,41 +517,41 @@ static inline int mousedev_limit_delta(int delta, int limit) return delta > limit ? limit : (delta < -limit ? -limit : delta); } -static void mousedev_packet(struct mousedev_client *client, - signed char *ps2_data) +static void mousedev_packet(struct mousedev_client *client, signed char *ps2_data) { - struct mousedev_motion *p = &client->packets[client->tail]; + struct mousedev_motion *p; + unsigned long flags; + + spin_lock_irqsave(&client->packet_lock, flags); + p = &client->packets[client->tail]; - ps2_data[0] = 0x08 | - ((p->dx < 0) << 4) | ((p->dy < 0) << 5) | (p->buttons & 0x07); + ps2_data[0] = 0x08 | ((p->dx < 0) << 4) | ((p->dy < 0) << 5) | (p->buttons & 0x07); ps2_data[1] = mousedev_limit_delta(p->dx, 127); ps2_data[2] = mousedev_limit_delta(p->dy, 127); p->dx -= ps2_data[1]; p->dy -= ps2_data[2]; switch (client->mode) { - case MOUSEDEV_EMUL_EXPS: - ps2_data[3] = mousedev_limit_delta(p->dz, 7); - p->dz -= ps2_data[3]; - ps2_data[3] = (ps2_data[3] & 0x0f) | ((p->buttons & 0x18) << 1); - client->bufsiz = 4; - break; - - case MOUSEDEV_EMUL_IMPS: - ps2_data[0] |= - ((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1); - ps2_data[3] = mousedev_limit_delta(p->dz, 127); - p->dz -= ps2_data[3]; - client->bufsiz = 4; - break; - - case MOUSEDEV_EMUL_PS2: - default: - ps2_data[0] |= - ((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1); - p->dz = 0; - client->bufsiz = 3; - break; + case MOUSEDEV_EMUL_EXPS: + ps2_data[3] = mousedev_limit_delta(p->dz, 7); + p->dz -= ps2_data[3]; + ps2_data[3] = (ps2_data[3] & 0x0f) | ((p->buttons & 0x18) << 1); + client->bufsiz = 4; + break; + + case MOUSEDEV_EMUL_IMPS: + ps2_data[0] |= ((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1); + ps2_data[3] = mousedev_limit_delta(p->dz, 127); + p->dz -= ps2_data[3]; + client->bufsiz = 4; + break; + + case MOUSEDEV_EMUL_PS2: + default: + ps2_data[0] |= ((p->buttons & 0x10) >> 3) | ((p->buttons & 0x08) >> 1); + p->dz = 0; + client->bufsiz = 3; + break; } if (!p->dx && !p->dy && !p->dz) { @@ -632,56 +561,12 @@ static void mousedev_packet(struct mousedev_client *client, } else client->tail = (client->tail + 1) % PACKET_QUEUE_LEN; } -} - -static void mousedev_generate_response(struct mousedev_client *client, - int command) -{ - client->ps2[0] = 0xfa; /* ACK */ - switch (command) { - - case 0xeb: /* Poll */ - mousedev_packet(client, &client->ps2[1]); - client->bufsiz++; /* account for leading ACK */ - break; - - case 0xf2: /* Get ID */ - switch (client->mode) { - case MOUSEDEV_EMUL_PS2: - client->ps2[1] = 0; - break; - case MOUSEDEV_EMUL_IMPS: - client->ps2[1] = 3; - break; - case MOUSEDEV_EMUL_EXPS: - client->ps2[1] = 4; - break; - } - client->bufsiz = 2; - break; - - case 0xe9: /* Get info */ - client->ps2[1] = 0x60; client->ps2[2] = 3; client->ps2[3] = 200; - client->bufsiz = 4; - break; - - case 0xff: /* Reset */ - client->impsseq = client->imexseq = 0; - client->mode = MOUSEDEV_EMUL_PS2; - client->ps2[1] = 0xaa; client->ps2[2] = 0x00; - client->bufsiz = 3; - break; - - default: - client->bufsiz = 1; - break; - } - client->buffer = client->bufsiz; + spin_unlock_irqrestore(&client->packet_lock, flags); } -static ssize_t mousedev_write(struct file *file, const char __user *buffer, - size_t count, loff_t *ppos) + +static ssize_t mousedev_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct mousedev_client *client = file->private_data; unsigned char c; @@ -692,8 +577,6 @@ static ssize_t mousedev_write(struct file *file, const char __user *buffer, if (get_user(c, buffer + i)) return -EFAULT; - spin_lock_irq(&client->packet_lock); - if (c == mousedev_imex_seq[client->imexseq]) { if (++client->imexseq == MOUSEDEV_SEQ_LEN) { client->imexseq = 0; @@ -710,39 +593,68 @@ static ssize_t mousedev_write(struct file *file, const char __user *buffer, } else client->impsseq = 0; - mousedev_generate_response(client, c); + client->ps2[0] = 0xfa; + + switch (c) { + + case 0xeb: /* Poll */ + mousedev_packet(client, &client->ps2[1]); + client->bufsiz++; /* account for leading ACK */ + break; + + case 0xf2: /* Get ID */ + switch (client->mode) { + case MOUSEDEV_EMUL_PS2: client->ps2[1] = 0; break; + case MOUSEDEV_EMUL_IMPS: client->ps2[1] = 3; break; + case MOUSEDEV_EMUL_EXPS: client->ps2[1] = 4; break; + } + client->bufsiz = 2; + break; + + case 0xe9: /* Get info */ + client->ps2[1] = 0x60; client->ps2[2] = 3; client->ps2[3] = 200; + client->bufsiz = 4; + break; + + case 0xff: /* Reset */ + client->impsseq = client->imexseq = 0; + client->mode = MOUSEDEV_EMUL_PS2; + client->ps2[1] = 0xaa; client->ps2[2] = 0x00; + client->bufsiz = 3; + break; + + default: + client->bufsiz = 1; + break; + } - spin_unlock_irq(&client->packet_lock); + client->buffer = client->bufsiz; } kill_fasync(&client->fasync, SIGIO, POLL_IN); + wake_up_interruptible(&client->mousedev->wait); return count; } -static ssize_t mousedev_read(struct file *file, char __user *buffer, - size_t count, loff_t *ppos) +static ssize_t mousedev_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) { struct mousedev_client *client = file->private_data; - struct mousedev *mousedev = client->mousedev; - signed char data[sizeof(client->ps2)]; int retval = 0; - if (!client->ready && !client->buffer && mousedev->exist && - (file->f_flags & O_NONBLOCK)) + if (!client->ready && !client->buffer && (file->f_flags & O_NONBLOCK)) return -EAGAIN; - retval = wait_event_interruptible(mousedev->wait, - !mousedev->exist || client->ready || client->buffer); + retval = wait_event_interruptible(client->mousedev->wait, + !client->mousedev->exist || client->ready || client->buffer); + if (retval) return retval; - if (!mousedev->exist) + if (!client->mousedev->exist) return -ENODEV; - spin_lock_irq(&client->packet_lock); - if (!client->buffer && client->ready) { mousedev_packet(client, client->ps2); client->buffer = client->bufsiz; @@ -751,12 +663,9 @@ static ssize_t mousedev_read(struct file *file, char __user *buffer, if (count > client->buffer) count = client->buffer; - memcpy(data, client->ps2 + client->bufsiz - client->buffer, count); client->buffer -= count; - spin_unlock_irq(&client->packet_lock); - - if (copy_to_user(buffer, data, count)) + if (copy_to_user(buffer, client->ps2 + client->bufsiz - client->buffer - count, count)) return -EFAULT; return count; @@ -783,60 +692,6 @@ static const struct file_operations mousedev_fops = { .fasync = mousedev_fasync, }; -static int mousedev_install_chrdev(struct mousedev *mousedev) -{ - mousedev_table[mousedev->minor] = mousedev; - return 0; -} - -static void mousedev_remove_chrdev(struct mousedev *mousedev) -{ - mutex_lock(&mousedev_table_mutex); - mousedev_table[mousedev->minor] = NULL; - mutex_unlock(&mousedev_table_mutex); -} - -/* - * Mark device non-existent. This disables writes, ioctls and - * prevents new users from opening the device. Already posted - * blocking reads will stay, however new ones will fail. - */ -static void mousedev_mark_dead(struct mousedev *mousedev) -{ - mutex_lock(&mousedev->mutex); - mousedev->exist = 0; - mutex_unlock(&mousedev->mutex); -} - -/* - * Wake up users waiting for IO so they can disconnect from - * dead device. - */ -static void mousedev_hangup(struct mousedev *mousedev) -{ - struct mousedev_client *client; - - spin_lock(&mousedev->client_lock); - list_for_each_entry(client, &mousedev->client_list, node) - kill_fasync(&client->fasync, SIGIO, POLL_HUP); - spin_unlock(&mousedev->client_lock); - - wake_up_interruptible(&mousedev->wait); -} - -static void mousedev_cleanup(struct mousedev *mousedev) -{ - struct input_handle *handle = &mousedev->handle; - - mousedev_mark_dead(mousedev); - mousedev_hangup(mousedev); - mousedev_remove_chrdev(mousedev); - - /* mousedev is marked dead so no one else accesses mousedev->open */ - if (mousedev->open) - input_close_device(handle); -} - static struct mousedev *mousedev_create(struct input_dev *dev, struct input_handler *handler, int minor) @@ -852,10 +707,6 @@ static struct mousedev *mousedev_create(struct input_dev *dev, INIT_LIST_HEAD(&mousedev->client_list); INIT_LIST_HEAD(&mousedev->mixdev_node); - spin_lock_init(&mousedev->client_lock); - mutex_init(&mousedev->mutex); - lockdep_set_subclass(&mousedev->mutex, - minor == MOUSEDEV_MIX ? MOUSEDEV_MIX : 0); init_waitqueue_head(&mousedev->wait); if (minor == MOUSEDEV_MIX) @@ -880,27 +731,14 @@ static struct mousedev *mousedev_create(struct input_dev *dev, mousedev->dev.release = mousedev_free; device_initialize(&mousedev->dev); - if (minor != MOUSEDEV_MIX) { - error = input_register_handle(&mousedev->handle); - if (error) - goto err_free_mousedev; - } - - error = mousedev_install_chrdev(mousedev); - if (error) - goto err_unregister_handle; + mousedev_table[minor] = mousedev; error = device_add(&mousedev->dev); if (error) - goto err_cleanup_mousedev; + goto err_free_mousedev; return mousedev; - err_cleanup_mousedev: - mousedev_cleanup(mousedev); - err_unregister_handle: - if (minor != MOUSEDEV_MIX) - input_unregister_handle(&mousedev->handle); err_free_mousedev: put_device(&mousedev->dev); err_out: @@ -909,64 +747,29 @@ static struct mousedev *mousedev_create(struct input_dev *dev, static void mousedev_destroy(struct mousedev *mousedev) { - device_del(&mousedev->dev); - mousedev_cleanup(mousedev); - if (mousedev->minor != MOUSEDEV_MIX) - input_unregister_handle(&mousedev->handle); - put_device(&mousedev->dev); -} - -static int mixdev_add_device(struct mousedev *mousedev) -{ - int retval; - - retval = mutex_lock_interruptible(&mousedev_mix->mutex); - if (retval) - return retval; - - if (mousedev_mix->open) { - retval = mousedev_open_device(mousedev); - if (retval) - goto out; - - mousedev->mixdev_open = 1; - } - - get_device(&mousedev->dev); - list_add_tail(&mousedev->mixdev_node, &mousedev_mix_list); - - out: - mutex_unlock(&mousedev_mix->mutex); - return retval; -} + struct mousedev_client *client; -static void mixdev_remove_device(struct mousedev *mousedev) -{ - mutex_lock(&mousedev_mix->mutex); + device_del(&mousedev->dev); + mousedev->exist = 0; - if (mousedev->mixdev_open) { - mousedev->mixdev_open = 0; - mousedev_close_device(mousedev); + if (mousedev->open) { + input_close_device(&mousedev->handle); + list_for_each_entry(client, &mousedev->client_list, node) + kill_fasync(&client->fasync, SIGIO, POLL_HUP); + wake_up_interruptible(&mousedev->wait); } - list_del_init(&mousedev->mixdev_node); - mutex_unlock(&mousedev_mix->mutex); - put_device(&mousedev->dev); } -static int mousedev_connect(struct input_handler *handler, - struct input_dev *dev, +static int mousedev_connect(struct input_handler *handler, struct input_dev *dev, const struct input_device_id *id) { struct mousedev *mousedev; int minor; int error; - for (minor = 0; minor < MOUSEDEV_MINORS; minor++) - if (!mousedev_table[minor]) - break; - + for (minor = 0; minor < MOUSEDEV_MINORS && mousedev_table[minor]; minor++); if (minor == MOUSEDEV_MINORS) { printk(KERN_ERR "mousedev: no more free mousedev devices\n"); return -ENFILE; @@ -976,13 +779,21 @@ static int mousedev_connect(struct input_handler *handler, if (IS_ERR(mousedev)) return PTR_ERR(mousedev); + error = input_register_handle(&mousedev->handle); + if (error) + goto err_delete_mousedev; + error = mixdev_add_device(mousedev); - if (error) { - mousedev_destroy(mousedev); - return error; - } + if (error) + goto err_unregister_handle; return 0; + + err_unregister_handle: + input_unregister_handle(&mousedev->handle); + err_delete_mousedev: + device_unregister(&mousedev->dev); + return error; } static void mousedev_disconnect(struct input_handle *handle) @@ -990,42 +801,33 @@ static void mousedev_disconnect(struct input_handle *handle) struct mousedev *mousedev = handle->private; mixdev_remove_device(mousedev); + input_unregister_handle(handle); mousedev_destroy(mousedev); } static const struct input_device_id mousedev_ids[] = { { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | - INPUT_DEVICE_ID_MATCH_KEYBIT | - INPUT_DEVICE_ID_MATCH_RELBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_RELBIT, .evbit = { BIT(EV_KEY) | BIT(EV_REL) }, .keybit = { [LONG(BTN_LEFT)] = BIT(BTN_LEFT) }, .relbit = { BIT(REL_X) | BIT(REL_Y) }, - }, /* A mouse like device, at least one button, - two relative axes */ + }, /* A mouse like device, at least one button, two relative axes */ { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | - INPUT_DEVICE_ID_MATCH_RELBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_RELBIT, .evbit = { BIT(EV_KEY) | BIT(EV_REL) }, .relbit = { BIT(REL_WHEEL) }, }, /* A separate scrollwheel */ { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | - INPUT_DEVICE_ID_MATCH_KEYBIT | - INPUT_DEVICE_ID_MATCH_ABSBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, .evbit = { BIT(EV_KEY) | BIT(EV_ABS) }, .keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) }, .absbit = { BIT(ABS_X) | BIT(ABS_Y) }, - }, /* A tablet like device, at least touch detection, - two absolute axes */ + }, /* A tablet like device, at least touch detection, two absolute axes */ { - .flags = INPUT_DEVICE_ID_MATCH_EVBIT | - INPUT_DEVICE_ID_MATCH_KEYBIT | - INPUT_DEVICE_ID_MATCH_ABSBIT, + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, .evbit = { BIT(EV_KEY) | BIT(EV_ABS) }, .keybit = { [LONG(BTN_TOOL_FINGER)] = BIT(BTN_TOOL_FINGER) }, - .absbit = { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) | - BIT(ABS_TOOL_WIDTH) }, + .absbit = { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) | BIT(ABS_TOOL_WIDTH) }, }, /* A touchpad */ { }, /* Terminating entry */ diff --git a/trunk/drivers/input/serio/i8042.c b/trunk/drivers/input/serio/i8042.c index 11dafc0ee994..c2eea2767e10 100644 --- a/trunk/drivers/input/serio/i8042.c +++ b/trunk/drivers/input/serio/i8042.c @@ -385,8 +385,6 @@ static int i8042_enable_kbd_port(void) i8042_ctr |= I8042_CTR_KBDINT; if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { - i8042_ctr &= ~I8042_CTR_KBDINT; - i8042_ctr |= I8042_CTR_KBDDIS; printk(KERN_ERR "i8042.c: Failed to enable KBD port.\n"); return -EIO; } @@ -404,8 +402,6 @@ static int i8042_enable_aux_port(void) i8042_ctr |= I8042_CTR_AUXINT; if (i8042_command(&i8042_ctr, I8042_CMD_CTL_WCTR)) { - i8042_ctr &= ~I8042_CTR_AUXINT; - i8042_ctr |= I8042_CTR_AUXDIS; printk(KERN_ERR "i8042.c: Failed to enable AUX port.\n"); return -EIO; } diff --git a/trunk/drivers/input/touchscreen/Kconfig b/trunk/drivers/input/touchscreen/Kconfig index e3e0baa1a158..f929fcdbae2e 100644 --- a/trunk/drivers/input/touchscreen/Kconfig +++ b/trunk/drivers/input/touchscreen/Kconfig @@ -126,16 +126,6 @@ config TOUCHSCREEN_HP600 To compile this driver as a module, choose M here: the module will be called hp680_ts_input. -config TOUCHSCREEN_HP7XX - tristate "HP Jornada 710/720/728 touchscreen" - depends on SA1100_JORNADA720_SSP - help - Say Y here if you have a HP Jornada 710/720/728 and want - to support the built-in touchscreen. - - To compile this driver as a module, choose M here: the - module will be called jornada720_ts. - config TOUCHSCREEN_PENMOUNT tristate "Penmount serial touchscreen" select SERIO @@ -201,7 +191,6 @@ config TOUCHSCREEN_USB_COMPOSITE - Gunze AHL61 - DMC TSC-10/25 - IRTOUCHSYSTEMS/UNITOP - - IdealTEK URTC1000 Have a look at for a usage description and the required user-space stuff. @@ -249,14 +238,4 @@ config TOUCHSCREEN_USB_IRTOUCH bool "IRTOUCHSYSTEMS/UNITOP device support" if EMBEDDED depends on TOUCHSCREEN_USB_COMPOSITE -config TOUCHSCREEN_USB_IDEALTEK - default y - bool "IdealTEK URTC1000 device support" if EMBEDDED - depends on TOUCHSCREEN_USB_COMPOSITE - -config TOUCHSCREEN_USB_GENERAL_TOUCH - default y - bool "GeneralTouch Touchscreen device support" if EMBEDDED - depends on TOUCHSCREEN_USB_COMPOSITE - endif diff --git a/trunk/drivers/input/touchscreen/Makefile b/trunk/drivers/input/touchscreen/Makefile index 35d4097df35a..5de8933c4993 100644 --- a/trunk/drivers/input/touchscreen/Makefile +++ b/trunk/drivers/input/touchscreen/Makefile @@ -13,7 +13,6 @@ obj-$(CONFIG_TOUCHSCREEN_FUJITSU) += fujitsu_ts.o obj-$(CONFIG_TOUCHSCREEN_MTOUCH) += mtouch.o obj-$(CONFIG_TOUCHSCREEN_MK712) += mk712.o obj-$(CONFIG_TOUCHSCREEN_HP600) += hp680_ts_input.o -obj-$(CONFIG_TOUCHSCREEN_HP7XX) += jornada720_ts.o obj-$(CONFIG_TOUCHSCREEN_USB_COMPOSITE) += usbtouchscreen.o obj-$(CONFIG_TOUCHSCREEN_PENMOUNT) += penmount.o obj-$(CONFIG_TOUCHSCREEN_TOUCHRIGHT) += touchright.o diff --git a/trunk/drivers/input/touchscreen/jornada720_ts.c b/trunk/drivers/input/touchscreen/jornada720_ts.c deleted file mode 100644 index 42a1c9a1940e..000000000000 --- a/trunk/drivers/input/touchscreen/jornada720_ts.c +++ /dev/null @@ -1,182 +0,0 @@ -/* - * drivers/input/touchscreen/jornada720_ts.c - * - * Copyright (C) 2007 Kristoffer Ericson - * - * Copyright (C) 2006 Filip Zyzniewski - * based on HP Jornada 56x touchscreen driver by Alex Lange - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * HP Jornada 710/720/729 Touchscreen Driver - */ - -#include -#include -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Kristoffer Ericson "); -MODULE_DESCRIPTION("HP Jornada 710/720/728 touchscreen driver"); -MODULE_LICENSE("GPLv2"); - -struct jornada_ts { - struct input_dev *dev; - int x_data[4]; /* X sample values */ - int y_data[4]; /* Y sample values */ -}; - -static void jornada720_ts_collect_data(struct jornada_ts *jornada_ts) -{ - - /* 3 low word X samples */ - jornada_ts->x_data[0] = jornada_ssp_byte(TXDUMMY); - jornada_ts->x_data[1] = jornada_ssp_byte(TXDUMMY); - jornada_ts->x_data[2] = jornada_ssp_byte(TXDUMMY); - - /* 3 low word Y samples */ - jornada_ts->y_data[0] = jornada_ssp_byte(TXDUMMY); - jornada_ts->y_data[1] = jornada_ssp_byte(TXDUMMY); - jornada_ts->y_data[2] = jornada_ssp_byte(TXDUMMY); - - /* combined x samples bits */ - jornada_ts->x_data[3] = jornada_ssp_byte(TXDUMMY); - - /* combined y samples bits */ - jornada_ts->y_data[3] = jornada_ssp_byte(TXDUMMY); -} - -static int jornada720_ts_average(int coords[4]) -{ - int coord, high_bits = coords[3]; - - coord = coords[0] | ((high_bits & 0x03) << 8); - coord += coords[1] | ((high_bits & 0x0c) << 6); - coord += coords[2] | ((high_bits & 0x30) << 4); - - return coord / 3; -} - -static irqreturn_t jornada720_ts_interrupt(int irq, void *dev_id) -{ - struct platform_device *pdev = dev_id; - struct jornada_ts *jornada_ts = platform_get_drvdata(pdev); - struct input_dev *input = jornada_ts->dev; - int x, y; - - /* If GPIO_GPIO9 is set to high then report pen up */ - if (GPLR & GPIO_GPIO(9)) { - input_report_key(input, BTN_TOUCH, 0); - input_sync(input); - } else { - jornada_ssp_start(); - - /* proper reply to request is always TXDUMMY */ - if (jornada_ssp_inout(GETTOUCHSAMPLES) == TXDUMMY) { - jornada720_ts_collect_data(jornada_ts); - - x = jornada720_ts_average(jornada_ts->x_data); - y = jornada720_ts_average(jornada_ts->y_data); - - input_report_key(input, BTN_TOUCH, 1); - input_report_abs(input, ABS_X, x); - input_report_abs(input, ABS_Y, y); - input_sync(input); - } - - jornada_ssp_end(); - } - - return IRQ_HANDLED; -} - -static int __devinit jornada720_ts_probe(struct platform_device *pdev) -{ - struct jornada_ts *jornada_ts; - struct input_dev *input_dev; - int error; - - jornada_ts = kzalloc(sizeof(struct jornada_ts), GFP_KERNEL); - input_dev = input_allocate_device(); - - if (!jornada_ts || !input_dev) { - error = -ENOMEM; - goto fail1; - } - - platform_set_drvdata(pdev, jornada_ts); - - jornada_ts->dev = input_dev; - - input_dev->name = "HP Jornada 7xx Touchscreen"; - input_dev->phys = "jornadats/input0"; - input_dev->id.bustype = BUS_HOST; - input_dev->dev.parent = &pdev->dev; - - input_dev->evbit[0] = BIT(EV_KEY) | BIT(EV_ABS); - input_dev->keybit[LONG(BTN_TOUCH)] = BIT(BTN_TOUCH); - input_set_abs_params(input_dev, ABS_X, 270, 3900, 0, 0); - input_set_abs_params(input_dev, ABS_Y, 180, 3700, 0, 0); - - error = request_irq(IRQ_GPIO9, - jornada720_ts_interrupt, - IRQF_DISABLED | IRQF_TRIGGER_RISING, - "HP7XX Touchscreen driver", pdev); - if (error) { - printk(KERN_INFO "HP7XX TS : Unable to acquire irq!\n"); - goto fail1; - } - - error = input_register_device(jornada_ts->dev); - if (error) - goto fail2; - - return 0; - - fail2: - free_irq(IRQ_GPIO9, pdev); - fail1: - platform_set_drvdata(pdev, NULL); - input_free_device(input_dev); - kfree(jornada_ts); - return error; -} - -static int __devexit jornada720_ts_remove(struct platform_device *pdev) -{ - struct jornada_ts *jornada_ts = platform_get_drvdata(pdev); - - free_irq(IRQ_GPIO9, pdev); - platform_set_drvdata(pdev, NULL); - input_unregister_device(jornada_ts->dev); - kfree(jornada_ts); - - return 0; -} - -static struct platform_driver jornada720_ts_driver = { - .probe = jornada720_ts_probe, - .remove = __devexit_p(jornada720_ts_remove), - .driver = { - .name = "jornada_ts", - }, -}; - -static int __init jornada720_ts_init(void) -{ - return platform_driver_register(&jornada720_ts_driver); -} - -static void __exit jornada720_ts_exit(void) -{ - platform_driver_unregister(&jornada720_ts_driver); -} - -module_init(jornada720_ts_init); -module_exit(jornada720_ts_exit); diff --git a/trunk/drivers/input/touchscreen/ucb1400_ts.c b/trunk/drivers/input/touchscreen/ucb1400_ts.c index 86aed64ec0fb..36f944019158 100644 --- a/trunk/drivers/input/touchscreen/ucb1400_ts.c +++ b/trunk/drivers/input/touchscreen/ucb1400_ts.c @@ -130,7 +130,8 @@ static unsigned int ucb1400_adc_read(struct ucb1400 *ucb, u16 adc_channel) if (val & UCB_ADC_DAT_VALID) break; /* yield to other processes */ - schedule_timeout_uninterruptible(1); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(1); } return UCB_ADC_DAT_VALUE(val); diff --git a/trunk/drivers/input/touchscreen/usbtouchscreen.c b/trunk/drivers/input/touchscreen/usbtouchscreen.c index 9fb3d5c30999..741f6c6f1e50 100644 --- a/trunk/drivers/input/touchscreen/usbtouchscreen.c +++ b/trunk/drivers/input/touchscreen/usbtouchscreen.c @@ -10,7 +10,6 @@ * - Gunze AHL61 * - DMC TSC-10/25 * - IRTOUCHSYSTEMS/UNITOP - * - IdealTEK URTC1000 * * Copyright (C) 2004-2006 by Daniel Ritz * Copyright (C) by Todd E. Johnson (mtouchusb.c) @@ -93,7 +92,7 @@ struct usbtouch_usb { }; -#if defined(CONFIG_TOUCHSCREEN_USB_EGALAX) || defined(CONFIG_TOUCHSCREEN_USB_ETURBO) || defined(CONFIG_TOUCHSCREEN_USB_IDEALTEK) +#if defined(CONFIG_TOUCHSCREEN_USB_EGALAX) || defined(CONFIG_TOUCHSCREEN_USB_ETURBO) #define MULTI_PACKET #endif @@ -113,8 +112,6 @@ enum { DEVTYPE_GUNZE, DEVTYPE_DMC_TSC10, DEVTYPE_IRTOUCH, - DEVTYPE_IDEALTEK, - DEVTYPE_GENERAL_TOUCH, }; static struct usb_device_id usbtouch_devices[] = { @@ -160,14 +157,6 @@ static struct usb_device_id usbtouch_devices[] = { {USB_DEVICE(0x6615, 0x0001), .driver_info = DEVTYPE_IRTOUCH}, #endif -#ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK - {USB_DEVICE(0x1391, 0x1000), .driver_info = DEVTYPE_IDEALTEK}, -#endif - -#ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH - {USB_DEVICE(0x0dfc, 0x0001), .driver_info = DEVTYPE_GENERAL_TOUCH}, -#endif - {} }; @@ -407,8 +396,7 @@ static int dmc_tsc10_init(struct usbtouch_usb *usbtouch) TSC10_RATE_150, 0, buf, 2, USB_CTRL_SET_TIMEOUT); if (ret < 0) return ret; - if ((buf[0] != 0x06 || buf[1] != 0x00) && - (buf[0] != 0x15 || buf[1] != 0x01)) + if (buf[0] != 0x06 || buf[1] != 0x00) return -ENODEV; /* start sending data */ @@ -449,57 +437,6 @@ static int irtouch_read_data(struct usbtouch_usb *dev, unsigned char *pkt) #endif -/***************************************************************************** - * IdealTEK URTC1000 Part - */ -#ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK -static int idealtek_get_pkt_len(unsigned char *buf, int len) -{ - if (buf[0] & 0x80) - return 5; - if (buf[0] == 0x01) - return len; - return 0; -} - -static int idealtek_read_data(struct usbtouch_usb *dev, unsigned char *pkt) -{ - switch (pkt[0] & 0x98) { - case 0x88: - /* touch data in IdealTEK mode */ - dev->x = (pkt[1] << 5) | (pkt[2] >> 2); - dev->y = (pkt[3] << 5) | (pkt[4] >> 2); - dev->touch = (pkt[0] & 0x40) ? 1 : 0; - return 1; - - case 0x98: - /* touch data in MT emulation mode */ - dev->x = (pkt[2] << 5) | (pkt[1] >> 2); - dev->y = (pkt[4] << 5) | (pkt[3] >> 2); - dev->touch = (pkt[0] & 0x40) ? 1 : 0; - return 1; - - default: - return 0; - } -} -#endif - -/***************************************************************************** - * General Touch Part - */ -#ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH -static int general_touch_read_data(struct usbtouch_usb *dev, unsigned char *pkt) -{ - dev->x = ((pkt[2] & 0x0F) << 8) | pkt[1] ; - dev->y = ((pkt[4] & 0x0F) << 8) | pkt[3] ; - dev->press = pkt[5] & 0xff; - dev->touch = pkt[0] & 0x01; - - return 1; -} -#endif - /***************************************************************************** * the different device descriptors */ @@ -600,32 +537,6 @@ static struct usbtouch_device_info usbtouch_dev_info[] = { .read_data = irtouch_read_data, }, #endif - -#ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK - [DEVTYPE_IDEALTEK] = { - .min_xc = 0x0, - .max_xc = 0x0fff, - .min_yc = 0x0, - .max_yc = 0x0fff, - .rept_size = 8, - .flags = USBTOUCH_FLG_BUFFER, - .process_pkt = usbtouch_process_multi, - .get_pkt_len = idealtek_get_pkt_len, - .read_data = idealtek_read_data, - }, -#endif - -#ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH - [DEVTYPE_GENERAL_TOUCH] = { - .min_xc = 0x0, - .max_xc = 0x0500, - .min_yc = 0x0, - .max_yc = 0x0500, - .rept_size = 7, - .read_data = general_touch_read_data, - } -#endif - }; diff --git a/trunk/drivers/input/tsdev.c b/trunk/drivers/input/tsdev.c new file mode 100644 index 000000000000..d2f882e98e5e --- /dev/null +++ b/trunk/drivers/input/tsdev.c @@ -0,0 +1,533 @@ +/* + * $Id: tsdev.c,v 1.15 2002/04/10 16:50:19 jsimmons Exp $ + * + * Copyright (c) 2001 "Crazy" james Simmons + * + * Compaq touchscreen protocol driver. The protocol emulated by this driver + * is obsolete; for new programs use the tslib library which can read directly + * from evdev and perform dejittering, variance filtering and calibration - + * all in user space, not at kernel level. The meaning of this driver is + * to allow usage of newer input drivers with old applications that use the + * old /dev/h3600_ts and /dev/h3600_tsraw devices. + * + * 09-Apr-2004: Andrew Zabolotny + * Fixed to actually work, not just output random numbers. + * Added support for both h3600_ts and h3600_tsraw protocol + * emulation. + */ + +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Should you need to contact me, the author, you can do so either by + * e-mail - mail your message to . + */ + +#define TSDEV_MINOR_BASE 128 +#define TSDEV_MINORS 32 +/* First 16 devices are h3600_ts compatible; second 16 are h3600_tsraw */ +#define TSDEV_MINOR_MASK 15 +#define TSDEV_BUFFER_SIZE 64 + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef CONFIG_INPUT_TSDEV_SCREEN_X +#define CONFIG_INPUT_TSDEV_SCREEN_X 240 +#endif +#ifndef CONFIG_INPUT_TSDEV_SCREEN_Y +#define CONFIG_INPUT_TSDEV_SCREEN_Y 320 +#endif + +/* This driver emulates both protocols of the old h3600_ts and h3600_tsraw + * devices. The first one must output X/Y data in 'cooked' format, e.g. + * filtered, dejittered and calibrated. Second device just outputs raw + * data received from the hardware. + * + * This driver doesn't support filtering and dejittering; it supports only + * calibration. Filtering and dejittering must be done in the low-level + * driver, if needed, because it may gain additional benefits from knowing + * the low-level details, the nature of noise and so on. + * + * The driver precomputes a calibration matrix given the initial xres and + * yres values (quite innacurate for most touchscreens) that will result + * in a more or less expected range of output values. The driver supports + * the TS_SET_CAL ioctl, which will replace the calibration matrix with a + * new one, supposedly generated from the values taken from the raw device. + */ + +MODULE_AUTHOR("James Simmons "); +MODULE_DESCRIPTION("Input driver to touchscreen converter"); +MODULE_LICENSE("GPL"); + +static int xres = CONFIG_INPUT_TSDEV_SCREEN_X; +module_param(xres, uint, 0); +MODULE_PARM_DESC(xres, "Horizontal screen resolution (can be negative for X-mirror)"); + +static int yres = CONFIG_INPUT_TSDEV_SCREEN_Y; +module_param(yres, uint, 0); +MODULE_PARM_DESC(yres, "Vertical screen resolution (can be negative for Y-mirror)"); + +/* From Compaq's Touch Screen Specification version 0.2 (draft) */ +struct ts_event { + short pressure; + short x; + short y; + short millisecs; +}; + +struct ts_calibration { + int xscale; + int xtrans; + int yscale; + int ytrans; + int xyswap; +}; + +struct tsdev { + int exist; + int open; + int minor; + char name[8]; + struct input_handle handle; + wait_queue_head_t wait; + struct list_head client_list; + struct device dev; + + int x, y, pressure; + struct ts_calibration cal; +}; + +struct tsdev_client { + struct fasync_struct *fasync; + struct list_head node; + struct tsdev *tsdev; + int head, tail; + struct ts_event event[TSDEV_BUFFER_SIZE]; + int raw; +}; + +/* The following ioctl codes are defined ONLY for backward compatibility. + * Don't use tsdev for new developement; use the tslib library instead. + * Touchscreen calibration is a fully userspace task. + */ +/* Use 'f' as magic number */ +#define IOC_H3600_TS_MAGIC 'f' +#define TS_GET_CAL _IOR(IOC_H3600_TS_MAGIC, 10, struct ts_calibration) +#define TS_SET_CAL _IOW(IOC_H3600_TS_MAGIC, 11, struct ts_calibration) + +static struct tsdev *tsdev_table[TSDEV_MINORS/2]; + +static int tsdev_fasync(int fd, struct file *file, int on) +{ + struct tsdev_client *client = file->private_data; + int retval; + + retval = fasync_helper(fd, file, on, &client->fasync); + return retval < 0 ? retval : 0; +} + +static int tsdev_open(struct inode *inode, struct file *file) +{ + int i = iminor(inode) - TSDEV_MINOR_BASE; + struct tsdev_client *client; + struct tsdev *tsdev; + int error; + + printk(KERN_WARNING "tsdev (compaq touchscreen emulation) is scheduled " + "for removal.\nSee Documentation/feature-removal-schedule.txt " + "for details.\n"); + + if (i >= TSDEV_MINORS) + return -ENODEV; + + tsdev = tsdev_table[i & TSDEV_MINOR_MASK]; + if (!tsdev || !tsdev->exist) + return -ENODEV; + + get_device(&tsdev->dev); + + client = kzalloc(sizeof(struct tsdev_client), GFP_KERNEL); + if (!client) { + error = -ENOMEM; + goto err_put_tsdev; + } + + client->tsdev = tsdev; + client->raw = (i >= TSDEV_MINORS / 2) ? 1 : 0; + list_add_tail(&client->node, &tsdev->client_list); + + if (!tsdev->open++ && tsdev->exist) { + error = input_open_device(&tsdev->handle); + if (error) + goto err_free_client; + } + + file->private_data = client; + return 0; + + err_free_client: + list_del(&client->node); + kfree(client); + err_put_tsdev: + put_device(&tsdev->dev); + return error; +} + +static void tsdev_free(struct device *dev) +{ + struct tsdev *tsdev = container_of(dev, struct tsdev, dev); + + tsdev_table[tsdev->minor] = NULL; + kfree(tsdev); +} + +static int tsdev_release(struct inode *inode, struct file *file) +{ + struct tsdev_client *client = file->private_data; + struct tsdev *tsdev = client->tsdev; + + tsdev_fasync(-1, file, 0); + + list_del(&client->node); + kfree(client); + + if (!--tsdev->open && tsdev->exist) + input_close_device(&tsdev->handle); + + put_device(&tsdev->dev); + + return 0; +} + +static ssize_t tsdev_read(struct file *file, char __user *buffer, size_t count, + loff_t *ppos) +{ + struct tsdev_client *client = file->private_data; + struct tsdev *tsdev = client->tsdev; + int retval = 0; + + if (client->head == client->tail && tsdev->exist && (file->f_flags & O_NONBLOCK)) + return -EAGAIN; + + retval = wait_event_interruptible(tsdev->wait, + client->head != client->tail || !tsdev->exist); + if (retval) + return retval; + + if (!tsdev->exist) + return -ENODEV; + + while (client->head != client->tail && + retval + sizeof (struct ts_event) <= count) { + if (copy_to_user (buffer + retval, client->event + client->tail, + sizeof (struct ts_event))) + return -EFAULT; + client->tail = (client->tail + 1) & (TSDEV_BUFFER_SIZE - 1); + retval += sizeof (struct ts_event); + } + + return retval; +} + +/* No kernel lock - fine */ +static unsigned int tsdev_poll(struct file *file, poll_table *wait) +{ + struct tsdev_client *client = file->private_data; + struct tsdev *tsdev = client->tsdev; + + poll_wait(file, &tsdev->wait, wait); + return ((client->head == client->tail) ? 0 : (POLLIN | POLLRDNORM)) | + (tsdev->exist ? 0 : (POLLHUP | POLLERR)); +} + +static int tsdev_ioctl(struct inode *inode, struct file *file, + unsigned int cmd, unsigned long arg) +{ + struct tsdev_client *client = file->private_data; + struct tsdev *tsdev = client->tsdev; + int retval = 0; + + switch (cmd) { + case TS_GET_CAL: + if (copy_to_user((void __user *)arg, &tsdev->cal, + sizeof (struct ts_calibration))) + retval = -EFAULT; + break; + + case TS_SET_CAL: + if (copy_from_user(&tsdev->cal, (void __user *)arg, + sizeof (struct ts_calibration))) + retval = -EFAULT; + break; + + default: + retval = -EINVAL; + break; + } + + return retval; +} + +static const struct file_operations tsdev_fops = { + .owner = THIS_MODULE, + .open = tsdev_open, + .release = tsdev_release, + .read = tsdev_read, + .poll = tsdev_poll, + .fasync = tsdev_fasync, + .ioctl = tsdev_ioctl, +}; + +static void tsdev_event(struct input_handle *handle, unsigned int type, + unsigned int code, int value) +{ + struct tsdev *tsdev = handle->private; + struct tsdev_client *client; + struct timeval time; + + switch (type) { + case EV_ABS: + switch (code) { + case ABS_X: + tsdev->x = value; + break; + + case ABS_Y: + tsdev->y = value; + break; + + case ABS_PRESSURE: + if (value > handle->dev->absmax[ABS_PRESSURE]) + value = handle->dev->absmax[ABS_PRESSURE]; + value -= handle->dev->absmin[ABS_PRESSURE]; + if (value < 0) + value = 0; + tsdev->pressure = value; + break; + } + break; + + case EV_REL: + switch (code) { + case REL_X: + tsdev->x += value; + if (tsdev->x < 0) + tsdev->x = 0; + else if (tsdev->x > xres) + tsdev->x = xres; + break; + + case REL_Y: + tsdev->y += value; + if (tsdev->y < 0) + tsdev->y = 0; + else if (tsdev->y > yres) + tsdev->y = yres; + break; + } + break; + + case EV_KEY: + if (code == BTN_TOUCH || code == BTN_MOUSE) { + switch (value) { + case 0: + tsdev->pressure = 0; + break; + + case 1: + if (!tsdev->pressure) + tsdev->pressure = 1; + break; + } + } + break; + } + + if (type != EV_SYN || code != SYN_REPORT) + return; + + list_for_each_entry(client, &tsdev->client_list, node) { + int x, y, tmp; + + do_gettimeofday(&time); + client->event[client->head].millisecs = time.tv_usec / 1000; + client->event[client->head].pressure = tsdev->pressure; + + x = tsdev->x; + y = tsdev->y; + + /* Calibration */ + if (!client->raw) { + x = ((x * tsdev->cal.xscale) >> 8) + tsdev->cal.xtrans; + y = ((y * tsdev->cal.yscale) >> 8) + tsdev->cal.ytrans; + if (tsdev->cal.xyswap) { + tmp = x; x = y; y = tmp; + } + } + + client->event[client->head].x = x; + client->event[client->head].y = y; + client->head = (client->head + 1) & (TSDEV_BUFFER_SIZE - 1); + kill_fasync(&client->fasync, SIGIO, POLL_IN); + } + wake_up_interruptible(&tsdev->wait); +} + +static int tsdev_connect(struct input_handler *handler, struct input_dev *dev, + const struct input_device_id *id) +{ + struct tsdev *tsdev; + int minor, delta; + int error; + + for (minor = 0; minor < TSDEV_MINORS / 2 && tsdev_table[minor]; minor++); + if (minor >= TSDEV_MINORS / 2) { + printk(KERN_ERR + "tsdev: You have way too many touchscreens\n"); + return -ENFILE; + } + + tsdev = kzalloc(sizeof(struct tsdev), GFP_KERNEL); + if (!tsdev) + return -ENOMEM; + + INIT_LIST_HEAD(&tsdev->client_list); + init_waitqueue_head(&tsdev->wait); + + tsdev->exist = 1; + tsdev->minor = minor; + tsdev->handle.dev = dev; + tsdev->handle.name = tsdev->name; + tsdev->handle.handler = handler; + tsdev->handle.private = tsdev; + snprintf(tsdev->name, sizeof(tsdev->name), "ts%d", minor); + + /* Precompute the rough calibration matrix */ + delta = dev->absmax [ABS_X] - dev->absmin [ABS_X] + 1; + if (delta == 0) + delta = 1; + tsdev->cal.xscale = (xres << 8) / delta; + tsdev->cal.xtrans = - ((dev->absmin [ABS_X] * tsdev->cal.xscale) >> 8); + + delta = dev->absmax [ABS_Y] - dev->absmin [ABS_Y] + 1; + if (delta == 0) + delta = 1; + tsdev->cal.yscale = (yres << 8) / delta; + tsdev->cal.ytrans = - ((dev->absmin [ABS_Y] * tsdev->cal.yscale) >> 8); + + snprintf(tsdev->dev.bus_id, sizeof(tsdev->dev.bus_id), + "ts%d", minor); + tsdev->dev.class = &input_class; + tsdev->dev.parent = &dev->dev; + tsdev->dev.devt = MKDEV(INPUT_MAJOR, TSDEV_MINOR_BASE + minor); + tsdev->dev.release = tsdev_free; + device_initialize(&tsdev->dev); + + tsdev_table[minor] = tsdev; + + error = device_add(&tsdev->dev); + if (error) + goto err_free_tsdev; + + error = input_register_handle(&tsdev->handle); + if (error) + goto err_delete_tsdev; + + return 0; + + err_delete_tsdev: + device_del(&tsdev->dev); + err_free_tsdev: + put_device(&tsdev->dev); + return error; +} + +static void tsdev_disconnect(struct input_handle *handle) +{ + struct tsdev *tsdev = handle->private; + struct tsdev_client *client; + + input_unregister_handle(handle); + device_del(&tsdev->dev); + + tsdev->exist = 0; + + if (tsdev->open) { + input_close_device(handle); + list_for_each_entry(client, &tsdev->client_list, node) + kill_fasync(&client->fasync, SIGIO, POLL_HUP); + wake_up_interruptible(&tsdev->wait); + } + + put_device(&tsdev->dev); +} + +static const struct input_device_id tsdev_ids[] = { + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_RELBIT, + .evbit = { BIT(EV_KEY) | BIT(EV_REL) }, + .keybit = { [LONG(BTN_LEFT)] = BIT(BTN_LEFT) }, + .relbit = { BIT(REL_X) | BIT(REL_Y) }, + }, /* A mouse like device, at least one button, two relative axes */ + + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_KEYBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, + .evbit = { BIT(EV_KEY) | BIT(EV_ABS) }, + .keybit = { [LONG(BTN_TOUCH)] = BIT(BTN_TOUCH) }, + .absbit = { BIT(ABS_X) | BIT(ABS_Y) }, + }, /* A tablet like device, at least touch detection, two absolute axes */ + + { + .flags = INPUT_DEVICE_ID_MATCH_EVBIT | INPUT_DEVICE_ID_MATCH_ABSBIT, + .evbit = { BIT(EV_ABS) }, + .absbit = { BIT(ABS_X) | BIT(ABS_Y) | BIT(ABS_PRESSURE) }, + }, /* A tablet like device with several gradations of pressure */ + + {} /* Terminating entry */ +}; + +MODULE_DEVICE_TABLE(input, tsdev_ids); + +static struct input_handler tsdev_handler = { + .event = tsdev_event, + .connect = tsdev_connect, + .disconnect = tsdev_disconnect, + .fops = &tsdev_fops, + .minor = TSDEV_MINOR_BASE, + .name = "tsdev", + .id_table = tsdev_ids, +}; + +static int __init tsdev_init(void) +{ + return input_register_handler(&tsdev_handler); +} + +static void __exit tsdev_exit(void) +{ + input_unregister_handler(&tsdev_handler); +} + +module_init(tsdev_init); +module_exit(tsdev_exit); diff --git a/trunk/drivers/isdn/i4l/isdn_net.c b/trunk/drivers/isdn/i4l/isdn_net.c index b39d1f5b378e..7c9cb7e19f2e 100644 --- a/trunk/drivers/isdn/i4l/isdn_net.c +++ b/trunk/drivers/isdn/i4l/isdn_net.c @@ -328,7 +328,7 @@ isdn_net_autohup(void) l->cps = (l->transcount * HZ) / (jiffies - last_jiffies); l->transcount = 0; if (dev->net_verbose > 3) - printk(KERN_DEBUG "%s: %d bogocps\n", p->dev->name, l->cps); + printk(KERN_DEBUG "%s: %d bogocps\n", l->name, l->cps); if ((l->flags & ISDN_NET_CONNECTED) && (!l->dialstate)) { anymore = 1; l->huptimer++; @@ -350,12 +350,12 @@ isdn_net_autohup(void) if (l->hupflags & ISDN_CHARGEHUP) { if (l->hupflags & ISDN_WAITCHARGE) { printk(KERN_DEBUG "isdn_net: Hupflags of %s are %X\n", - p->dev->name, l->hupflags); + l->name, l->hupflags); isdn_net_hangup(p->dev); } else if (time_after(jiffies, l->chargetime + l->chargeint)) { printk(KERN_DEBUG "isdn_net: %s: chtime = %lu, chint = %d\n", - p->dev->name, l->chargetime, l->chargeint); + l->name, l->chargetime, l->chargeint); isdn_net_hangup(p->dev); } } else @@ -442,8 +442,8 @@ isdn_net_stat_callback(int idx, isdn_ctrl *c) #endif isdn_net_lp_disconnected(lp); isdn_all_eaz(lp->isdn_device, lp->isdn_channel); - printk(KERN_INFO "%s: remote hangup\n", p->dev->name); - printk(KERN_INFO "%s: Chargesum is %d\n", p->dev->name, + printk(KERN_INFO "%s: remote hangup\n", lp->name); + printk(KERN_INFO "%s: Chargesum is %d\n", lp->name, lp->charge); isdn_net_unbind_channel(lp); return 1; @@ -487,7 +487,7 @@ isdn_net_stat_callback(int idx, isdn_ctrl *c) isdn_net_add_to_bundle(nd, lp); } } - printk(KERN_INFO "isdn_net: %s connected\n", p->dev->name); + printk(KERN_INFO "isdn_net: %s connected\n", lp->name); /* If first Chargeinfo comes before B-Channel connect, * we correct the timestamp here. */ @@ -534,7 +534,7 @@ isdn_net_stat_callback(int idx, isdn_ctrl *c) lp->hupflags |= ISDN_HAVECHARGE; lp->chargetime = jiffies; printk(KERN_DEBUG "isdn_net: Got CINF chargetime of %s now %lu\n", - p->dev->name, lp->chargetime); + lp->name, lp->chargetime); return 1; } } @@ -565,7 +565,7 @@ isdn_net_dial(void) #ifdef ISDN_DEBUG_NET_DIAL if (lp->dialstate) - printk(KERN_DEBUG "%s: dialstate=%d\n", p->dev->name, lp->dialstate); + printk(KERN_DEBUG "%s: dialstate=%d\n", lp->name, lp->dialstate); #endif switch (lp->dialstate) { case 0: @@ -578,7 +578,7 @@ isdn_net_dial(void) lp->dial = lp->phone[1]; if (!lp->dial) { printk(KERN_WARNING "%s: phone number deleted?\n", - p->dev->name); + lp->name); isdn_net_hangup(p->dev); break; } @@ -632,13 +632,13 @@ isdn_net_dial(void) cmd.arg = lp->isdn_channel; if (!lp->dial) { printk(KERN_WARNING "%s: phone number deleted?\n", - p->dev->name); + lp->name); isdn_net_hangup(p->dev); break; } if (!strncmp(lp->dial->num, "LEASED", strlen("LEASED"))) { lp->dialstate = 4; - printk(KERN_INFO "%s: Open leased line ...\n", p->dev->name); + printk(KERN_INFO "%s: Open leased line ...\n", lp->name); } else { if(lp->dialtimeout > 0) if (time_after(jiffies, lp->dialstarted + lp->dialtimeout)) { @@ -688,7 +688,7 @@ isdn_net_dial(void) dev->usage[i] |= ISDN_USAGE_OUTGOING; isdn_info_update(); } - printk(KERN_INFO "%s: dialing %d %s... %s\n", p->dev->name, + printk(KERN_INFO "%s: dialing %d %s... %s\n", lp->name, lp->dialretry, cmd.parm.setup.phone, (cmd.parm.setup.si1 == 1) ? "DOV" : ""); lp->dtimer = 0; @@ -797,7 +797,7 @@ isdn_net_dial(void) */ if (lp->dtimer++ > lp->cbdelay) { - printk(KERN_INFO "%s: hangup waiting for callback ...\n", p->dev->name); + printk(KERN_INFO "%s: hangup waiting for callback ...\n", lp->name); lp->dtimer = 0; lp->dialstate = 4; cmd.driver = lp->isdn_device; @@ -810,7 +810,7 @@ isdn_net_dial(void) break; default: printk(KERN_WARNING "isdn_net: Illegal dialstate %d for device %s\n", - lp->dialstate, p->dev->name); + lp->dialstate, lp->name); } p = (isdn_net_dev *) p->next; } @@ -836,11 +836,11 @@ isdn_net_hangup(struct net_device *d) if (slp->flags & ISDN_NET_CONNECTED) { printk(KERN_INFO "isdn_net: hang up slave %s before %s\n", - lp->slave->name, d->name); + slp->name, lp->name); isdn_net_hangup(lp->slave); } } - printk(KERN_INFO "isdn_net: local hangup %s\n", d->name); + printk(KERN_INFO "isdn_net: local hangup %s\n", lp->name); #ifdef CONFIG_ISDN_PPP if (lp->p_encap == ISDN_NET_ENCAP_SYNCPPP) isdn_ppp_free(lp); @@ -858,7 +858,7 @@ isdn_net_hangup(struct net_device *d) cmd.command = ISDN_CMD_HANGUP; cmd.arg = lp->isdn_channel; isdn_command(&cmd); - printk(KERN_INFO "%s: Chargesum is %d\n", d->name, lp->charge); + printk(KERN_INFO "%s: Chargesum is %d\n", lp->name, lp->charge); isdn_all_eaz(lp->isdn_device, lp->isdn_channel); } isdn_net_unbind_channel(lp); @@ -885,7 +885,7 @@ isdn_net_log_skb(struct sk_buff * skb, isdn_net_local * lp) /* fall back to old isdn_net_log_packet method() */ char * buf = skb->data; - printk(KERN_DEBUG "isdn_net: protocol %04x is buggy, dev %s\n", skb->protocol, lp->netdev->dev->name); + printk(KERN_DEBUG "isdn_net: protocol %04x is buggy, dev %s\n", skb->protocol, lp->name); p = buf; proto = ETH_P_IP; switch (lp->p_encap) { @@ -1023,7 +1023,7 @@ void isdn_net_writebuf_skb(isdn_net_local *lp, struct sk_buff *skb) ret = isdn_writebuf_skb_stub(lp->isdn_device, lp->isdn_channel, 1, skb); if (ret != len) { /* we should never get here */ - printk(KERN_WARNING "%s: HL driver queue full\n", lp->netdev->dev->name); + printk(KERN_WARNING "%s: HL driver queue full\n", lp->name); goto error; } @@ -1461,7 +1461,7 @@ isdn_ciscohdlck_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) mod_timer(&lp->cisco_timer, expires); printk(KERN_INFO "%s: Keepalive period set " "to %d seconds.\n", - dev->name, lp->cisco_keepalive_period); + lp->name, lp->cisco_keepalive_period); } break; @@ -1512,7 +1512,7 @@ isdn_net_ciscohdlck_slarp_send_keepalive(unsigned long data) lp->cisco_line_state = 0; printk (KERN_WARNING "UPDOWN: Line protocol on Interface %s," - " changed state to down\n", lp->netdev->dev->name); + " changed state to down\n", lp->name); /* should stop routing higher-level data accross */ } else if ((!lp->cisco_line_state) && (myseq_diff >= 0) && (myseq_diff <= 2)) { @@ -1520,14 +1520,14 @@ isdn_net_ciscohdlck_slarp_send_keepalive(unsigned long data) lp->cisco_line_state = 1; printk (KERN_WARNING "UPDOWN: Line protocol on Interface %s," - " changed state to up\n", lp->netdev->dev->name); + " changed state to up\n", lp->name); /* restart routing higher-level data accross */ } if (lp->cisco_debserint) printk (KERN_DEBUG "%s: HDLC " "myseq %lu, mineseen %lu%c, yourseen %lu, %s\n", - lp->netdev->dev->name, last_cisco_myseq, lp->cisco_mineseen, + lp->name, last_cisco_myseq, lp->cisco_mineseen, ((last_cisco_myseq == lp->cisco_mineseen) ? '*' : 040), lp->cisco_yourseq, ((lp->cisco_line_state) ? "line up" : "line down")); @@ -1682,7 +1682,7 @@ isdn_net_ciscohdlck_slarp_in(isdn_net_local *lp, struct sk_buff *skb) "remote ip: %d.%d.%d.%d, " "local ip: %d.%d.%d.%d " "mask: %d.%d.%d.%d\n", - lp->netdev->dev->name, + lp->name, HIPQUAD(addr), HIPQUAD(local), HIPQUAD(mask)); @@ -1690,7 +1690,7 @@ isdn_net_ciscohdlck_slarp_in(isdn_net_local *lp, struct sk_buff *skb) slarp_reply_out: printk(KERN_INFO "%s: got invalid slarp " "reply (%d.%d.%d.%d/%d.%d.%d.%d) " - "- ignored\n", lp->netdev->dev->name, + "- ignored\n", lp->name, HIPQUAD(addr), HIPQUAD(mask)); break; case CISCO_SLARP_KEEPALIVE: @@ -1701,8 +1701,7 @@ isdn_net_ciscohdlck_slarp_in(isdn_net_local *lp, struct sk_buff *skb) lp->cisco_last_slarp_in) { printk(KERN_DEBUG "%s: Keepalive period mismatch - " "is %d but should be %d.\n", - lp->netdev->dev->name, period, - lp->cisco_keepalive_period); + lp->name, period, lp->cisco_keepalive_period); } lp->cisco_last_slarp_in = jiffies; p += get_u32(p, &my_seq); @@ -1733,12 +1732,12 @@ isdn_net_ciscohdlck_receive(isdn_net_local *lp, struct sk_buff *skb) if (addr != CISCO_ADDR_UNICAST && addr != CISCO_ADDR_BROADCAST) { printk(KERN_WARNING "%s: Unknown Cisco addr 0x%02x\n", - lp->netdev->dev->name, addr); + lp->name, addr); goto out_free; } if (ctrl != CISCO_CTRL) { printk(KERN_WARNING "%s: Unknown Cisco ctrl 0x%02x\n", - lp->netdev->dev->name, ctrl); + lp->name, ctrl); goto out_free; } @@ -1749,8 +1748,7 @@ isdn_net_ciscohdlck_receive(isdn_net_local *lp, struct sk_buff *skb) case CISCO_TYPE_CDP: if (lp->cisco_debserint) printk(KERN_DEBUG "%s: Received CDP packet. use " - "\"no cdp enable\" on cisco.\n", - lp->netdev->dev->name); + "\"no cdp enable\" on cisco.\n", lp->name); goto out_free; default: /* no special cisco protocol */ @@ -1845,7 +1843,7 @@ isdn_net_receive(struct net_device *ndev, struct sk_buff *skb) }; #endif /* CONFIG_ISDN_X25 */ printk(KERN_WARNING "%s: unknown encapsulation, dropping\n", - lp->netdev->dev->name); + lp->name); kfree_skb(skb); return; } @@ -2176,7 +2174,7 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) wret = matchret; #ifdef ISDN_DEBUG_NET_ICALL printk(KERN_DEBUG "n_fi: if='%s', l.msn=%s, l.flags=%d, l.dstate=%d\n", - p->dev->name, lp->msn, lp->flags, lp->dialstate); + lp->name, lp->msn, lp->flags, lp->dialstate); #endif if ((!matchret) && /* EAZ is matching */ (((!(lp->flags & ISDN_NET_CONNECTED)) && /* but not connected */ @@ -2279,7 +2277,7 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) * */ if (ISDN_NET_DIALMODE(*lp) == ISDN_NET_DM_OFF) { printk(KERN_INFO "incoming call, interface %s `stopped' -> rejected\n", - p->dev->name); + lp->name); return 3; } /* @@ -2288,7 +2286,7 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) */ if (!isdn_net_device_started(p)) { printk(KERN_INFO "%s: incoming call, interface down -> rejected\n", - p->dev->name); + lp->name); return 3; } /* Interface is up, now see if it's a slave. If so, see if @@ -2296,8 +2294,8 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) */ if (lp->master) { isdn_net_local *mlp = (isdn_net_local *) lp->master->priv; - printk(KERN_DEBUG "ICALLslv: %s\n", p->dev->name); - printk(KERN_DEBUG "master=%s\n", lp->master->name); + printk(KERN_DEBUG "ICALLslv: %s\n", lp->name); + printk(KERN_DEBUG "master=%s\n", mlp->name); if (mlp->flags & ISDN_NET_CONNECTED) { printk(KERN_DEBUG "master online\n"); /* Master is online, find parent-slave (master if first slave) */ @@ -2324,11 +2322,11 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) * */ if (ISDN_NET_DIALMODE(*lp) == ISDN_NET_DM_OFF) { printk(KERN_INFO "incoming call for callback, interface %s `off' -> rejected\n", - p->dev->name); + lp->name); return 3; } printk(KERN_DEBUG "%s: call from %s -> %s, start callback\n", - p->dev->name, nr, eaz); + lp->name, nr, eaz); if (lp->phone[1]) { /* Grab a free ISDN-Channel */ spin_lock_irqsave(&dev->lock, flags); @@ -2342,8 +2340,7 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) lp->msn) ) < 0) { - printk(KERN_WARNING "isdn_net_find_icall: No channel for %s\n", - p->dev->name); + printk(KERN_WARNING "isdn_net_find_icall: No channel for %s\n", lp->name); spin_unlock_irqrestore(&dev->lock, flags); return 0; } @@ -2364,12 +2361,11 @@ isdn_net_find_icall(int di, int ch, int idx, setup_parm *setup) /* Initiate dialing by returning 2 or 4 */ return (lp->flags & ISDN_NET_CBHUP) ? 2 : 4; } else - printk(KERN_WARNING "isdn_net: %s: No phone number\n", - p->dev->name); + printk(KERN_WARNING "isdn_net: %s: No phone number\n", lp->name); return 0; } else { - printk(KERN_DEBUG "%s: call from %s -> %s accepted\n", - p->dev->name, nr, eaz); + printk(KERN_DEBUG "%s: call from %s -> %s accepted\n", lp->name, nr, + eaz); /* if this interface is dialing, it does it probably on a different device, so free this device */ if ((lp->dialstate == 4) || (lp->dialstate == 12)) { @@ -2428,7 +2424,7 @@ isdn_net_findif(char *name) isdn_net_dev *p = dev->netdev; while (p) { - if (!strcmp(p->dev->name, name)) + if (!strcmp(p->local->name, name)) return p; p = (isdn_net_dev *) p->next; } @@ -2457,8 +2453,7 @@ isdn_net_force_dial_lp(isdn_net_local * lp) lp->pre_device, lp->pre_channel, lp->msn)) < 0) { - printk(KERN_WARNING "isdn_net_force_dial: No channel for %s\n", - lp->netdev->dev->name); + printk(KERN_WARNING "isdn_net_force_dial: No channel for %s\n", lp->name); spin_unlock_irqrestore(&dev->lock, flags); return -EAGAIN; } @@ -2561,7 +2556,7 @@ isdn_net_new(char *name, struct net_device *master) return NULL; } if (name == NULL) - return NULL; + name = " "; if (!(netdev = kzalloc(sizeof(isdn_net_dev), GFP_KERNEL))) { printk(KERN_WARNING "isdn_net: Could not allocate net-device\n"); return NULL; @@ -2573,6 +2568,7 @@ isdn_net_new(char *name, struct net_device *master) return NULL; } netdev->local = netdev->dev->priv; + strcpy(netdev->local->name, netdev->dev->name); netdev->dev->init = isdn_net_init; if (master) { /* Device shall be a slave */ @@ -2677,7 +2673,7 @@ isdn_net_setcfg(isdn_net_ioctl_cfg * cfg) #endif if (isdn_net_device_started(p)) { printk(KERN_WARNING "%s: cannot change encap when if is up\n", - p->dev->name); + lp->name); return -EBUSY; } #ifdef CONFIG_ISDN_X25 @@ -2702,7 +2698,7 @@ isdn_net_setcfg(isdn_net_ioctl_cfg * cfg) case ISDN_NET_ENCAP_SYNCPPP: #ifndef CONFIG_ISDN_PPP printk(KERN_WARNING "%s: SyncPPP support not configured\n", - p->dev->name); + lp->name); return -EINVAL; #else p->dev->type = ARPHRD_PPP; /* change ARP type */ @@ -2713,7 +2709,7 @@ isdn_net_setcfg(isdn_net_ioctl_cfg * cfg) case ISDN_NET_ENCAP_X25IFACE: #ifndef CONFIG_ISDN_X25 printk(KERN_WARNING "%s: isdn-x25 support not configured\n", - p->dev->name); + p->local->name); return -EINVAL; #else p->dev->type = ARPHRD_X25; /* change ARP type */ @@ -2729,7 +2725,7 @@ isdn_net_setcfg(isdn_net_ioctl_cfg * cfg) break; printk(KERN_WARNING "%s: encapsulation protocol %d not supported\n", - p->dev->name, cfg->p_encap); + p->local->name, cfg->p_encap); return -EINVAL; } if (strlen(cfg->drvid)) { @@ -2906,18 +2902,13 @@ isdn_net_getcfg(isdn_net_ioctl_cfg * cfg) cfg->pppbind = lp->pppbind; cfg->dialtimeout = lp->dialtimeout >= 0 ? lp->dialtimeout / HZ : -1; cfg->dialwait = lp->dialwait / HZ; - if (lp->slave) { - if (strlen(lp->slave->name) > 8) - strcpy(cfg->slave, "too-long"); - else - strcpy(cfg->slave, lp->slave->name); - } else + if (lp->slave) + strcpy(cfg->slave, ((isdn_net_local *) lp->slave->priv)->name); + else cfg->slave[0] = '\0'; - if (lp->master) { - if (strlen(lp->master->name) > 8) - strcpy(cfg->master, "too-long"); - strcpy(cfg->master, lp->master->name); - } else + if (lp->master) + strcpy(cfg->master, ((isdn_net_local *) lp->master->priv)->name); + else cfg->master[0] = '\0'; return 0; } @@ -2987,8 +2978,7 @@ isdn_net_getpeer(isdn_net_ioctl_phone *phone, isdn_net_ioctl_phone __user *peer) isdn_net_dev *p = isdn_net_findif(phone->name); int ch, dv, idx; - if (!p) - return -ENODEV; + if (!p) return -ENODEV; /* * Theoretical race: while this executes, the remote number might * become invalid (hang up) or change (new connection), resulting @@ -2997,18 +2987,14 @@ isdn_net_getpeer(isdn_net_ioctl_phone *phone, isdn_net_ioctl_phone __user *peer) */ ch = p->local->isdn_channel; dv = p->local->isdn_device; - if(ch < 0 && dv < 0) - return -ENOTCONN; + if(ch<0 && dv<0) return -ENOTCONN; idx = isdn_dc2minor(dv, ch); - if (idx <0 ) - return -ENODEV; + if (idx<0) return -ENODEV; /* for pre-bound channels, we need this extra check */ - if (strncmp(dev->num[idx], "???", 3) == 0) - return -ENOTCONN; - strncpy(phone->phone, dev->num[idx], ISDN_MSNLEN); - phone->outgoing = USG_OUTGOING(dev->usage[idx]); - if (copy_to_user(peer, phone, sizeof(*peer))) - return -EFAULT; + if ( strncmp(dev->num[idx],"???",3) == 0 ) return -ENOTCONN; + strncpy(phone->phone,dev->num[idx],ISDN_MSNLEN); + phone->outgoing=USG_OUTGOING(dev->usage[idx]); + if ( copy_to_user(peer,phone,sizeof(*peer)) ) return -EFAULT; return 0; } /* @@ -3127,18 +3113,18 @@ isdn_net_realrm(isdn_net_dev * p, isdn_net_dev * q) dev->netdev = p->next; if (p->local->slave) { /* If this interface has a slave, remove it also */ - char *slavename = p->local->slave->name; + char *slavename = ((isdn_net_local *) (p->local->slave->priv))->name; isdn_net_dev *n = dev->netdev; q = NULL; while (n) { - if (!strcmp(n->dev->name, slavename)) { + if (!strcmp(n->local->name, slavename)) { spin_unlock_irqrestore(&dev->lock, flags); isdn_net_realrm(n, q); spin_lock_irqsave(&dev->lock, flags); break; } q = n; - n = (isdn_net_dev *)n->next; + n = (isdn_net_dev *) n->next; } } spin_unlock_irqrestore(&dev->lock, flags); @@ -3166,7 +3152,7 @@ isdn_net_rm(char *name) p = dev->netdev; q = NULL; while (p) { - if (!strcmp(p->dev->name, name)) { + if (!strcmp(p->local->name, name)) { spin_unlock_irqrestore(&dev->lock, flags); return (isdn_net_realrm(p, q)); } diff --git a/trunk/drivers/isdn/i4l/isdn_ppp.c b/trunk/drivers/isdn/i4l/isdn_ppp.c index 9f5fe372f83d..0e5e59f84344 100644 --- a/trunk/drivers/isdn/i4l/isdn_ppp.c +++ b/trunk/drivers/isdn/i4l/isdn_ppp.c @@ -190,11 +190,9 @@ isdn_ppp_bind(isdn_net_local * lp) retval = -1; goto out; } - /* get unit number from interface name .. ugly! */ - unit = isdn_ppp_if_get_unit(lp->netdev->dev->name); + unit = isdn_ppp_if_get_unit(lp->name); /* get unit number from interface name .. ugly! */ if (unit < 0) { - printk(KERN_ERR "isdn_ppp_bind: illegal interface name %s.\n", - lp->netdev->dev->name); + printk(KERN_ERR "isdn_ppp_bind: illegal interface name %s.\n", lp->name); retval = -1; goto out; } @@ -509,8 +507,7 @@ isdn_ppp_ioctl(int min, struct file *file, unsigned int cmd, unsigned long arg) case PPPIOCGIFNAME: if(!lp) return -EINVAL; - if ((r = set_arg(argp, lp->netdev->dev->name, - strlen(lp->netdev->dev->name)))) + if ((r = set_arg(argp, lp->name, strlen(lp->name)))) return r; break; case PPPIOCGMPFLAGS: /* get configuration flags */ diff --git a/trunk/drivers/macintosh/Kconfig b/trunk/drivers/macintosh/Kconfig index 77f50b63a970..56cd8998fe4b 100644 --- a/trunk/drivers/macintosh/Kconfig +++ b/trunk/drivers/macintosh/Kconfig @@ -172,7 +172,6 @@ config INPUT_ADBHID config MAC_EMUMOUSEBTN bool "Support for mouse button 2+3 emulation" - select INPUT help This provides generic support for emulating the 2nd and 3rd mouse button with keypresses. If you say Y here, the emulation is still diff --git a/trunk/drivers/macintosh/adbhid.c b/trunk/drivers/macintosh/adbhid.c index 8cce016b3d09..48d17bf6c927 100644 --- a/trunk/drivers/macintosh/adbhid.c +++ b/trunk/drivers/macintosh/adbhid.c @@ -52,11 +52,6 @@ MODULE_AUTHOR("Franz Sirl "); -static int restore_capslock_events; -module_param(restore_capslock_events, int, 0644); -MODULE_PARM_DESC(restore_capslock_events, - "Produce keypress events for capslock on both keyup and keydown."); - #define KEYB_KEYREG 0 /* register # for key up/down data */ #define KEYB_LEDREG 2 /* register # for leds on ADB keyboard */ #define MOUSE_DATAREG 0 /* reg# for movement/button codes from mouse */ @@ -222,8 +217,6 @@ struct adbhid { #define FLAG_FN_KEY_PRESSED 0x00000001 #define FLAG_POWER_FROM_FN 0x00000002 #define FLAG_EMU_FWDEL_DOWN 0x00000004 -#define FLAG_CAPSLOCK_TRANSLATE 0x00000008 -#define FLAG_CAPSLOCK_DOWN 0x00000010 static struct adbhid *adbhid[16]; @@ -279,50 +272,19 @@ adbhid_keyboard_input(unsigned char *data, int nb, int apoll) } static void -adbhid_input_keycode(int id, int scancode, int repeat) +adbhid_input_keycode(int id, int keycode, int repeat) { struct adbhid *ahid = adbhid[id]; - int keycode, up_flag; - - keycode = scancode & 0x7f; - up_flag = scancode & 0x80; - - if (restore_capslock_events) { - if (keycode == ADB_KEY_CAPSLOCK && !up_flag) { - /* Key pressed, turning on the CapsLock LED. - * The next 0xff will be interpreted as a release. */ - ahid->flags |= FLAG_CAPSLOCK_TRANSLATE - | FLAG_CAPSLOCK_DOWN; - } else if (scancode == 0xff) { - /* Scancode 0xff usually signifies that the capslock - * key was either pressed or released. */ - if (ahid->flags & FLAG_CAPSLOCK_TRANSLATE) { - keycode = ADB_KEY_CAPSLOCK; - if (ahid->flags & FLAG_CAPSLOCK_DOWN) { - /* Key released */ - up_flag = 1; - ahid->flags &= ~FLAG_CAPSLOCK_DOWN; - } else { - /* Key pressed */ - up_flag = 0; - ahid->flags &= ~FLAG_CAPSLOCK_TRANSLATE; - } - } else { - printk(KERN_INFO "Spurious caps lock event " - "(scancode 0xff)."); - } - } - } + int up_flag, key; + + up_flag = (keycode & 0x80); + keycode &= 0x7f; switch (keycode) { - case ADB_KEY_CAPSLOCK: - if (!restore_capslock_events) { - /* Generate down/up events for CapsLock everytime. */ - input_report_key(ahid->input, KEY_CAPSLOCK, 1); - input_sync(ahid->input); - input_report_key(ahid->input, KEY_CAPSLOCK, 0); - input_sync(ahid->input); - } + case ADB_KEY_CAPSLOCK: /* Generate down/up events for CapsLock everytime. */ + input_report_key(ahid->input, KEY_CAPSLOCK, 1); + input_report_key(ahid->input, KEY_CAPSLOCK, 0); + input_sync(ahid->input); return; #ifdef CONFIG_PPC_PMAC case ADB_KEY_POWER_OLD: /* Power key on PBook 3400 needs remapping */ @@ -334,7 +296,7 @@ adbhid_input_keycode(int id, int scancode, int repeat) keycode = ADB_KEY_POWER; } break; - case ADB_KEY_POWER: + case ADB_KEY_POWER: /* Fn + Command will produce a bogus "power" keycode */ if (ahid->flags & FLAG_FN_KEY_PRESSED) { keycode = ADB_KEY_CMD; diff --git a/trunk/drivers/misc/thinkpad_acpi.c b/trunk/drivers/misc/thinkpad_acpi.c index 81e068fa7ac5..216948dd71a5 100644 --- a/trunk/drivers/misc/thinkpad_acpi.c +++ b/trunk/drivers/misc/thinkpad_acpi.c @@ -945,15 +945,15 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) KEY_UNKNOWN, /* 0x0C: FN+BACKSPACE */ KEY_UNKNOWN, /* 0x0D: FN+INSERT */ KEY_UNKNOWN, /* 0x0E: FN+DELETE */ - KEY_BRIGHTNESSUP, /* 0x0F: FN+HOME (brightness up) */ + KEY_RESERVED, /* 0x0F: FN+HOME (brightness up) */ /* Scan codes 0x10 to 0x1F: Extended ACPI HKEY hot keys */ - KEY_BRIGHTNESSDOWN, /* 0x10: FN+END (brightness down) */ + KEY_RESERVED, /* 0x10: FN+END (brightness down) */ KEY_RESERVED, /* 0x11: FN+PGUP (thinklight toggle) */ KEY_UNKNOWN, /* 0x12: FN+PGDOWN */ KEY_ZOOM, /* 0x13: FN+SPACE (zoom) */ - KEY_VOLUMEUP, /* 0x14: VOLUME UP */ - KEY_VOLUMEDOWN, /* 0x15: VOLUME DOWN */ - KEY_MUTE, /* 0x16: MUTE */ + KEY_RESERVED, /* 0x14: VOLUME UP */ + KEY_RESERVED, /* 0x15: VOLUME DOWN */ + KEY_RESERVED, /* 0x16: MUTE */ KEY_VENDOR, /* 0x17: Thinkpad/AccessIBM/Lenovo */ /* (assignments unknown, please report if found) */ KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, @@ -974,9 +974,9 @@ static int __init hotkey_init(struct ibm_init_struct *iibm) KEY_RESERVED, /* 0x11: FN+PGUP (thinklight toggle) */ KEY_UNKNOWN, /* 0x12: FN+PGDOWN */ KEY_ZOOM, /* 0x13: FN+SPACE (zoom) */ - KEY_VOLUMEUP, /* 0x14: VOLUME UP */ - KEY_VOLUMEDOWN, /* 0x15: VOLUME DOWN */ - KEY_MUTE, /* 0x16: MUTE */ + KEY_RESERVED, /* 0x14: VOLUME UP */ + KEY_RESERVED, /* 0x15: VOLUME DOWN */ + KEY_RESERVED, /* 0x16: MUTE */ KEY_VENDOR, /* 0x17: Thinkpad/AccessIBM/Lenovo */ /* (assignments unknown, please report if found) */ KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, KEY_UNKNOWN, diff --git a/trunk/drivers/mtd/maps/pxa2xx-flash.c b/trunk/drivers/mtd/maps/pxa2xx-flash.c index 82113295c266..cb933ac475d5 100644 --- a/trunk/drivers/mtd/maps/pxa2xx-flash.c +++ b/trunk/drivers/mtd/maps/pxa2xx-flash.c @@ -14,20 +14,20 @@ #include #include #include +#include #include #include #include #include #include -#include #include static void pxa2xx_map_inval_cache(struct map_info *map, unsigned long from, ssize_t len) { - flush_ioremap_region(map->phys, map->cached, from, len); + consistent_sync((char *)map->cached + from, len, DMA_FROM_DEVICE); } struct pxa2xx_flash_info { diff --git a/trunk/drivers/net/Kconfig b/trunk/drivers/net/Kconfig index 8f99a0626616..9c635a237a9d 100644 --- a/trunk/drivers/net/Kconfig +++ b/trunk/drivers/net/Kconfig @@ -1780,15 +1780,6 @@ config SC92031 To compile this driver as a module, choose M here: the module will be called sc92031. This is recommended. -config CPMAC - tristate "TI AR7 CPMAC Ethernet support (EXPERIMENTAL)" - depends on NET_ETHERNET && EXPERIMENTAL && AR7 - select PHYLIB - select FIXED_PHY - select FIXED_MII_100_FDX - help - TI AR7 CPMAC Ethernet support - config NET_POCKET bool "Pocket and portable adapters" depends on PARPORT diff --git a/trunk/drivers/net/Makefile b/trunk/drivers/net/Makefile index 22f78cbd126b..d2e0f35da42e 100644 --- a/trunk/drivers/net/Makefile +++ b/trunk/drivers/net/Makefile @@ -159,7 +159,6 @@ obj-$(CONFIG_8139CP) += 8139cp.o obj-$(CONFIG_8139TOO) += 8139too.o obj-$(CONFIG_ZNET) += znet.o obj-$(CONFIG_LAN_SAA9730) += saa9730.o -obj-$(CONFIG_CPMAC) += cpmac.o obj-$(CONFIG_DEPCA) += depca.o obj-$(CONFIG_EWRK3) += ewrk3.o obj-$(CONFIG_ATP) += atp.o diff --git a/trunk/drivers/net/au1000_eth.c b/trunk/drivers/net/au1000_eth.c index 185f98e3964c..b46c5d8a77bd 100644 --- a/trunk/drivers/net/au1000_eth.c +++ b/trunk/drivers/net/au1000_eth.c @@ -54,16 +54,13 @@ #include #include #include - -#include #include #include #include #include -#include -#include - +#include +#include #include "au1000_eth.h" #ifdef AU1000_ETH_DEBUG @@ -99,6 +96,11 @@ static void mdio_write(struct net_device *, int, int, u16); static void au1000_adjust_link(struct net_device *); static void enable_mac(struct net_device *, int); +// externs +extern int get_ethernet_addr(char *ethernet_addr); +extern void str2eaddr(unsigned char *ea, unsigned char *str); +extern char * prom_getcmdline(void); + /* * Theory of operation * @@ -617,6 +619,7 @@ static struct net_device * au1000_probe(int port_num) struct au1000_private *aup = NULL; struct net_device *dev = NULL; db_dest_t *pDB, *pDBfree; + char *pmac, *argptr; char ethaddr[6]; int irq, i, err; u32 base, macen; @@ -674,12 +677,21 @@ static struct net_device * au1000_probe(int port_num) au_macs[port_num] = aup; if (port_num == 0) { - if (prom_get_ethernet_addr(ethaddr) == 0) + /* Check the environment variables first */ + if (get_ethernet_addr(ethaddr) == 0) memcpy(au1000_mac_addr, ethaddr, sizeof(au1000_mac_addr)); else { - printk(KERN_INFO "%s: No MAC address found\n", - dev->name); + /* Check command line */ + argptr = prom_getcmdline(); + if ((pmac = strstr(argptr, "ethaddr=")) == NULL) + printk(KERN_INFO "%s: No MAC address found\n", + dev->name); /* Use the hard coded MAC addresses */ + else { + str2eaddr(ethaddr, pmac + strlen("ethaddr=")); + memcpy(au1000_mac_addr, ethaddr, + sizeof(au1000_mac_addr)); + } } setup_hw_rings(aup, MAC0_RX_DMA_ADDR, MAC0_TX_DMA_ADDR); diff --git a/trunk/drivers/net/bonding/bond_main.c b/trunk/drivers/net/bonding/bond_main.c index db80f243dd37..64bfec32e2a6 100644 --- a/trunk/drivers/net/bonding/bond_main.c +++ b/trunk/drivers/net/bonding/bond_main.c @@ -98,7 +98,6 @@ static char *xmit_hash_policy = NULL; static int arp_interval = BOND_LINK_ARP_INTERV; static char *arp_ip_target[BOND_MAX_ARP_TARGETS] = { NULL, }; static char *arp_validate = NULL; -static int fail_over_mac = 0; struct bond_params bonding_defaults; module_param(max_bonds, int, 0); @@ -132,8 +131,6 @@ module_param_array(arp_ip_target, charp, NULL, 0); MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form"); module_param(arp_validate, charp, 0); MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes: none (default), active, backup or all"); -module_param(fail_over_mac, int, 0); -MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to the same MAC. 0 of off (default), 1 for on."); /*----------------------------- Global variables ----------------------------*/ @@ -1099,21 +1096,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) if (new_active) { bond_set_slave_active_flags(new_active); } - - /* when bonding does not set the slave MAC address, the bond MAC - * address is the one of the active slave. - */ - if (new_active && bond->params.fail_over_mac) - memcpy(bond->dev->dev_addr, new_active->dev->dev_addr, - new_active->dev->addr_len); - if (bond->curr_active_slave && - test_bit(__LINK_STATE_LINKWATCH_PENDING, - &bond->curr_active_slave->dev->state)) { - dprintk("delaying gratuitous arp on %s\n", - bond->curr_active_slave->dev->name); - bond->send_grat_arp = 1; - } else - bond_send_gratuitous_arp(bond); + bond_send_gratuitous_arp(bond); } } @@ -1234,8 +1217,7 @@ static int bond_compute_features(struct bonding *bond) struct slave *slave; struct net_device *bond_dev = bond->dev; unsigned long features = bond_dev->features; - unsigned short max_hard_header_len = max((u16)ETH_HLEN, - bond_dev->hard_header_len); + unsigned short max_hard_header_len = ETH_HLEN; int i; features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); @@ -1256,23 +1238,6 @@ static int bond_compute_features(struct bonding *bond) return 0; } - -static void bond_setup_by_slave(struct net_device *bond_dev, - struct net_device *slave_dev) -{ - struct bonding *bond = bond_dev->priv; - - bond_dev->neigh_setup = slave_dev->neigh_setup; - - bond_dev->type = slave_dev->type; - bond_dev->hard_header_len = slave_dev->hard_header_len; - bond_dev->addr_len = slave_dev->addr_len; - - memcpy(bond_dev->broadcast, slave_dev->broadcast, - slave_dev->addr_len); - bond->setup_by_slave = 1; -} - /* enslave device to bond device */ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) { @@ -1293,9 +1258,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) /* bond must be initialized by bond_open() before enslaving */ if (!(bond_dev->flags & IFF_UP)) { - printk(KERN_WARNING DRV_NAME - " %s: master_dev is not up in bond_enslave\n", - bond_dev->name); + dprintk("Error, master_dev is not up\n"); + return -EPERM; } /* already enslaved */ @@ -1348,42 +1312,14 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) goto err_undo_flags; } - /* set bonding device ether type by slave - bonding netdevices are - * created with ether_setup, so when the slave type is not ARPHRD_ETHER - * there is a need to override some of the type dependent attribs/funcs. - * - * bond ether type mutual exclusion - don't allow slaves of dissimilar - * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond - */ - if (bond->slave_cnt == 0) { - if (slave_dev->type != ARPHRD_ETHER) - bond_setup_by_slave(bond_dev, slave_dev); - } else if (bond_dev->type != slave_dev->type) { - printk(KERN_ERR DRV_NAME ": %s ether type (%d) is different " - "from other slaves (%d), can not enslave it.\n", - slave_dev->name, - slave_dev->type, bond_dev->type); - res = -EINVAL; - goto err_undo_flags; - } - if (slave_dev->set_mac_address == NULL) { - if (bond->slave_cnt == 0) { - printk(KERN_WARNING DRV_NAME - ": %s: Warning: The first slave device " - "specified does not support setting the MAC " - "address. Enabling the fail_over_mac option.", - bond_dev->name); - bond->params.fail_over_mac = 1; - } else if (!bond->params.fail_over_mac) { - printk(KERN_ERR DRV_NAME - ": %s: Error: The slave device specified " - "does not support setting the MAC address, " - "but fail_over_mac is not enabled.\n" - , bond_dev->name); - res = -EOPNOTSUPP; - goto err_undo_flags; - } + printk(KERN_ERR DRV_NAME + ": %s: Error: The slave device you specified does " + "not support setting the MAC address. " + "Your kernel likely does not support slave " + "devices.\n", bond_dev->name); + res = -EOPNOTSUPP; + goto err_undo_flags; } new_slave = kzalloc(sizeof(struct slave), GFP_KERNEL); @@ -1404,18 +1340,16 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) */ memcpy(new_slave->perm_hwaddr, slave_dev->dev_addr, ETH_ALEN); - if (!bond->params.fail_over_mac) { - /* - * Set slave to master's mac address. The application already - * set the master's mac address to that of the first slave - */ - memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); - addr.sa_family = slave_dev->type; - res = dev_set_mac_address(slave_dev, &addr); - if (res) { - dprintk("Error %d calling set_mac_address\n", res); - goto err_free; - } + /* + * Set slave to master's mac address. The application already + * set the master's mac address to that of the first slave + */ + memcpy(addr.sa_data, bond_dev->dev_addr, bond_dev->addr_len); + addr.sa_family = slave_dev->type; + res = dev_set_mac_address(slave_dev, &addr); + if (res) { + dprintk("Error %d calling set_mac_address\n", res); + goto err_free; } res = netdev_set_master(slave_dev, bond_dev); @@ -1640,11 +1574,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) dev_close(slave_dev); err_restore_mac: - if (!bond->params.fail_over_mac) { - memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); - addr.sa_family = slave_dev->type; - dev_set_mac_address(slave_dev, &addr); - } + memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN); + addr.sa_family = slave_dev->type; + dev_set_mac_address(slave_dev, &addr); err_free: kfree(new_slave); @@ -1817,12 +1749,10 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) /* close slave before restoring its mac address */ dev_close(slave_dev); - if (!bond->params.fail_over_mac) { - /* restore original ("permanent") mac address */ - memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); - addr.sa_family = slave_dev->type; - dev_set_mac_address(slave_dev, &addr); - } + /* restore original ("permanent") mac address */ + memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); + addr.sa_family = slave_dev->type; + dev_set_mac_address(slave_dev, &addr); slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | IFF_SLAVE_INACTIVE | IFF_BONDING | @@ -1833,35 +1763,6 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) return 0; /* deletion OK */ } -/* -* Destroy a bonding device. -* Must be under rtnl_lock when this function is called. -*/ -void bond_destroy(struct bonding *bond) -{ - bond_deinit(bond->dev); - bond_destroy_sysfs_entry(bond); - unregister_netdevice(bond->dev); -} - -/* -* First release a slave and than destroy the bond if no more slaves iare left. -* Must be under rtnl_lock when this function is called. -*/ -int bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev) -{ - struct bonding *bond = bond_dev->priv; - int ret; - - ret = bond_release(bond_dev, slave_dev); - if ((ret == 0) && (bond->slave_cnt == 0)) { - printk(KERN_INFO DRV_NAME ": %s: destroying bond %s.\n", - bond_dev->name, bond_dev->name); - bond_destroy(bond); - } - return ret; -} - /* * This function releases all slaves. */ @@ -1938,12 +1839,10 @@ static int bond_release_all(struct net_device *bond_dev) /* close slave before restoring its mac address */ dev_close(slave_dev); - if (!bond->params.fail_over_mac) { - /* restore original ("permanent") mac address*/ - memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); - addr.sa_family = slave_dev->type; - dev_set_mac_address(slave_dev, &addr); - } + /* restore original ("permanent") mac address*/ + memcpy(addr.sa_data, slave->perm_hwaddr, ETH_ALEN); + addr.sa_family = slave_dev->type; + dev_set_mac_address(slave_dev, &addr); slave_dev->priv_flags &= ~(IFF_MASTER_8023AD | IFF_MASTER_ALB | IFF_SLAVE_INACTIVE); @@ -2114,17 +2013,6 @@ void bond_mii_monitor(struct net_device *bond_dev) * program could monitor the link itself if needed. */ - if (bond->send_grat_arp) { - if (bond->curr_active_slave && test_bit(__LINK_STATE_LINKWATCH_PENDING, - &bond->curr_active_slave->dev->state)) - dprintk("Needs to send gratuitous arp but not yet\n"); - else { - dprintk("sending delayed gratuitous arp on on %s\n", - bond->curr_active_slave->dev->name); - bond_send_gratuitous_arp(bond); - bond->send_grat_arp = 0; - } - } read_lock(&bond->curr_slave_lock); oldcurrent = bond->curr_active_slave; read_unlock(&bond->curr_slave_lock); @@ -2526,7 +2414,7 @@ static void bond_send_gratuitous_arp(struct bonding *bond) if (bond->master_ip) { bond_arp_send(slave->dev, ARPOP_REPLY, bond->master_ip, - bond->master_ip, 0); + bond->master_ip, 0); } list_for_each_entry(vlan, &bond->vlan_list, vlan_list) { @@ -3063,15 +2951,9 @@ static void bond_info_show_master(struct seq_file *seq) curr = bond->curr_active_slave; read_unlock(&bond->curr_slave_lock); - seq_printf(seq, "Bonding Mode: %s", + seq_printf(seq, "Bonding Mode: %s\n", bond_mode_name(bond->params.mode)); - if (bond->params.mode == BOND_MODE_ACTIVEBACKUP && - bond->params.fail_over_mac) - seq_printf(seq, " (fail_over_mac)"); - - seq_printf(seq, "\n"); - if (bond->params.mode == BOND_MODE_XOR || bond->params.mode == BOND_MODE_8023AD) { seq_printf(seq, "Transmit Hash Policy: %s (%d)\n", @@ -3366,11 +3248,6 @@ static int bond_slave_netdev_event(unsigned long event, struct net_device *slave * ... Or is it this? */ break; - case NETDEV_GOING_DOWN: - dprintk("slave %s is going down\n", slave_dev->name); - if (bond->setup_by_slave) - bond_release_and_destroy(bond_dev, slave_dev); - break; case NETDEV_CHANGEMTU: /* * TODO: Should slaves be allowed to @@ -4003,13 +3880,6 @@ static int bond_set_mac_address(struct net_device *bond_dev, void *addr) dprintk("bond=%p, name=%s\n", bond, (bond_dev ? bond_dev->name : "None")); - /* - * If fail_over_mac is enabled, do nothing and return success. - * Returning an error causes ifenslave to fail. - */ - if (bond->params.fail_over_mac) - return 0; - if (!is_valid_ether_addr(sa->sa_data)) { return -EADDRNOTAVAIL; } @@ -4347,8 +4217,6 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params) bond->current_arp_slave = NULL; bond->primary_slave = NULL; bond->dev = bond_dev; - bond->send_grat_arp = 0; - bond->setup_by_slave = 0; INIT_LIST_HEAD(&bond->vlan_list); /* Initialize the device entry points */ @@ -4397,6 +4265,7 @@ static int bond_init(struct net_device *bond_dev, struct bond_params *params) #ifdef CONFIG_PROC_FS bond_create_proc_entry(bond); #endif + list_add_tail(&bond->bond_list, &bond_dev_list); return 0; @@ -4730,11 +4599,6 @@ static int bond_check_params(struct bond_params *params) primary = NULL; } - if (fail_over_mac && (bond_mode != BOND_MODE_ACTIVEBACKUP)) - printk(KERN_WARNING DRV_NAME - ": Warning: fail_over_mac only affects " - "active-backup mode.\n"); - /* fill params struct with the proper values */ params->mode = bond_mode; params->xmit_policy = xmit_hashtype; @@ -4746,7 +4610,6 @@ static int bond_check_params(struct bond_params *params) params->use_carrier = use_carrier; params->lacp_fast = lacp_fast; params->primary[0] = 0; - params->fail_over_mac = fail_over_mac; if (primary) { strncpy(params->primary, primary, IFNAMSIZ); diff --git a/trunk/drivers/net/bonding/bond_sysfs.c b/trunk/drivers/net/bonding/bond_sysfs.c index 80c0c8c415ed..6f49ca7e9b66 100644 --- a/trunk/drivers/net/bonding/bond_sysfs.c +++ b/trunk/drivers/net/bonding/bond_sysfs.c @@ -164,7 +164,9 @@ static ssize_t bonding_store_bonds(struct class *cls, const char *buffer, size_t printk(KERN_INFO DRV_NAME ": %s is being deleted...\n", bond->dev->name); - bond_destroy(bond); + bond_deinit(bond->dev); + bond_destroy_sysfs_entry(bond); + unregister_netdevice(bond->dev); rtnl_unlock(); goto out; } @@ -258,16 +260,17 @@ static ssize_t bonding_store_slaves(struct device *d, char command[IFNAMSIZ + 1] = { 0, }; char *ifname; int i, res, found, ret = count; - u32 original_mtu; struct slave *slave; struct net_device *dev = NULL; struct bonding *bond = to_bond(d); /* Quick sanity check -- is the bond interface up? */ if (!(bond->dev->flags & IFF_UP)) { - printk(KERN_WARNING DRV_NAME - ": %s: doing slave updates when interface is down.\n", + printk(KERN_ERR DRV_NAME + ": %s: Unable to update slaves because interface is down.\n", bond->dev->name); + ret = -EPERM; + goto out; } /* Note: We can't hold bond->lock here, as bond_create grabs it. */ @@ -324,7 +327,6 @@ static ssize_t bonding_store_slaves(struct device *d, } /* Set the slave's MTU to match the bond */ - original_mtu = dev->mtu; if (dev->mtu != bond->dev->mtu) { if (dev->change_mtu) { res = dev->change_mtu(dev, @@ -339,9 +341,6 @@ static ssize_t bonding_store_slaves(struct device *d, } rtnl_lock(); res = bond_enslave(bond->dev, dev); - bond_for_each_slave(bond, slave, i) - if (strnicmp(slave->dev->name, ifname, IFNAMSIZ) == 0) - slave->original_mtu = original_mtu; rtnl_unlock(); if (res) { ret = res; @@ -354,17 +353,13 @@ static ssize_t bonding_store_slaves(struct device *d, bond_for_each_slave(bond, slave, i) if (strnicmp(slave->dev->name, ifname, IFNAMSIZ) == 0) { dev = slave->dev; - original_mtu = slave->original_mtu; break; } if (dev) { printk(KERN_INFO DRV_NAME ": %s: Removing slave %s\n", bond->dev->name, dev->name); rtnl_lock(); - if (bond->setup_by_slave) - res = bond_release_and_destroy(bond->dev, dev); - else - res = bond_release(bond->dev, dev); + res = bond_release(bond->dev, dev); rtnl_unlock(); if (res) { ret = res; @@ -372,9 +367,9 @@ static ssize_t bonding_store_slaves(struct device *d, } /* set the slave MTU to the default */ if (dev->change_mtu) { - dev->change_mtu(dev, original_mtu); + dev->change_mtu(dev, 1500); } else { - dev->mtu = original_mtu; + dev->mtu = 1500; } } else { @@ -567,54 +562,6 @@ static ssize_t bonding_store_arp_validate(struct device *d, static DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate, bonding_store_arp_validate); -/* - * Show and store fail_over_mac. User only allowed to change the - * value when there are no slaves. - */ -static ssize_t bonding_show_fail_over_mac(struct device *d, struct device_attribute *attr, char *buf) -{ - struct bonding *bond = to_bond(d); - - return sprintf(buf, "%d\n", bond->params.fail_over_mac) + 1; -} - -static ssize_t bonding_store_fail_over_mac(struct device *d, struct device_attribute *attr, const char *buf, size_t count) -{ - int new_value; - int ret = count; - struct bonding *bond = to_bond(d); - - if (bond->slave_cnt != 0) { - printk(KERN_ERR DRV_NAME - ": %s: Can't alter fail_over_mac with slaves in bond.\n", - bond->dev->name); - ret = -EPERM; - goto out; - } - - if (sscanf(buf, "%d", &new_value) != 1) { - printk(KERN_ERR DRV_NAME - ": %s: no fail_over_mac value specified.\n", - bond->dev->name); - ret = -EINVAL; - goto out; - } - - if ((new_value == 0) || (new_value == 1)) { - bond->params.fail_over_mac = new_value; - printk(KERN_INFO DRV_NAME ": %s: Setting fail_over_mac to %d.\n", - bond->dev->name, new_value); - } else { - printk(KERN_INFO DRV_NAME - ": %s: Ignoring invalid fail_over_mac value %d.\n", - bond->dev->name, new_value); - } -out: - return ret; -} - -static DEVICE_ATTR(fail_over_mac, S_IRUGO | S_IWUSR, bonding_show_fail_over_mac, bonding_store_fail_over_mac); - /* * Show and set the arp timer interval. There are two tricky bits * here. First, if ARP monitoring is activated, then we must disable @@ -1436,7 +1383,6 @@ static DEVICE_ATTR(ad_partner_mac, S_IRUGO, bonding_show_ad_partner_mac, NULL); static struct attribute *per_bond_attrs[] = { &dev_attr_slaves.attr, &dev_attr_mode.attr, - &dev_attr_fail_over_mac.attr, &dev_attr_arp_validate.attr, &dev_attr_arp_interval.attr, &dev_attr_arp_ip_target.attr, diff --git a/trunk/drivers/net/bonding/bonding.h b/trunk/drivers/net/bonding/bonding.h index a8bbd563265c..2a6af7d23728 100644 --- a/trunk/drivers/net/bonding/bonding.h +++ b/trunk/drivers/net/bonding/bonding.h @@ -22,8 +22,8 @@ #include "bond_3ad.h" #include "bond_alb.h" -#define DRV_VERSION "3.2.0" -#define DRV_RELDATE "September 13, 2007" +#define DRV_VERSION "3.1.3" +#define DRV_RELDATE "June 13, 2007" #define DRV_NAME "bonding" #define DRV_DESCRIPTION "Ethernet Channel Bonding Driver" @@ -128,7 +128,6 @@ struct bond_params { int arp_interval; int arp_validate; int use_carrier; - int fail_over_mac; int updelay; int downdelay; int lacp_fast; @@ -157,7 +156,6 @@ struct slave { s8 link; /* one of BOND_LINK_XXXX */ s8 state; /* one of BOND_STATE_XXXX */ u32 original_flags; - u32 original_mtu; u32 link_failure_count; u16 speed; u8 duplex; @@ -187,8 +185,6 @@ struct bonding { struct timer_list mii_timer; struct timer_list arp_timer; s8 kill_timers; - s8 send_grat_arp; - s8 setup_by_slave; struct net_device_stats stats; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_entry; @@ -296,8 +292,6 @@ static inline void bond_unset_master_alb_flags(struct bonding *bond) struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr); int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev); int bond_create(char *name, struct bond_params *params, struct bonding **newbond); -void bond_destroy(struct bonding *bond); -int bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev); void bond_deinit(struct net_device *bond_dev); int bond_create_sysfs(void); void bond_destroy_sysfs(void); diff --git a/trunk/drivers/net/cassini.c b/trunk/drivers/net/cassini.c index 7df31b5561cc..563bf5f6fa2a 100644 --- a/trunk/drivers/net/cassini.c +++ b/trunk/drivers/net/cassini.c @@ -4443,7 +4443,7 @@ static struct { {REG_MAC_COLL_EXCESS}, {REG_MAC_COLL_LATE} }; -#define CAS_REG_LEN ARRAY_SIZE(ethtool_register_table) +#define CAS_REG_LEN (sizeof(ethtool_register_table)/sizeof(int)) #define CAS_MAX_REGS (sizeof (u32)*CAS_REG_LEN) static void cas_read_regs(struct cas *cp, u8 *ptr, int len) diff --git a/trunk/drivers/net/cpmac.c b/trunk/drivers/net/cpmac.c deleted file mode 100644 index ed53aaab4c02..000000000000 --- a/trunk/drivers/net/cpmac.c +++ /dev/null @@ -1,1174 +0,0 @@ -/* - * Copyright (C) 2006, 2007 Eugene Konev - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -MODULE_AUTHOR("Eugene Konev "); -MODULE_DESCRIPTION("TI AR7 ethernet driver (CPMAC)"); -MODULE_LICENSE("GPL"); - -static int debug_level = 8; -static int dumb_switch; - -/* Next 2 are only used in cpmac_probe, so it's pointless to change them */ -module_param(debug_level, int, 0444); -module_param(dumb_switch, int, 0444); - -MODULE_PARM_DESC(debug_level, "Number of NETIF_MSG bits to enable"); -MODULE_PARM_DESC(dumb_switch, "Assume switch is not connected to MDIO bus"); - -#define CPMAC_VERSION "0.5.0" -/* stolen from net/ieee80211.h */ -#ifndef MAC_FMT -#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x" -#define MAC_ARG(x) ((u8*)(x))[0], ((u8*)(x))[1], ((u8*)(x))[2], \ - ((u8*)(x))[3], ((u8*)(x))[4], ((u8*)(x))[5] -#endif -/* frame size + 802.1q tag */ -#define CPMAC_SKB_SIZE (ETH_FRAME_LEN + 4) -#define CPMAC_QUEUES 8 - -/* Ethernet registers */ -#define CPMAC_TX_CONTROL 0x0004 -#define CPMAC_TX_TEARDOWN 0x0008 -#define CPMAC_RX_CONTROL 0x0014 -#define CPMAC_RX_TEARDOWN 0x0018 -#define CPMAC_MBP 0x0100 -# define MBP_RXPASSCRC 0x40000000 -# define MBP_RXQOS 0x20000000 -# define MBP_RXNOCHAIN 0x10000000 -# define MBP_RXCMF 0x01000000 -# define MBP_RXSHORT 0x00800000 -# define MBP_RXCEF 0x00400000 -# define MBP_RXPROMISC 0x00200000 -# define MBP_PROMISCCHAN(channel) (((channel) & 0x7) << 16) -# define MBP_RXBCAST 0x00002000 -# define MBP_BCASTCHAN(channel) (((channel) & 0x7) << 8) -# define MBP_RXMCAST 0x00000020 -# define MBP_MCASTCHAN(channel) ((channel) & 0x7) -#define CPMAC_UNICAST_ENABLE 0x0104 -#define CPMAC_UNICAST_CLEAR 0x0108 -#define CPMAC_MAX_LENGTH 0x010c -#define CPMAC_BUFFER_OFFSET 0x0110 -#define CPMAC_MAC_CONTROL 0x0160 -# define MAC_TXPTYPE 0x00000200 -# define MAC_TXPACE 0x00000040 -# define MAC_MII 0x00000020 -# define MAC_TXFLOW 0x00000010 -# define MAC_RXFLOW 0x00000008 -# define MAC_MTEST 0x00000004 -# define MAC_LOOPBACK 0x00000002 -# define MAC_FDX 0x00000001 -#define CPMAC_MAC_STATUS 0x0164 -# define MAC_STATUS_QOS 0x00000004 -# define MAC_STATUS_RXFLOW 0x00000002 -# define MAC_STATUS_TXFLOW 0x00000001 -#define CPMAC_TX_INT_ENABLE 0x0178 -#define CPMAC_TX_INT_CLEAR 0x017c -#define CPMAC_MAC_INT_VECTOR 0x0180 -# define MAC_INT_STATUS 0x00080000 -# define MAC_INT_HOST 0x00040000 -# define MAC_INT_RX 0x00020000 -# define MAC_INT_TX 0x00010000 -#define CPMAC_MAC_EOI_VECTOR 0x0184 -#define CPMAC_RX_INT_ENABLE 0x0198 -#define CPMAC_RX_INT_CLEAR 0x019c -#define CPMAC_MAC_INT_ENABLE 0x01a8 -#define CPMAC_MAC_INT_CLEAR 0x01ac -#define CPMAC_MAC_ADDR_LO(channel) (0x01b0 + (channel) * 4) -#define CPMAC_MAC_ADDR_MID 0x01d0 -#define CPMAC_MAC_ADDR_HI 0x01d4 -#define CPMAC_MAC_HASH_LO 0x01d8 -#define CPMAC_MAC_HASH_HI 0x01dc -#define CPMAC_TX_PTR(channel) (0x0600 + (channel) * 4) -#define CPMAC_RX_PTR(channel) (0x0620 + (channel) * 4) -#define CPMAC_TX_ACK(channel) (0x0640 + (channel) * 4) -#define CPMAC_RX_ACK(channel) (0x0660 + (channel) * 4) -#define CPMAC_REG_END 0x0680 -/* - * Rx/Tx statistics - * TODO: use some of them to fill stats in cpmac_stats() - */ -#define CPMAC_STATS_RX_GOOD 0x0200 -#define CPMAC_STATS_RX_BCAST 0x0204 -#define CPMAC_STATS_RX_MCAST 0x0208 -#define CPMAC_STATS_RX_PAUSE 0x020c -#define CPMAC_STATS_RX_CRC 0x0210 -#define CPMAC_STATS_RX_ALIGN 0x0214 -#define CPMAC_STATS_RX_OVER 0x0218 -#define CPMAC_STATS_RX_JABBER 0x021c -#define CPMAC_STATS_RX_UNDER 0x0220 -#define CPMAC_STATS_RX_FRAG 0x0224 -#define CPMAC_STATS_RX_FILTER 0x0228 -#define CPMAC_STATS_RX_QOSFILTER 0x022c -#define CPMAC_STATS_RX_OCTETS 0x0230 - -#define CPMAC_STATS_TX_GOOD 0x0234 -#define CPMAC_STATS_TX_BCAST 0x0238 -#define CPMAC_STATS_TX_MCAST 0x023c -#define CPMAC_STATS_TX_PAUSE 0x0240 -#define CPMAC_STATS_TX_DEFER 0x0244 -#define CPMAC_STATS_TX_COLLISION 0x0248 -#define CPMAC_STATS_TX_SINGLECOLL 0x024c -#define CPMAC_STATS_TX_MULTICOLL 0x0250 -#define CPMAC_STATS_TX_EXCESSCOLL 0x0254 -#define CPMAC_STATS_TX_LATECOLL 0x0258 -#define CPMAC_STATS_TX_UNDERRUN 0x025c -#define CPMAC_STATS_TX_CARRIERSENSE 0x0260 -#define CPMAC_STATS_TX_OCTETS 0x0264 - -#define cpmac_read(base, reg) (readl((void __iomem *)(base) + (reg))) -#define cpmac_write(base, reg, val) (writel(val, (void __iomem *)(base) + \ - (reg))) - -/* MDIO bus */ -#define CPMAC_MDIO_VERSION 0x0000 -#define CPMAC_MDIO_CONTROL 0x0004 -# define MDIOC_IDLE 0x80000000 -# define MDIOC_ENABLE 0x40000000 -# define MDIOC_PREAMBLE 0x00100000 -# define MDIOC_FAULT 0x00080000 -# define MDIOC_FAULTDETECT 0x00040000 -# define MDIOC_INTTEST 0x00020000 -# define MDIOC_CLKDIV(div) ((div) & 0xff) -#define CPMAC_MDIO_ALIVE 0x0008 -#define CPMAC_MDIO_LINK 0x000c -#define CPMAC_MDIO_ACCESS(channel) (0x0080 + (channel) * 8) -# define MDIO_BUSY 0x80000000 -# define MDIO_WRITE 0x40000000 -# define MDIO_REG(reg) (((reg) & 0x1f) << 21) -# define MDIO_PHY(phy) (((phy) & 0x1f) << 16) -# define MDIO_DATA(data) ((data) & 0xffff) -#define CPMAC_MDIO_PHYSEL(channel) (0x0084 + (channel) * 8) -# define PHYSEL_LINKSEL 0x00000040 -# define PHYSEL_LINKINT 0x00000020 - -struct cpmac_desc { - u32 hw_next; - u32 hw_data; - u16 buflen; - u16 bufflags; - u16 datalen; - u16 dataflags; -#define CPMAC_SOP 0x8000 -#define CPMAC_EOP 0x4000 -#define CPMAC_OWN 0x2000 -#define CPMAC_EOQ 0x1000 - struct sk_buff *skb; - struct cpmac_desc *next; - dma_addr_t mapping; - dma_addr_t data_mapping; -}; - -struct cpmac_priv { - spinlock_t lock; - spinlock_t rx_lock; - struct cpmac_desc *rx_head; - int ring_size; - struct cpmac_desc *desc_ring; - dma_addr_t dma_ring; - void __iomem *regs; - struct mii_bus *mii_bus; - struct phy_device *phy; - char phy_name[BUS_ID_SIZE]; - int oldlink, oldspeed, oldduplex; - u32 msg_enable; - struct net_device *dev; - struct work_struct reset_work; - struct platform_device *pdev; -}; - -static irqreturn_t cpmac_irq(int, void *); -static void cpmac_hw_start(struct net_device *dev); -static void cpmac_hw_stop(struct net_device *dev); -static int cpmac_stop(struct net_device *dev); -static int cpmac_open(struct net_device *dev); - -static void cpmac_dump_regs(struct net_device *dev) -{ - int i; - struct cpmac_priv *priv = netdev_priv(dev); - for (i = 0; i < CPMAC_REG_END; i += 4) { - if (i % 16 == 0) { - if (i) - printk("\n"); - printk(KERN_DEBUG "%s: reg[%p]:", dev->name, - priv->regs + i); - } - printk(" %08x", cpmac_read(priv->regs, i)); - } - printk("\n"); -} - -static void cpmac_dump_desc(struct net_device *dev, struct cpmac_desc *desc) -{ - int i; - printk(KERN_DEBUG "%s: desc[%p]:", dev->name, desc); - for (i = 0; i < sizeof(*desc) / 4; i++) - printk(" %08x", ((u32 *)desc)[i]); - printk("\n"); -} - -static void cpmac_dump_skb(struct net_device *dev, struct sk_buff *skb) -{ - int i; - printk(KERN_DEBUG "%s: skb 0x%p, len=%d\n", dev->name, skb, skb->len); - for (i = 0; i < skb->len; i++) { - if (i % 16 == 0) { - if (i) - printk("\n"); - printk(KERN_DEBUG "%s: data[%p]:", dev->name, - skb->data + i); - } - printk(" %02x", ((u8 *)skb->data)[i]); - } - printk("\n"); -} - -static int cpmac_mdio_read(struct mii_bus *bus, int phy_id, int reg) -{ - u32 val; - - while (cpmac_read(bus->priv, CPMAC_MDIO_ACCESS(0)) & MDIO_BUSY) - cpu_relax(); - cpmac_write(bus->priv, CPMAC_MDIO_ACCESS(0), MDIO_BUSY | MDIO_REG(reg) | - MDIO_PHY(phy_id)); - while ((val = cpmac_read(bus->priv, CPMAC_MDIO_ACCESS(0))) & MDIO_BUSY) - cpu_relax(); - return MDIO_DATA(val); -} - -static int cpmac_mdio_write(struct mii_bus *bus, int phy_id, - int reg, u16 val) -{ - while (cpmac_read(bus->priv, CPMAC_MDIO_ACCESS(0)) & MDIO_BUSY) - cpu_relax(); - cpmac_write(bus->priv, CPMAC_MDIO_ACCESS(0), MDIO_BUSY | MDIO_WRITE | - MDIO_REG(reg) | MDIO_PHY(phy_id) | MDIO_DATA(val)); - return 0; -} - -static int cpmac_mdio_reset(struct mii_bus *bus) -{ - ar7_device_reset(AR7_RESET_BIT_MDIO); - cpmac_write(bus->priv, CPMAC_MDIO_CONTROL, MDIOC_ENABLE | - MDIOC_CLKDIV(ar7_cpmac_freq() / 2200000 - 1)); - return 0; -} - -static int mii_irqs[PHY_MAX_ADDR] = { PHY_POLL, }; - -static struct mii_bus cpmac_mii = { - .name = "cpmac-mii", - .read = cpmac_mdio_read, - .write = cpmac_mdio_write, - .reset = cpmac_mdio_reset, - .irq = mii_irqs, -}; - -static int cpmac_config(struct net_device *dev, struct ifmap *map) -{ - if (dev->flags & IFF_UP) - return -EBUSY; - - /* Don't allow changing the I/O address */ - if (map->base_addr != dev->base_addr) - return -EOPNOTSUPP; - - /* ignore other fields */ - return 0; -} - -static void cpmac_set_multicast_list(struct net_device *dev) -{ - struct dev_mc_list *iter; - int i; - u8 tmp; - u32 mbp, bit, hash[2] = { 0, }; - struct cpmac_priv *priv = netdev_priv(dev); - - mbp = cpmac_read(priv->regs, CPMAC_MBP); - if (dev->flags & IFF_PROMISC) { - cpmac_write(priv->regs, CPMAC_MBP, (mbp & ~MBP_PROMISCCHAN(0)) | - MBP_RXPROMISC); - } else { - cpmac_write(priv->regs, CPMAC_MBP, mbp & ~MBP_RXPROMISC); - if (dev->flags & IFF_ALLMULTI) { - /* enable all multicast mode */ - cpmac_write(priv->regs, CPMAC_MAC_HASH_LO, 0xffffffff); - cpmac_write(priv->regs, CPMAC_MAC_HASH_HI, 0xffffffff); - } else { - /* - * cpmac uses some strange mac address hashing - * (not crc32) - */ - for (i = 0, iter = dev->mc_list; i < dev->mc_count; - i++, iter = iter->next) { - bit = 0; - tmp = iter->dmi_addr[0]; - bit ^= (tmp >> 2) ^ (tmp << 4); - tmp = iter->dmi_addr[1]; - bit ^= (tmp >> 4) ^ (tmp << 2); - tmp = iter->dmi_addr[2]; - bit ^= (tmp >> 6) ^ tmp; - tmp = iter->dmi_addr[3]; - bit ^= (tmp >> 2) ^ (tmp << 4); - tmp = iter->dmi_addr[4]; - bit ^= (tmp >> 4) ^ (tmp << 2); - tmp = iter->dmi_addr[5]; - bit ^= (tmp >> 6) ^ tmp; - bit &= 0x3f; - hash[bit / 32] |= 1 << (bit % 32); - } - - cpmac_write(priv->regs, CPMAC_MAC_HASH_LO, hash[0]); - cpmac_write(priv->regs, CPMAC_MAC_HASH_HI, hash[1]); - } - } -} - -static struct sk_buff *cpmac_rx_one(struct net_device *dev, - struct cpmac_priv *priv, - struct cpmac_desc *desc) -{ - struct sk_buff *skb, *result = NULL; - - if (unlikely(netif_msg_hw(priv))) - cpmac_dump_desc(dev, desc); - cpmac_write(priv->regs, CPMAC_RX_ACK(0), (u32)desc->mapping); - if (unlikely(!desc->datalen)) { - if (netif_msg_rx_err(priv) && net_ratelimit()) - printk(KERN_WARNING "%s: rx: spurious interrupt\n", - dev->name); - return NULL; - } - - skb = netdev_alloc_skb(dev, CPMAC_SKB_SIZE); - if (likely(skb)) { - skb_reserve(skb, 2); - skb_put(desc->skb, desc->datalen); - desc->skb->protocol = eth_type_trans(desc->skb, dev); - desc->skb->ip_summed = CHECKSUM_NONE; - dev->stats.rx_packets++; - dev->stats.rx_bytes += desc->datalen; - result = desc->skb; - dma_unmap_single(&dev->dev, desc->data_mapping, CPMAC_SKB_SIZE, - DMA_FROM_DEVICE); - desc->skb = skb; - desc->data_mapping = dma_map_single(&dev->dev, skb->data, - CPMAC_SKB_SIZE, - DMA_FROM_DEVICE); - desc->hw_data = (u32)desc->data_mapping; - if (unlikely(netif_msg_pktdata(priv))) { - printk(KERN_DEBUG "%s: received packet:\n", dev->name); - cpmac_dump_skb(dev, result); - } - } else { - if (netif_msg_rx_err(priv) && net_ratelimit()) - printk(KERN_WARNING - "%s: low on skbs, dropping packet\n", dev->name); - dev->stats.rx_dropped++; - } - - desc->buflen = CPMAC_SKB_SIZE; - desc->dataflags = CPMAC_OWN; - - return result; -} - -static int cpmac_poll(struct net_device *dev, int *budget) -{ - struct sk_buff *skb; - struct cpmac_desc *desc; - int received = 0, quota = min(dev->quota, *budget); - struct cpmac_priv *priv = netdev_priv(dev); - - spin_lock(&priv->rx_lock); - if (unlikely(!priv->rx_head)) { - if (netif_msg_rx_err(priv) && net_ratelimit()) - printk(KERN_WARNING "%s: rx: polling, but no queue\n", - dev->name); - netif_rx_complete(dev); - return 0; - } - - desc = priv->rx_head; - while ((received < quota) && ((desc->dataflags & CPMAC_OWN) == 0)) { - skb = cpmac_rx_one(dev, priv, desc); - if (likely(skb)) { - netif_receive_skb(skb); - received++; - } - desc = desc->next; - } - - priv->rx_head = desc; - spin_unlock(&priv->rx_lock); - *budget -= received; - dev->quota -= received; - if (unlikely(netif_msg_rx_status(priv))) - printk(KERN_DEBUG "%s: poll processed %d packets\n", dev->name, - received); - if (desc->dataflags & CPMAC_OWN) { - netif_rx_complete(dev); - cpmac_write(priv->regs, CPMAC_RX_PTR(0), (u32)desc->mapping); - cpmac_write(priv->regs, CPMAC_RX_INT_ENABLE, 1); - return 0; - } - - return 1; -} - -static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - int queue, len; - struct cpmac_desc *desc; - struct cpmac_priv *priv = netdev_priv(dev); - - if (unlikely(skb_padto(skb, ETH_ZLEN))) { - if (netif_msg_tx_err(priv) && net_ratelimit()) - printk(KERN_WARNING - "%s: tx: padding failed, dropping\n", dev->name); - spin_lock(&priv->lock); - dev->stats.tx_dropped++; - spin_unlock(&priv->lock); - return -ENOMEM; - } - - len = max(skb->len, ETH_ZLEN); - queue = skb->queue_mapping; -#ifdef CONFIG_NETDEVICES_MULTIQUEUE - netif_stop_subqueue(dev, queue); -#else - netif_stop_queue(dev); -#endif - - desc = &priv->desc_ring[queue]; - if (unlikely(desc->dataflags & CPMAC_OWN)) { - if (netif_msg_tx_err(priv) && net_ratelimit()) - printk(KERN_WARNING "%s: tx dma ring full, dropping\n", - dev->name); - spin_lock(&priv->lock); - dev->stats.tx_dropped++; - spin_unlock(&priv->lock); - dev_kfree_skb_any(skb); - return -ENOMEM; - } - - spin_lock(&priv->lock); - dev->trans_start = jiffies; - spin_unlock(&priv->lock); - desc->dataflags = CPMAC_SOP | CPMAC_EOP | CPMAC_OWN; - desc->skb = skb; - desc->data_mapping = dma_map_single(&dev->dev, skb->data, len, - DMA_TO_DEVICE); - desc->hw_data = (u32)desc->data_mapping; - desc->datalen = len; - desc->buflen = len; - if (unlikely(netif_msg_tx_queued(priv))) - printk(KERN_DEBUG "%s: sending 0x%p, len=%d\n", dev->name, skb, - skb->len); - if (unlikely(netif_msg_hw(priv))) - cpmac_dump_desc(dev, desc); - if (unlikely(netif_msg_pktdata(priv))) - cpmac_dump_skb(dev, skb); - cpmac_write(priv->regs, CPMAC_TX_PTR(queue), (u32)desc->mapping); - - return 0; -} - -static void cpmac_end_xmit(struct net_device *dev, int queue) -{ - struct cpmac_desc *desc; - struct cpmac_priv *priv = netdev_priv(dev); - - desc = &priv->desc_ring[queue]; - cpmac_write(priv->regs, CPMAC_TX_ACK(queue), (u32)desc->mapping); - if (likely(desc->skb)) { - spin_lock(&priv->lock); - dev->stats.tx_packets++; - dev->stats.tx_bytes += desc->skb->len; - spin_unlock(&priv->lock); - dma_unmap_single(&dev->dev, desc->data_mapping, desc->skb->len, - DMA_TO_DEVICE); - - if (unlikely(netif_msg_tx_done(priv))) - printk(KERN_DEBUG "%s: sent 0x%p, len=%d\n", dev->name, - desc->skb, desc->skb->len); - - dev_kfree_skb_irq(desc->skb); - desc->skb = NULL; -#ifdef CONFIG_NETDEVICES_MULTIQUEUE - if (netif_subqueue_stopped(dev, queue)) - netif_wake_subqueue(dev, queue); -#else - if (netif_queue_stopped(dev)) - netif_wake_queue(dev); -#endif - } else { - if (netif_msg_tx_err(priv) && net_ratelimit()) - printk(KERN_WARNING - "%s: end_xmit: spurious interrupt\n", dev->name); -#ifdef CONFIG_NETDEVICES_MULTIQUEUE - if (netif_subqueue_stopped(dev, queue)) - netif_wake_subqueue(dev, queue); -#else - if (netif_queue_stopped(dev)) - netif_wake_queue(dev); -#endif - } -} - -static void cpmac_hw_stop(struct net_device *dev) -{ - int i; - struct cpmac_priv *priv = netdev_priv(dev); - struct plat_cpmac_data *pdata = priv->pdev->dev.platform_data; - - ar7_device_reset(pdata->reset_bit); - cpmac_write(priv->regs, CPMAC_RX_CONTROL, - cpmac_read(priv->regs, CPMAC_RX_CONTROL) & ~1); - cpmac_write(priv->regs, CPMAC_TX_CONTROL, - cpmac_read(priv->regs, CPMAC_TX_CONTROL) & ~1); - for (i = 0; i < 8; i++) { - cpmac_write(priv->regs, CPMAC_TX_PTR(i), 0); - cpmac_write(priv->regs, CPMAC_RX_PTR(i), 0); - } - cpmac_write(priv->regs, CPMAC_UNICAST_CLEAR, 0xff); - cpmac_write(priv->regs, CPMAC_RX_INT_CLEAR, 0xff); - cpmac_write(priv->regs, CPMAC_TX_INT_CLEAR, 0xff); - cpmac_write(priv->regs, CPMAC_MAC_INT_CLEAR, 0xff); - cpmac_write(priv->regs, CPMAC_MAC_CONTROL, - cpmac_read(priv->regs, CPMAC_MAC_CONTROL) & ~MAC_MII); -} - -static void cpmac_hw_start(struct net_device *dev) -{ - int i; - struct cpmac_priv *priv = netdev_priv(dev); - struct plat_cpmac_data *pdata = priv->pdev->dev.platform_data; - - ar7_device_reset(pdata->reset_bit); - for (i = 0; i < 8; i++) { - cpmac_write(priv->regs, CPMAC_TX_PTR(i), 0); - cpmac_write(priv->regs, CPMAC_RX_PTR(i), 0); - } - cpmac_write(priv->regs, CPMAC_RX_PTR(0), priv->rx_head->mapping); - - cpmac_write(priv->regs, CPMAC_MBP, MBP_RXSHORT | MBP_RXBCAST | - MBP_RXMCAST); - cpmac_write(priv->regs, CPMAC_BUFFER_OFFSET, 0); - for (i = 0; i < 8; i++) - cpmac_write(priv->regs, CPMAC_MAC_ADDR_LO(i), dev->dev_addr[5]); - cpmac_write(priv->regs, CPMAC_MAC_ADDR_MID, dev->dev_addr[4]); - cpmac_write(priv->regs, CPMAC_MAC_ADDR_HI, dev->dev_addr[0] | - (dev->dev_addr[1] << 8) | (dev->dev_addr[2] << 16) | - (dev->dev_addr[3] << 24)); - cpmac_write(priv->regs, CPMAC_MAX_LENGTH, CPMAC_SKB_SIZE); - cpmac_write(priv->regs, CPMAC_UNICAST_CLEAR, 0xff); - cpmac_write(priv->regs, CPMAC_RX_INT_CLEAR, 0xff); - cpmac_write(priv->regs, CPMAC_TX_INT_CLEAR, 0xff); - cpmac_write(priv->regs, CPMAC_MAC_INT_CLEAR, 0xff); - cpmac_write(priv->regs, CPMAC_UNICAST_ENABLE, 1); - cpmac_write(priv->regs, CPMAC_RX_INT_ENABLE, 1); - cpmac_write(priv->regs, CPMAC_TX_INT_ENABLE, 0xff); - cpmac_write(priv->regs, CPMAC_MAC_INT_ENABLE, 3); - - cpmac_write(priv->regs, CPMAC_RX_CONTROL, - cpmac_read(priv->regs, CPMAC_RX_CONTROL) | 1); - cpmac_write(priv->regs, CPMAC_TX_CONTROL, - cpmac_read(priv->regs, CPMAC_TX_CONTROL) | 1); - cpmac_write(priv->regs, CPMAC_MAC_CONTROL, - cpmac_read(priv->regs, CPMAC_MAC_CONTROL) | MAC_MII | - MAC_FDX); -} - -static void cpmac_clear_rx(struct net_device *dev) -{ - struct cpmac_priv *priv = netdev_priv(dev); - struct cpmac_desc *desc; - int i; - if (unlikely(!priv->rx_head)) - return; - desc = priv->rx_head; - for (i = 0; i < priv->ring_size; i++) { - if ((desc->dataflags & CPMAC_OWN) == 0) { - if (netif_msg_rx_err(priv) && net_ratelimit()) - printk(KERN_WARNING "%s: packet dropped\n", - dev->name); - if (unlikely(netif_msg_hw(priv))) - cpmac_dump_desc(dev, desc); - desc->dataflags = CPMAC_OWN; - dev->stats.rx_dropped++; - } - desc = desc->next; - } -} - -static void cpmac_clear_tx(struct net_device *dev) -{ - struct cpmac_priv *priv = netdev_priv(dev); - int i; - if (unlikely(!priv->desc_ring)) - return; - for (i = 0; i < CPMAC_QUEUES; i++) - if (priv->desc_ring[i].skb) { - dev_kfree_skb_any(priv->desc_ring[i].skb); - if (netif_subqueue_stopped(dev, i)) - netif_wake_subqueue(dev, i); - } -} - -static void cpmac_hw_error(struct work_struct *work) -{ - struct cpmac_priv *priv = - container_of(work, struct cpmac_priv, reset_work); - - spin_lock(&priv->rx_lock); - cpmac_clear_rx(priv->dev); - spin_unlock(&priv->rx_lock); - cpmac_clear_tx(priv->dev); - cpmac_hw_start(priv->dev); - netif_start_queue(priv->dev); -} - -static irqreturn_t cpmac_irq(int irq, void *dev_id) -{ - struct net_device *dev = dev_id; - struct cpmac_priv *priv; - int queue; - u32 status; - - if (!dev) - return IRQ_NONE; - - priv = netdev_priv(dev); - - status = cpmac_read(priv->regs, CPMAC_MAC_INT_VECTOR); - - if (unlikely(netif_msg_intr(priv))) - printk(KERN_DEBUG "%s: interrupt status: 0x%08x\n", dev->name, - status); - - if (status & MAC_INT_TX) - cpmac_end_xmit(dev, (status & 7)); - - if (status & MAC_INT_RX) { - queue = (status >> 8) & 7; - netif_rx_schedule(dev); - cpmac_write(priv->regs, CPMAC_RX_INT_CLEAR, 1 << queue); - } - - cpmac_write(priv->regs, CPMAC_MAC_EOI_VECTOR, 0); - - if (unlikely(status & (MAC_INT_HOST | MAC_INT_STATUS))) { - if (netif_msg_drv(priv) && net_ratelimit()) - printk(KERN_ERR "%s: hw error, resetting...\n", - dev->name); - netif_stop_queue(dev); - cpmac_hw_stop(dev); - schedule_work(&priv->reset_work); - if (unlikely(netif_msg_hw(priv))) - cpmac_dump_regs(dev); - } - - return IRQ_HANDLED; -} - -static void cpmac_tx_timeout(struct net_device *dev) -{ - struct cpmac_priv *priv = netdev_priv(dev); - int i; - - spin_lock(&priv->lock); - dev->stats.tx_errors++; - spin_unlock(&priv->lock); - if (netif_msg_tx_err(priv) && net_ratelimit()) - printk(KERN_WARNING "%s: transmit timeout\n", dev->name); - /* - * FIXME: waking up random queue is not the best thing to - * do... on the other hand why we got here at all? - */ -#ifdef CONFIG_NETDEVICES_MULTIQUEUE - for (i = 0; i < CPMAC_QUEUES; i++) - if (priv->desc_ring[i].skb) { - dev_kfree_skb_any(priv->desc_ring[i].skb); - netif_wake_subqueue(dev, i); - break; - } -#else - if (priv->desc_ring[0].skb) - dev_kfree_skb_any(priv->desc_ring[0].skb); - netif_wake_queue(dev); -#endif -} - -static int cpmac_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - struct cpmac_priv *priv = netdev_priv(dev); - if (!(netif_running(dev))) - return -EINVAL; - if (!priv->phy) - return -EINVAL; - if ((cmd == SIOCGMIIPHY) || (cmd == SIOCGMIIREG) || - (cmd == SIOCSMIIREG)) - return phy_mii_ioctl(priv->phy, if_mii(ifr), cmd); - - return -EOPNOTSUPP; -} - -static int cpmac_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct cpmac_priv *priv = netdev_priv(dev); - - if (priv->phy) - return phy_ethtool_gset(priv->phy, cmd); - - return -EINVAL; -} - -static int cpmac_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct cpmac_priv *priv = netdev_priv(dev); - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (priv->phy) - return phy_ethtool_sset(priv->phy, cmd); - - return -EINVAL; -} - -static void cpmac_get_ringparam(struct net_device *dev, struct ethtool_ringparam* ring) -{ - struct cpmac_priv *priv = netdev_priv(dev); - - ring->rx_max_pending = 1024; - ring->rx_mini_max_pending = 1; - ring->rx_jumbo_max_pending = 1; - ring->tx_max_pending = 1; - - ring->rx_pending = priv->ring_size; - ring->rx_mini_pending = 1; - ring->rx_jumbo_pending = 1; - ring->tx_pending = 1; -} - -static int cpmac_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ring) -{ - struct cpmac_priv *priv = netdev_priv(dev); - - if (dev->flags && IFF_UP) - return -EBUSY; - priv->ring_size = ring->rx_pending; - return 0; -} - -static void cpmac_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info) -{ - strcpy(info->driver, "cpmac"); - strcpy(info->version, CPMAC_VERSION); - info->fw_version[0] = '\0'; - sprintf(info->bus_info, "%s", "cpmac"); - info->regdump_len = 0; -} - -static const struct ethtool_ops cpmac_ethtool_ops = { - .get_settings = cpmac_get_settings, - .set_settings = cpmac_set_settings, - .get_drvinfo = cpmac_get_drvinfo, - .get_link = ethtool_op_get_link, - .get_ringparam = cpmac_get_ringparam, - .set_ringparam = cpmac_set_ringparam, -}; - -static void cpmac_adjust_link(struct net_device *dev) -{ - struct cpmac_priv *priv = netdev_priv(dev); - int new_state = 0; - - spin_lock(&priv->lock); - if (priv->phy->link) { - netif_start_queue(dev); - if (priv->phy->duplex != priv->oldduplex) { - new_state = 1; - priv->oldduplex = priv->phy->duplex; - } - - if (priv->phy->speed != priv->oldspeed) { - new_state = 1; - priv->oldspeed = priv->phy->speed; - } - - if (!priv->oldlink) { - new_state = 1; - priv->oldlink = 1; - netif_schedule(dev); - } - } else if (priv->oldlink) { - netif_stop_queue(dev); - new_state = 1; - priv->oldlink = 0; - priv->oldspeed = 0; - priv->oldduplex = -1; - } - - if (new_state && netif_msg_link(priv) && net_ratelimit()) - phy_print_status(priv->phy); - - spin_unlock(&priv->lock); -} - -static int cpmac_open(struct net_device *dev) -{ - int i, size, res; - struct cpmac_priv *priv = netdev_priv(dev); - struct resource *mem; - struct cpmac_desc *desc; - struct sk_buff *skb; - - priv->phy = phy_connect(dev, priv->phy_name, &cpmac_adjust_link, - 0, PHY_INTERFACE_MODE_MII); - if (IS_ERR(priv->phy)) { - if (netif_msg_drv(priv)) - printk(KERN_ERR "%s: Could not attach to PHY\n", - dev->name); - return PTR_ERR(priv->phy); - } - - mem = platform_get_resource_byname(priv->pdev, IORESOURCE_MEM, "regs"); - if (!request_mem_region(mem->start, mem->end - mem->start, dev->name)) { - if (netif_msg_drv(priv)) - printk(KERN_ERR "%s: failed to request registers\n", - dev->name); - res = -ENXIO; - goto fail_reserve; - } - - priv->regs = ioremap(mem->start, mem->end - mem->start); - if (!priv->regs) { - if (netif_msg_drv(priv)) - printk(KERN_ERR "%s: failed to remap registers\n", - dev->name); - res = -ENXIO; - goto fail_remap; - } - - size = priv->ring_size + CPMAC_QUEUES; - priv->desc_ring = dma_alloc_coherent(&dev->dev, - sizeof(struct cpmac_desc) * size, - &priv->dma_ring, - GFP_KERNEL); - if (!priv->desc_ring) { - res = -ENOMEM; - goto fail_alloc; - } - - for (i = 0; i < size; i++) - priv->desc_ring[i].mapping = priv->dma_ring + sizeof(*desc) * i; - - priv->rx_head = &priv->desc_ring[CPMAC_QUEUES]; - for (i = 0, desc = priv->rx_head; i < priv->ring_size; i++, desc++) { - skb = netdev_alloc_skb(dev, CPMAC_SKB_SIZE); - if (unlikely(!skb)) { - res = -ENOMEM; - goto fail_desc; - } - skb_reserve(skb, 2); - desc->skb = skb; - desc->data_mapping = dma_map_single(&dev->dev, skb->data, - CPMAC_SKB_SIZE, - DMA_FROM_DEVICE); - desc->hw_data = (u32)desc->data_mapping; - desc->buflen = CPMAC_SKB_SIZE; - desc->dataflags = CPMAC_OWN; - desc->next = &priv->rx_head[(i + 1) % priv->ring_size]; - desc->hw_next = (u32)desc->next->mapping; - } - - if ((res = request_irq(dev->irq, cpmac_irq, IRQF_SHARED, - dev->name, dev))) { - if (netif_msg_drv(priv)) - printk(KERN_ERR "%s: failed to obtain irq\n", - dev->name); - goto fail_irq; - } - - INIT_WORK(&priv->reset_work, cpmac_hw_error); - cpmac_hw_start(dev); - - priv->phy->state = PHY_CHANGELINK; - phy_start(priv->phy); - - return 0; - -fail_irq: -fail_desc: - for (i = 0; i < priv->ring_size; i++) { - if (priv->rx_head[i].skb) { - dma_unmap_single(&dev->dev, - priv->rx_head[i].data_mapping, - CPMAC_SKB_SIZE, - DMA_FROM_DEVICE); - kfree_skb(priv->rx_head[i].skb); - } - } -fail_alloc: - kfree(priv->desc_ring); - iounmap(priv->regs); - -fail_remap: - release_mem_region(mem->start, mem->end - mem->start); - -fail_reserve: - phy_disconnect(priv->phy); - - return res; -} - -static int cpmac_stop(struct net_device *dev) -{ - int i; - struct cpmac_priv *priv = netdev_priv(dev); - struct resource *mem; - - netif_stop_queue(dev); - - cancel_work_sync(&priv->reset_work); - phy_stop(priv->phy); - phy_disconnect(priv->phy); - priv->phy = NULL; - - cpmac_hw_stop(dev); - - for (i = 0; i < 8; i++) - cpmac_write(priv->regs, CPMAC_TX_PTR(i), 0); - cpmac_write(priv->regs, CPMAC_RX_PTR(0), 0); - cpmac_write(priv->regs, CPMAC_MBP, 0); - - free_irq(dev->irq, dev); - iounmap(priv->regs); - mem = platform_get_resource_byname(priv->pdev, IORESOURCE_MEM, "regs"); - release_mem_region(mem->start, mem->end - mem->start); - priv->rx_head = &priv->desc_ring[CPMAC_QUEUES]; - for (i = 0; i < priv->ring_size; i++) { - if (priv->rx_head[i].skb) { - dma_unmap_single(&dev->dev, - priv->rx_head[i].data_mapping, - CPMAC_SKB_SIZE, - DMA_FROM_DEVICE); - kfree_skb(priv->rx_head[i].skb); - } - } - - dma_free_coherent(&dev->dev, sizeof(struct cpmac_desc) * - (CPMAC_QUEUES + priv->ring_size), - priv->desc_ring, priv->dma_ring); - return 0; -} - -static int external_switch; - -static int __devinit cpmac_probe(struct platform_device *pdev) -{ - int rc, phy_id; - struct resource *mem; - struct cpmac_priv *priv; - struct net_device *dev; - struct plat_cpmac_data *pdata; - - pdata = pdev->dev.platform_data; - - for (phy_id = 0; phy_id < PHY_MAX_ADDR; phy_id++) { - if (!(pdata->phy_mask & (1 << phy_id))) - continue; - if (!cpmac_mii.phy_map[phy_id]) - continue; - break; - } - - if (phy_id == PHY_MAX_ADDR) { - if (external_switch || dumb_switch) - phy_id = 0; - else { - printk(KERN_ERR "cpmac: no PHY present\n"); - return -ENODEV; - } - } - - dev = alloc_etherdev_mq(sizeof(*priv), CPMAC_QUEUES); - - if (!dev) { - printk(KERN_ERR "cpmac: Unable to allocate net_device\n"); - return -ENOMEM; - } - - platform_set_drvdata(pdev, dev); - priv = netdev_priv(dev); - - priv->pdev = pdev; - mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); - if (!mem) { - rc = -ENODEV; - goto fail; - } - - dev->irq = platform_get_irq_byname(pdev, "irq"); - - dev->open = cpmac_open; - dev->stop = cpmac_stop; - dev->set_config = cpmac_config; - dev->hard_start_xmit = cpmac_start_xmit; - dev->do_ioctl = cpmac_ioctl; - dev->set_multicast_list = cpmac_set_multicast_list; - dev->tx_timeout = cpmac_tx_timeout; - dev->ethtool_ops = &cpmac_ethtool_ops; - dev->poll = cpmac_poll; - dev->weight = 64; - dev->features |= NETIF_F_MULTI_QUEUE; - - spin_lock_init(&priv->lock); - spin_lock_init(&priv->rx_lock); - priv->dev = dev; - priv->ring_size = 64; - priv->msg_enable = netif_msg_init(debug_level, 0xff); - memcpy(dev->dev_addr, pdata->dev_addr, sizeof(dev->dev_addr)); - if (phy_id == 31) { - snprintf(priv->phy_name, BUS_ID_SIZE, PHY_ID_FMT, - cpmac_mii.id, phy_id); - } else - snprintf(priv->phy_name, BUS_ID_SIZE, "fixed@%d:%d", 100, 1); - - if ((rc = register_netdev(dev))) { - printk(KERN_ERR "cpmac: error %i registering device %s\n", rc, - dev->name); - goto fail; - } - - if (netif_msg_probe(priv)) { - printk(KERN_INFO - "cpmac: device %s (regs: %p, irq: %d, phy: %s, mac: " - MAC_FMT ")\n", dev->name, (void *)mem->start, dev->irq, - priv->phy_name, MAC_ARG(dev->dev_addr)); - } - return 0; - -fail: - free_netdev(dev); - return rc; -} - -static int __devexit cpmac_remove(struct platform_device *pdev) -{ - struct net_device *dev = platform_get_drvdata(pdev); - unregister_netdev(dev); - free_netdev(dev); - return 0; -} - -static struct platform_driver cpmac_driver = { - .driver.name = "cpmac", - .probe = cpmac_probe, - .remove = __devexit_p(cpmac_remove), -}; - -int __devinit cpmac_init(void) -{ - u32 mask; - int i, res; - - cpmac_mii.priv = ioremap(AR7_REGS_MDIO, 256); - - if (!cpmac_mii.priv) { - printk(KERN_ERR "Can't ioremap mdio registers\n"); - return -ENXIO; - } - -#warning FIXME: unhardcode gpio&reset bits - ar7_gpio_disable(26); - ar7_gpio_disable(27); - ar7_device_reset(AR7_RESET_BIT_CPMAC_LO); - ar7_device_reset(AR7_RESET_BIT_CPMAC_HI); - ar7_device_reset(AR7_RESET_BIT_EPHY); - - cpmac_mii.reset(&cpmac_mii); - - for (i = 0; i < 300000; i++) - if ((mask = cpmac_read(cpmac_mii.priv, CPMAC_MDIO_ALIVE))) - break; - else - cpu_relax(); - - mask &= 0x7fffffff; - if (mask & (mask - 1)) { - external_switch = 1; - mask = 0; - } - - cpmac_mii.phy_mask = ~(mask | 0x80000000); - - res = mdiobus_register(&cpmac_mii); - if (res) - goto fail_mii; - - res = platform_driver_register(&cpmac_driver); - if (res) - goto fail_cpmac; - - return 0; - -fail_cpmac: - mdiobus_unregister(&cpmac_mii); - -fail_mii: - iounmap(cpmac_mii.priv); - - return res; -} - -void __devexit cpmac_exit(void) -{ - platform_driver_unregister(&cpmac_driver); - mdiobus_unregister(&cpmac_mii); - iounmap(cpmac_mii.priv); -} - -module_init(cpmac_init); -module_exit(cpmac_exit); diff --git a/trunk/drivers/net/gianfar.c b/trunk/drivers/net/gianfar.c index 558440c15b6c..0db5e6fabe73 100644 --- a/trunk/drivers/net/gianfar.c +++ b/trunk/drivers/net/gianfar.c @@ -168,6 +168,7 @@ static int gfar_probe(struct platform_device *pdev) struct gfar_private *priv = NULL; struct gianfar_platform_data *einfo; struct resource *r; + int idx; int err = 0; DECLARE_MAC_BUF(mac); @@ -260,9 +261,7 @@ static int gfar_probe(struct platform_device *pdev) dev->hard_start_xmit = gfar_start_xmit; dev->tx_timeout = gfar_timeout; dev->watchdog_timeo = TX_TIMEOUT; -#ifdef CONFIG_GFAR_NAPI netif_napi_add(dev, &priv->napi, gfar_poll, GFAR_DEV_WEIGHT); -#endif #ifdef CONFIG_NET_POLL_CONTROLLER dev->poll_controller = gfar_netpoll; #endif @@ -932,14 +931,9 @@ int startup_gfar(struct net_device *dev) /* Returns 0 for success. */ static int gfar_enet_open(struct net_device *dev) { -#ifdef CONFIG_GFAR_NAPI - struct gfar_private *priv = netdev_priv(dev); -#endif int err; -#ifdef CONFIG_GFAR_NAPI napi_enable(&priv->napi); -#endif /* Initialize a bunch of registers */ init_registers(dev); @@ -949,17 +943,13 @@ static int gfar_enet_open(struct net_device *dev) err = init_phy(dev); if(err) { -#ifdef CONFIG_GFAR_NAPI napi_disable(&priv->napi); -#endif return err; } err = startup_gfar(dev); if (err) -#ifdef CONFIG_GFAR_NAPI napi_disable(&priv->napi); -#endif netif_start_queue(dev); @@ -1113,9 +1103,7 @@ static int gfar_close(struct net_device *dev) { struct gfar_private *priv = netdev_priv(dev); -#ifdef CONFIG_GFAR_NAPI napi_disable(&priv->napi); -#endif stop_gfar(dev); diff --git a/trunk/drivers/net/ibm_emac/ibm_emac_mal.c b/trunk/drivers/net/ibm_emac/ibm_emac_mal.c index dcd8826fc749..4e49e8c4f871 100644 --- a/trunk/drivers/net/ibm_emac/ibm_emac_mal.c +++ b/trunk/drivers/net/ibm_emac/ibm_emac_mal.c @@ -413,10 +413,7 @@ static int __init mal_probe(struct ocp_device *ocpdev) ocpdev->def->index); return -ENOMEM; } - - /* XXX This only works for native dcr for now */ - mal->dcrhost = dcr_map(NULL, maldata->dcr_base, 0); - + mal->dcrbase = maldata->dcr_base; mal->def = ocpdev->def; INIT_LIST_HEAD(&mal->poll_list); diff --git a/trunk/drivers/net/ibm_emac/ibm_emac_mal.h b/trunk/drivers/net/ibm_emac/ibm_emac_mal.h index b8adbe6d4b01..8f54d621994d 100644 --- a/trunk/drivers/net/ibm_emac/ibm_emac_mal.h +++ b/trunk/drivers/net/ibm_emac/ibm_emac_mal.h @@ -191,6 +191,7 @@ struct mal_commac { }; struct ibm_ocp_mal { + int dcrbase; dcr_host_t dcrhost; struct list_head poll_list; @@ -208,12 +209,12 @@ struct ibm_ocp_mal { static inline u32 get_mal_dcrn(struct ibm_ocp_mal *mal, int reg) { - return dcr_read(mal->dcrhost, reg); + return dcr_read(mal->dcrhost, mal->dcrbase + reg); } static inline void set_mal_dcrn(struct ibm_ocp_mal *mal, int reg, u32 val) { - dcr_write(mal->dcrhost, reg, val); + dcr_write(mal->dcrhost, mal->dcrbase + reg, val); } /* Register MAL devices */ diff --git a/trunk/drivers/net/ibm_newemac/mal.c b/trunk/drivers/net/ibm_newemac/mal.c index 39f4cb6b0cf3..58854117b1a9 100644 --- a/trunk/drivers/net/ibm_newemac/mal.c +++ b/trunk/drivers/net/ibm_newemac/mal.c @@ -461,7 +461,6 @@ static int __devinit mal_probe(struct of_device *ofdev, struct mal_instance *mal; int err = 0, i, bd_size; int index = mal_count++; - unsigned int dcr_base; const u32 *prop; u32 cfg; @@ -498,14 +497,14 @@ static int __devinit mal_probe(struct of_device *ofdev, } mal->num_rx_chans = prop[0]; - dcr_base = dcr_resource_start(ofdev->node, 0); - if (dcr_base == 0) { + mal->dcr_base = dcr_resource_start(ofdev->node, 0); + if (mal->dcr_base == 0) { printk(KERN_ERR "mal%d: can't find DCR resource!\n", index); err = -ENODEV; goto fail; } - mal->dcr_host = dcr_map(ofdev->node, dcr_base, 0x100); + mal->dcr_host = dcr_map(ofdev->node, mal->dcr_base, 0x100); if (!DCR_MAP_OK(mal->dcr_host)) { printk(KERN_ERR "mal%d: failed to map DCRs !\n", index); @@ -627,7 +626,7 @@ static int __devinit mal_probe(struct of_device *ofdev, fail2: dma_free_coherent(&ofdev->dev, bd_size, mal->bd_virt, mal->bd_dma); fail_unmap: - dcr_unmap(mal->dcr_host, 0x100); + dcr_unmap(mal->dcr_host, mal->dcr_base, 0x100); fail: kfree(mal); diff --git a/trunk/drivers/net/ibm_newemac/mal.h b/trunk/drivers/net/ibm_newemac/mal.h index 784edb8ea822..cb1a16d589fe 100644 --- a/trunk/drivers/net/ibm_newemac/mal.h +++ b/trunk/drivers/net/ibm_newemac/mal.h @@ -185,6 +185,7 @@ struct mal_commac { struct mal_instance { int version; + int dcr_base; dcr_host_t dcr_host; int num_tx_chans; /* Number of TX channels */ @@ -212,12 +213,12 @@ struct mal_instance { static inline u32 get_mal_dcrn(struct mal_instance *mal, int reg) { - return dcr_read(mal->dcr_host, reg); + return dcr_read(mal->dcr_host, mal->dcr_base + reg); } static inline void set_mal_dcrn(struct mal_instance *mal, int reg, u32 val) { - dcr_write(mal->dcr_host, reg, val); + dcr_write(mal->dcr_host, mal->dcr_base + reg, val); } /* Register MAL devices */ diff --git a/trunk/drivers/net/irda/donauboe.c b/trunk/drivers/net/irda/donauboe.c index a82d8f98383d..3e5eca1aa987 100644 --- a/trunk/drivers/net/irda/donauboe.c +++ b/trunk/drivers/net/irda/donauboe.c @@ -840,7 +840,7 @@ toshoboe_probe (struct toshoboe_cb *self) /* test 1: SIR filter and back to back */ - for (j = 0; j < ARRAY_SIZE(bauds); ++j) + for (j = 0; j < (sizeof (bauds) / sizeof (int)); ++j) { int fir = (j > 1); toshoboe_stopchip (self); diff --git a/trunk/drivers/net/jazzsonic.c b/trunk/drivers/net/jazzsonic.c index 5c154fe13859..d3825c8ee994 100644 --- a/trunk/drivers/net/jazzsonic.c +++ b/trunk/drivers/net/jazzsonic.c @@ -208,6 +208,7 @@ static int __init jazz_sonic_probe(struct platform_device *pdev) struct sonic_local *lp; struct resource *res; int err = 0; + int i; DECLARE_MAC_BUF(mac); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/trunk/drivers/net/loopback.c b/trunk/drivers/net/loopback.c index 662b8d16803c..be25aa33971c 100644 --- a/trunk/drivers/net/loopback.c +++ b/trunk/drivers/net/loopback.c @@ -265,16 +265,17 @@ static __net_init int loopback_net_init(struct net *net) if (err) goto out_free_netdev; + err = 0; net->loopback_dev = dev; - return 0; - -out_free_netdev: - free_netdev(dev); out: - if (net == &init_net) + if (err) panic("loopback: Failed to register netdevice: %d\n", err); return err; + +out_free_netdev: + free_netdev(dev); + goto out; } static __net_exit void loopback_net_exit(struct net *net) diff --git a/trunk/drivers/net/mipsnet.c b/trunk/drivers/net/mipsnet.c index 37707a0c0498..d593175ab6f0 100644 --- a/trunk/drivers/net/mipsnet.c +++ b/trunk/drivers/net/mipsnet.c @@ -7,12 +7,12 @@ #define DEBUG #include -#include #include #include #include #include #include +#include #include #include "mipsnet.h" /* actual device IO mapping */ @@ -33,8 +33,9 @@ static int ioiocpy_frommipsnet(struct net_device *dev, unsigned char *kdata, if (available_len < len) return -EFAULT; - for (; len > 0; len--, kdata++) + for (; len > 0; len--, kdata++) { *kdata = inb(mipsnet_reg_address(dev, rxDataBuffer)); + } return inl(mipsnet_reg_address(dev, rxDataCount)); } @@ -46,15 +47,16 @@ static inline ssize_t mipsnet_put_todevice(struct net_device *dev, char *buf_ptr = skb->data; pr_debug("%s: %s(): telling MIPSNET txDataCount(%d)\n", - dev->name, __FUNCTION__, skb->len); + dev->name, __FUNCTION__, skb->len); outl(skb->len, mipsnet_reg_address(dev, txDataCount)); pr_debug("%s: %s(): sending data to MIPSNET txDataBuffer(%d)\n", - dev->name, __FUNCTION__, skb->len); + dev->name, __FUNCTION__, skb->len); - for (; count_to_go; buf_ptr++, count_to_go--) + for (; count_to_go; buf_ptr++, count_to_go--) { outb(*buf_ptr, mipsnet_reg_address(dev, txDataBuffer)); + } dev->stats.tx_packets++; dev->stats.tx_bytes += skb->len; @@ -65,7 +67,7 @@ static inline ssize_t mipsnet_put_todevice(struct net_device *dev, static int mipsnet_xmit(struct sk_buff *skb, struct net_device *dev) { pr_debug("%s:%s(): transmitting %d bytes\n", - dev->name, __FUNCTION__, skb->len); + dev->name, __FUNCTION__, skb->len); /* Only one packet at a time. Once TXDONE interrupt is serviced, the * queue will be restarted. @@ -81,8 +83,7 @@ static inline ssize_t mipsnet_get_fromdev(struct net_device *dev, size_t count) struct sk_buff *skb; size_t len = count; - skb = alloc_skb(len + 2, GFP_KERNEL); - if (!skb) { + if (!(skb = alloc_skb(len + 2, GFP_KERNEL))) { dev->stats.rx_dropped++; return -ENOMEM; } @@ -95,7 +96,7 @@ static inline ssize_t mipsnet_get_fromdev(struct net_device *dev, size_t count) skb->ip_summed = CHECKSUM_UNNECESSARY; pr_debug("%s:%s(): pushing RXed data to kernel\n", - dev->name, __FUNCTION__); + dev->name, __FUNCTION__); netif_rx(skb); dev->stats.rx_packets++; @@ -113,44 +114,42 @@ static irqreturn_t mipsnet_interrupt(int irq, void *dev_id) if (irq == dev->irq) { pr_debug("%s:%s(): irq %d for device\n", - dev->name, __FUNCTION__, irq); + dev->name, __FUNCTION__, irq); retval = IRQ_HANDLED; interruptFlags = inl(mipsnet_reg_address(dev, interruptControl)); pr_debug("%s:%s(): intCtl=0x%016llx\n", dev->name, - __FUNCTION__, interruptFlags); + __FUNCTION__, interruptFlags); if (interruptFlags & MIPSNET_INTCTL_TXDONE) { pr_debug("%s:%s(): got TXDone\n", - dev->name, __FUNCTION__); + dev->name, __FUNCTION__); outl(MIPSNET_INTCTL_TXDONE, mipsnet_reg_address(dev, interruptControl)); - /* only one packet at a time, we are done. */ + // only one packet at a time, we are done. netif_wake_queue(dev); } else if (interruptFlags & MIPSNET_INTCTL_RXDONE) { pr_debug("%s:%s(): got RX data\n", - dev->name, __FUNCTION__); + dev->name, __FUNCTION__); mipsnet_get_fromdev(dev, - inl(mipsnet_reg_address(dev, rxDataCount))); + inl(mipsnet_reg_address(dev, rxDataCount))); pr_debug("%s:%s(): clearing RX int\n", - dev->name, __FUNCTION__); + dev->name, __FUNCTION__); outl(MIPSNET_INTCTL_RXDONE, mipsnet_reg_address(dev, interruptControl)); } else if (interruptFlags & MIPSNET_INTCTL_TESTBIT) { pr_debug("%s:%s(): got test interrupt\n", - dev->name, __FUNCTION__); - /* - * TESTBIT is cleared on read. - * And takes effect after a write with 0 - */ + dev->name, __FUNCTION__); + // TESTBIT is cleared on read. + // And takes effect after a write with 0 outl(0, mipsnet_reg_address(dev, interruptControl)); } else { pr_debug("%s:%s(): no valid fags 0x%016llx\n", - dev->name, __FUNCTION__, interruptFlags); - /* Maybe shared IRQ, just ignore, no clearing. */ + dev->name, __FUNCTION__, interruptFlags); + // Maybe shared IRQ, just ignore, no clearing. retval = IRQ_NONE; } @@ -160,7 +159,7 @@ static irqreturn_t mipsnet_interrupt(int irq, void *dev_id) retval = IRQ_NONE; } return retval; -} +} //mipsnet_interrupt() static int mipsnet_open(struct net_device *dev) { @@ -172,18 +171,18 @@ static int mipsnet_open(struct net_device *dev) if (err) { pr_debug("%s: %s(): can't get irq %d\n", - dev->name, __FUNCTION__, dev->irq); + dev->name, __FUNCTION__, dev->irq); release_region(dev->base_addr, MIPSNET_IO_EXTENT); return err; } pr_debug("%s: %s(): got IO region at 0x%04lx and irq %d for dev.\n", - dev->name, __FUNCTION__, dev->base_addr, dev->irq); + dev->name, __FUNCTION__, dev->base_addr, dev->irq); netif_start_queue(dev); - /* test interrupt handler */ + // test interrupt handler outl(MIPSNET_INTCTL_TESTBIT, mipsnet_reg_address(dev, interruptControl)); @@ -200,6 +199,8 @@ static int mipsnet_close(struct net_device *dev) static void mipsnet_set_mclist(struct net_device *dev) { + // we don't do anything + return; } static int __init mipsnet_probe(struct device *dev) @@ -225,13 +226,13 @@ static int __init mipsnet_probe(struct device *dev) */ netdev->base_addr = 0x4200; netdev->irq = MIPS_CPU_IRQ_BASE + MIPSCPU_INT_MB0 + - inl(mipsnet_reg_address(netdev, interruptInfo)); + inl(mipsnet_reg_address(netdev, interruptInfo)); - /* Get the io region now, get irq on open() */ + // Get the io region now, get irq on open() if (!request_region(netdev->base_addr, MIPSNET_IO_EXTENT, "mipsnet")) { pr_debug("%s: %s(): IO region {start: 0x%04lux, len: %d} " - "for dev is not availble.\n", netdev->name, - __FUNCTION__, netdev->base_addr, MIPSNET_IO_EXTENT); + "for dev is not availble.\n", netdev->name, + __FUNCTION__, netdev->base_addr, MIPSNET_IO_EXTENT); err = -EBUSY; goto out_free_netdev; } diff --git a/trunk/drivers/net/mipsnet.h b/trunk/drivers/net/mipsnet.h index 0132c6714a40..026c732024c9 100644 --- a/trunk/drivers/net/mipsnet.h +++ b/trunk/drivers/net/mipsnet.h @@ -9,34 +9,32 @@ /* * Id of this Net device, as seen by the core. */ -#define MIPS_NET_DEV_ID ((uint64_t) \ - ((uint64_t) 'M' << 0)| \ - ((uint64_t) 'I' << 8)| \ - ((uint64_t) 'P' << 16)| \ - ((uint64_t) 'S' << 24)| \ - ((uint64_t) 'N' << 32)| \ - ((uint64_t) 'E' << 40)| \ - ((uint64_t) 'T' << 48)| \ - ((uint64_t) '0' << 56)) +#define MIPS_NET_DEV_ID ((uint64_t) \ + ((uint64_t)'M'<< 0)| \ + ((uint64_t)'I'<< 8)| \ + ((uint64_t)'P'<<16)| \ + ((uint64_t)'S'<<24)| \ + ((uint64_t)'N'<<32)| \ + ((uint64_t)'E'<<40)| \ + ((uint64_t)'T'<<48)| \ + ((uint64_t)'0'<<56)) /* * Net status/control block as seen by sw in the core. * (Why not use bit fields? can't be bothered with cross-platform struct * packing.) */ -struct net_control_block { - /* - * dev info for probing - * reads as MIPSNET%d where %d is some form of version - */ - uint64_t devId; /* 0x00 */ +typedef struct _net_control_block { + /// dev info for probing + /// reads as MIPSNET%d where %d is some form of version + uint64_t devId; /*0x00 */ /* * read only busy flag. * Set and cleared by the Net Device to indicate that an rx or a tx * is in progress. */ - uint32_t busy; /* 0x08 */ + uint32_t busy; /*0x08 */ /* * Set by the Net Device. @@ -45,16 +43,16 @@ struct net_control_block { * rxDataBuffer. The value will decrease till 0 until all the data * from rxDataBuffer has been read. */ - uint32_t rxDataCount; /* 0x0c */ + uint32_t rxDataCount; /*0x0c */ #define MIPSNET_MAX_RXTX_DATACOUNT (1<<16) /* - * Settable from the MIPS core, cleared by the Net Device. The core - * should set the number of bytes it wants to send, then it should - * write those bytes of data to txDataBuffer. The device will clear - * txDataCount has been processed (not necessarily sent). + * Settable from the MIPS core, cleared by the Net Device. + * The core should set the number of bytes it wants to send, + * then it should write those bytes of data to txDataBuffer. + * The device will clear txDataCount has been processed (not necessarily sent). */ - uint32_t txDataCount; /* 0x10 */ + uint32_t txDataCount; /*0x10 */ /* * Interrupt control @@ -71,42 +69,39 @@ struct net_control_block { * To clear the test interrupt, write 0 to this register. */ uint32_t interruptControl; /*0x14 */ -#define MIPSNET_INTCTL_TXDONE ((uint32_t)(1 << 0)) -#define MIPSNET_INTCTL_RXDONE ((uint32_t)(1 << 1)) -#define MIPSNET_INTCTL_TESTBIT ((uint32_t)(1 << 31)) -#define MIPSNET_INTCTL_ALLSOURCES (MIPSNET_INTCTL_TXDONE | \ - MIPSNET_INTCTL_RXDONE | \ - MIPSNET_INTCTL_TESTBIT) +#define MIPSNET_INTCTL_TXDONE ((uint32_t)(1<< 0)) +#define MIPSNET_INTCTL_RXDONE ((uint32_t)(1<< 1)) +#define MIPSNET_INTCTL_TESTBIT ((uint32_t)(1<<31)) +#define MIPSNET_INTCTL_ALLSOURCES (MIPSNET_INTCTL_TXDONE|MIPSNET_INTCTL_RXDONE|MIPSNET_INTCTL_TESTBIT) /* - * Readonly core-specific interrupt info for the device to signal the - * core. The meaning of the contents of this field might change. - * - * TODO: the whole memIntf interrupt scheme is messy: the device should - * have no control what so ever of what VPE/register set is being - * used. The MemIntf should only expose interrupt lines, and - * something in the config should be responsible for the - * line<->core/vpe bindings. + * Readonly core-specific interrupt info for the device to signal the core. + * The meaning of the contents of this field might change. */ - uint32_t interruptInfo; /* 0x18 */ + /*###\todo: the whole memIntf interrupt scheme is messy: the device should have + * no control what so ever of what VPE/register set is being used. + * The MemIntf should only expose interrupt lines, and something in the + * config should be responsible for the line<->core/vpe bindings. + */ + uint32_t interruptInfo; /*0x18 */ /* * This is where the received data is read out. * There is more data to read until rxDataReady is 0. * Only 1 byte at this regs offset is used. */ - uint32_t rxDataBuffer; /* 0x1c */ + uint32_t rxDataBuffer; /*0x1c */ /* - * This is where the data to transmit is written. Data should be - * written for the amount specified in the txDataCount register. Only - * 1 byte at this regs offset is used. + * This is where the data to transmit is written. + * Data should be written for the amount specified in the txDataCount register. + * Only 1 byte at this regs offset is used. */ - uint32_t txDataBuffer; /* 0x20 */ -}; + uint32_t txDataBuffer; /*0x20 */ +} MIPS_T_NetControl; #define MIPSNET_IO_EXTENT 0x40 /* being generous */ -#define field_offset(field) (offsetof(struct net_control_block, field)) +#define field_offset(field) ((int)&((MIPS_T_NetControl*)(0))->field) #endif /* __MIPSNET_H */ diff --git a/trunk/drivers/net/myri10ge/myri10ge.c b/trunk/drivers/net/myri10ge/myri10ge.c index 64c8151f2004..e8afa101433e 100644 --- a/trunk/drivers/net/myri10ge/myri10ge.c +++ b/trunk/drivers/net/myri10ge/myri10ge.c @@ -75,7 +75,7 @@ #include "myri10ge_mcp.h" #include "myri10ge_mcp_gen_header.h" -#define MYRI10GE_VERSION_STR "1.3.2-1.287" +#define MYRI10GE_VERSION_STR "1.3.2-1.269" MODULE_DESCRIPTION("Myricom 10G driver (10GbE)"); MODULE_AUTHOR("Maintainer: help@myri.com"); @@ -214,8 +214,6 @@ struct myri10ge_priv { unsigned long serial_number; int vendor_specific_offset; int fw_multicast_support; - unsigned long features; - u32 max_tso6; u32 read_dma; u32 write_dma; u32 read_write_dma; @@ -313,7 +311,6 @@ MODULE_PARM_DESC(myri10ge_wcfifo, "Enable WC Fifo when WC is enabled\n"); #define myri10ge_pio_copy(to,from,size) __iowrite64_copy(to,from,size/8) static void myri10ge_set_multicast_list(struct net_device *dev); -static int myri10ge_sw_tso(struct sk_buff *skb, struct net_device *dev); static inline void put_be32(__be32 val, __be32 __iomem * p) { @@ -615,7 +612,6 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp) __be32 buf[16]; u32 dma_low, dma_high, size; int status, i; - struct myri10ge_cmd cmd; size = 0; status = myri10ge_load_hotplug_firmware(mgp, &size); @@ -692,14 +688,6 @@ static int myri10ge_load_firmware(struct myri10ge_priv *mgp) dev_info(&mgp->pdev->dev, "handoff confirmed\n"); myri10ge_dummy_rdma(mgp, 1); - /* probe for IPv6 TSO support */ - mgp->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO; - status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, - &cmd, 0); - if (status == 0) { - mgp->max_tso6 = cmd.data0; - mgp->features |= NETIF_F_TSO6; - } return 0; } @@ -1059,8 +1047,7 @@ myri10ge_rx_done(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, hlen = MYRI10GE_HLEN > len ? len : MYRI10GE_HLEN; - /* allocate an skb to attach the page(s) to. This is done - * after trying LRO, so as to avoid skb allocation overheads */ + /* allocate an skb to attach the page(s) to. */ skb = netdev_alloc_skb(dev, MYRI10GE_HLEN + 16); if (unlikely(skb == NULL)) { @@ -1230,8 +1217,7 @@ static inline void myri10ge_check_statblock(struct myri10ge_priv *mgp) static int myri10ge_poll(struct napi_struct *napi, int budget) { - struct myri10ge_priv *mgp = - container_of(napi, struct myri10ge_priv, napi); + struct myri10ge_priv *mgp = container_of(napi, struct myri10ge_priv, napi); struct net_device *netdev = mgp->dev; struct myri10ge_rx_done *rx_done = &mgp->rx_done; int work_done; @@ -1396,18 +1382,6 @@ static int myri10ge_set_rx_csum(struct net_device *netdev, u32 csum_enabled) return 0; } -static int myri10ge_set_tso(struct net_device *netdev, u32 tso_enabled) -{ - struct myri10ge_priv *mgp = netdev_priv(netdev); - unsigned long flags = mgp->features & (NETIF_F_TSO6 | NETIF_F_TSO); - - if (tso_enabled) - netdev->features |= flags; - else - netdev->features &= ~flags; - return 0; -} - static const char myri10ge_gstrings_stats[][ETH_GSTRING_LEN] = { "rx_packets", "tx_packets", "rx_bytes", "tx_bytes", "rx_errors", "tx_errors", "rx_dropped", "tx_dropped", "multicast", "collisions", @@ -1532,7 +1506,7 @@ static const struct ethtool_ops myri10ge_ethtool_ops = { .set_rx_csum = myri10ge_set_rx_csum, .set_tx_csum = ethtool_op_set_tx_hw_csum, .set_sg = ethtool_op_set_sg, - .set_tso = myri10ge_set_tso, + .set_tso = ethtool_op_set_tso, .get_link = ethtool_op_get_link, .get_strings = myri10ge_get_strings, .get_sset_count = myri10ge_get_sset_count, @@ -2190,8 +2164,7 @@ static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev) pseudo_hdr_offset = cksum_offset + skb->csum_offset; /* If the headers are excessively large, then we must * fall back to a software checksum */ - if (unlikely(!mss && (cksum_offset > 255 || - pseudo_hdr_offset > 127))) { + if (unlikely(cksum_offset > 255 || pseudo_hdr_offset > 127)) { if (skb_checksum_help(skb)) goto drop; cksum_offset = 0; @@ -2211,18 +2184,9 @@ static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev) /* negative cum_len signifies to the * send loop that we are still in the * header portion of the TSO packet. - * TSO header can be at most 1KB long */ + * TSO header must be at most 134 bytes long */ cum_len = -(skb_transport_offset(skb) + tcp_hdrlen(skb)); - /* for IPv6 TSO, the checksum offset stores the - * TCP header length, to save the firmware from - * the need to parse the headers */ - if (skb_is_gso_v6(skb)) { - cksum_offset = tcp_hdrlen(skb); - /* Can only handle headers <= max_tso6 long */ - if (unlikely(-cum_len > mgp->max_tso6)) - return myri10ge_sw_tso(skb, dev); - } /* for TSO, pseudo_hdr_offset holds mss. * The firmware figures out where to put * the checksum by parsing the header. */ @@ -2337,12 +2301,10 @@ static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev) req++; count++; rdma_count++; - if (cksum_offset != 0 && !(mss && skb_is_gso_v6(skb))) { - if (unlikely(cksum_offset > seglen)) - cksum_offset -= seglen; - else - cksum_offset = 0; - } + if (unlikely(cksum_offset > seglen)) + cksum_offset -= seglen; + else + cksum_offset = 0; } if (frag_idx == frag_cnt) break; @@ -2425,41 +2387,6 @@ static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev) } -static int myri10ge_sw_tso(struct sk_buff *skb, struct net_device *dev) -{ - struct sk_buff *segs, *curr; - struct myri10ge_priv *mgp = dev->priv; - int status; - - segs = skb_gso_segment(skb, dev->features & ~NETIF_F_TSO6); - if (unlikely(IS_ERR(segs))) - goto drop; - - while (segs) { - curr = segs; - segs = segs->next; - curr->next = NULL; - status = myri10ge_xmit(curr, dev); - if (status != 0) { - dev_kfree_skb_any(curr); - if (segs != NULL) { - curr = segs; - segs = segs->next; - curr->next = NULL; - dev_kfree_skb_any(segs); - } - goto drop; - } - } - dev_kfree_skb_any(skb); - return 0; - -drop: - dev_kfree_skb_any(skb); - mgp->stats.tx_dropped += 1; - return 0; -} - static struct net_device_stats *myri10ge_get_stats(struct net_device *dev) { struct myri10ge_priv *mgp = netdev_priv(dev); @@ -2779,6 +2706,7 @@ static void myri10ge_select_firmware(struct myri10ge_priv *mgp) } #ifdef CONFIG_PM + static int myri10ge_suspend(struct pci_dev *pdev, pm_message_t state) { struct myri10ge_priv *mgp; @@ -2859,6 +2787,7 @@ static int myri10ge_resume(struct pci_dev *pdev) return -EIO; } + #endif /* CONFIG_PM */ static u32 myri10ge_read_reboot(struct myri10ge_priv *mgp) @@ -3025,7 +2954,8 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) mgp = netdev_priv(netdev); mgp->dev = netdev; - netif_napi_add(netdev, &mgp->napi, myri10ge_poll, myri10ge_napi_weight); + netif_napi_add(netdev, &mgp->napi, + myri10ge_poll, myri10ge_napi_weight); mgp->pdev = pdev; mgp->csum_flag = MXGEFW_FLAGS_CKSUM; mgp->pause = myri10ge_flow_control; @@ -3147,7 +3077,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->change_mtu = myri10ge_change_mtu; netdev->set_multicast_list = myri10ge_set_multicast_list; netdev->set_mac_address = myri10ge_set_mac_address; - netdev->features = mgp->features; + netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO; if (dac_enabled) netdev->features |= NETIF_F_HIGHDMA; diff --git a/trunk/drivers/net/myri10ge/myri10ge_mcp.h b/trunk/drivers/net/myri10ge/myri10ge_mcp.h index 58e57178c563..a1d2a22296a9 100644 --- a/trunk/drivers/net/myri10ge/myri10ge_mcp.h +++ b/trunk/drivers/net/myri10ge/myri10ge_mcp.h @@ -10,7 +10,7 @@ struct mcp_dma_addr { __be32 low; }; -/* 4 Bytes. 8 Bytes for NDIS drivers. */ +/* 4 Bytes */ struct mcp_slot { __sum16 checksum; __be16 length; @@ -205,87 +205,8 @@ enum myri10ge_mcp_cmd_type { /* same than DMA_TEST (same args) but abort with UNALIGNED on unaligned * chipset */ - MXGEFW_CMD_UNALIGNED_STATUS, - /* return data = boolean, true if the chipset is known to be unaligned */ - - MXGEFW_CMD_ALWAYS_USE_N_BIG_BUFFERS, - /* data0 = number of big buffers to use. It must be 0 or a power of 2. - * 0 indicates that the NIC consumes as many buffers as they are required - * for packet. This is the default behavior. - * A power of 2 number indicates that the NIC always uses the specified - * number of buffers for each big receive packet. - * It is up to the driver to ensure that this value is big enough for - * the NIC to be able to receive maximum-sized packets. - */ - - MXGEFW_CMD_GET_MAX_RSS_QUEUES, - MXGEFW_CMD_ENABLE_RSS_QUEUES, - /* data0 = number of slices n (0, 1, ..., n-1) to enable - * data1 = interrupt mode. 0=share one INTx/MSI, 1=use one MSI-X per queue. - * If all queues share one interrupt, the driver must have set - * RSS_SHARED_INTERRUPT_DMA before enabling queues. - */ - MXGEFW_CMD_GET_RSS_SHARED_INTERRUPT_MASK_OFFSET, - MXGEFW_CMD_SET_RSS_SHARED_INTERRUPT_DMA, - /* data0, data1 = bus address lsw, msw */ - MXGEFW_CMD_GET_RSS_TABLE_OFFSET, - /* get the offset of the indirection table */ - MXGEFW_CMD_SET_RSS_TABLE_SIZE, - /* set the size of the indirection table */ - MXGEFW_CMD_GET_RSS_KEY_OFFSET, - /* get the offset of the secret key */ - MXGEFW_CMD_RSS_KEY_UPDATED, - /* tell nic that the secret key's been updated */ - MXGEFW_CMD_SET_RSS_ENABLE, - /* data0 = enable/disable rss - * 0: disable rss. nic does not distribute receive packets. - * 1: enable rss. nic distributes receive packets among queues. - * data1 = hash type - * 1: IPV4 - * 2: TCP_IPV4 - * 3: IPV4 | TCP_IPV4 - */ - - MXGEFW_CMD_GET_MAX_TSO6_HDR_SIZE, - /* Return data = the max. size of the entire headers of a IPv6 TSO packet. - * If the header size of a IPv6 TSO packet is larger than the specified - * value, then the driver must not use TSO. - * This size restriction only applies to IPv6 TSO. - * For IPv4 TSO, the maximum size of the headers is fixed, and the NIC - * always has enough header buffer to store maximum-sized headers. - */ - - MXGEFW_CMD_SET_TSO_MODE, - /* data0 = TSO mode. - * 0: Linux/FreeBSD style (NIC default) - * 1: NDIS/NetBSD style - */ - - MXGEFW_CMD_MDIO_READ, - /* data0 = dev_addr (PMA/PMD or PCS ...), data1 = register/addr */ - MXGEFW_CMD_MDIO_WRITE, - /* data0 = dev_addr, data1 = register/addr, data2 = value */ - - MXGEFW_CMD_XFP_I2C_READ, - /* Starts to get a fresh copy of one byte or of the whole xfp i2c table, the - * obtained data is cached inside the xaui-xfi chip : - * data0 : "all" flag : 0 => get one byte, 1=> get 256 bytes, - * data1 : if (data0 == 0): index of byte to refresh [ not used otherwise ] - * The operation might take ~1ms for a single byte or ~65ms when refreshing all 256 bytes - * During the i2c operation, MXGEFW_CMD_XFP_I2C_READ or MXGEFW_CMD_XFP_BYTE attempts - * will return MXGEFW_CMD_ERROR_BUSY - */ - MXGEFW_CMD_XFP_BYTE, - /* Return the last obtained copy of a given byte in the xfp i2c table - * (copy cached during the last relevant MXGEFW_CMD_XFP_I2C_READ) - * data0 : index of the desired table entry - * Return data = the byte stored at the requested index in the table - */ - - MXGEFW_CMD_GET_VPUMP_OFFSET, - /* Return data = NIC memory offset of mcp_vpump_public_global */ - MXGEFW_CMD_RESET_VPUMP, - /* Resets the VPUMP state */ + MXGEFW_CMD_UNALIGNED_STATUS + /* return data = boolean, true if the chipset is known to be unaligned */ }; enum myri10ge_mcp_cmd_status { @@ -299,10 +220,7 @@ enum myri10ge_mcp_cmd_status { MXGEFW_CMD_ERROR_BAD_PORT, MXGEFW_CMD_ERROR_RESOURCES, MXGEFW_CMD_ERROR_MULTICAST, - MXGEFW_CMD_ERROR_UNALIGNED, - MXGEFW_CMD_ERROR_NO_MDIO, - MXGEFW_CMD_ERROR_XFP_FAILURE, - MXGEFW_CMD_ERROR_XFP_ABSENT + MXGEFW_CMD_ERROR_UNALIGNED }; #define MXGEFW_OLD_IRQ_DATA_LEN 40 diff --git a/trunk/drivers/net/natsemi.c b/trunk/drivers/net/natsemi.c index 50e1ec67ef9c..527f9dcc7f69 100644 --- a/trunk/drivers/net/natsemi.c +++ b/trunk/drivers/net/natsemi.c @@ -1576,7 +1576,7 @@ static int netdev_open(struct net_device *dev) /* Set the timer to check for link beat. */ init_timer(&np->timer); - np->timer.expires = round_jiffies(jiffies + NATSEMI_TIMER_FREQ); + np->timer.expires = jiffies + NATSEMI_TIMER_FREQ; np->timer.data = (unsigned long)dev; np->timer.function = &netdev_timer; /* timer handler */ add_timer(&np->timer); @@ -1856,11 +1856,7 @@ static void netdev_timer(unsigned long data) next_tick = 1; } } - - if (next_tick > 1) - mod_timer(&np->timer, round_jiffies(jiffies + next_tick)); - else - mod_timer(&np->timer, jiffies + next_tick); + mod_timer(&np->timer, jiffies + next_tick); } static void dump_ring(struct net_device *dev) @@ -3314,19 +3310,13 @@ static int natsemi_resume (struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata (pdev); struct netdev_private *np = netdev_priv(dev); - int ret = 0; rtnl_lock(); if (netif_device_present(dev)) goto out; if (netif_running(dev)) { BUG_ON(!np->hands_off); - ret = pci_enable_device(pdev); - if (ret < 0) { - dev_err(&pdev->dev, - "pci_enable_device() failed: %d\n", ret); - goto out; - } + pci_enable_device(pdev); /* pci_power_on(pdev); */ napi_enable(&np->napi); @@ -3341,12 +3331,12 @@ static int natsemi_resume (struct pci_dev *pdev) spin_unlock_irq(&np->lock); enable_irq(dev->irq); - mod_timer(&np->timer, round_jiffies(jiffies + 1*HZ)); + mod_timer(&np->timer, jiffies + 1*HZ); } netif_device_attach(dev); out: rtnl_unlock(); - return ret; + return 0; } #endif /* CONFIG_PM */ diff --git a/trunk/drivers/net/ne-h8300.c b/trunk/drivers/net/ne-h8300.c index fbc7531d3c7d..368f2560856d 100644 --- a/trunk/drivers/net/ne-h8300.c +++ b/trunk/drivers/net/ne-h8300.c @@ -93,7 +93,7 @@ static int __init init_reg_offset(struct net_device *dev,unsigned long base_addr bus_width = *(volatile unsigned char *)ABWCR; bus_width &= 1 << ((base_addr >> 21) & 7); - for (i = 0; i < ARRAY_SIZE(reg_offset); i++) + for (i = 0; i < sizeof(reg_offset) / sizeof(u32); i++) if (bus_width == 0) reg_offset[i] = i * 2 + 1; else @@ -115,7 +115,7 @@ static int h8300_ne_irq[] = {EXT_IRQ5}; static inline int init_dev(struct net_device *dev) { - if (h8300_ne_count < ARRAY_SIZE(h8300_ne_base)) { + if (h8300_ne_count < (sizeof(h8300_ne_base) / sizeof(unsigned long))) { dev->base_addr = h8300_ne_base[h8300_ne_count]; dev->irq = h8300_ne_irq[h8300_ne_count]; h8300_ne_count++; diff --git a/trunk/drivers/net/niu.c b/trunk/drivers/net/niu.c index ed1f9bbb2a32..43bfe7e6b6f5 100644 --- a/trunk/drivers/net/niu.c +++ b/trunk/drivers/net/niu.c @@ -6123,19 +6123,19 @@ static int __devinit niu_pci_probe_sprom(struct niu *np) val = nr64(ESPC_PHY_TYPE); switch (np->port) { case 0: - val8 = (val & ESPC_PHY_TYPE_PORT0) >> + val = (val & ESPC_PHY_TYPE_PORT0) >> ESPC_PHY_TYPE_PORT0_SHIFT; break; case 1: - val8 = (val & ESPC_PHY_TYPE_PORT1) >> + val = (val & ESPC_PHY_TYPE_PORT1) >> ESPC_PHY_TYPE_PORT1_SHIFT; break; case 2: - val8 = (val & ESPC_PHY_TYPE_PORT2) >> + val = (val & ESPC_PHY_TYPE_PORT2) >> ESPC_PHY_TYPE_PORT2_SHIFT; break; case 3: - val8 = (val & ESPC_PHY_TYPE_PORT3) >> + val = (val & ESPC_PHY_TYPE_PORT3) >> ESPC_PHY_TYPE_PORT3_SHIFT; break; default: @@ -6143,9 +6143,9 @@ static int __devinit niu_pci_probe_sprom(struct niu *np) np->port); return -EINVAL; } - niudbg(PROBE, "SPROM: PHY type %x\n", val8); + niudbg(PROBE, "SPROM: PHY type %llx\n", (unsigned long long) val); - switch (val8) { + switch (val) { case ESPC_PHY_TYPE_1G_COPPER: /* 1G copper, MII */ np->flags &= ~(NIU_FLAGS_FIBER | @@ -6175,7 +6175,8 @@ static int __devinit niu_pci_probe_sprom(struct niu *np) break; default: - dev_err(np->device, PFX "Bogus SPROM phy type %u\n", val8); + dev_err(np->device, PFX "Bogus SPROM phy type %llu\n", + (unsigned long long) val); return -EINVAL; } @@ -6212,7 +6213,7 @@ static int __devinit niu_pci_probe_sprom(struct niu *np) val = nr64(ESPC_MOD_STR_LEN); niudbg(PROBE, "SPROM: MOD_STR_LEN[%llu]\n", (unsigned long long) val); - if (val >= 8 * 4) + if (val > 8 * 4) return -EINVAL; for (i = 0; i < val; i += 4) { @@ -6228,7 +6229,7 @@ static int __devinit niu_pci_probe_sprom(struct niu *np) val = nr64(ESPC_BD_MOD_STR_LEN); niudbg(PROBE, "SPROM: BD_MOD_STR_LEN[%llu]\n", (unsigned long long) val); - if (val >= 4 * 4) + if (val > 4 * 4) return -EINVAL; for (i = 0; i < val; i += 4) { diff --git a/trunk/drivers/net/saa9730.c b/trunk/drivers/net/saa9730.c index c65199df8a7f..14361e885415 100644 --- a/trunk/drivers/net/saa9730.c +++ b/trunk/drivers/net/saa9730.c @@ -97,16 +97,13 @@ static void evm_saa9730_unblock_lan_int(struct lan_saa9730_private *lp) &lp->evm_saa9730_regs->InterruptBlock1); } -static void __used show_saa9730_regs(struct net_device *dev) +static void __attribute_used__ show_saa9730_regs(struct lan_saa9730_private *lp) { - struct lan_saa9730_private *lp = netdev_priv(dev); int i, j; - printk("TxmBufferA = %p\n", lp->TxmBuffer[0][0]); printk("TxmBufferB = %p\n", lp->TxmBuffer[1][0]); printk("RcvBufferA = %p\n", lp->RcvBuffer[0][0]); printk("RcvBufferB = %p\n", lp->RcvBuffer[1][0]); - for (i = 0; i < LAN_SAA9730_BUFFERS; i++) { for (j = 0; j < LAN_SAA9730_TXM_Q_SIZE; j++) { printk("TxmBuffer[%d][%d] = %x\n", i, j, @@ -149,13 +146,11 @@ static void __used show_saa9730_regs(struct net_device *dev) readl(&lp->lan_saa9730_regs->RxCtl)); printk("lp->lan_saa9730_regs->RxStatus = %x\n", readl(&lp->lan_saa9730_regs->RxStatus)); - for (i = 0; i < LAN_SAA9730_CAM_DWORDS; i++) { writel(i, &lp->lan_saa9730_regs->CamAddress); printk("lp->lan_saa9730_regs->CamData = %x\n", readl(&lp->lan_saa9730_regs->CamData)); } - printk("dev->stats.tx_packets = %lx\n", dev->stats.tx_packets); printk("dev->stats.tx_errors = %lx\n", dev->stats.tx_errors); printk("dev->stats.tx_aborted_errors = %lx\n", @@ -860,7 +855,7 @@ static void lan_saa9730_tx_timeout(struct net_device *dev) /* Transmitter timeout, serious problems */ dev->stats.tx_errors++; printk("%s: transmit timed out, reset\n", dev->name); - /*show_saa9730_regs(dev); */ + /*show_saa9730_regs(lp); */ lan_saa9730_restart(lp); dev->trans_start = jiffies; diff --git a/trunk/drivers/net/tc35815.c b/trunk/drivers/net/tc35815.c index 8038f2882c9b..a679f4310ce1 100644 --- a/trunk/drivers/net/tc35815.c +++ b/trunk/drivers/net/tc35815.c @@ -1461,6 +1461,7 @@ static irqreturn_t tc35815_interrupt(int irq, void *dev_id) } return IRQ_NONE; #else + struct tc35815_local *lp = dev->priv; int handled; u32 status; diff --git a/trunk/drivers/net/tehuti.c b/trunk/drivers/net/tehuti.c index 4e1b84e6d66a..8d04654f0c59 100644 --- a/trunk/drivers/net/tehuti.c +++ b/trunk/drivers/net/tehuti.c @@ -1906,7 +1906,7 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /************** pci *****************/ if ((err = pci_enable_device(pdev))) /* it trigers interrupt, dunno why. */ - goto err_pci; /* it's not a problem though */ + RET(err); /* it's not a problem though */ if (!(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK)) && !(err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK))) { @@ -2076,7 +2076,6 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) pci_release_regions(pdev); err_dma: pci_disable_device(pdev); -err_pci: vfree(nic); RET(err); diff --git a/trunk/drivers/net/tg3.c b/trunk/drivers/net/tg3.c index 014dc2cfe4d6..30b1cca8144c 100644 --- a/trunk/drivers/net/tg3.c +++ b/trunk/drivers/net/tg3.c @@ -64,8 +64,8 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "3.84" -#define DRV_MODULE_RELDATE "October 12, 2007" +#define DRV_MODULE_VERSION "3.83" +#define DRV_MODULE_RELDATE "October 10, 2007" #define TG3_DEF_MAC_MODE 0 #define TG3_DEF_RX_MODE 0 @@ -5056,12 +5056,6 @@ static void tg3_restore_pci_state(struct tg3 *tp) pci_write_config_dword(tp->pdev, TG3PCI_COMMAND, tp->pci_cmd); - if (!(tp->tg3_flags2 & TG3_FLG2_PCI_EXPRESS)) { - pci_write_config_byte(tp->pdev, PCI_CACHE_LINE_SIZE, - tp->pci_cacheline_sz); - pci_write_config_byte(tp->pdev, PCI_LATENCY_TIMER, - tp->pci_lat_timer); - } /* Make sure PCI-X relaxed ordering bit is clear. */ if (tp->pcix_cap) { u16 pcix_cmd; @@ -9040,7 +9034,7 @@ static int tg3_do_mem_test(struct tg3 *tp, u32 offset, u32 len) int i; u32 j; - for (i = 0; i < ARRAY_SIZE(test_pattern); i++) { + for (i = 0; i < sizeof(test_pattern)/sizeof(u32); i++) { for (j = 0; j < len; j += 4) { u32 val; diff --git a/trunk/drivers/net/tulip/de4x5.c b/trunk/drivers/net/tulip/de4x5.c index 41f34bb91cad..9b9cd83fb8b6 100644 --- a/trunk/drivers/net/tulip/de4x5.c +++ b/trunk/drivers/net/tulip/de4x5.c @@ -1041,7 +1041,7 @@ static struct InfoLeaf infoleaf_array[] = { {DC21142, dc21142_infoleaf}, {DC21143, dc21143_infoleaf} }; -#define INFOLEAF_SIZE ARRAY_SIZE(infoleaf_array) +#define INFOLEAF_SIZE (sizeof(infoleaf_array)/(sizeof(int)+sizeof(int *))) /* ** List the SROM info block functions @@ -1056,7 +1056,7 @@ static int (*dc_infoblock[])(struct net_device *dev, u_char, u_char *) = { compact_infoblock }; -#define COMPACT (ARRAY_SIZE(dc_infoblock) - 1) +#define COMPACT (sizeof(dc_infoblock)/sizeof(int *) - 1) /* ** Miscellaneous defines... diff --git a/trunk/drivers/net/ucc_geth.c b/trunk/drivers/net/ucc_geth.c index bec413ba9bca..d00e7d41f6a5 100644 --- a/trunk/drivers/net/ucc_geth.c +++ b/trunk/drivers/net/ucc_geth.c @@ -63,7 +63,7 @@ #define UGETH_MSG_DEFAULT (NETIF_MSG_IFUP << 1 ) - 1 void uec_set_ethtool_ops(struct net_device *netdev); - + static DEFINE_SPINLOCK(ugeth_lock); static struct { @@ -3454,12 +3454,9 @@ static int ucc_geth_rx(struct ucc_geth_private *ugeth, u8 rxQ, int rx_work_limit u16 length, howmany = 0; u32 bd_status; u8 *bdBuffer; - struct net_device * dev; ugeth_vdbg("%s: IN", __FUNCTION__); - dev = ugeth->dev; - /* collect received buffers */ bd = ugeth->rxBd[rxQ]; diff --git a/trunk/drivers/net/wan/sdla.c b/trunk/drivers/net/wan/sdla.c index 05df0a345b60..b39a541b2509 100644 --- a/trunk/drivers/net/wan/sdla.c +++ b/trunk/drivers/net/wan/sdla.c @@ -1342,11 +1342,11 @@ static int sdla_set_config(struct net_device *dev, struct ifmap *map) if (flp->initialized) return(-EINVAL); - for(i=0; i < ARRAY_SIZE(valid_port); i++) + for(i=0;i < sizeof(valid_port) / sizeof (int) ; i++) if (valid_port[i] == map->base_addr) break; - if (i == ARRAY_SIZE(valid_port)) + if (i == sizeof(valid_port) / sizeof(int)) return(-EINVAL); if (!request_region(map->base_addr, SDLA_IO_EXTENTS, dev->name)){ @@ -1487,12 +1487,12 @@ static int sdla_set_config(struct net_device *dev, struct ifmap *map) } } - for(i=0; i < ARRAY_SIZE(valid_mem); i++) + for(i=0;i < sizeof(valid_mem) / sizeof (int) ; i++) if (valid_mem[i] == map->mem_start) break; err = -EINVAL; - if (i == ARRAY_SIZE(valid_mem)) + if (i == sizeof(valid_mem) / sizeof(int)) goto fail2; if (flp->type == SDLA_S502A && (map->mem_start & 0xF000) >> 12 == 0x0E) diff --git a/trunk/drivers/net/xen-netfront.c b/trunk/drivers/net/xen-netfront.c index 7fd505cc4f7a..f464b82c7d5f 100644 --- a/trunk/drivers/net/xen-netfront.c +++ b/trunk/drivers/net/xen-netfront.c @@ -74,12 +74,22 @@ struct netfront_info { struct napi_struct napi; - unsigned int evtchn; - struct xenbus_device *xbdev; + struct xen_netif_tx_front_ring tx; + struct xen_netif_rx_front_ring rx; spinlock_t tx_lock; - struct xen_netif_tx_front_ring tx; - int tx_ring_ref; + spinlock_t rx_lock; + + unsigned int evtchn; + + /* Receive-ring batched refills. */ +#define RX_MIN_TARGET 8 +#define RX_DFL_MIN_TARGET 64 +#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) + unsigned rx_min_target, rx_max_target, rx_target; + struct sk_buff_head rx_batch; + + struct timer_list rx_refill_timer; /* * {tx,rx}_skbs store outstanding skbuffs. Free tx_skb entries @@ -98,23 +108,14 @@ struct netfront_info { grant_ref_t grant_tx_ref[NET_TX_RING_SIZE]; unsigned tx_skb_freelist; - spinlock_t rx_lock ____cacheline_aligned_in_smp; - struct xen_netif_rx_front_ring rx; - int rx_ring_ref; - - /* Receive-ring batched refills. */ -#define RX_MIN_TARGET 8 -#define RX_DFL_MIN_TARGET 64 -#define RX_MAX_TARGET min_t(int, NET_RX_RING_SIZE, 256) - unsigned rx_min_target, rx_max_target, rx_target; - struct sk_buff_head rx_batch; - - struct timer_list rx_refill_timer; - struct sk_buff *rx_skbs[NET_RX_RING_SIZE]; grant_ref_t gref_rx_head; grant_ref_t grant_rx_ref[NET_RX_RING_SIZE]; + struct xenbus_device *xbdev; + int tx_ring_ref; + int rx_ring_ref; + unsigned long rx_pfn_array[NET_RX_RING_SIZE]; struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1]; struct mmu_update rx_mmu[NET_RX_RING_SIZE]; diff --git a/trunk/drivers/scsi/gdth.c b/trunk/drivers/scsi/gdth.c index 3ac080ee6e2f..e8010a702e73 100644 --- a/trunk/drivers/scsi/gdth.c +++ b/trunk/drivers/scsi/gdth.c @@ -5213,10 +5213,6 @@ static int __init gdth_init(void) #endif /* CONFIG_PCI */ TRACE2(("gdth_detect() %d controller detected\n", gdth_ctr_count)); - - if (list_empty(&gdth_instances)) - return -ENODEV; - #ifdef GDTH_STATISTICS TRACE2(("gdth_detect(): Initializing timer !\n")); init_timer(&gdth_timer); diff --git a/trunk/fs/Kconfig b/trunk/fs/Kconfig index 815d201d8600..bb02b39380a3 100644 --- a/trunk/fs/Kconfig +++ b/trunk/fs/Kconfig @@ -1755,14 +1755,6 @@ config SUNRPC config SUNRPC_GSS tristate -config SUNRPC_XPRT_RDMA - tristate "RDMA transport for sunrpc (EXPERIMENTAL)" - depends on SUNRPC && INFINIBAND && EXPERIMENTAL - default m - help - Adds a client RPC transport for supporting kernel NFS over RDMA - mounts, including Infiniband and iWARP. Experimental. - config SUNRPC_BIND34 bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL diff --git a/trunk/fs/inode.c b/trunk/fs/inode.c index f97de0aeb3b6..29f5068f819b 100644 --- a/trunk/fs/inode.c +++ b/trunk/fs/inode.c @@ -142,15 +142,6 @@ static struct inode *alloc_inode(struct super_block *sb) return NULL; } - spin_lock_init(&inode->i_lock); - lockdep_set_class(&inode->i_lock, &sb->s_type->i_lock_key); - - mutex_init(&inode->i_mutex); - lockdep_set_class(&inode->i_mutex, &sb->s_type->i_mutex_key); - - init_rwsem(&inode->i_alloc_sem); - lockdep_set_class(&inode->i_alloc_sem, &sb->s_type->i_alloc_sem_key); - mapping->a_ops = &empty_aops; mapping->host = inode; mapping->flags = 0; @@ -199,6 +190,8 @@ void inode_init_once(struct inode *inode) INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_devices); + mutex_init(&inode->i_mutex); + init_rwsem(&inode->i_alloc_sem); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); rwlock_init(&inode->i_data.tree_lock); spin_lock_init(&inode->i_data.i_mmap_lock); @@ -206,6 +199,7 @@ void inode_init_once(struct inode *inode) spin_lock_init(&inode->i_data.private_lock); INIT_RAW_PRIO_TREE_ROOT(&inode->i_data.i_mmap); INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear); + spin_lock_init(&inode->i_lock); i_size_ordered_init(inode); #ifdef CONFIG_INOTIFY INIT_LIST_HEAD(&inode->inotify_watches); @@ -567,18 +561,6 @@ EXPORT_SYMBOL(new_inode); void unlock_new_inode(struct inode *inode) { -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct file_system_type *type = inode->i_sb->s_type; - /* - * ensure nobody is actually holding i_mutex - */ - mutex_destroy(&inode->i_mutex); - mutex_init(&inode->i_mutex); - if (inode->i_mode & S_IFDIR) - lockdep_set_class(&inode->i_mutex, &type->i_mutex_dir_key); - else - lockdep_set_class(&inode->i_mutex, &type->i_mutex_key); -#endif /* * This is special! We do not need the spinlock * when clearing I_LOCK, because we're guaranteed diff --git a/trunk/fs/jbd/transaction.c b/trunk/fs/jbd/transaction.c index 8df5bac0b7a5..772b6531a2a2 100644 --- a/trunk/fs/jbd/transaction.c +++ b/trunk/fs/jbd/transaction.c @@ -233,8 +233,6 @@ static int start_this_handle(journal_t *journal, handle_t *handle) return ret; } -static struct lock_class_key jbd_handle_key; - /* Allocate a new handle. This should probably be in a slab... */ static handle_t *new_handle(int nblocks) { @@ -245,8 +243,6 @@ static handle_t *new_handle(int nblocks) handle->h_buffer_credits = nblocks; handle->h_ref = 1; - lockdep_init_map(&handle->h_lockdep_map, "jbd_handle", &jbd_handle_key, 0); - return handle; } @@ -290,9 +286,6 @@ handle_t *journal_start(journal_t *journal, int nblocks) current->journal_info = NULL; handle = ERR_PTR(err); } - - lock_acquire(&handle->h_lockdep_map, 0, 0, 0, 2, _THIS_IP_); - return handle; } @@ -1418,8 +1411,6 @@ int journal_stop(handle_t *handle) spin_unlock(&journal->j_state_lock); } - lock_release(&handle->h_lockdep_map, 1, _THIS_IP_); - jbd_free_handle(handle); return err; } diff --git a/trunk/fs/lockd/mon.c b/trunk/fs/lockd/mon.c index 908b23fadd05..3353ed8421a7 100644 --- a/trunk/fs/lockd/mon.c +++ b/trunk/fs/lockd/mon.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -133,7 +132,7 @@ nsm_create(void) .sin_port = 0, }; struct rpc_create_args args = { - .protocol = XPRT_TRANSPORT_UDP, + .protocol = IPPROTO_UDP, .address = (struct sockaddr *)&sin, .addrsize = sizeof(sin), .servername = "localhost", diff --git a/trunk/fs/lockd/xdr.c b/trunk/fs/lockd/xdr.c index 633653bff944..5316e307a49d 100644 --- a/trunk/fs/lockd/xdr.c +++ b/trunk/fs/lockd/xdr.c @@ -62,9 +62,8 @@ static __be32 *nlm_decode_cookie(__be32 *p, struct nlm_cookie *c) } else { - dprintk("lockd: bad cookie size %d (only cookies under " - "%d bytes are supported.)\n", - len, NLM_MAXCOOKIELEN); + printk(KERN_NOTICE + "lockd: bad cookie size %d (only cookies under %d bytes are supported.)\n", len, NLM_MAXCOOKIELEN); return NULL; } return p; @@ -85,7 +84,8 @@ nlm_decode_fh(__be32 *p, struct nfs_fh *f) unsigned int len; if ((len = ntohl(*p++)) != NFS2_FHSIZE) { - dprintk("lockd: bad fhandle size %d (should be %d)\n", + printk(KERN_NOTICE + "lockd: bad fhandle size %d (should be %d)\n", len, NFS2_FHSIZE); return NULL; } diff --git a/trunk/fs/lockd/xdr4.c b/trunk/fs/lockd/xdr4.c index 43ff9397e6c6..846fc1d639dd 100644 --- a/trunk/fs/lockd/xdr4.c +++ b/trunk/fs/lockd/xdr4.c @@ -64,9 +64,8 @@ nlm4_decode_cookie(__be32 *p, struct nlm_cookie *c) } else { - dprintk("lockd: bad cookie size %d (only cookies under " - "%d bytes are supported.)\n", - len, NLM_MAXCOOKIELEN); + printk(KERN_NOTICE + "lockd: bad cookie size %d (only cookies under %d bytes are supported.)\n", len, NLM_MAXCOOKIELEN); return NULL; } return p; @@ -87,7 +86,8 @@ nlm4_decode_fh(__be32 *p, struct nfs_fh *f) memset(f->data, 0, sizeof(f->data)); f->size = ntohl(*p++); if (f->size > NFS_MAXFHSIZE) { - dprintk("lockd: bad fhandle size %d (should be <=%d)\n", + printk(KERN_NOTICE + "lockd: bad fhandle size %d (should be <=%d)\n", f->size, NFS_MAXFHSIZE); return NULL; } diff --git a/trunk/fs/nfs/Makefile b/trunk/fs/nfs/Makefile index df0f41e09885..b55cb236cf74 100644 --- a/trunk/fs/nfs/Makefile +++ b/trunk/fs/nfs/Makefile @@ -16,3 +16,4 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ nfs4namespace.o nfs-$(CONFIG_NFS_DIRECTIO) += direct.o nfs-$(CONFIG_SYSCTL) += sysctl.o +nfs-objs := $(nfs-y) diff --git a/trunk/fs/nfs/client.c b/trunk/fs/nfs/client.c index a532ee12740a..a204484072f3 100644 --- a/trunk/fs/nfs/client.c +++ b/trunk/fs/nfs/client.c @@ -23,8 +23,6 @@ #include #include #include -#include -#include #include #include #include @@ -342,8 +340,7 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, to->to_retries = 2; switch (proto) { - case XPRT_TRANSPORT_TCP: - case XPRT_TRANSPORT_RDMA: + case IPPROTO_TCP: if (!to->to_initval) to->to_initval = 60 * HZ; if (to->to_initval > NFS_MAX_TCP_TIMEOUT) @@ -352,7 +349,7 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto, to->to_maxval = to->to_initval + (to->to_increment * to->to_retries); to->to_exponential = 0; break; - case XPRT_TRANSPORT_UDP: + case IPPROTO_UDP: default: if (!to->to_initval) to->to_initval = 11 * HZ / 10; @@ -504,9 +501,9 @@ static int nfs_init_server_rpcclient(struct nfs_server *server, rpc_authflavor_t /* * Initialise an NFS2 or NFS3 client */ -static int nfs_init_client(struct nfs_client *clp, - const struct nfs_parsed_mount_data *data) +static int nfs_init_client(struct nfs_client *clp, const struct nfs_mount_data *data) { + int proto = (data->flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; int error; if (clp->cl_cons_state == NFS_CS_READY) { @@ -525,8 +522,8 @@ static int nfs_init_client(struct nfs_client *clp, * Create a client RPC handle for doing FSSTAT with UNIX auth only * - RFC 2623, sec 2.3.2 */ - error = nfs_create_rpc_client(clp, data->nfs_server.protocol, - data->timeo, data->retrans, RPC_AUTH_UNIX, 0); + error = nfs_create_rpc_client(clp, proto, data->timeo, data->retrans, + RPC_AUTH_UNIX, 0); if (error < 0) goto error; nfs_mark_client_ready(clp, NFS_CS_READY); @@ -541,8 +538,7 @@ static int nfs_init_client(struct nfs_client *clp, /* * Create a version 2 or 3 client */ -static int nfs_init_server(struct nfs_server *server, - const struct nfs_parsed_mount_data *data) +static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_data *data) { struct nfs_client *clp; int error, nfsvers = 2; @@ -555,8 +551,7 @@ static int nfs_init_server(struct nfs_server *server, #endif /* Allocate or find a client reference we can use */ - clp = nfs_get_client(data->nfs_server.hostname, - &data->nfs_server.address, nfsvers); + clp = nfs_get_client(data->hostname, &data->addr, nfsvers); if (IS_ERR(clp)) { dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); return PTR_ERR(clp); @@ -586,7 +581,7 @@ static int nfs_init_server(struct nfs_server *server, if (error < 0) goto error; - error = nfs_init_server_rpcclient(server, data->auth_flavors[0]); + error = nfs_init_server_rpcclient(server, data->pseudoflavor); if (error < 0) goto error; @@ -765,7 +760,7 @@ void nfs_free_server(struct nfs_server *server) * Create a version 2 or 3 volume record * - keyed on server and FSID */ -struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, +struct nfs_server *nfs_create_server(const struct nfs_mount_data *data, struct nfs_fh *mntfh) { struct nfs_server *server; @@ -911,7 +906,7 @@ static int nfs4_set_client(struct nfs_server *server, * Create a version 4 volume record */ static int nfs4_init_server(struct nfs_server *server, - const struct nfs_parsed_mount_data *data) + const struct nfs4_mount_data *data, rpc_authflavor_t authflavour) { int error; @@ -931,7 +926,7 @@ static int nfs4_init_server(struct nfs_server *server, server->acdirmin = data->acdirmin * HZ; server->acdirmax = data->acdirmax * HZ; - error = nfs_init_server_rpcclient(server, data->auth_flavors[0]); + error = nfs_init_server_rpcclient(server, authflavour); /* Done */ dprintk("<-- nfs4_init_server() = %d\n", error); @@ -942,7 +937,12 @@ static int nfs4_init_server(struct nfs_server *server, * Create a version 4 volume record * - keyed on server and FSID */ -struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, +struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data, + const char *hostname, + const struct sockaddr_in *addr, + const char *mntpath, + const char *ip_addr, + rpc_authflavor_t authflavour, struct nfs_fh *mntfh) { struct nfs_fattr fattr; @@ -956,18 +956,13 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, return ERR_PTR(-ENOMEM); /* Get a client record */ - error = nfs4_set_client(server, - data->nfs_server.hostname, - &data->nfs_server.address, - data->client_address, - data->auth_flavors[0], - data->nfs_server.protocol, - data->timeo, data->retrans); + error = nfs4_set_client(server, hostname, addr, ip_addr, authflavour, + data->proto, data->timeo, data->retrans); if (error < 0) goto error; /* set up the general RPC client */ - error = nfs4_init_server(server, data); + error = nfs4_init_server(server, data, authflavour); if (error < 0) goto error; @@ -976,7 +971,7 @@ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); /* Probe the root fh to retrieve its FSID */ - error = nfs4_path_walk(server, mntfh, data->nfs_server.export_path); + error = nfs4_path_walk(server, mntfh, mntpath); if (error < 0) goto error; diff --git a/trunk/fs/nfs/delegation.c b/trunk/fs/nfs/delegation.c index af8b235d405d..c55a761c22bb 100644 --- a/trunk/fs/nfs/delegation.c +++ b/trunk/fs/nfs/delegation.c @@ -52,7 +52,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; - if (nfs_file_open_context(fl->fl_file) != ctx) + if ((struct nfs_open_context *)fl->fl_file->private_data != ctx) continue; status = nfs4_lock_delegation_recall(state, fl); if (status >= 0) @@ -109,7 +109,6 @@ static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid * void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) { struct nfs_delegation *delegation = NFS_I(inode)->delegation; - struct rpc_cred *oldcred; if (delegation == NULL) return; @@ -117,12 +116,11 @@ void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, st sizeof(delegation->stateid.data)); delegation->type = res->delegation_type; delegation->maxsize = res->maxsize; - oldcred = delegation->cred; + put_rpccred(cred); delegation->cred = get_rpccred(cred); delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM; NFS_I(inode)->delegation_state = delegation->type; smp_wmb(); - put_rpccred(oldcred); } /* diff --git a/trunk/fs/nfs/dir.c b/trunk/fs/nfs/dir.c index 8ec7fbd8240c..e4a04d16b8b0 100644 --- a/trunk/fs/nfs/dir.c +++ b/trunk/fs/nfs/dir.c @@ -200,6 +200,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) desc->timestamp = timestamp; desc->timestamp_valid = 1; SetPageUptodate(page); + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&inode->i_lock); /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either * through inode->i_mutex or some other mechanism. @@ -211,7 +214,9 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) unlock_page(page); return 0; error: + SetPageError(page); unlock_page(page); + nfs_zap_caches(inode); desc->error = error; return -EIO; } @@ -402,7 +407,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, struct file *file = desc->file; struct nfs_entry *entry = desc->entry; struct dentry *dentry = NULL; - u64 fileid; + unsigned long fileid; int loop_count = 0, res; @@ -413,7 +418,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, unsigned d_type = DT_UNKNOWN; /* Note: entry->prev_cookie contains the cookie for * retrieving the current dirent on the server */ - fileid = entry->ino; + fileid = nfs_fileid_to_ino_t(entry->ino); /* Get a dentry if we have one */ if (dentry != NULL) @@ -423,12 +428,11 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, /* Use readdirplus info */ if (dentry != NULL && dentry->d_inode != NULL) { d_type = dt_type(dentry->d_inode); - fileid = NFS_FILEID(dentry->d_inode); + fileid = dentry->d_inode->i_ino; } res = filldir(dirent, entry->name, entry->len, - file->f_pos, nfs_compat_user_ino64(fileid), - d_type); + file->f_pos, fileid, d_type); if (res < 0) break; file->f_pos++; @@ -486,6 +490,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, page, NFS_SERVER(inode)->dtsize, desc->plus); + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&inode->i_lock); desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { @@ -551,7 +558,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) memset(desc, 0, sizeof(*desc)); desc->file = filp; - desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie; + desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = NFS_USE_READDIRPLUS(inode); @@ -616,7 +623,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) } if (offset != filp->f_pos) { filp->f_pos = offset; - nfs_file_open_context(filp)->dir_cookie = 0; + ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; } out: mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex); @@ -643,18 +650,36 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) */ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) { + unsigned long verf; + if (IS_ROOT(dentry)) return 1; - if (!nfs_verify_change_attribute(dir, dentry->d_time)) - return 0; - /* Revalidate nfsi->cache_change_attribute before we declare a match */ - if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) - return 0; - if (!nfs_verify_change_attribute(dir, dentry->d_time)) + verf = dentry->d_time; + if (nfs_caches_unstable(dir) + || verf != NFS_I(dir)->cache_change_attribute) return 0; return 1; } +static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) +{ + dentry->d_time = verf; +} + +static void nfs_refresh_verifier(struct dentry * dentry, unsigned long verf) +{ + nfs_set_verifier(dentry, verf); +} + +/* + * Whenever an NFS operation succeeds, we know that the dentry + * is valid, so we update the revalidation timestamp. + */ +static inline void nfs_renew_times(struct dentry * dentry) +{ + dentry->d_time = jiffies; +} + /* * Return the intent data that applies to this particular path component * @@ -669,19 +694,6 @@ static inline unsigned int nfs_lookup_check_intent(struct nameidata *nd, unsigne return nd->flags & mask; } -/* - * Use intent information to check whether or not we're going to do - * an O_EXCL create using this path component. - */ -static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) -{ - if (NFS_PROTO(dir)->version == 2) - return 0; - if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0) - return 0; - return (nd->intent.open.flags & O_EXCL) != 0; -} - /* * Inode and filehandle revalidation for lookups. * @@ -705,7 +717,6 @@ int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) goto out_force; - return 0; } return nfs_revalidate_inode(server, inode); out_force: @@ -748,6 +759,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) int error; struct nfs_fh fhandle; struct nfs_fattr fattr; + unsigned long verifier; parent = dget_parent(dentry); lock_kernel(); @@ -755,6 +767,10 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = dentry->d_inode; + /* Revalidate parent directory attribute cache */ + if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) + goto out_zap_parent; + if (!inode) { if (nfs_neg_need_reval(dir, dentry, nd)) goto out_bad; @@ -769,7 +785,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) } /* Force a full look up iff the parent directory has changed */ - if (!nfs_is_exclusive_create(dir, nd) && nfs_check_verifier(dir, dentry)) { + if (nfs_check_verifier(dir, dentry)) { if (nfs_lookup_verify_inode(inode, nd)) goto out_zap_parent; goto out_valid; @@ -778,6 +794,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) if (NFS_STALE(inode)) goto out_bad; + verifier = nfs_save_change_attribute(dir); error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); if (error) goto out_bad; @@ -786,7 +803,8 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) if ((error = nfs_refresh_inode(inode, &fattr)) != 0) goto out_bad; - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + nfs_renew_times(dentry); + nfs_refresh_verifier(dentry, verifier); out_valid: unlock_kernel(); dput(parent); @@ -797,7 +815,7 @@ static int nfs_lookup_revalidate(struct dentry * dentry, struct nameidata *nd) out_zap_parent: nfs_zap_caches(dir); out_bad: - nfs_mark_for_revalidate(dir); + NFS_CACHEINV(dir); if (inode && S_ISDIR(inode->i_mode)) { /* Purge readdir caches. */ nfs_zap_caches(inode); @@ -854,6 +872,8 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode) nfs_complete_unlink(dentry, inode); unlock_kernel(); } + /* When creating a negative dentry, we want to renew d_time */ + nfs_renew_times(dentry); iput(inode); } @@ -863,6 +883,30 @@ struct dentry_operations nfs_dentry_operations = { .d_iput = nfs_dentry_iput, }; +/* + * Use intent information to check whether or not we're going to do + * an O_EXCL create using this path component. + */ +static inline +int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) +{ + if (NFS_PROTO(dir)->version == 2) + return 0; + if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0) + return 0; + return (nd->intent.open.flags & O_EXCL) != 0; +} + +static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr) +{ + struct nfs_server *server = NFS_SERVER(dir); + + if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) + /* Revalidate fsid using the parent directory */ + return __nfs_revalidate_inode(server, dir); + return 0; +} + static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { struct dentry *res; @@ -901,6 +945,11 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru res = ERR_PTR(error); goto out_unlock; } + error = nfs_reval_fsid(dir, &fattr); + if (error < 0) { + res = ERR_PTR(error); + goto out_unlock; + } inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); res = (struct dentry *)inode; if (IS_ERR(res)) @@ -909,10 +958,17 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru no_entry: res = d_materialise_unique(dentry, inode); if (res != NULL) { + struct dentry *parent; if (IS_ERR(res)) goto out_unlock; + /* Was a directory renamed! */ + parent = dget_parent(res); + if (!IS_ROOT(parent)) + nfs_mark_for_revalidate(parent->d_inode); + dput(parent); dentry = res; } + nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unlock: unlock_kernel(); @@ -964,16 +1020,28 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry } dentry->d_op = NFS_PROTO(dir)->dentry_ops; - /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash - * the dentry. */ + /* Let vfs_create() deal with O_EXCL */ if (nd->intent.open.flags & O_EXCL) { - d_instantiate(dentry, NULL); + d_add(dentry, NULL); goto out; } /* Open the file on the server */ lock_kernel(); - res = nfs4_atomic_open(dir, dentry, nd); + /* Revalidate parent directory attribute cache */ + error = nfs_revalidate_inode(NFS_SERVER(dir), dir); + if (error < 0) { + res = ERR_PTR(error); + unlock_kernel(); + goto out; + } + + if (nd->intent.open.flags & O_CREAT) { + nfs_begin_data_update(dir); + res = nfs4_atomic_open(dir, dentry, nd); + nfs_end_data_update(dir); + } else + res = nfs4_atomic_open(dir, dentry, nd); unlock_kernel(); if (IS_ERR(res)) { error = PTR_ERR(res); @@ -995,6 +1063,8 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry } } else if (res != NULL) dentry = res; + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out: return res; no_open: @@ -1006,6 +1076,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) struct dentry *parent = NULL; struct inode *inode = dentry->d_inode; struct inode *dir; + unsigned long verifier; int openflags, ret = 0; parent = dget_parent(dentry); @@ -1015,12 +1086,8 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) /* We can't create new files in nfs_open_revalidate(), so we * optimize away revalidation of negative dentries. */ - if (inode == NULL) { - if (!nfs_neg_need_reval(dir, dentry, nd)) - ret = 1; + if (inode == NULL) goto out; - } - /* NFS only supports OPEN on regular files */ if (!S_ISREG(inode->i_mode)) goto no_open; @@ -1037,7 +1104,10 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) * change attribute *before* we do the RPC call. */ lock_kernel(); + verifier = nfs_save_change_attribute(dir); ret = nfs4_open_revalidate(dir, dentry, openflags, nd); + if (!ret) + nfs_refresh_verifier(dentry, verifier); unlock_kernel(); out: dput(parent); @@ -1063,7 +1133,6 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) .len = entry->len, }; struct inode *inode; - unsigned long verf = nfs_save_change_attribute(dir); switch (name.len) { case 2: @@ -1074,14 +1143,6 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) if (name.name[0] == '.') return dget(parent); } - - spin_lock(&dir->i_lock); - if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) { - spin_unlock(&dir->i_lock); - return NULL; - } - spin_unlock(&dir->i_lock); - name.hash = full_name_hash(name.name, name.len); dentry = d_lookup(parent, &name); if (dentry != NULL) { @@ -1122,8 +1183,12 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) dentry = alias; } + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + return dentry; out_renew: - nfs_set_verifier(dentry, verf); + nfs_renew_times(dentry); + nfs_refresh_verifier(dentry, nfs_save_change_attribute(dir)); return dentry; } @@ -1133,40 +1198,32 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - struct dentry *parent = dget_parent(dentry); - struct inode *dir = parent->d_inode; struct inode *inode; int error = -EACCES; - d_drop(dentry); - /* We may have been initialized further down */ if (dentry->d_inode) - goto out; + return 0; if (fhandle->size == 0) { + struct inode *dir = dentry->d_parent->d_inode; error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); if (error) - goto out_error; + return error; } - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); if (!(fattr->valid & NFS_ATTR_FATTR)) { struct nfs_server *server = NFS_SB(dentry->d_sb); error = server->nfs_client->rpc_ops->getattr(server, fhandle, fattr); if (error < 0) - goto out_error; + return error; } inode = nfs_fhget(dentry->d_sb, fhandle, fattr); error = PTR_ERR(inode); if (IS_ERR(inode)) - goto out_error; - d_add(dentry, inode); -out: - dput(parent); + return error; + d_instantiate(dentry, inode); + if (d_unhashed(dentry)) + d_rehash(dentry); return 0; -out_error: - nfs_mark_for_revalidate(dir); - dput(parent); - return error; } /* @@ -1192,9 +1249,13 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, open_flags = nd->intent.open.flags; lock_kernel(); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); + nfs_end_data_update(dir); if (error != 0) goto out_err; + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); unlock_kernel(); return 0; out_err: @@ -1222,9 +1283,13 @@ nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) attr.ia_valid = ATTR_MODE; lock_kernel(); + nfs_begin_data_update(dir); status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); + nfs_end_data_update(dir); if (status != 0) goto out_err; + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); unlock_kernel(); return 0; out_err: @@ -1248,9 +1313,13 @@ static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) attr.ia_mode = mode | S_IFDIR; lock_kernel(); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); + nfs_end_data_update(dir); if (error != 0) goto out_err; + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); unlock_kernel(); return 0; out_err: @@ -1267,10 +1336,12 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); lock_kernel(); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); /* Ensure the VFS deletes this inode */ if (error == 0 && dentry->d_inode != NULL) clear_nlink(dentry->d_inode); + nfs_end_data_update(dir); unlock_kernel(); return error; @@ -1279,9 +1350,9 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) { static unsigned int sillycounter; - const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2; + const int i_inosize = sizeof(dir->i_ino)*2; const int countersize = sizeof(sillycounter)*2; - const int slen = sizeof(".nfs")+fileidsize+countersize-1; + const int slen = sizeof(".nfs") + i_inosize + countersize - 1; char silly[slen+1]; struct qstr qsilly; struct dentry *sdentry; @@ -1299,9 +1370,8 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) if (dentry->d_flags & DCACHE_NFSFS_RENAMED) goto out; - sprintf(silly, ".nfs%*.*Lx", - fileidsize, fileidsize, - (unsigned long long)NFS_FILEID(dentry->d_inode)); + sprintf(silly, ".nfs%*.*lx", + i_inosize, i_inosize, dentry->d_inode->i_ino); /* Return delegation in anticipation of the rename */ nfs_inode_return_delegation(dentry->d_inode); @@ -1328,14 +1398,19 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) qsilly.name = silly; qsilly.len = strlen(silly); + nfs_begin_data_update(dir); if (dentry->d_inode) { + nfs_begin_data_update(dentry->d_inode); error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly); nfs_mark_for_revalidate(dentry->d_inode); + nfs_end_data_update(dentry->d_inode); } else error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly); + nfs_end_data_update(dir); if (!error) { + nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); d_move(dentry, sdentry); error = nfs_async_unlink(dir, dentry); @@ -1368,15 +1443,19 @@ static int nfs_safe_remove(struct dentry *dentry) goto out; } + nfs_begin_data_update(dir); if (inode != NULL) { nfs_inode_return_delegation(inode); + nfs_begin_data_update(inode); error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); /* The VFS may want to delete this inode */ if (error == 0) drop_nlink(inode); nfs_mark_for_revalidate(inode); + nfs_end_data_update(inode); } else error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + nfs_end_data_update(dir); out: return error; } @@ -1414,6 +1493,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry) spin_unlock(&dcache_lock); error = nfs_safe_remove(dentry); if (!error) { + nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); } else if (need_rehash) d_rehash(dentry); @@ -1468,7 +1548,9 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen); kunmap_atomic(kaddr, KM_USER0); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr); + nfs_end_data_update(dir); if (error != 0) { dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s) error %d\n", dir->i_sb->s_id, dir->i_ino, @@ -1508,12 +1590,15 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) dentry->d_parent->d_name.name, dentry->d_name.name); lock_kernel(); - d_drop(dentry); + nfs_begin_data_update(dir); + nfs_begin_data_update(inode); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { atomic_inc(&inode->i_count); - d_add(dentry, inode); + d_instantiate(dentry, inode); } + nfs_end_data_update(inode); + nfs_end_data_update(dir); unlock_kernel(); return error; } @@ -1616,16 +1701,22 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, d_delete(new_dentry); } + nfs_begin_data_update(old_dir); + nfs_begin_data_update(new_dir); + nfs_begin_data_update(old_inode); error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, new_dir, &new_dentry->d_name); nfs_mark_for_revalidate(old_inode); + nfs_end_data_update(old_inode); + nfs_end_data_update(new_dir); + nfs_end_data_update(old_dir); out: if (rehash) d_rehash(rehash); if (!error) { d_move(old_dentry, new_dentry); - nfs_set_verifier(new_dentry, - nfs_save_change_attribute(new_dir)); + nfs_renew_times(new_dentry); + nfs_refresh_verifier(new_dentry, nfs_save_change_attribute(new_dir)); } /* new dentry created? */ @@ -1751,7 +1842,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, st return NULL; } -static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_access_entry *cache; @@ -1763,7 +1854,7 @@ static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, str cache = nfs_access_search_rbtree(inode, cred); if (cache == NULL) goto out; - if (!time_in_range(jiffies, cache->jiffies, cache->jiffies + nfsi->attrtimeo)) + if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) goto out_stale; res->jiffies = cache->jiffies; res->cred = cache->cred; @@ -1818,7 +1909,7 @@ static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry * nfs_access_free_entry(entry); } -static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) { struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) @@ -1866,24 +1957,6 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) return -EACCES; } -static int nfs_open_permission_mask(int openflags) -{ - int mask = 0; - - if (openflags & FMODE_READ) - mask |= MAY_READ; - if (openflags & FMODE_WRITE) - mask |= MAY_WRITE; - if (openflags & FMODE_EXEC) - mask |= MAY_EXEC; - return mask; -} - -int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) -{ - return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); -} - int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) { struct rpc_cred *cred; diff --git a/trunk/fs/nfs/direct.c b/trunk/fs/nfs/direct.c index 32fe97211eea..fcf4d384610e 100644 --- a/trunk/fs/nfs/direct.c +++ b/trunk/fs/nfs/direct.c @@ -368,7 +368,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size return -ENOMEM; dreq->inode = inode; - dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); + dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; @@ -510,6 +510,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode nfs_direct_write_reschedule(dreq); break; default: + nfs_end_data_update(inode); if (dreq->commit_data != NULL) nfs_commit_free(dreq->commit_data); nfs_direct_free_writedata(dreq); @@ -532,6 +533,7 @@ static inline void nfs_alloc_commit_data(struct nfs_direct_req *dreq) static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode) { + nfs_end_data_update(inode); nfs_direct_free_writedata(dreq); nfs_zap_mapping(inode, inode->i_mapping); nfs_direct_complete(dreq); @@ -716,12 +718,14 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz sync = FLUSH_STABLE; dreq->inode = inode; - dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); + dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, count); + nfs_begin_data_update(inode); + rpc_clnt_sigmask(clnt, &oldset); result = nfs_direct_write_schedule(dreq, user_addr, count, pos, sync); if (!result) diff --git a/trunk/fs/nfs/file.c b/trunk/fs/nfs/file.c index c664bb921425..579cf8a7d4a7 100644 --- a/trunk/fs/nfs/file.c +++ b/trunk/fs/nfs/file.c @@ -33,7 +33,6 @@ #include #include "delegation.h" -#include "internal.h" #include "iostat.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -56,8 +55,6 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_setlease(struct file *file, long arg, struct file_lock **fl); -static struct vm_operations_struct nfs_file_vm_ops; - const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, .read = do_sync_read, @@ -176,31 +173,6 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) return remote_llseek(filp, offset, origin); } -/* - * Helper for nfs_file_flush() and nfs_fsync() - * - * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to - * disk, but it retrieves and clears ctx->error after synching, despite - * the two being set at the same time in nfs_context_set_write_error(). - * This is because the former is used to notify the _next_ call to - * nfs_file_write() that a write error occured, and hence cause it to - * fall back to doing a synchronous write. - */ -static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode) -{ - int have_error, status; - int ret = 0; - - have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); - status = nfs_wb_all(inode); - have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); - if (have_error) - ret = xchg(&ctx->error, 0); - if (!ret) - ret = status; - return ret; -} - /* * Flush all dirty pages, and check for write errors. * @@ -208,7 +180,7 @@ static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode) static int nfs_file_flush(struct file *file, fl_owner_t id) { - struct nfs_open_context *ctx = nfs_file_open_context(file); + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct inode *inode = file->f_path.dentry->d_inode; int status; @@ -217,11 +189,16 @@ nfs_file_flush(struct file *file, fl_owner_t id) if ((file->f_mode & FMODE_WRITE) == 0) return 0; nfs_inc_stats(inode, NFSIOS_VFSFLUSH); - + lock_kernel(); /* Ensure that data+attribute caches are up to date after close() */ - status = nfs_do_fsync(ctx, inode); - if (!status) - nfs_revalidate_inode(NFS_SERVER(inode), inode); + status = nfs_wb_all(inode); + if (!status) { + status = ctx->error; + ctx->error = 0; + if (!status) + nfs_revalidate_inode(NFS_SERVER(inode), inode); + } + unlock_kernel(); return status; } @@ -280,11 +257,8 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) dentry->d_parent->d_name.name, dentry->d_name.name); status = nfs_revalidate_mapping(inode, file->f_mapping); - if (!status) { - vma->vm_ops = &nfs_file_vm_ops; - vma->vm_flags |= VM_CAN_NONLINEAR; - file_accessed(file); - } + if (!status) + status = generic_file_mmap(file, vma); return status; } @@ -296,13 +270,21 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) static int nfs_fsync(struct file *file, struct dentry *dentry, int datasync) { - struct nfs_open_context *ctx = nfs_file_open_context(file); + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct inode *inode = dentry->d_inode; + int status; dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); - return nfs_do_fsync(ctx, inode); + lock_kernel(); + status = nfs_wb_all(inode); + if (!status) { + status = ctx->error; + ctx->error = 0; + } + unlock_kernel(); + return status; } /* @@ -351,7 +333,7 @@ static int nfs_launder_page(struct page *page) const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, .readpages = nfs_readpages, - .set_page_dirty = __set_page_dirty_nobuffers, + .set_page_dirty = nfs_set_page_dirty, .writepage = nfs_writepage, .writepages = nfs_writepages, .prepare_write = nfs_prepare_write, @@ -364,43 +346,6 @@ const struct address_space_operations nfs_file_aops = { .launder_page = nfs_launder_page, }; -static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) -{ - struct file *filp = vma->vm_file; - unsigned pagelen; - int ret = -EINVAL; - - lock_page(page); - if (page->mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) - goto out_unlock; - pagelen = nfs_page_length(page); - if (pagelen == 0) - goto out_unlock; - ret = nfs_prepare_write(filp, page, 0, pagelen); - if (!ret) - ret = nfs_commit_write(filp, page, 0, pagelen); -out_unlock: - unlock_page(page); - return ret; -} - -static struct vm_operations_struct nfs_file_vm_ops = { - .fault = filemap_fault, - .page_mkwrite = nfs_vm_page_mkwrite, -}; - -static int nfs_need_sync_write(struct file *filp, struct inode *inode) -{ - struct nfs_open_context *ctx; - - if (IS_SYNC(inode) || (filp->f_flags & O_SYNC)) - return 1; - ctx = nfs_file_open_context(filp); - if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) - return 1; - return 0; -} - static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { @@ -437,8 +382,8 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); result = generic_file_aio_write(iocb, iov, nr_segs, pos); /* Return error values for O_SYNC and IS_SYNC() */ - if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { - int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); + if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) { + int err = nfs_fsync(iocb->ki_filp, dentry, 1); if (err < 0) result = err; } diff --git a/trunk/fs/nfs/inode.c b/trunk/fs/nfs/inode.c index 035c769b715e..71a49c3acabd 100644 --- a/trunk/fs/nfs/inode.c +++ b/trunk/fs/nfs/inode.c @@ -49,11 +49,6 @@ #define NFSDBG_FACILITY NFSDBG_VFS -#define NFS_64_BIT_INODE_NUMBERS_ENABLED 1 - -/* Default is to see 64-bit inode numbers */ -static int enable_ino64 = NFS_64_BIT_INODE_NUMBERS_ENABLED; - static void nfs_invalidate_inode(struct inode *); static int nfs_update_inode(struct inode *, struct nfs_fattr *); @@ -67,25 +62,6 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) return nfs_fileid_to_ino_t(fattr->fileid); } -/** - * nfs_compat_user_ino64 - returns the user-visible inode number - * @fileid: 64-bit fileid - * - * This function returns a 32-bit inode number if the boot parameter - * nfs.enable_ino64 is zero. - */ -u64 nfs_compat_user_ino64(u64 fileid) -{ - int ino; - - if (enable_ino64) - return fileid; - ino = fileid; - if (sizeof(ino) < sizeof(fileid)) - ino ^= fileid >> (sizeof(fileid)-sizeof(ino)) * 8; - return ino; -} - int nfs_write_inode(struct inode *inode, int sync) { int ret; @@ -109,6 +85,7 @@ void nfs_clear_inode(struct inode *inode) */ BUG_ON(nfs_have_writebacks(inode)); BUG_ON(!list_empty(&NFS_I(inode)->open_files)); + BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0); nfs_zap_acl_cache(inode); nfs_access_zap_cache(inode); } @@ -141,8 +118,8 @@ static void nfs_zap_caches_locked(struct inode *inode) nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); - nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); - nfsi->attrtimeo_timestamp = jiffies; + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); + NFS_ATTRTIMEO_UPDATE(inode) = jiffies; memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) @@ -179,13 +156,6 @@ static void nfs_zap_acl_cache(struct inode *inode) spin_unlock(&inode->i_lock); } -void nfs_invalidate_atime(struct inode *inode) -{ - spin_lock(&inode->i_lock); - NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; - spin_unlock(&inode->i_lock); -} - /* * Invalidate, but do not unhash, the inode. * NB: must be called with inode->i_lock held! @@ -368,6 +338,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) return 0; lock_kernel(); + nfs_begin_data_update(inode); /* Write all dirty data */ if (S_ISREG(inode->i_mode)) { filemap_write_and_wait(inode->i_mapping); @@ -381,6 +352,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr) error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); if (error == 0) nfs_refresh_inode(inode, &fattr); + nfs_end_data_update(inode); unlock_kernel(); return error; } @@ -459,7 +431,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) /* Flush out writes to the server in order to update c/mtime */ if (S_ISREG(inode->i_mode)) - nfs_wb_nocommit(inode); + nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT); /* * We may force a getattr if the user cares about atime. @@ -478,10 +450,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); else err = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (!err) { + if (!err) generic_fillattr(inode, stat); - stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); - } return err; } @@ -566,7 +536,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c static void nfs_file_clear_open_context(struct file *filp) { struct inode *inode = filp->f_path.dentry->d_inode; - struct nfs_open_context *ctx = nfs_file_open_context(filp); + struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; if (ctx) { filp->private_data = NULL; @@ -628,10 +598,16 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) status = nfs_wait_on_inode(inode); if (status < 0) goto out; - - status = -ESTALE; - if (NFS_STALE(inode)) - goto out; + if (NFS_STALE(inode)) { + status = -ESTALE; + /* Do we trust the cached ESTALE? */ + if (NFS_ATTRTIMEO(inode) != 0) { + if (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME)) { + /* no */ + } else + goto out; + } + } status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); if (status != 0) { @@ -678,7 +654,7 @@ int nfs_attribute_timeout(struct inode *inode) if (nfs_have_delegation(inode, FMODE_READ)) return 0; - return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); + return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); } /** @@ -707,8 +683,11 @@ static int nfs_invalidate_mapping_nolock(struct inode *inode, struct address_spa } spin_lock(&inode->i_lock); nfsi->cache_validity &= ~NFS_INO_INVALID_DATA; - if (S_ISDIR(inode->i_mode)) + if (S_ISDIR(inode->i_mode)) { memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + /* This ensures we revalidate child dentries */ + nfsi->cache_change_attribute = jiffies; + } spin_unlock(&inode->i_lock); nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", @@ -777,27 +756,56 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) return ret; } -static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) +/** + * nfs_begin_data_update + * @inode - pointer to inode + * Declare that a set of operations will update file data on the server + */ +void nfs_begin_data_update(struct inode *inode) +{ + atomic_inc(&NFS_I(inode)->data_updates); +} + +/** + * nfs_end_data_update + * @inode - pointer to inode + * Declare end of the operations that will update file data + * This will mark the inode as immediately needing revalidation + * of its attribute cache. + */ +void nfs_end_data_update(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); - if ((fattr->valid & NFS_ATTR_WCC_V4) != 0 && - nfsi->change_attr == fattr->pre_change_attr) { - nfsi->change_attr = fattr->change_attr; - if (S_ISDIR(inode->i_mode)) - nfsi->cache_validity |= NFS_INO_INVALID_DATA; + /* Directories: invalidate page cache */ + if (S_ISDIR(inode->i_mode)) { + spin_lock(&inode->i_lock); + nfsi->cache_validity |= NFS_INO_INVALID_DATA; + spin_unlock(&inode->i_lock); } + nfsi->cache_change_attribute = jiffies; + atomic_dec(&nfsi->data_updates); +} + +static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs_inode *nfsi = NFS_I(inode); + unsigned long now = jiffies; + /* If we have atomic WCC data, we may update some attributes */ if ((fattr->valid & NFS_ATTR_WCC) != 0) { - if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) + if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) { memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + nfsi->cache_change_attribute = now; + } if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - if (S_ISDIR(inode->i_mode)) - nfsi->cache_validity |= NFS_INO_INVALID_DATA; + nfsi->cache_change_attribute = now; } - if (inode->i_size == fattr->pre_size && nfsi->npages == 0) + if (inode->i_size == fattr->pre_size && nfsi->npages == 0) { inode->i_size = fattr->size; + nfsi->cache_change_attribute = now; + } } } @@ -814,7 +822,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat { struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_size, new_isize; - unsigned long invalid = 0; + int data_unstable; /* Has the inode gone and changed behind our back? */ @@ -823,41 +831,37 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat return -EIO; } + /* Are we in the process of updating data on the server? */ + data_unstable = nfs_caches_unstable(inode); + /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && nfsi->change_attr != fattr->change_attr) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; /* Verify a few of the more important attributes */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; cur_size = i_size_read(inode); new_isize = nfs_size_to_loff_t(fattr->size); if (cur_size != new_isize && nfsi->npages == 0) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - invalid |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ if (inode->i_nlink != fattr->nlink) - invalid |= NFS_INO_INVALID_ATTR; + nfsi->cache_validity |= NFS_INO_INVALID_ATTR; if (!timespec_equal(&inode->i_atime, &fattr->atime)) - invalid |= NFS_INO_INVALID_ATIME; - - if (invalid != 0) - nfsi->cache_validity |= invalid; - else - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ATIME - | NFS_INO_REVAL_PAGECACHE); + nfsi->cache_validity |= NFS_INO_INVALID_ATIME; nfsi->read_cache_jiffies = fattr->time_start; return 0; @@ -907,41 +911,17 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); + int status = 0; spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; - if (S_ISDIR(inode->i_mode)) - nfsi->cache_validity |= NFS_INO_INVALID_DATA; - spin_unlock(&inode->i_lock); - return nfs_refresh_inode(inode, fattr); -} - -/** - * nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache - * @inode - pointer to inode - * @fattr - updated attributes - * - * After an operation that has changed the inode metadata, mark the - * attribute cache as being invalid, then try to update it. Fake up - * weak cache consistency data, if none exist. - * - * This function is mainly designed to be used by the ->write_done() functions. - */ -int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) -{ - if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && - (fattr->valid & NFS_ATTR_WCC_V4) == 0) { - fattr->pre_change_attr = NFS_I(inode)->change_attr; - fattr->valid |= NFS_ATTR_WCC_V4; - } - if ((fattr->valid & NFS_ATTR_FATTR) != 0 && - (fattr->valid & NFS_ATTR_WCC) == 0) { - memcpy(&fattr->pre_ctime, &inode->i_ctime, sizeof(fattr->pre_ctime)); - memcpy(&fattr->pre_mtime, &inode->i_mtime, sizeof(fattr->pre_mtime)); - fattr->pre_size = inode->i_size; - fattr->valid |= NFS_ATTR_WCC; + if (unlikely((fattr->valid & NFS_ATTR_FATTR) == 0)) { + nfsi->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + goto out; } - return nfs_post_op_update_inode(inode, fattr); + status = nfs_update_inode(inode, fattr); +out: + spin_unlock(&inode->i_lock); + return status; } /* @@ -961,8 +941,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) struct nfs_server *server; struct nfs_inode *nfsi = NFS_I(inode); loff_t cur_isize, new_isize; - unsigned long invalid = 0; + unsigned int invalid = 0; unsigned long now = jiffies; + int data_stable; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", __FUNCTION__, inode->i_sb->s_id, inode->i_ino, @@ -987,51 +968,57 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) * Update the read time so we don't revalidate too often. */ nfsi->read_cache_jiffies = fattr->time_start; + nfsi->last_updated = now; - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ATIME - | NFS_INO_REVAL_PAGECACHE); + /* Fix a wraparound issue with nfsi->cache_change_attribute */ + if (time_before(now, nfsi->cache_change_attribute)) + nfsi->cache_change_attribute = now - 600*HZ; + + /* Are we racing with known updates of the metadata on the server? */ + data_stable = nfs_verify_change_attribute(inode, fattr->time_start); + if (data_stable) + nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATIME); /* Do atomic weak cache consistency updates */ nfs_wcc_update_inode(inode, fattr); - /* More cache consistency checks */ - if (!(fattr->valid & NFS_ATTR_FATTR_V4)) { - /* NFSv2/v3: Check if the mtime agrees */ - if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { - dprintk("NFS: mtime change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; - nfsi->cache_change_attribute = now; - } - /* If ctime has changed we should definitely clear access+acl caches */ - if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) - invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - } else if (nfsi->change_attr != fattr->change_attr) { - dprintk("NFS: change_attr change on server for file %s/%ld\n", - inode->i_sb->s_id, inode->i_ino); - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; - nfsi->cache_change_attribute = now; - } - /* Check if our cached file size is stale */ new_isize = nfs_size_to_loff_t(fattr->size); cur_isize = i_size_read(inode); if (new_isize != cur_isize) { - /* Do we perhaps have any outstanding writes, or has - * the file grown beyond our last write? */ - if (nfsi->npages == 0 || new_isize > cur_isize) { + /* Do we perhaps have any outstanding writes? */ + if (nfsi->npages == 0) { + /* No, but did we race with nfs_end_data_update()? */ + if (data_stable) { + inode->i_size = new_isize; + invalid |= NFS_INO_INVALID_DATA; + } + invalid |= NFS_INO_INVALID_ATTR; + } else if (new_isize > cur_isize) { inode->i_size = new_isize; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } + nfsi->cache_change_attribute = now; dprintk("NFS: isize change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); } + /* Check if the mtime agrees */ + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + dprintk("NFS: mtime change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + nfsi->cache_change_attribute = now; + } - memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + /* If ctime has changed we should definitely clear access+acl caches */ + if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { + invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + nfsi->cache_change_attribute = now; + } memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); - nfsi->change_attr = fattr->change_attr; if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || @@ -1052,29 +1039,31 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_blocks = fattr->du.nfs2.blocks; } + if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && + nfsi->change_attr != fattr->change_attr) { + dprintk("NFS: change_attr change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + nfsi->change_attr = fattr->change_attr; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + nfsi->cache_change_attribute = now; + } + /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; - nfsi->last_updated = now; - } else { - if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { - if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) - nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); - nfsi->attrtimeo_timestamp = now; - } - /* - * Avoid jiffy wraparound issues with nfsi->last_updated - */ - if (!time_in_range(nfsi->last_updated, nfsi->read_cache_jiffies, now)) - nfsi->last_updated = nfsi->read_cache_jiffies; + } else if (time_after(now, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { + if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) + nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); + nfsi->attrtimeo_timestamp = now; } - invalid &= ~NFS_INO_INVALID_ATTR; /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; + if (data_stable) + invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE); if (!nfs_have_delegation(inode, FMODE_READ) || (nfsi->cache_validity & NFS_INO_REVAL_FORCED)) nfsi->cache_validity |= invalid; @@ -1163,6 +1152,7 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag INIT_LIST_HEAD(&nfsi->access_cache_entry_lru); INIT_LIST_HEAD(&nfsi->access_cache_inode_lru); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); + atomic_set(&nfsi->data_updates, 0); nfsi->ncommit = 0; nfsi->npages = 0; nfs4_init_once(nfsi); @@ -1259,7 +1249,6 @@ static void __exit exit_nfs_fs(void) /* Not quite true; I just maintain it */ MODULE_AUTHOR("Olaf Kirch "); MODULE_LICENSE("GPL"); -module_param(enable_ino64, bool, 0644); module_init(init_nfs_fs) module_exit(exit_nfs_fs) diff --git a/trunk/fs/nfs/internal.h b/trunk/fs/nfs/internal.h index f3acf48412be..76cf55d57101 100644 --- a/trunk/fs/nfs/internal.h +++ b/trunk/fs/nfs/internal.h @@ -5,6 +5,8 @@ #include struct nfs_string; +struct nfs_mount_data; +struct nfs4_mount_data; /* Maximum number of readahead requests * FIXME: this should really be a sysctl so that users may tune it to suit @@ -25,50 +27,20 @@ struct nfs_clone_mount { rpc_authflavor_t authflavor; }; -/* - * In-kernel mount arguments - */ -struct nfs_parsed_mount_data { - int flags; - int rsize, wsize; - int timeo, retrans; - int acregmin, acregmax, - acdirmin, acdirmax; - int namlen; - unsigned int bsize; - unsigned int auth_flavor_len; - rpc_authflavor_t auth_flavors[1]; - char *client_address; - - struct { - struct sockaddr_in address; - char *hostname; - unsigned int program; - unsigned int version; - unsigned short port; - int protocol; - } mount_server; - - struct { - struct sockaddr_in address; - char *hostname; - char *export_path; - unsigned int program; - int protocol; - } nfs_server; -}; - /* client.c */ extern struct rpc_program nfs_program; extern void nfs_put_client(struct nfs_client *); extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); -extern struct nfs_server *nfs_create_server( - const struct nfs_parsed_mount_data *, - struct nfs_fh *); -extern struct nfs_server *nfs4_create_server( - const struct nfs_parsed_mount_data *, - struct nfs_fh *); +extern struct nfs_server *nfs_create_server(const struct nfs_mount_data *, + struct nfs_fh *); +extern struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *, + const char *, + const struct sockaddr_in *, + const char *, + const char *, + rpc_authflavor_t, + struct nfs_fh *); extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, struct nfs_fh *); extern void nfs_free_server(struct nfs_server *server); diff --git a/trunk/fs/nfs/nfs2xdr.c b/trunk/fs/nfs/nfs2xdr.c index 668ab96c7b59..c5fce7567200 100644 --- a/trunk/fs/nfs/nfs2xdr.c +++ b/trunk/fs/nfs/nfs2xdr.c @@ -251,7 +251,6 @@ nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS_readres_sz) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, count); - req->rq_rcv_buf.flags |= XDRBUF_READ; return 0; } @@ -272,7 +271,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) res->eof = 0; hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { - dprintk("NFS: READ reply header overflowed:" + printk(KERN_WARNING "NFS: READ reply header overflowed:" "length %d > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { @@ -282,7 +281,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) recvd = req->rq_rcv_buf.len - hdrlen; if (count > recvd) { - dprintk("NFS: server cheating in read reply: " + printk(KERN_WARNING "NFS: server cheating in read reply: " "count %d > recvd %d\n", count, recvd); count = recvd; } @@ -314,7 +313,6 @@ nfs_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) /* Copy the page array */ xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); - sndbuf->flags |= XDRBUF_WRITE; return 0; } @@ -433,7 +431,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { - dprintk("NFS: READDIR reply header overflowed:" + printk(KERN_WARNING "NFS: READDIR reply header overflowed:" "length %d > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { @@ -456,7 +454,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) len = ntohl(*p++); p += XDR_QUADLEN(len) + 1; /* name plus cookie */ if (len > NFS2_MAXNAMLEN) { - dprintk("NFS: giant filename in readdir (len 0x%x)!\n", + printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)!\n", len); goto err_unmap; } @@ -473,7 +471,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) entry[0] = entry[1] = 0; /* truncate listing ? */ if (!nr) { - dprintk("NFS: readdir reply truncated!\n"); + printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); entry[1] = 1; } goto out; @@ -585,12 +583,12 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) /* Convert length of symlink */ len = ntohl(*p++); if (len >= rcvbuf->page_len || len <= 0) { - dprintk("nfs: server returned giant symlink!\n"); + dprintk(KERN_WARNING "nfs: server returned giant symlink!\n"); return -ENAMETOOLONG; } hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { - dprintk("NFS: READLINK reply header overflowed:" + printk(KERN_WARNING "NFS: READLINK reply header overflowed:" "length %d > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { @@ -599,7 +597,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) } recvd = req->rq_rcv_buf.len - hdrlen; if (recvd < len) { - dprintk("NFS: server cheating in readlink reply: " + printk(KERN_WARNING "NFS: server cheating in readlink reply: " "count %u > recvd %u\n", len, recvd); return -EIO; } @@ -697,7 +695,7 @@ nfs_stat_to_errno(int stat) if (nfs_errtbl[i].stat == stat) return nfs_errtbl[i].errno; } - dprintk("nfs_stat_to_errno: bad nfs status return value: %d\n", stat); + printk(KERN_ERR "nfs_stat_to_errno: bad nfs status return value: %d\n", stat); return nfs_errtbl[i].errno; } diff --git a/trunk/fs/nfs/nfs3acl.c b/trunk/fs/nfs/nfs3acl.c index 9b7362565c0c..7322da4d2055 100644 --- a/trunk/fs/nfs/nfs3acl.c +++ b/trunk/fs/nfs/nfs3acl.c @@ -317,11 +317,13 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, } dprintk("NFS call setacl\n"); + nfs_begin_data_update(inode); msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; status = rpc_call_sync(server->client_acl, &msg, 0); spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS; spin_unlock(&inode->i_lock); + nfs_end_data_update(inode); dprintk("NFS reply setacl: %d\n", status); /* pages may have been allocated at the xdr layer. */ diff --git a/trunk/fs/nfs/nfs3proc.c b/trunk/fs/nfs/nfs3proc.c index 4cdc2361a669..c7ca5d70870b 100644 --- a/trunk/fs/nfs/nfs3proc.c +++ b/trunk/fs/nfs/nfs3proc.c @@ -166,7 +166,6 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name, nfs_fattr_init(&dir_attr); nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - nfs_refresh_inode(dir, &dir_attr); if (status >= 0 && !(fattr->valid & NFS_ATTR_FATTR)) { msg.rpc_proc = &nfs3_procedures[NFS3PROC_GETATTR]; msg.rpc_argp = fhandle; @@ -174,6 +173,8 @@ nfs3_proc_lookup(struct inode *dir, struct qstr *name, status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); } dprintk("NFS reply lookup: %d\n", status); + if (status >= 0) + status = nfs_refresh_inode(dir, &dir_attr); return status; } @@ -606,9 +607,6 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, nfs_fattr_init(&dir_attr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - - nfs_invalidate_atime(dir); - nfs_refresh_inode(dir, &dir_attr); dprintk("NFS reply readdir: %d\n", status); return status; @@ -726,9 +724,9 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data) { if (nfs3_async_handle_jukebox(task, data->inode)) return -EAGAIN; - - nfs_invalidate_atime(data->inode); - nfs_refresh_inode(data->inode, &data->fattr); + /* Call back common NFS readpage processing */ + if (task->tk_status >= 0) + nfs_refresh_inode(data->inode, &data->fattr); return 0; } @@ -749,7 +747,7 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data) if (nfs3_async_handle_jukebox(task, data->inode)) return -EAGAIN; if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); + nfs_post_op_update_inode(data->inode, data->res.fattr); return 0; } @@ -777,7 +775,8 @@ static int nfs3_commit_done(struct rpc_task *task, struct nfs_write_data *data) { if (nfs3_async_handle_jukebox(task, data->inode)) return -EAGAIN; - nfs_refresh_inode(data->inode, data->res.fattr); + if (task->tk_status >= 0) + nfs_post_op_update_inode(data->inode, data->res.fattr); return 0; } diff --git a/trunk/fs/nfs/nfs3xdr.c b/trunk/fs/nfs/nfs3xdr.c index 616d3267b7e7..d9e08f0cf2a0 100644 --- a/trunk/fs/nfs/nfs3xdr.c +++ b/trunk/fs/nfs/nfs3xdr.c @@ -346,7 +346,6 @@ nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args) replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS3_readres_sz) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, count); - req->rq_rcv_buf.flags |= XDRBUF_READ; return 0; } @@ -368,7 +367,6 @@ nfs3_xdr_writeargs(struct rpc_rqst *req, __be32 *p, struct nfs_writeargs *args) /* Copy the page array */ xdr_encode_pages(sndbuf, args->pages, args->pgbase, count); - sndbuf->flags |= XDRBUF_WRITE; return 0; } @@ -526,7 +524,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { - dprintk("NFS: READDIR reply header overflowed:" + printk(KERN_WARNING "NFS: READDIR reply header overflowed:" "length %d > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { @@ -549,7 +547,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res len = ntohl(*p++); /* string length */ p += XDR_QUADLEN(len) + 2; /* name + cookie */ if (len > NFS3_MAXNAMLEN) { - dprintk("NFS: giant filename in readdir (len %x)!\n", + printk(KERN_WARNING "NFS: giant filename in readdir (len %x)!\n", len); goto err_unmap; } @@ -569,7 +567,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res goto short_pkt; len = ntohl(*p++); if (len > NFS3_FHSIZE) { - dprintk("NFS: giant filehandle in " + printk(KERN_WARNING "NFS: giant filehandle in " "readdir (len %x)!\n", len); goto err_unmap; } @@ -590,7 +588,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res entry[0] = entry[1] = 0; /* truncate listing ? */ if (!nr) { - dprintk("NFS: readdir reply truncated!\n"); + printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); entry[1] = 1; } goto out; @@ -828,23 +826,22 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) /* Convert length of symlink */ len = ntohl(*p++); if (len >= rcvbuf->page_len || len <= 0) { - dprintk("nfs: server returned giant symlink!\n"); + dprintk(KERN_WARNING "nfs: server returned giant symlink!\n"); return -ENAMETOOLONG; } hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { - dprintk("NFS: READLINK reply header overflowed:" + printk(KERN_WARNING "NFS: READLINK reply header overflowed:" "length %d > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { - dprintk("NFS: READLINK header is short. " - "iovec will be shifted.\n"); + dprintk("NFS: READLINK header is short. iovec will be shifted.\n"); xdr_shift_buf(rcvbuf, iov->iov_len - hdrlen); } recvd = req->rq_rcv_buf.len - hdrlen; if (recvd < len) { - dprintk("NFS: server cheating in readlink reply: " + printk(KERN_WARNING "NFS: server cheating in readlink reply: " "count %u > recvd %u\n", len, recvd); return -EIO; } @@ -879,13 +876,13 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) ocount = ntohl(*p++); if (ocount != count) { - dprintk("NFS: READ count doesn't match RPC opaque count.\n"); + printk(KERN_WARNING "NFS: READ count doesn't match RPC opaque count.\n"); return -errno_NFSERR_IO; } hdrlen = (u8 *) p - (u8 *) iov->iov_base; if (iov->iov_len < hdrlen) { - dprintk("NFS: READ reply header overflowed:" + printk(KERN_WARNING "NFS: READ reply header overflowed:" "length %d > %Zu\n", hdrlen, iov->iov_len); return -errno_NFSERR_IO; } else if (iov->iov_len != hdrlen) { @@ -895,7 +892,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res) recvd = req->rq_rcv_buf.len - hdrlen; if (count > recvd) { - dprintk("NFS: server cheating in read reply: " + printk(KERN_WARNING "NFS: server cheating in read reply: " "count %d > recvd %d\n", count, recvd); count = recvd; res->eof = 0; diff --git a/trunk/fs/nfs/nfs4proc.c b/trunk/fs/nfs/nfs4proc.c index cb99fd90a9ac..4b90e17555a9 100644 --- a/trunk/fs/nfs/nfs4proc.c +++ b/trunk/fs/nfs/nfs4proc.c @@ -62,8 +62,10 @@ struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); +static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); +static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags); static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); @@ -175,7 +177,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent *p++ = xdr_one; /* bitmap length */ *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ *p++ = htonl(8); /* attribute buffer length */ - p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode)); + p = xdr_encode_hyper(p, dentry->d_inode->i_ino); } *p++ = xdr_one; /* next */ @@ -187,7 +189,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent *p++ = xdr_one; /* bitmap length */ *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ *p++ = htonl(8); /* attribute buffer length */ - p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode)); + p = xdr_encode_hyper(p, dentry->d_parent->d_inode->i_ino); readdir->pgbase = (char *)p - (char *)start; readdir->count -= readdir->pgbase; @@ -209,9 +211,8 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) spin_lock(&dir->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA; - if (!cinfo->atomic || cinfo->before != nfsi->change_attr) - nfsi->cache_change_attribute = jiffies; - nfsi->change_attr = cinfo->after; + if (cinfo->before == nfsi->change_attr && cinfo->atomic) + nfsi->change_attr = cinfo->after; spin_unlock(&dir->i_lock); } @@ -453,7 +454,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); rcu_read_unlock(); lock_kernel(); - ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); + ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode); unlock_kernel(); if (ret != 0) goto out; @@ -947,6 +948,36 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) return 0; } +static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags) +{ + struct nfs_access_entry cache; + int mask = 0; + int status; + + if (openflags & FMODE_READ) + mask |= MAY_READ; + if (openflags & FMODE_WRITE) + mask |= MAY_WRITE; + if (openflags & FMODE_EXEC) + mask |= MAY_EXEC; + status = nfs_access_get_cached(inode, cred, &cache); + if (status == 0) + goto out; + + /* Be clever: ask server to check for all possible rights */ + cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; + cache.cred = cred; + cache.jiffies = jiffies; + status = _nfs4_proc_access(inode, &cache); + if (status != 0) + return status; + nfs_access_add_cache(inode, &cache); +out: + if ((cache.mask & mask) == mask) + return 0; + return -EACCES; +} + static int nfs4_recover_expired_lease(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; @@ -1350,7 +1381,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct /* If the open_intent is for execute, we have an extra check to make */ if (nd->intent.open.flags & FMODE_EXEC) { - ret = nfs_may_open(state->inode, + ret = _nfs4_do_access(state->inode, state->owner->so_cred, nd->intent.open.flags); if (ret < 0) @@ -1359,7 +1390,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct filp = lookup_instantiate_filp(nd, path->dentry, NULL); if (!IS_ERR(filp)) { struct nfs_open_context *ctx; - ctx = nfs_file_open_context(filp); + ctx = (struct nfs_open_context *)filp->private_data; ctx->state = state; return 0; } @@ -1397,16 +1428,13 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) { - if (PTR_ERR(state) == -ENOENT) { + if (PTR_ERR(state) == -ENOENT) d_add(dentry, NULL); - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - } return (struct dentry *)state; } res = d_add_unique(dentry, igrab(state->inode)); if (res != NULL) path.dentry = res; - nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir)); nfs4_intent_set_file(nd, &path, state); return res; } @@ -1440,7 +1468,6 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st } } if (state->inode == dentry->d_inode) { - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); nfs4_intent_set_file(nd, &path, state); return 1; } @@ -1730,16 +1757,10 @@ static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) { - struct nfs_server *server = NFS_SERVER(inode); - struct nfs_fattr fattr; struct nfs4_accessargs args = { .fh = NFS_FH(inode), - .bitmask = server->attr_bitmask, - }; - struct nfs4_accessres res = { - .server = server, - .fattr = &fattr, }; + struct nfs4_accessres res = { 0 }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], .rpc_argp = &args, @@ -1765,7 +1786,6 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry if (mode & MAY_EXEC) args.access |= NFS4_ACCESS_EXECUTE; } - nfs_fattr_init(&fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (!status) { entry->mask = 0; @@ -1775,7 +1795,6 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry entry->mask |= MAY_WRITE; if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) entry->mask |= MAY_EXEC; - nfs_refresh_inode(inode, &fattr); } return status; } @@ -1881,13 +1900,11 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, } state = nfs4_do_open(dir, &path, flags, sattr, cred); put_rpccred(cred); - d_drop(dentry); if (IS_ERR(state)) { status = PTR_ERR(state); goto out; } - d_add(dentry, igrab(state->inode)); - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + d_instantiate(dentry, igrab(state->inode)); if (flags & O_EXCL) { struct nfs_fattr fattr; status = nfs4_do_setattr(state->inode, &fattr, sattr, state); @@ -2201,9 +2218,6 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); if (status == 0) memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); - - nfs_invalidate_atime(dir); - dprintk("%s: returns %d\n", __FUNCTION__, status); return status; } @@ -2400,8 +2414,6 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) rpc_restart_call(task); return -EAGAIN; } - - nfs_invalidate_atime(data->inode); if (task->tk_status > 0) renew_lease(server, data->timestamp); return 0; @@ -2431,7 +2443,7 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) } if (task->tk_status >= 0) { renew_lease(NFS_SERVER(inode), data->timestamp); - nfs_post_op_update_inode_force_wcc(inode, data->res.fattr); + nfs_post_op_update_inode(inode, data->res.fattr); } return 0; } @@ -2473,7 +2485,8 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) rpc_restart_call(task); return -EAGAIN; } - nfs_refresh_inode(inode, data->res.fattr); + if (task->tk_status >= 0) + nfs_post_op_update_inode(inode, data->res.fattr); return 0; } @@ -3043,7 +3056,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co if (status == 0) { status = data->rpc_status; if (status == 0) - nfs_refresh_inode(inode, &data->fattr); + nfs_post_op_update_inode(inode, &data->fattr); } rpc_put_task(task); return status; @@ -3290,7 +3303,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * status = -ENOMEM; if (seqid == NULL) goto out; - task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid); + task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid); status = PTR_ERR(task); if (IS_ERR(task)) goto out; @@ -3434,7 +3447,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f int ret; dprintk("%s: begin!\n", __FUNCTION__); - data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), + data = nfs4_alloc_lockdata(fl, fl->fl_file->private_data, fl->fl_u.nfs4_fl.owner); if (data == NULL) return -ENOMEM; @@ -3560,7 +3573,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) int status; /* verify open state */ - ctx = nfs_file_open_context(filp); + ctx = (struct nfs_open_context *)filp->private_data; state = ctx->state; if (request->fl_start < 0 || request->fl_end < 0) diff --git a/trunk/fs/nfs/nfs4state.c b/trunk/fs/nfs/nfs4state.c index bfb36261cecb..3e4adf8c8312 100644 --- a/trunk/fs/nfs/nfs4state.c +++ b/trunk/fs/nfs/nfs4state.c @@ -774,7 +774,7 @@ static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_s for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; - if (nfs_file_open_context(fl->fl_file)->state != state) + if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state) continue; status = ops->recover_lock(state, fl); if (status >= 0) diff --git a/trunk/fs/nfs/nfs4xdr.c b/trunk/fs/nfs/nfs4xdr.c index 51dd3804866f..badd73b7ca12 100644 --- a/trunk/fs/nfs/nfs4xdr.c +++ b/trunk/fs/nfs/nfs4xdr.c @@ -376,12 +376,10 @@ static int nfs4_stat_to_errno(int); decode_locku_maxsz) #define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - encode_access_maxsz + \ - encode_getattr_maxsz) + encode_access_maxsz) #define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - decode_access_maxsz + \ - decode_getattr_maxsz) + decode_access_maxsz) #define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz) @@ -564,6 +562,7 @@ struct compound_hdr { #define RESERVE_SPACE(nbytes) do { \ p = xdr_reserve_space(xdr, nbytes); \ + if (!p) printk("RESERVE_SPACE(%d) failed in function %s\n", (int) (nbytes), __FUNCTION__); \ BUG_ON(!p); \ } while (0) @@ -629,8 +628,8 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s if (iap->ia_valid & ATTR_UID) { owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); if (owner_namelen < 0) { - dprintk("nfs: couldn't resolve uid %d to string\n", - iap->ia_uid); + printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n", + iap->ia_uid); /* XXX */ strcpy(owner_name, "nobody"); owner_namelen = sizeof("nobody") - 1; @@ -641,8 +640,8 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s if (iap->ia_valid & ATTR_GID) { owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); if (owner_grouplen < 0) { - dprintk("nfs: couldn't resolve gid %d to string\n", - iap->ia_gid); + printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n", + iap->ia_gid); strcpy(owner_group, "nobody"); owner_grouplen = sizeof("nobody") - 1; /* goto out; */ @@ -712,7 +711,7 @@ static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const s * Now we backfill the bitmap and the attribute buffer length. */ if (len != ((char *)p - (char *)q) + 4) { - printk(KERN_ERR "nfs: Attr length error, %u != %Zu\n", + printk ("encode_attr: Attr length calculation error! %u != %Zu\n", len, ((char *)p - (char *)q) + 4); BUG(); } @@ -1377,20 +1376,14 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 2, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - status = encode_putfh(&xdr, args->fh); - if (status != 0) - goto out; - status = encode_access(&xdr, args->access); - if (status != 0) - goto out; - status = encode_getfattr(&xdr, args->bitmask); -out: + if ((status = encode_putfh(&xdr, args->fh)) == 0) + status = encode_access(&xdr, args->access); return status; } @@ -1864,7 +1857,6 @@ static int nfs4_xdr_enc_read(struct rpc_rqst *req, __be32 *p, struct nfs_readarg replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_read_sz) << 2; xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, args->pgbase, args->count); - req->rq_rcv_buf.flags |= XDRBUF_READ; out: return status; } @@ -1941,7 +1933,6 @@ static int nfs4_xdr_enc_write(struct rpc_rqst *req, __be32 *p, struct nfs_writea status = encode_write(&xdr, args); if (status) goto out; - req->rq_snd_buf.flags |= XDRBUF_WRITE; status = encode_getfattr(&xdr, args->bitmask); out: return status; @@ -2189,9 +2180,9 @@ static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, __be32 *p, struct nfs #define READ_BUF(nbytes) do { \ p = xdr_inline_decode(xdr, nbytes); \ if (unlikely(!p)) { \ - dprintk("nfs: %s: prematurely hit end of receive" \ + printk(KERN_INFO "%s: prematurely hit end of receive" \ " buffer\n", __FUNCTION__); \ - dprintk("nfs: %s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \ + printk(KERN_INFO "%s: xdr->p=%p, bytes=%u, xdr->end=%p\n", \ __FUNCTION__, xdr->p, nbytes, xdr->end); \ return -EIO; \ } \ @@ -2232,8 +2223,9 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) READ_BUF(8); READ32(opnum); if (opnum != expected) { - dprintk("nfs: Server returned operation" - " %d but we issued a request for %d\n", + printk(KERN_NOTICE + "nfs4_decode_op_hdr: Server returned operation" + " %d but we issued a request for %d\n", opnum, expected); return -EIO; } @@ -2766,7 +2758,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, struct nf dprintk("%s: nfs_map_name_to_uid failed!\n", __FUNCTION__); } else - dprintk("%s: name too long (%u)!\n", + printk(KERN_WARNING "%s: name too long (%u)!\n", __FUNCTION__, len); bitmap[1] &= ~FATTR4_WORD1_OWNER; } @@ -2791,7 +2783,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, struct nf dprintk("%s: nfs_map_group_to_gid failed!\n", __FUNCTION__); } else - dprintk("%s: name too long (%u)!\n", + printk(KERN_WARNING "%s: name too long (%u)!\n", __FUNCTION__, len); bitmap[1] &= ~FATTR4_WORD1_OWNER_GROUP; } @@ -2958,8 +2950,7 @@ static int verify_attr_len(struct xdr_stream *xdr, __be32 *savep, uint32_t attrl unsigned int nwords = xdr->p - savep; if (unlikely(attrwords != nwords)) { - dprintk("%s: server returned incorrect attribute length: " - "%u %c %u\n", + printk(KERN_WARNING "%s: server returned incorrect attribute length: %u %c %u\n", __FUNCTION__, attrwords << 2, (attrwords < nwords) ? '<' : '>', @@ -3460,7 +3451,7 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_ hdrlen = (u8 *) p - (u8 *) iov->iov_base; recvd = req->rq_rcv_buf.len - hdrlen; if (count > recvd) { - dprintk("NFS: server cheating in read reply: " + printk(KERN_WARNING "NFS: server cheating in read reply: " "count %u > recvd %u\n", count, recvd); count = recvd; eof = 0; @@ -3509,8 +3500,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n p += 2; /* cookie */ len = ntohl(*p++); /* filename length */ if (len > NFS4_MAXNAMLEN) { - dprintk("NFS: giant filename in readdir (len 0x%x)\n", - len); + printk(KERN_WARNING "NFS: giant filename in readdir (len 0x%x)\n", len); goto err_unmap; } xlen = XDR_QUADLEN(len); @@ -3538,7 +3528,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n entry[0] = entry[1] = 0; /* truncate listing ? */ if (!nr) { - dprintk("NFS: readdir reply truncated!\n"); + printk(KERN_NOTICE "NFS: readdir reply truncated!\n"); entry[1] = 1; } goto out; @@ -3564,13 +3554,13 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) READ_BUF(4); READ32(len); if (len >= rcvbuf->page_len || len <= 0) { - dprintk("nfs: server returned giant symlink!\n"); + dprintk(KERN_WARNING "nfs: server returned giant symlink!\n"); return -ENAMETOOLONG; } hdrlen = (char *) xdr->p - (char *) iov->iov_base; recvd = req->rq_rcv_buf.len - hdrlen; if (recvd < len) { - dprintk("NFS: server cheating in readlink reply: " + printk(KERN_WARNING "NFS: server cheating in readlink reply: " "count %u > recvd %u\n", len, recvd); return -EIO; } @@ -3653,7 +3643,7 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; recvd = req->rq_rcv_buf.len - hdrlen; if (attrlen > recvd) { - dprintk("NFS: server cheating in getattr" + printk(KERN_WARNING "NFS: server cheating in getattr" " acl reply: attrlen %u > recvd %u\n", attrlen, recvd); return -EINVAL; @@ -3698,7 +3688,8 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp) READ_BUF(8); READ32(opnum); if (opnum != OP_SETCLIENTID) { - dprintk("nfs: decode_setclientid: Server returned operation" + printk(KERN_NOTICE + "nfs4_decode_setclientid: Server returned operation" " %d\n", opnum); return -EIO; } @@ -3792,13 +3783,8 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) goto out; - status = decode_putfh(&xdr); - if (status != 0) - goto out; - status = decode_access(&xdr, res); - if (status != 0) - goto out; - decode_getfattr(&xdr, res->fattr, res->server); + if ((status = decode_putfh(&xdr)) == 0) + status = decode_access(&xdr, res); out: return status; } diff --git a/trunk/fs/nfs/nfsroot.c b/trunk/fs/nfs/nfsroot.c index e87b44ee9ac9..3490322d1145 100644 --- a/trunk/fs/nfs/nfsroot.c +++ b/trunk/fs/nfs/nfsroot.c @@ -76,7 +76,6 @@ #include #include #include -#include #include #include #include @@ -492,7 +491,7 @@ static int __init root_nfs_get_handle(void) struct sockaddr_in sin; int status; int protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? - XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP; + IPPROTO_TCP : IPPROTO_UDP; int version = (nfs_data.flags & NFS_MOUNT_VER3) ? NFS_MNT3_VERSION : NFS_MNT_VERSION; diff --git a/trunk/fs/nfs/proc.c b/trunk/fs/nfs/proc.c index 97669ed05500..845cdde1d8b7 100644 --- a/trunk/fs/nfs/proc.c +++ b/trunk/fs/nfs/proc.c @@ -476,8 +476,6 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, dprintk("NFS call readdir %d\n", (unsigned int)cookie); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - nfs_invalidate_atime(dir); - dprintk("NFS reply readdir: %d\n", status); return status; } @@ -552,7 +550,6 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data) { - nfs_invalidate_atime(data->inode); if (task->tk_status >= 0) { nfs_refresh_inode(data->inode, data->res.fattr); /* Emulate the eof flag, which isn't normally needed in NFSv2 @@ -579,7 +576,7 @@ static void nfs_proc_read_setup(struct nfs_read_data *data) static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data) { if (task->tk_status >= 0) - nfs_post_op_update_inode_force_wcc(data->inode, data->res.fattr); + nfs_post_op_update_inode(data->inode, data->res.fattr); return 0; } diff --git a/trunk/fs/nfs/read.c b/trunk/fs/nfs/read.c index 4587a86adaac..19e05633f4e3 100644 --- a/trunk/fs/nfs/read.c +++ b/trunk/fs/nfs/read.c @@ -341,6 +341,9 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); nfs_mark_for_revalidate(data->inode); } + spin_lock(&data->inode->i_lock); + NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&data->inode->i_lock); return 0; } @@ -494,7 +497,8 @@ int nfs_readpage(struct file *file, struct page *page) if (ctx == NULL) goto out_unlock; } else - ctx = get_nfs_open_context(nfs_file_open_context(file)); + ctx = get_nfs_open_context((struct nfs_open_context *) + file->private_data); error = nfs_readpage_async(ctx, inode, page); @@ -572,7 +576,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (desc.ctx == NULL) return -EBADF; } else - desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); + desc.ctx = get_nfs_open_context((struct nfs_open_context *) + filp->private_data); if (rsize < PAGE_CACHE_SIZE) nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); else diff --git a/trunk/fs/nfs/super.c b/trunk/fs/nfs/super.c index fa517ae9207f..b878528b64c1 100644 --- a/trunk/fs/nfs/super.c +++ b/trunk/fs/nfs/super.c @@ -33,8 +33,6 @@ #include #include #include -#include -#include #include #include #include @@ -60,6 +58,36 @@ #define NFSDBG_FACILITY NFSDBG_VFS + +struct nfs_parsed_mount_data { + int flags; + int rsize, wsize; + int timeo, retrans; + int acregmin, acregmax, + acdirmin, acdirmax; + int namlen; + unsigned int bsize; + unsigned int auth_flavor_len; + rpc_authflavor_t auth_flavors[1]; + char *client_address; + + struct { + struct sockaddr_in address; + unsigned int program; + unsigned int version; + unsigned short port; + int protocol; + } mount_server; + + struct { + struct sockaddr_in address; + char *hostname; + char *export_path; + unsigned int program; + int protocol; + } nfs_server; +}; + enum { /* Mount options that take no arguments */ Opt_soft, Opt_hard, @@ -69,7 +97,7 @@ enum { Opt_ac, Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, - Opt_udp, Opt_tcp, Opt_rdma, + Opt_udp, Opt_tcp, Opt_acl, Opt_noacl, Opt_rdirplus, Opt_nordirplus, Opt_sharecache, Opt_nosharecache, @@ -88,7 +116,7 @@ enum { /* Mount options that take string arguments */ Opt_sec, Opt_proto, Opt_mountproto, - Opt_addr, Opt_mountaddr, Opt_clientaddr, + Opt_addr, Opt_mounthost, Opt_clientaddr, /* Mount options that are ignored */ Opt_userspace, Opt_deprecated, @@ -115,7 +143,6 @@ static match_table_t nfs_mount_option_tokens = { { Opt_v3, "v3" }, { Opt_udp, "udp" }, { Opt_tcp, "tcp" }, - { Opt_rdma, "rdma" }, { Opt_acl, "acl" }, { Opt_noacl, "noacl" }, { Opt_rdirplus, "rdirplus" }, @@ -148,14 +175,13 @@ static match_table_t nfs_mount_option_tokens = { { Opt_mountproto, "mountproto=%s" }, { Opt_addr, "addr=%s" }, { Opt_clientaddr, "clientaddr=%s" }, - { Opt_userspace, "mounthost=%s" }, - { Opt_mountaddr, "mountaddr=%s" }, + { Opt_mounthost, "mounthost=%s" }, { Opt_err, NULL } }; enum { - Opt_xprt_udp, Opt_xprt_tcp, Opt_xprt_rdma, + Opt_xprt_udp, Opt_xprt_tcp, Opt_xprt_err }; @@ -163,7 +189,6 @@ enum { static match_table_t nfs_xprt_protocol_tokens = { { Opt_xprt_udp, "udp" }, { Opt_xprt_tcp, "tcp" }, - { Opt_xprt_rdma, "rdma" }, { Opt_xprt_err, NULL } }; @@ -424,7 +449,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, const char *nostr; } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", ",hard" }, - { NFS_MOUNT_INTR, ",intr", ",nointr" }, + { NFS_MOUNT_INTR, ",intr", "" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", "" }, @@ -435,6 +460,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, }; const struct proc_nfs_info *nfs_infop; struct nfs_client *clp = nfss->nfs_client; + char buf[12]; + const char *proto; seq_printf(m, ",vers=%d", clp->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); @@ -453,8 +480,18 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, else seq_puts(m, nfs_infop->nostr); } - seq_printf(m, ",proto=%s", - rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO)); + switch (nfss->client->cl_xprt->prot) { + case IPPROTO_TCP: + proto = "tcp"; + break; + case IPPROTO_UDP: + proto = "udp"; + break; + default: + snprintf(buf, sizeof(buf), "%u", nfss->client->cl_xprt->prot); + proto = buf; + } + seq_printf(m, ",proto=%s", proto); seq_printf(m, ",timeo=%lu", 10U * clp->retrans_timeo / HZ); seq_printf(m, ",retrans=%u", clp->retrans_count); seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); @@ -469,8 +506,8 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) nfs_show_mount_options(m, nfss, 0); - seq_printf(m, ",addr="NIPQUAD_FMT, - NIPQUAD(nfss->nfs_client->cl_addr.sin_addr)); + seq_puts(m, ",addr="); + seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\"); return 0; } @@ -661,19 +698,13 @@ static int nfs_parse_mount_options(char *raw, break; case Opt_udp: mnt->flags &= ~NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; + mnt->nfs_server.protocol = IPPROTO_UDP; mnt->timeo = 7; mnt->retrans = 5; break; case Opt_tcp: mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - mnt->timeo = 600; - mnt->retrans = 2; - break; - case Opt_rdma: - mnt->flags |= NFS_MOUNT_TCP; /* for side protocols */ - mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + mnt->nfs_server.protocol = IPPROTO_TCP; mnt->timeo = 600; mnt->retrans = 2; break; @@ -882,20 +913,13 @@ static int nfs_parse_mount_options(char *raw, switch (token) { case Opt_xprt_udp: mnt->flags &= ~NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; + mnt->nfs_server.protocol = IPPROTO_UDP; mnt->timeo = 7; mnt->retrans = 5; break; case Opt_xprt_tcp: mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; - mnt->timeo = 600; - mnt->retrans = 2; - break; - case Opt_xprt_rdma: - /* vector side protocols to TCP */ - mnt->flags |= NFS_MOUNT_TCP; - mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; + mnt->nfs_server.protocol = IPPROTO_TCP; mnt->timeo = 600; mnt->retrans = 2; break; @@ -913,12 +937,11 @@ static int nfs_parse_mount_options(char *raw, switch (token) { case Opt_xprt_udp: - mnt->mount_server.protocol = XPRT_TRANSPORT_UDP; + mnt->mount_server.protocol = IPPROTO_UDP; break; case Opt_xprt_tcp: - mnt->mount_server.protocol = XPRT_TRANSPORT_TCP; + mnt->mount_server.protocol = IPPROTO_TCP; break; - case Opt_xprt_rdma: /* not used for side protocols */ default: goto out_unrec_xprt; } @@ -938,7 +961,7 @@ static int nfs_parse_mount_options(char *raw, goto out_nomem; mnt->client_address = string; break; - case Opt_mountaddr: + case Opt_mounthost: string = match_strdup(args); if (string == NULL) goto out_nomem; @@ -1004,10 +1027,16 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, sin = args->mount_server.address; else sin = args->nfs_server.address; - /* - * autobind will be used if mount_server.port == 0 - */ - sin.sin_port = htons(args->mount_server.port); + if (args->mount_server.port == 0) { + status = rpcb_getport_sync(&sin, + args->mount_server.program, + args->mount_server.version, + args->mount_server.protocol); + if (status < 0) + goto out_err; + sin.sin_port = htons(status); + } else + sin.sin_port = htons(args->mount_server.port); /* * Now ask the mount server to map our export path @@ -1020,11 +1049,14 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, args->mount_server.version, args->mount_server.protocol, root_fh); - if (status == 0) - return 0; + if (status < 0) + goto out_err; - dfprintk(MOUNT, "NFS: unable to mount server " NIPQUAD_FMT - ", error %d\n", NIPQUAD(sin.sin_addr.s_addr), status); + return status; + +out_err: + dfprintk(MOUNT, "NFS: unable to contact server on host " + NIPQUAD_FMT "\n", NIPQUAD(sin.sin_addr.s_addr)); return status; } @@ -1047,31 +1079,15 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args, * XXX: as far as I can tell, changing the NFS program number is not * supported in the NFS client. */ -static int nfs_validate_mount_data(void *options, - struct nfs_parsed_mount_data *args, +static int nfs_validate_mount_data(struct nfs_mount_data **options, struct nfs_fh *mntfh, const char *dev_name) { - struct nfs_mount_data *data = (struct nfs_mount_data *)options; + struct nfs_mount_data *data = *options; if (data == NULL) goto out_no_data; - memset(args, 0, sizeof(*args)); - args->flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP); - args->rsize = NFS_MAX_FILE_IO_SIZE; - args->wsize = NFS_MAX_FILE_IO_SIZE; - args->timeo = 600; - args->retrans = 2; - args->acregmin = 3; - args->acregmax = 60; - args->acdirmin = 30; - args->acdirmax = 60; - args->mount_server.protocol = XPRT_TRANSPORT_UDP; - args->mount_server.program = NFS_MNT_PROGRAM; - args->nfs_server.protocol = XPRT_TRANSPORT_TCP; - args->nfs_server.program = NFS_PROGRAM; - switch (data->version) { case 1: data->namlen = 0; @@ -1100,73 +1116,92 @@ static int nfs_validate_mount_data(void *options, if (mntfh->size < sizeof(mntfh->data)) memset(mntfh->data + mntfh->size, 0, sizeof(mntfh->data) - mntfh->size); - - if (!nfs_verify_server_address((struct sockaddr *) &data->addr)) - goto out_no_address; - - /* - * Translate to nfs_parsed_mount_data, which nfs_fill_super - * can deal with. - */ - args->flags = data->flags; - args->rsize = data->rsize; - args->wsize = data->wsize; - args->flags = data->flags; - args->timeo = data->timeo; - args->retrans = data->retrans; - args->acregmin = data->acregmin; - args->acregmax = data->acregmax; - args->acdirmin = data->acdirmin; - args->acdirmax = data->acdirmax; - args->nfs_server.address = data->addr; - if (!(data->flags & NFS_MOUNT_TCP)) - args->nfs_server.protocol = XPRT_TRANSPORT_UDP; - /* N.B. caller will free nfs_server.hostname in all cases */ - args->nfs_server.hostname = kstrdup(data->hostname, GFP_KERNEL); - args->namlen = data->namlen; - args->bsize = data->bsize; - args->auth_flavors[0] = data->pseudoflavor; break; default: { unsigned int len; char *c; int status; - - if (nfs_parse_mount_options((char *)options, args) == 0) + struct nfs_parsed_mount_data args = { + .flags = (NFS_MOUNT_VER3 | NFS_MOUNT_TCP), + .rsize = NFS_MAX_FILE_IO_SIZE, + .wsize = NFS_MAX_FILE_IO_SIZE, + .timeo = 600, + .retrans = 2, + .acregmin = 3, + .acregmax = 60, + .acdirmin = 30, + .acdirmax = 60, + .mount_server.protocol = IPPROTO_UDP, + .mount_server.program = NFS_MNT_PROGRAM, + .nfs_server.protocol = IPPROTO_TCP, + .nfs_server.program = NFS_PROGRAM, + }; + + if (nfs_parse_mount_options((char *) *options, &args) == 0) return -EINVAL; - if (!nfs_verify_server_address((struct sockaddr *) - &args->nfs_server.address)) - goto out_no_address; + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + + /* + * NB: after this point, caller will free "data" + * if we return an error + */ + *options = data; c = strchr(dev_name, ':'); if (c == NULL) return -EINVAL; len = c - dev_name; - /* N.B. caller will free nfs_server.hostname in all cases */ - args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL); + if (len > sizeof(data->hostname)) + return -ENAMETOOLONG; + strncpy(data->hostname, dev_name, len); + args.nfs_server.hostname = data->hostname; c++; if (strlen(c) > NFS_MAXPATHLEN) return -ENAMETOOLONG; - args->nfs_server.export_path = c; + args.nfs_server.export_path = c; - status = nfs_try_mount(args, mntfh); + status = nfs_try_mount(&args, mntfh); if (status) return status; + /* + * Translate to nfs_mount_data, which nfs_fill_super + * can deal with. + */ + data->version = 6; + data->flags = args.flags; + data->rsize = args.rsize; + data->wsize = args.wsize; + data->timeo = args.timeo; + data->retrans = args.retrans; + data->acregmin = args.acregmin; + data->acregmax = args.acregmax; + data->acdirmin = args.acdirmin; + data->acdirmax = args.acdirmax; + data->addr = args.nfs_server.address; + data->namlen = args.namlen; + data->bsize = args.bsize; + data->pseudoflavor = args.auth_flavors[0]; + break; } } - if (!(args->flags & NFS_MOUNT_SECFLAVOUR)) - args->auth_flavors[0] = RPC_AUTH_UNIX; + if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) + data->pseudoflavor = RPC_AUTH_UNIX; #ifndef CONFIG_NFS_V3 - if (args->flags & NFS_MOUNT_VER3) + if (data->flags & NFS_MOUNT_VER3) goto out_v3_not_compiled; #endif /* !CONFIG_NFS_V3 */ + if (!nfs_verify_server_address((struct sockaddr *) &data->addr)) + goto out_no_address; + return 0; out_no_data: @@ -1223,8 +1258,7 @@ static inline void nfs_initialise_sb(struct super_block *sb) /* * Finish setting up an NFS2/3 superblock */ -static void nfs_fill_super(struct super_block *sb, - struct nfs_parsed_mount_data *data) +static void nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data) { struct nfs_server *server = NFS_SB(sb); @@ -1345,7 +1379,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, struct nfs_server *server = NULL; struct super_block *s; struct nfs_fh mntfh; - struct nfs_parsed_mount_data data; + struct nfs_mount_data *data = raw_data; struct dentry *mntroot; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; struct nfs_sb_mountdata sb_mntdata = { @@ -1354,12 +1388,12 @@ static int nfs_get_sb(struct file_system_type *fs_type, int error; /* Validate the mount data */ - error = nfs_validate_mount_data(raw_data, &data, &mntfh, dev_name); + error = nfs_validate_mount_data(&data, &mntfh, dev_name); if (error < 0) goto out; /* Get a volume representation */ - server = nfs_create_server(&data, &mntfh); + server = nfs_create_server(data, &mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); goto out; @@ -1383,7 +1417,7 @@ static int nfs_get_sb(struct file_system_type *fs_type, if (!s->s_root) { /* initial superblock/root creation */ - nfs_fill_super(s, &data); + nfs_fill_super(s, data); } mntroot = nfs_get_root(s, &mntfh); @@ -1398,7 +1432,8 @@ static int nfs_get_sb(struct file_system_type *fs_type, error = 0; out: - kfree(data.nfs_server.hostname); + if (data != raw_data) + kfree(data); return error; out_err_nosb: @@ -1524,49 +1559,38 @@ static void nfs4_fill_super(struct super_block *sb) /* * Validate NFSv4 mount options */ -static int nfs4_validate_mount_data(void *options, - struct nfs_parsed_mount_data *args, - const char *dev_name) +static int nfs4_validate_mount_data(struct nfs4_mount_data **options, + const char *dev_name, + struct sockaddr_in *addr, + rpc_authflavor_t *authflavour, + char **hostname, + char **mntpath, + char **ip_addr) { - struct nfs4_mount_data *data = (struct nfs4_mount_data *)options; + struct nfs4_mount_data *data = *options; char *c; if (data == NULL) goto out_no_data; - memset(args, 0, sizeof(*args)); - args->rsize = NFS_MAX_FILE_IO_SIZE; - args->wsize = NFS_MAX_FILE_IO_SIZE; - args->timeo = 600; - args->retrans = 2; - args->acregmin = 3; - args->acregmax = 60; - args->acdirmin = 30; - args->acdirmax = 60; - args->nfs_server.protocol = XPRT_TRANSPORT_TCP; - switch (data->version) { case 1: - if (data->host_addrlen != sizeof(args->nfs_server.address)) + if (data->host_addrlen != sizeof(*addr)) goto out_no_address; - if (copy_from_user(&args->nfs_server.address, - data->host_addr, - sizeof(args->nfs_server.address))) + if (copy_from_user(addr, data->host_addr, sizeof(*addr))) return -EFAULT; - if (args->nfs_server.address.sin_port == 0) - args->nfs_server.address.sin_port = htons(NFS_PORT); - if (!nfs_verify_server_address((struct sockaddr *) - &args->nfs_server.address)) + if (addr->sin_port == 0) + addr->sin_port = htons(NFS_PORT); + if (!nfs_verify_server_address((struct sockaddr *) addr)) goto out_no_address; switch (data->auth_flavourlen) { case 0: - args->auth_flavors[0] = RPC_AUTH_UNIX; + *authflavour = RPC_AUTH_UNIX; break; case 1: - if (copy_from_user(&args->auth_flavors[0], - data->auth_flavours, - sizeof(args->auth_flavors[0]))) + if (copy_from_user(authflavour, data->auth_flavours, + sizeof(*authflavour))) return -EFAULT; break; default: @@ -1576,56 +1600,74 @@ static int nfs4_validate_mount_data(void *options, c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); if (IS_ERR(c)) return PTR_ERR(c); - args->nfs_server.hostname = c; + *hostname = c; c = strndup_user(data->mnt_path.data, NFS4_MAXPATHLEN); if (IS_ERR(c)) return PTR_ERR(c); - args->nfs_server.export_path = c; - dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", c); + *mntpath = c; + dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *mntpath); c = strndup_user(data->client_addr.data, 16); if (IS_ERR(c)) return PTR_ERR(c); - args->client_address = c; - - /* - * Translate to nfs_parsed_mount_data, which nfs4_fill_super - * can deal with. - */ - - args->flags = data->flags & NFS4_MOUNT_FLAGMASK; - args->rsize = data->rsize; - args->wsize = data->wsize; - args->timeo = data->timeo; - args->retrans = data->retrans; - args->acregmin = data->acregmin; - args->acregmax = data->acregmax; - args->acdirmin = data->acdirmin; - args->acdirmax = data->acdirmax; - args->nfs_server.protocol = data->proto; + *ip_addr = c; break; default: { unsigned int len; - - if (nfs_parse_mount_options((char *)options, args) == 0) + struct nfs_parsed_mount_data args = { + .rsize = NFS_MAX_FILE_IO_SIZE, + .wsize = NFS_MAX_FILE_IO_SIZE, + .timeo = 600, + .retrans = 2, + .acregmin = 3, + .acregmax = 60, + .acdirmin = 30, + .acdirmax = 60, + .nfs_server.protocol = IPPROTO_TCP, + }; + + if (nfs_parse_mount_options((char *) *options, &args) == 0) return -EINVAL; if (!nfs_verify_server_address((struct sockaddr *) - &args->nfs_server.address)) + &args.nfs_server.address)) return -EINVAL; + *addr = args.nfs_server.address; - switch (args->auth_flavor_len) { + switch (args.auth_flavor_len) { case 0: - args->auth_flavors[0] = RPC_AUTH_UNIX; + *authflavour = RPC_AUTH_UNIX; break; case 1: + *authflavour = (rpc_authflavor_t) args.auth_flavors[0]; break; default: goto out_inval_auth; } + /* + * Translate to nfs4_mount_data, which nfs4_fill_super + * can deal with. + */ + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (data == NULL) + return -ENOMEM; + *options = data; + + data->version = 1; + data->flags = args.flags & NFS4_MOUNT_FLAGMASK; + data->rsize = args.rsize; + data->wsize = args.wsize; + data->timeo = args.timeo; + data->retrans = args.retrans; + data->acregmin = args.acregmin; + data->acregmax = args.acregmax; + data->acdirmin = args.acdirmin; + data->acdirmax = args.acdirmax; + data->proto = args.nfs_server.protocol; + /* * Split "dev_name" into "hostname:mntpath". */ @@ -1636,25 +1678,27 @@ static int nfs4_validate_mount_data(void *options, len = c - dev_name; if (len > NFS4_MAXNAMLEN) return -ENAMETOOLONG; - args->nfs_server.hostname = kzalloc(len, GFP_KERNEL); - if (args->nfs_server.hostname == NULL) + *hostname = kzalloc(len, GFP_KERNEL); + if (*hostname == NULL) return -ENOMEM; - strncpy(args->nfs_server.hostname, dev_name, len - 1); + strncpy(*hostname, dev_name, len - 1); c++; /* step over the ':' */ len = strlen(c); if (len > NFS4_MAXPATHLEN) return -ENAMETOOLONG; - args->nfs_server.export_path = kzalloc(len + 1, GFP_KERNEL); - if (args->nfs_server.export_path == NULL) + *mntpath = kzalloc(len + 1, GFP_KERNEL); + if (*mntpath == NULL) return -ENOMEM; - strncpy(args->nfs_server.export_path, c, len); + strncpy(*mntpath, c, len); - dprintk("MNTPATH: %s\n", args->nfs_server.export_path); + dprintk("MNTPATH: %s\n", *mntpath); - if (args->client_address == NULL) + if (args.client_address == NULL) goto out_no_client_address; + *ip_addr = args.client_address; + break; } } @@ -1685,11 +1729,14 @@ static int nfs4_validate_mount_data(void *options, static int nfs4_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { - struct nfs_parsed_mount_data data; + struct nfs4_mount_data *data = raw_data; struct super_block *s; struct nfs_server *server; + struct sockaddr_in addr; + rpc_authflavor_t authflavour; struct nfs_fh mntfh; struct dentry *mntroot; + char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; struct nfs_sb_mountdata sb_mntdata = { .mntflags = flags, @@ -1697,12 +1744,14 @@ static int nfs4_get_sb(struct file_system_type *fs_type, int error; /* Validate the mount data */ - error = nfs4_validate_mount_data(raw_data, &data, dev_name); + error = nfs4_validate_mount_data(&data, dev_name, &addr, &authflavour, + &hostname, &mntpath, &ip_addr); if (error < 0) goto out; /* Get a volume representation */ - server = nfs4_create_server(&data, &mntfh); + server = nfs4_create_server(data, hostname, &addr, mntpath, ip_addr, + authflavour, &mntfh); if (IS_ERR(server)) { error = PTR_ERR(server); goto out; @@ -1741,9 +1790,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type, error = 0; out: - kfree(data.client_address); - kfree(data.nfs_server.export_path); - kfree(data.nfs_server.hostname); + kfree(ip_addr); + kfree(mntpath); + kfree(hostname); return error; out_free: diff --git a/trunk/fs/nfs/unlink.c b/trunk/fs/nfs/unlink.c index 1aed850d18f2..045ab805c17f 100644 --- a/trunk/fs/nfs/unlink.c +++ b/trunk/fs/nfs/unlink.c @@ -66,6 +66,7 @@ static void nfs_async_unlink_init(struct rpc_task *task, void *calldata) .rpc_cred = data->cred, }; + nfs_begin_data_update(dir); NFS_PROTO(dir)->unlink_setup(&msg, dir); rpc_call_setup(task, &msg, 0); } @@ -83,6 +84,8 @@ static void nfs_async_unlink_done(struct rpc_task *task, void *calldata) if (!NFS_PROTO(dir)->unlink_done(task, dir)) rpc_restart_call(task); + else + nfs_end_data_update(dir); } /** diff --git a/trunk/fs/nfs/write.c b/trunk/fs/nfs/write.c index e2bb66c34406..0d7a77cc394b 100644 --- a/trunk/fs/nfs/write.c +++ b/trunk/fs/nfs/write.c @@ -110,13 +110,6 @@ void nfs_writedata_release(void *wdata) nfs_writedata_free(wdata); } -static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) -{ - ctx->error = error; - smp_wmb(); - set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); -} - static struct nfs_page *nfs_page_find_request_locked(struct page *page) { struct nfs_page *req = NULL; @@ -250,7 +243,10 @@ static void nfs_end_page_writeback(struct page *page) /* * Find an associated nfs write request, and prepare to flush it out - * May return an error if the user signalled nfs_wait_on_request(). + * Returns 1 if there was no write request, or if the request was + * already tagged by nfs_set_page_dirty.Returns 0 if the request + * was not tagged. + * May also return an error if the user signalled nfs_wait_on_request(). */ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, struct page *page) @@ -265,7 +261,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, req = nfs_page_find_request_locked(page); if (req == NULL) { spin_unlock(&inode->i_lock); - return 0; + return 1; } if (nfs_lock_request_dontget(req)) break; @@ -286,7 +282,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, spin_unlock(&inode->i_lock); nfs_unlock_request(req); nfs_pageio_complete(pgio); - return 0; + return 1; } if (nfs_set_page_writeback(page) != 0) { spin_unlock(&inode->i_lock); @@ -294,56 +290,70 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, } radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); + ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); spin_unlock(&inode->i_lock); nfs_pageio_add_request(pgio, req); - return 0; + return ret; } -static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) +/* + * Write an mmapped page to the server. + */ +static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) { + struct nfs_pageio_descriptor mypgio, *pgio; + struct nfs_open_context *ctx; struct inode *inode = page->mapping->host; + unsigned offset; + int err; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); - nfs_pageio_cond_complete(pgio, page->index); - return nfs_page_async_flush(pgio, page); -} + if (wbc->for_writepages) + pgio = wbc->fs_private; + else { + nfs_pageio_init_write(&mypgio, inode, wb_priority(wbc)); + pgio = &mypgio; + } -/* - * Write an mmapped page to the server. - */ -static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) -{ - struct nfs_pageio_descriptor pgio; - int err; + nfs_pageio_cond_complete(pgio, page->index); - nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); - err = nfs_do_writepage(page, wbc, &pgio); - nfs_pageio_complete(&pgio); - if (err < 0) - return err; - if (pgio.pg_error < 0) - return pgio.pg_error; - return 0; -} + err = nfs_page_async_flush(pgio, page); + if (err <= 0) + goto out; + err = 0; + offset = nfs_page_length(page); + if (!offset) + goto out; -int nfs_writepage(struct page *page, struct writeback_control *wbc) -{ - int ret; + nfs_pageio_cond_complete(pgio, page->index); - ret = nfs_writepage_locked(page, wbc); - unlock_page(page); - return ret; + ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE); + if (ctx == NULL) { + err = -EBADF; + goto out; + } + err = nfs_writepage_setup(ctx, page, 0, offset); + put_nfs_open_context(ctx); + if (err != 0) + goto out; + err = nfs_page_async_flush(pgio, page); + if (err > 0) + err = 0; +out: + if (!wbc->for_writepages) + nfs_pageio_complete(pgio); + return err; } -static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data) +int nfs_writepage(struct page *page, struct writeback_control *wbc) { - int ret; + int err; - ret = nfs_do_writepage(page, wbc, data); + err = nfs_writepage_locked(page, wbc); unlock_page(page); - return ret; + return err; } int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) @@ -355,11 +365,12 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); - err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); + wbc->fs_private = &pgio; + err = generic_writepages(mapping, wbc); nfs_pageio_complete(&pgio); - if (err < 0) + if (err) return err; - if (pgio.pg_error < 0) + if (pgio.pg_error) return pgio.pg_error; return 0; } @@ -378,11 +389,14 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) return error; if (!nfsi->npages) { igrab(inode); + nfs_begin_data_update(inode); if (nfs_have_delegation(inode, FMODE_WRITE)) nfsi->change_attr++; } SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); + if (PageDirty(req->wb_page)) + set_bit(PG_NEED_FLUSH, &req->wb_flags); nfsi->npages++; kref_get(&req->wb_kref); return 0; @@ -402,9 +416,12 @@ static void nfs_inode_remove_request(struct nfs_page *req) set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); + if (test_and_clear_bit(PG_NEED_FLUSH, &req->wb_flags)) + __set_page_dirty_nobuffers(req->wb_page); nfsi->npages--; if (!nfsi->npages) { spin_unlock(&inode->i_lock); + nfs_end_data_update(inode); iput(inode); } else spin_unlock(&inode->i_lock); @@ -665,7 +682,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, int nfs_flush_incompatible(struct file *file, struct page *page) { - struct nfs_open_context *ctx = nfs_file_open_context(file); + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct nfs_page *req; int do_flush, status; /* @@ -699,7 +716,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) int nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count) { - struct nfs_open_context *ctx = nfs_file_open_context(file); + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct inode *inode = page->mapping->host; int status = 0; @@ -950,7 +967,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { nfs_set_pageerror(page); - nfs_context_set_write_error(req->wb_context, task->tk_status); + req->wb_context->error = task->tk_status; dprintk(", error = %d\n", task->tk_status); goto out; } @@ -1013,7 +1030,7 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { nfs_set_pageerror(page); - nfs_context_set_write_error(req->wb_context, task->tk_status); + req->wb_context->error = task->tk_status; dprintk(", error = %d\n", task->tk_status); goto remove_request; } @@ -1227,7 +1244,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) req->wb_bytes, (long long)req_offset(req)); if (task->tk_status < 0) { - nfs_context_set_write_error(req->wb_context, task->tk_status); + req->wb_context->error = task->tk_status; nfs_inode_remove_request(req); dprintk(", error = %d\n", task->tk_status); goto next; @@ -1330,52 +1347,53 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr return ret; } -static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how) +/* + * flush the inode to disk. + */ +int nfs_wb_all(struct inode *inode) { + struct address_space *mapping = inode->i_mapping; + struct writeback_control wbc = { + .bdi = mapping->backing_dev_info, + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .for_writepages = 1, + .range_cyclic = 1, + }; int ret; - ret = nfs_writepages(mapping, wbc); - if (ret < 0) - goto out; - ret = nfs_sync_mapping_wait(mapping, wbc, how); + ret = nfs_writepages(mapping, &wbc); if (ret < 0) goto out; - return 0; + ret = nfs_sync_mapping_wait(mapping, &wbc, 0); + if (ret >= 0) + return 0; out: __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); return ret; } -/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */ -static int nfs_write_mapping(struct address_space *mapping, int how) +int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how) { struct writeback_control wbc = { .bdi = mapping->backing_dev_info, - .sync_mode = WB_SYNC_NONE, + .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, + .range_start = range_start, + .range_end = range_end, .for_writepages = 1, - .range_cyclic = 1, }; int ret; - ret = __nfs_write_mapping(mapping, &wbc, how); + ret = nfs_writepages(mapping, &wbc); if (ret < 0) - return ret; - wbc.sync_mode = WB_SYNC_ALL; - return __nfs_write_mapping(mapping, &wbc, how); -} - -/* - * flush the inode to disk. - */ -int nfs_wb_all(struct inode *inode) -{ - return nfs_write_mapping(inode->i_mapping, 0); -} - -int nfs_wb_nocommit(struct inode *inode) -{ - return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT); + goto out; + ret = nfs_sync_mapping_wait(mapping, &wbc, how); + if (ret >= 0) + return 0; +out: + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + return ret; } int nfs_wb_page_cancel(struct inode *inode, struct page *page) @@ -1459,6 +1477,35 @@ int nfs_wb_page(struct inode *inode, struct page* page) return nfs_wb_page_priority(inode, page, FLUSH_STABLE); } +int nfs_set_page_dirty(struct page *page) +{ + struct address_space *mapping = page->mapping; + struct inode *inode; + struct nfs_page *req; + int ret; + + if (!mapping) + goto out_raced; + inode = mapping->host; + if (!inode) + goto out_raced; + spin_lock(&inode->i_lock); + req = nfs_page_find_request_locked(page); + if (req != NULL) { + /* Mark any existing write requests for flushing */ + ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags); + spin_unlock(&inode->i_lock); + nfs_release_request(req); + return ret; + } + ret = __set_page_dirty_nobuffers(page); + spin_unlock(&inode->i_lock); + return ret; +out_raced: + return !TestSetPageDirty(page); +} + + int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", diff --git a/trunk/fs/nfsd/nfs4xdr.c b/trunk/fs/nfsd/nfs4xdr.c index 57333944af7f..e15f2cf8ac15 100644 --- a/trunk/fs/nfsd/nfs4xdr.c +++ b/trunk/fs/nfsd/nfs4xdr.c @@ -102,8 +102,7 @@ check_filename(char *str, int len, __be32 err) out: \ return status; \ xdr_error: \ - dprintk("NFSD: xdr error (%s:%d)\n", \ - __FILE__, __LINE__); \ + printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \ status = nfserr_bad_xdr; \ goto out @@ -125,8 +124,7 @@ xdr_error: \ if (!(x = (p==argp->tmp || p == argp->tmpp) ? \ savemem(argp, p, nbytes) : \ (char *)p)) { \ - dprintk("NFSD: xdr error (%s:%d)\n", \ - __FILE__, __LINE__); \ + printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \ goto xdr_error; \ } \ p += XDR_QUADLEN(nbytes); \ @@ -142,8 +140,7 @@ xdr_error: \ p = argp->p; \ argp->p += XDR_QUADLEN(nbytes); \ } else if (!(p = read_buf(argp, nbytes))) { \ - dprintk("NFSD: xdr error (%s:%d)\n", \ - __FILE__, __LINE__); \ + printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); \ goto xdr_error; \ } \ } while (0) @@ -951,8 +948,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) */ avail = (char*)argp->end - (char*)argp->p; if (avail + argp->pagelen < write->wr_buflen) { - dprintk("NFSD: xdr error (%s:%d)\n", - __FILE__, __LINE__); + printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); goto xdr_error; } argp->rqstp->rq_vec[0].iov_base = p; @@ -1023,7 +1019,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) argp->ops = kmalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL); if (!argp->ops) { argp->ops = argp->iops; - dprintk("nfsd: couldn't allocate room for COMPOUND\n"); + printk(KERN_INFO "nfsd: couldn't allocate room for COMPOUND\n"); goto xdr_error; } } @@ -1330,7 +1326,7 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 * path = exp->ex_path; if (strncmp(path, rootpath, strlen(rootpath))) { - dprintk("nfsd: fs_locations failed;" + printk("nfsd: fs_locations failed;" "%s is not contained in %s\n", path, rootpath); *stat = nfserr_notsupp; return NULL; diff --git a/trunk/include/asm-blackfin/mach-bf548/bf54x_keys.h b/trunk/include/asm-blackfin/mach-bf548/bf54x_keys.h deleted file mode 100644 index 1fb4ec77cc25..000000000000 --- a/trunk/include/asm-blackfin/mach-bf548/bf54x_keys.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _BFIN_KPAD_H -#define _BFIN_KPAD_H - -struct bfin_kpad_platform_data { - int rows; - int cols; - const unsigned int *keymap; - unsigned short keymapsize; - unsigned short repeat; - u32 debounce_time; /* in ns */ - u32 coldrive_time; /* in ns */ - u32 keyup_test_interval; /* in ms */ -}; - -#define KEYVAL(col, row, val) (((1 << col) << 24) | ((1 << row) << 16) | (val)) - -#endif diff --git a/trunk/include/asm-ia64/dma-mapping.h b/trunk/include/asm-ia64/dma-mapping.h index 6299b51575bb..3ca6d5c14b2e 100644 --- a/trunk/include/asm-ia64/dma-mapping.h +++ b/trunk/include/asm-ia64/dma-mapping.h @@ -6,6 +6,7 @@ * David Mosberger-Tang */ #include +#include #define dma_alloc_coherent platform_dma_alloc_coherent /* coherent mem. is cheap */ diff --git a/trunk/include/asm-mips/mach-au1x00/prom.h b/trunk/include/asm-mips/mach-au1x00/prom.h deleted file mode 100644 index e38715577c51..000000000000 --- a/trunk/include/asm-mips/mach-au1x00/prom.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __AU1X00_PROM_H -#define __AU1X00_PROM_H - -extern int prom_argc; -extern char **prom_argv; -extern char **prom_envp; - -extern void prom_init_cmdline(void); -extern char *prom_getcmdline(void); -extern char *prom_getenv(char *envname); -extern int prom_get_ethernet_addr(char *ethernet_addr); - -#endif diff --git a/trunk/include/asm-powerpc/dcr-mmio.h b/trunk/include/asm-powerpc/dcr-mmio.h index 08532ff1899c..6b82c3ba495a 100644 --- a/trunk/include/asm-powerpc/dcr-mmio.h +++ b/trunk/include/asm-powerpc/dcr-mmio.h @@ -33,16 +33,16 @@ typedef struct { extern dcr_host_t dcr_map(struct device_node *dev, unsigned int dcr_n, unsigned int dcr_c); -extern void dcr_unmap(dcr_host_t host, unsigned int dcr_c); +extern void dcr_unmap(dcr_host_t host, unsigned int dcr_n, unsigned int dcr_c); static inline u32 dcr_read(dcr_host_t host, unsigned int dcr_n) { - return in_be32(host.token + ((host.base + dcr_n) * host.stride)); + return in_be32(host.token + dcr_n * host.stride); } static inline void dcr_write(dcr_host_t host, unsigned int dcr_n, u32 value) { - out_be32(host.token + ((host.base + dcr_n) * host.stride), value); + out_be32(host.token + dcr_n * host.stride, value); } extern u64 of_translate_dcr_address(struct device_node *dev, diff --git a/trunk/include/asm-powerpc/dcr-native.h b/trunk/include/asm-powerpc/dcr-native.h index 8dbb1ab0aa04..f41058c0f6cb 100644 --- a/trunk/include/asm-powerpc/dcr-native.h +++ b/trunk/include/asm-powerpc/dcr-native.h @@ -29,9 +29,9 @@ typedef struct { #define DCR_MAP_OK(host) (1) #define dcr_map(dev, dcr_n, dcr_c) ((dcr_host_t){ .base = (dcr_n) }) -#define dcr_unmap(host, dcr_c) do {} while (0) -#define dcr_read(host, dcr_n) mfdcr(dcr_n + host.base) -#define dcr_write(host, dcr_n, value) mtdcr(dcr_n + host.base, value) +#define dcr_unmap(host, dcr_n, dcr_c) do {} while (0) +#define dcr_read(host, dcr_n) mfdcr(dcr_n) +#define dcr_write(host, dcr_n, value) mtdcr(dcr_n, value) /* Device Control Registers */ void __mtdcr(int reg, unsigned int val); diff --git a/trunk/include/asm-x86/irqflags_32.h b/trunk/include/asm-x86/irqflags_32.h index d058b04e0083..eff8585cb741 100644 --- a/trunk/include/asm-x86/irqflags_32.h +++ b/trunk/include/asm-x86/irqflags_32.h @@ -160,17 +160,4 @@ static inline int raw_irqs_disabled(void) # define TRACE_IRQS_OFF #endif -#ifdef CONFIG_DEBUG_LOCK_ALLOC -# define LOCKDEP_SYS_EXIT \ - pushl %eax; \ - pushl %ecx; \ - pushl %edx; \ - call lockdep_sys_exit; \ - popl %edx; \ - popl %ecx; \ - popl %eax; -#else -# define LOCKDEP_SYS_EXIT -#endif - #endif diff --git a/trunk/include/asm-x86/irqflags_64.h b/trunk/include/asm-x86/irqflags_64.h index 5341ea1f815a..86e70fe23659 100644 --- a/trunk/include/asm-x86/irqflags_64.h +++ b/trunk/include/asm-x86/irqflags_64.h @@ -137,20 +137,6 @@ static inline void halt(void) # define TRACE_IRQS_ON # define TRACE_IRQS_OFF # endif -# ifdef CONFIG_DEBUG_LOCK_ALLOC -# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk -# define LOCKDEP_SYS_EXIT_IRQ \ - TRACE_IRQS_ON; \ - sti; \ - SAVE_REST; \ - LOCKDEP_SYS_EXIT; \ - RESTORE_REST; \ - cli; \ - TRACE_IRQS_OFF; -# else -# define LOCKDEP_SYS_EXIT -# define LOCKDEP_SYS_EXIT_IRQ -# endif #endif #endif diff --git a/trunk/include/linux/fs.h b/trunk/include/linux/fs.h index 6d760f1ad875..16421f662a7a 100644 --- a/trunk/include/linux/fs.h +++ b/trunk/include/linux/fs.h @@ -1302,14 +1302,8 @@ struct file_system_type { struct module *owner; struct file_system_type * next; struct list_head fs_supers; - struct lock_class_key s_lock_key; struct lock_class_key s_umount_key; - - struct lock_class_key i_lock_key; - struct lock_class_key i_mutex_key; - struct lock_class_key i_mutex_dir_key; - struct lock_class_key i_alloc_sem_key; }; extern int get_sb_bdev(struct file_system_type *fs_type, diff --git a/trunk/include/linux/gpio_keys.h b/trunk/include/linux/gpio_keys.h index c6d3a9de5634..265d17830a0f 100644 --- a/trunk/include/linux/gpio_keys.h +++ b/trunk/include/linux/gpio_keys.h @@ -8,7 +8,6 @@ struct gpio_keys_button { int active_low; char *desc; int type; /* input event type (EV_KEY, EV_SW) */ - int wakeup; /* configure the button as a wake-up source */ }; struct gpio_keys_platform_data { diff --git a/trunk/include/linux/if_bridge.h b/trunk/include/linux/if_bridge.h index 58e43e566457..99e3a1a00099 100644 --- a/trunk/include/linux/if_bridge.h +++ b/trunk/include/linux/if_bridge.h @@ -107,7 +107,7 @@ struct __fdb_entry extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); extern struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff *skb); -extern int (*br_should_route_hook)(struct sk_buff *skb); +extern int (*br_should_route_hook)(struct sk_buff **pskb); #endif diff --git a/trunk/include/linux/input.h b/trunk/include/linux/input.h index f30da6fc08e3..52d1bd434a50 100644 --- a/trunk/include/linux/input.h +++ b/trunk/include/linux/input.h @@ -856,7 +856,7 @@ struct ff_rumble_effect { * defining effect parameters * * This structure is sent through ioctl from the application to the driver. - * To create a new effect application should set its @id to -1; the kernel + * To create a new effect aplication should set its @id to -1; the kernel * will return assigned @id which can later be used to update or delete * this effect. * @@ -936,82 +936,9 @@ struct ff_effect { #define BIT(x) (1UL<<((x)%BITS_PER_LONG)) #define LONG(x) ((x)/BITS_PER_LONG) -/** - * struct input_dev - represents an input device - * @name: name of the device - * @phys: physical path to the device in the system hierarchy - * @uniq: unique identification code for the device (if device has it) - * @id: id of the device (struct input_id) - * @evbit: bitmap of types of events supported by the device (EV_KEY, - * EV_REL, etc.) - * @keybit: bitmap of keys/buttons this device has - * @relbit: bitmap of relative axes for the device - * @absbit: bitmap of absolute axes for the device - * @mscbit: bitmap of miscellaneous events supported by the device - * @ledbit: bitmap of leds present on the device - * @sndbit: bitmap of sound effects supported by the device - * @ffbit: bitmap of force feedback effects supported by the device - * @swbit: bitmap of switches present on the device - * @keycodemax: size of keycode table - * @keycodesize: size of elements in keycode table - * @keycode: map of scancodes to keycodes for this device - * @setkeycode: optional method to alter current keymap, used to implement - * sparse keymaps. If not supplied default mechanism will be used - * @getkeycode: optional method to retrieve current keymap. If not supplied - * default mechanism will be used - * @ff: force feedback structure associated with the device if device - * supports force feedback effects - * @repeat_key: stores key code of the last key pressed; used to implement - * software autorepeat - * @timer: timer for software autorepeat - * @sync: set to 1 when there were no new events since last EV_SYNC - * @abs: current values for reports from absolute axes - * @rep: current values for autorepeat parameters (delay, rate) - * @key: reflects current state of device's keys/buttons - * @led: reflects current state of device's LEDs - * @snd: reflects current state of sound effects - * @sw: reflects current state of device's switches - * @absmax: maximum values for events coming from absolute axes - * @absmin: minimum values for events coming from absolute axes - * @absfuzz: describes noisiness for axes - * @absflat: size of the center flat position (used by joydev) - * @open: this method is called when the very first user calls - * input_open_device(). The driver must prepare the device - * to start generating events (start polling thread, - * request an IRQ, submit URB, etc.) - * @close: this method is called when the very last user calls - * input_close_device(). - * @flush: purges the device. Most commonly used to get rid of force - * feedback effects loaded into the device when disconnecting - * from it - * @event: event handler for events sent _to_ the device, like EV_LED - * or EV_SND. The device is expected to carry out the requested - * action (turn on a LED, play sound, etc.) The call is protected - * by @event_lock and must not sleep - * @grab: input handle that currently has the device grabbed (via - * EVIOCGRAB ioctl). When a handle grabs a device it becomes sole - * recipient for all input events coming from the device - * @event_lock: this spinlock is is taken when input core receives - * and processes a new event for the device (in input_event()). - * Code that accesses and/or modifies parameters of a device - * (such as keymap or absmin, absmax, absfuzz, etc.) after device - * has been registered with input core must take this lock. - * @mutex: serializes calls to open(), close() and flush() methods - * @users: stores number of users (input handlers) that opened this - * device. It is used by input_open_device() and input_close_device() - * to make sure that dev->open() is only called when the first - * user opens device and dev->close() is called when the very - * last user closes the device - * @going_away: marks devices that are in a middle of unregistering and - * causes input_open_device*() fail with -ENODEV. - * @dev: driver model's view of this device - * @h_list: list of input handles associated with the device. When - * accessing the list dev->mutex must be held - * @node: used to place the device onto input_dev_list - */ struct input_dev { - void *private; /* do not use */ + void *private; const char *name; const char *phys; @@ -1039,6 +966,8 @@ struct input_dev { unsigned int repeat_key; struct timer_list timer; + int state; + int sync; int abs[ABS_MAX + 1]; @@ -1061,11 +990,8 @@ struct input_dev { struct input_handle *grab; - spinlock_t event_lock; - struct mutex mutex; - + struct mutex mutex; /* serializes open and close operations */ unsigned int users; - int going_away; struct device dev; union { /* temporarily so while we switching to struct device */ @@ -1131,9 +1057,7 @@ struct input_handle; /** * struct input_handler - implements one of interfaces for input devices * @private: driver-specific data - * @event: event handler. This method is being called by input core with - * interrupts disabled and dev->event_lock spinlock held and so - * it may not sleep + * @event: event handler * @connect: called when attaching a handler to an input device * @disconnect: disconnects a handler from input device * @start: starts handler for given handle. This function is called by @@ -1145,18 +1069,10 @@ struct input_handle; * @name: name of the handler, to be shown in /proc/bus/input/handlers * @id_table: pointer to a table of input_device_ids this driver can * handle - * @blacklist: pointer to a table of input_device_ids this driver should + * @blacklist: prointer to a table of input_device_ids this driver should * ignore even if they match @id_table * @h_list: list of input handles associated with the handler * @node: for placing the driver onto input_handler_list - * - * Input handlers attach to input devices and create input handles. There - * are likely several handlers attached to any given input device at the - * same time. All of them will get their copy of input event generated by - * the device. - * - * Note that input core serializes calls to connect() and disconnect() - * methods. */ struct input_handler { @@ -1178,18 +1094,6 @@ struct input_handler { struct list_head node; }; -/** - * struct input_handle - links input device with an input handler - * @private: handler-specific data - * @open: counter showing whether the handle is 'open', i.e. should deliver - * events from its device - * @name: name given to the handle by handler that created it - * @dev: input device the handle is attached to - * @handler: handler that works with the device through this handle - * @d_node: used to put the handle on device's list of attached handles - * @h_node: used to put the handle on handler's list of handles from which - * it gets events - */ struct input_handle { void *private; @@ -1232,10 +1136,10 @@ static inline void input_set_drvdata(struct input_dev *dev, void *data) dev->private = data; } -int __must_check input_register_device(struct input_dev *); +int input_register_device(struct input_dev *); void input_unregister_device(struct input_dev *); -int __must_check input_register_handler(struct input_handler *); +int input_register_handler(struct input_handler *); void input_unregister_handler(struct input_handler *); int input_register_handle(struct input_handle *); @@ -1312,7 +1216,7 @@ extern struct class input_class; * @max_effects: maximum number of effects supported by device * @effects: pointer to an array of effects currently loaded into device * @effect_owners: array of effect owners; when file handle owning - * an effect gets closed the effect is automatically erased + * an effect gets closed the effcet is automatically erased * * Every force-feedback device must implement upload() and playback() * methods; erase() is optional. set_gain() and set_autocenter() need diff --git a/trunk/include/linux/isdn.h b/trunk/include/linux/isdn.h index d5dda4b643ac..ad09506554a3 100644 --- a/trunk/include/linux/isdn.h +++ b/trunk/include/linux/isdn.h @@ -286,6 +286,7 @@ typedef struct { /* Local interface-data */ typedef struct isdn_net_local_s { ulong magic; + char name[10]; /* Name of device */ struct net_device_stats stats; /* Ethernet Statistics */ int isdn_device; /* Index to isdn-device */ int isdn_channel; /* Index to isdn-channel */ diff --git a/trunk/include/linux/jbd.h b/trunk/include/linux/jbd.h index 700a93b79189..452737551260 100644 --- a/trunk/include/linux/jbd.h +++ b/trunk/include/linux/jbd.h @@ -30,7 +30,6 @@ #include #include #include -#include #include #endif @@ -397,10 +396,6 @@ struct handle_s unsigned int h_sync: 1; /* sync-on-close */ unsigned int h_jdata: 1; /* force data journaling */ unsigned int h_aborted: 1; /* fatal error on handle */ - -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map h_lockdep_map; -#endif }; diff --git a/trunk/include/linux/jiffies.h b/trunk/include/linux/jiffies.h index e757a74b9d17..d7a5e034c3a2 100644 --- a/trunk/include/linux/jiffies.h +++ b/trunk/include/linux/jiffies.h @@ -109,10 +109,6 @@ static inline u64 get_jiffies_64(void) ((long)(a) - (long)(b) >= 0)) #define time_before_eq(a,b) time_after_eq(b,a) -#define time_in_range(a,b,c) \ - (time_after_eq(a,b) && \ - time_before_eq(a,c)) - /* Same as above, but does so with platform independent 64bit types. * These must be used when utilizing jiffies_64 (i.e. return value of * get_jiffies_64() */ diff --git a/trunk/include/linux/keyboard.h b/trunk/include/linux/keyboard.h index 33b5c2e325b9..7ddbc30aa8e7 100644 --- a/trunk/include/linux/keyboard.h +++ b/trunk/include/linux/keyboard.h @@ -416,7 +416,6 @@ extern unsigned short plain_map[NR_KEYS]; #define K_SHIFTRLOCK K(KT_LOCK,KG_SHIFTR) #define K_CTRLLLOCK K(KT_LOCK,KG_CTRLL) #define K_CTRLRLOCK K(KT_LOCK,KG_CTRLR) -#define K_CAPSSHIFTLOCK K(KT_LOCK,KG_CAPSSHIFT) #define K_SHIFT_SLOCK K(KT_SLOCK,KG_SHIFT) #define K_CTRL_SLOCK K(KT_SLOCK,KG_CTRL) @@ -426,9 +425,8 @@ extern unsigned short plain_map[NR_KEYS]; #define K_SHIFTR_SLOCK K(KT_SLOCK,KG_SHIFTR) #define K_CTRLL_SLOCK K(KT_SLOCK,KG_CTRLL) #define K_CTRLR_SLOCK K(KT_SLOCK,KG_CTRLR) -#define K_CAPSSHIFT_SLOCK K(KT_SLOCK,KG_CAPSSHIFT) -#define NR_LOCK 9 +#define NR_LOCK 8 #define K_BRL_BLANK K(KT_BRL, 0) #define K_BRL_DOT1 K(KT_BRL, 1) diff --git a/trunk/include/linux/lockdep.h b/trunk/include/linux/lockdep.h index f6279f68a827..0e843bf65877 100644 --- a/trunk/include/linux/lockdep.h +++ b/trunk/include/linux/lockdep.h @@ -238,7 +238,6 @@ extern void lockdep_info(void); extern void lockdep_reset(void); extern void lockdep_reset_lock(struct lockdep_map *lock); extern void lockdep_free_key_range(void *start, unsigned long size); -extern void lockdep_sys_exit(void); extern void lockdep_off(void); extern void lockdep_on(void); @@ -252,13 +251,6 @@ extern void lockdep_on(void); extern void lockdep_init_map(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass); -/* - * To initialize a lockdep_map statically use this macro. - * Note that _name must not be NULL. - */ -#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ - { .name = (_name), .key = (void *)(_key), } - /* * Reinitialize a lock key - for cases where there is special locking or * special initialization of locks so that the validator gets the scope @@ -325,7 +317,6 @@ static inline void lockdep_on(void) # define INIT_LOCKDEP # define lockdep_reset() do { debug_locks = 1; } while (0) # define lockdep_free_key_range(start, size) do { } while (0) -# define lockdep_sys_exit() do { } while (0) /* * The class key takes no space if lockdep is disabled: */ diff --git a/trunk/include/linux/mutex.h b/trunk/include/linux/mutex.h index 6a735c72f23f..0d50ea3df689 100644 --- a/trunk/include/linux/mutex.h +++ b/trunk/include/linux/mutex.h @@ -120,17 +120,14 @@ static inline int fastcall mutex_is_locked(struct mutex *lock) * See kernel/mutex.c for detailed documentation of these APIs. * Also see Documentation/mutex-design.txt. */ +extern void fastcall mutex_lock(struct mutex *lock); +extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock); + #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass); extern int __must_check mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); - -#define mutex_lock(lock) mutex_lock_nested(lock, 0) -#define mutex_lock_interruptible(lock) mutex_lock_interruptible_nested(lock, 0) #else -extern void fastcall mutex_lock(struct mutex *lock); -extern int __must_check fastcall mutex_lock_interruptible(struct mutex *lock); - # define mutex_lock_nested(lock, subclass) mutex_lock(lock) # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) #endif diff --git a/trunk/include/linux/netfilter.h b/trunk/include/linux/netfilter.h index 16adac688af5..1dd075eda595 100644 --- a/trunk/include/linux/netfilter.h +++ b/trunk/include/linux/netfilter.h @@ -51,7 +51,7 @@ struct sk_buff; struct net_device; typedef unsigned int nf_hookfn(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)); @@ -183,7 +183,7 @@ void nf_log_packet(int pf, struct nf_loginfo *li, const char *fmt, ...); -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, +int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh); @@ -195,7 +195,7 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, * value indicates the packet has been consumed by the hook. */ static inline int nf_hook_thresh(int pf, unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh, @@ -207,14 +207,14 @@ static inline int nf_hook_thresh(int pf, unsigned int hook, if (list_empty(&nf_hooks[pf][hook])) return 1; #endif - return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh); + return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh); } -static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, +static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { - return nf_hook_thresh(pf, hook, skb, indev, outdev, okfn, INT_MIN, 1); + return nf_hook_thresh(pf, hook, pskb, indev, outdev, okfn, INT_MIN, 1); } /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -241,13 +241,13 @@ static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, #define NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, thresh) \ ({int __ret; \ -if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, thresh, 1)) == 1)\ +if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, thresh, 1)) == 1)\ __ret = (okfn)(skb); \ __ret;}) #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) \ ({int __ret; \ -if ((__ret=nf_hook_thresh(pf, hook, (skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\ +if ((__ret=nf_hook_thresh(pf, hook, &(skb), indev, outdev, okfn, INT_MIN, cond)) == 1)\ __ret = (okfn)(skb); \ __ret;}) @@ -287,7 +287,7 @@ extern void nf_invalidate_cache(int pf); /* Call this before modifying an existing packet: ensures it is modifiable and linear to the point you care about (writable_len). Returns true or false. */ -extern int skb_make_writable(struct sk_buff *skb, unsigned int writable_len); +extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); static inline void nf_csum_replace4(__sum16 *sum, __be32 from, __be32 to) { @@ -317,7 +317,7 @@ struct nf_afinfo { unsigned int dataoff, u_int8_t protocol); void (*saveroute)(const struct sk_buff *skb, struct nf_info *info); - int (*reroute)(struct sk_buff *skb, + int (*reroute)(struct sk_buff **skb, const struct nf_info *info); int route_key_size; }; @@ -371,15 +371,15 @@ extern struct proc_dir_entry *proc_net_netfilter; #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) (okfn)(skb) #define NF_HOOK_COND(pf, hook, skb, indev, outdev, okfn, cond) (okfn)(skb) static inline int nf_hook_thresh(int pf, unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), int thresh, int cond) { - return okfn(skb); + return okfn(*pskb); } -static inline int nf_hook(int pf, unsigned int hook, struct sk_buff *skb, +static inline int nf_hook(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *)) { diff --git a/trunk/include/linux/netfilter/nf_conntrack_amanda.h b/trunk/include/linux/netfilter/nf_conntrack_amanda.h index 0bb5a6976bf3..26c223544ae8 100644 --- a/trunk/include/linux/netfilter/nf_conntrack_amanda.h +++ b/trunk/include/linux/netfilter/nf_conntrack_amanda.h @@ -2,7 +2,7 @@ #define _NF_CONNTRACK_AMANDA_H /* AMANDA tracking. */ -extern unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb, +extern unsigned int (*nf_nat_amanda_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, diff --git a/trunk/include/linux/netfilter/nf_conntrack_ftp.h b/trunk/include/linux/netfilter/nf_conntrack_ftp.h index 47727d7546ea..b7c360ffd0d0 100644 --- a/trunk/include/linux/netfilter/nf_conntrack_ftp.h +++ b/trunk/include/linux/netfilter/nf_conntrack_ftp.h @@ -32,7 +32,7 @@ struct nf_conntrack_expect; /* For NAT to hook in when we find a packet which describes what other * connection we should expect. */ -extern unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, +extern unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, enum nf_ct_ftp_type type, unsigned int matchoff, diff --git a/trunk/include/linux/netfilter/nf_conntrack_h323.h b/trunk/include/linux/netfilter/nf_conntrack_h323.h index aabd24ac7631..08e2f4977c2e 100644 --- a/trunk/include/linux/netfilter/nf_conntrack_h323.h +++ b/trunk/include/linux/netfilter/nf_conntrack_h323.h @@ -36,27 +36,27 @@ extern void nf_conntrack_h245_expect(struct nf_conn *new, struct nf_conntrack_expect *this); extern void nf_conntrack_q931_expect(struct nf_conn *new, struct nf_conntrack_expect *this); -extern int (*set_h245_addr_hook) (struct sk_buff *skb, +extern int (*set_h245_addr_hook) (struct sk_buff **pskb, unsigned char **data, int dataoff, H245_TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port); -extern int (*set_h225_addr_hook) (struct sk_buff *skb, +extern int (*set_h225_addr_hook) (struct sk_buff **pskb, unsigned char **data, int dataoff, TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port); -extern int (*set_sig_addr_hook) (struct sk_buff *skb, +extern int (*set_sig_addr_hook) (struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count); -extern int (*set_ras_addr_hook) (struct sk_buff *skb, +extern int (*set_ras_addr_hook) (struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count); -extern int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, +extern int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -64,24 +64,24 @@ extern int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, __be16 port, __be16 rtp_port, struct nf_conntrack_expect *rtp_exp, struct nf_conntrack_expect *rtcp_exp); -extern int (*nat_t120_hook) (struct sk_buff *skb, struct nf_conn *ct, +extern int (*nat_t120_hook) (struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp); -extern int (*nat_h245_hook) (struct sk_buff *skb, struct nf_conn *ct, +extern int (*nat_h245_hook) (struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp); -extern int (*nat_callforwarding_hook) (struct sk_buff *skb, +extern int (*nat_callforwarding_hook) (struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp); -extern int (*nat_q931_hook) (struct sk_buff *skb, struct nf_conn *ct, +extern int (*nat_q931_hook) (struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int idx, __be16 port, diff --git a/trunk/include/linux/netfilter/nf_conntrack_irc.h b/trunk/include/linux/netfilter/nf_conntrack_irc.h index 36282bf71b63..2ab6b8255911 100644 --- a/trunk/include/linux/netfilter/nf_conntrack_irc.h +++ b/trunk/include/linux/netfilter/nf_conntrack_irc.h @@ -5,7 +5,7 @@ #define IRC_PORT 6667 -extern unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, +extern unsigned int (*nf_nat_irc_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, diff --git a/trunk/include/linux/netfilter/nf_conntrack_pptp.h b/trunk/include/linux/netfilter/nf_conntrack_pptp.h index 23435496d24a..c93061f33144 100644 --- a/trunk/include/linux/netfilter/nf_conntrack_pptp.h +++ b/trunk/include/linux/netfilter/nf_conntrack_pptp.h @@ -301,13 +301,13 @@ struct nf_conn; struct nf_conntrack_expect; extern int -(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb, +(*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq); extern int -(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb, +(*nf_nat_pptp_hook_inbound)(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq); diff --git a/trunk/include/linux/netfilter/nf_conntrack_sip.h b/trunk/include/linux/netfilter/nf_conntrack_sip.h index 9fff19779bd5..bb7f2041db74 100644 --- a/trunk/include/linux/netfilter/nf_conntrack_sip.h +++ b/trunk/include/linux/netfilter/nf_conntrack_sip.h @@ -21,11 +21,11 @@ enum sip_header_pos { POS_SDP_HEADER, }; -extern unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, +extern unsigned int (*nf_nat_sip_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr); -extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, +extern unsigned int (*nf_nat_sdp_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp, const char *dptr); diff --git a/trunk/include/linux/netfilter/nf_conntrack_tftp.h b/trunk/include/linux/netfilter/nf_conntrack_tftp.h index c78d38fdb050..0d79b7ae051f 100644 --- a/trunk/include/linux/netfilter/nf_conntrack_tftp.h +++ b/trunk/include/linux/netfilter/nf_conntrack_tftp.h @@ -13,7 +13,7 @@ struct tftphdr { #define TFTP_OPCODE_ACK 4 #define TFTP_OPCODE_ERROR 5 -extern unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb, +extern unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp); diff --git a/trunk/include/linux/netfilter/x_tables.h b/trunk/include/linux/netfilter/x_tables.h index 03e6ce979eaa..64f425a855bb 100644 --- a/trunk/include/linux/netfilter/x_tables.h +++ b/trunk/include/linux/netfilter/x_tables.h @@ -191,7 +191,7 @@ struct xt_target /* Returns verdict. Argument order changed since 2.6.9, as this must now handle non-linear skbs, using skb_copy_bits and skb_ip_make_writable. */ - unsigned int (*target)(struct sk_buff *skb, + unsigned int (*target)(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, diff --git a/trunk/include/linux/netfilter_arp/arp_tables.h b/trunk/include/linux/netfilter_arp/arp_tables.h index 2fc73fa8e37f..584cd1b18f12 100644 --- a/trunk/include/linux/netfilter_arp/arp_tables.h +++ b/trunk/include/linux/netfilter_arp/arp_tables.h @@ -287,7 +287,7 @@ struct arpt_error extern int arpt_register_table(struct arpt_table *table, const struct arpt_replace *repl); extern void arpt_unregister_table(struct arpt_table *table); -extern unsigned int arpt_do_table(struct sk_buff *skb, +extern unsigned int arpt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, diff --git a/trunk/include/linux/netfilter_bridge/ebtables.h b/trunk/include/linux/netfilter_bridge/ebtables.h index 892f5b7771c7..94e0a7dc0cb2 100644 --- a/trunk/include/linux/netfilter_bridge/ebtables.h +++ b/trunk/include/linux/netfilter_bridge/ebtables.h @@ -237,7 +237,7 @@ struct ebt_target struct list_head list; const char name[EBT_FUNCTION_MAXNAMELEN]; /* returns one of the standard verdicts */ - int (*target)(struct sk_buff *skb, unsigned int hooknr, + int (*target)(struct sk_buff **pskb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *targetdata, unsigned int datalen); /* 0 == let it in */ @@ -294,7 +294,7 @@ extern int ebt_register_watcher(struct ebt_watcher *watcher); extern void ebt_unregister_watcher(struct ebt_watcher *watcher); extern int ebt_register_target(struct ebt_target *target); extern void ebt_unregister_target(struct ebt_target *target); -extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, +extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, struct ebt_table *table); diff --git a/trunk/include/linux/netfilter_ipv4.h b/trunk/include/linux/netfilter_ipv4.h index 1a63adf5c4c1..ceae87a4c891 100644 --- a/trunk/include/linux/netfilter_ipv4.h +++ b/trunk/include/linux/netfilter_ipv4.h @@ -75,8 +75,8 @@ enum nf_ip_hook_priorities { #define SO_ORIGINAL_DST 80 #ifdef __KERNEL__ -extern int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type); -extern int ip_xfrm_me_harder(struct sk_buff *skb); +extern int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type); +extern int ip_xfrm_me_harder(struct sk_buff **pskb); extern __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); #endif /*__KERNEL__*/ diff --git a/trunk/include/linux/netfilter_ipv4/ip_tables.h b/trunk/include/linux/netfilter_ipv4/ip_tables.h index d79ed69cbc1f..e992cd6b28f5 100644 --- a/trunk/include/linux/netfilter_ipv4/ip_tables.h +++ b/trunk/include/linux/netfilter_ipv4/ip_tables.h @@ -337,7 +337,7 @@ struct ipt_error .target.errorname = "ERROR", \ } -extern unsigned int ipt_do_table(struct sk_buff *skb, +extern unsigned int ipt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, diff --git a/trunk/include/linux/netfilter_ipv6/ip6_tables.h b/trunk/include/linux/netfilter_ipv6/ip6_tables.h index 7dc481ce7cba..9a720f05888f 100644 --- a/trunk/include/linux/netfilter_ipv6/ip6_tables.h +++ b/trunk/include/linux/netfilter_ipv6/ip6_tables.h @@ -336,7 +336,7 @@ extern void ip6t_init(void) __init; extern int ip6t_register_table(struct xt_table *table, const struct ip6t_replace *repl); extern void ip6t_unregister_table(struct xt_table *table); -extern unsigned int ip6t_do_table(struct sk_buff *skb, +extern unsigned int ip6t_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, diff --git a/trunk/include/linux/nfs_fs.h b/trunk/include/linux/nfs_fs.h index c5164c257f71..7250eeadd7b5 100644 --- a/trunk/include/linux/nfs_fs.h +++ b/trunk/include/linux/nfs_fs.h @@ -47,8 +47,10 @@ #include #include #include + #include +#include #include /* @@ -75,9 +77,6 @@ struct nfs_open_context { struct nfs4_state *state; fl_owner_t lockowner; int mode; - - unsigned long flags; -#define NFS_CONTEXT_ERROR_WRITE (0) int error; struct list_head list; @@ -134,6 +133,11 @@ struct nfs_inode { * server. */ unsigned long cache_change_attribute; + /* + * Counter indicating the number of outstanding requests that + * will cause a file data update. + */ + atomic_t data_updates; struct rb_root access_cache; struct list_head access_cache_entry_lru; @@ -201,18 +205,27 @@ static inline struct nfs_inode *NFS_I(struct inode *inode) #define NFS_CLIENT(inode) (NFS_SERVER(inode)->client) #define NFS_PROTO(inode) (NFS_SERVER(inode)->nfs_client->rpc_ops) #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf) +#define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies) +#define NFS_CHANGE_ATTR(inode) (NFS_I(inode)->change_attr) +#define NFS_ATTRTIMEO(inode) (NFS_I(inode)->attrtimeo) #define NFS_MINATTRTIMEO(inode) \ (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \ : NFS_SERVER(inode)->acregmin) #define NFS_MAXATTRTIMEO(inode) \ (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmax \ : NFS_SERVER(inode)->acregmax) +#define NFS_ATTRTIMEO_UPDATE(inode) (NFS_I(inode)->attrtimeo_timestamp) #define NFS_FLAGS(inode) (NFS_I(inode)->flags) #define NFS_STALE(inode) (test_bit(NFS_INO_STALE, &NFS_FLAGS(inode))) #define NFS_FILEID(inode) (NFS_I(inode)->fileid) +static inline int nfs_caches_unstable(struct inode *inode) +{ + return atomic_read(&NFS_I(inode)->data_updates) != 0; +} + static inline void nfs_mark_for_revalidate(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); @@ -224,6 +237,12 @@ static inline void nfs_mark_for_revalidate(struct inode *inode) spin_unlock(&inode->i_lock); } +static inline void NFS_CACHEINV(struct inode *inode) +{ + if (!nfs_caches_unstable(inode)) + nfs_mark_for_revalidate(inode); +} + static inline int nfs_server_capable(struct inode *inode, int cap) { return NFS_SERVER(inode)->caps & cap; @@ -234,33 +253,28 @@ static inline int NFS_USE_READDIRPLUS(struct inode *inode) return test_bit(NFS_INO_ADVISE_RDPLUS, &NFS_FLAGS(inode)); } -static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) -{ - dentry->d_time = verf; -} - /** * nfs_save_change_attribute - Returns the inode attribute change cookie - * @dir - pointer to parent directory inode + * @inode - pointer to inode * The "change attribute" is updated every time we finish an operation * that will result in a metadata change on the server. */ -static inline unsigned long nfs_save_change_attribute(struct inode *dir) +static inline long nfs_save_change_attribute(struct inode *inode) { - return NFS_I(dir)->cache_change_attribute; + return NFS_I(inode)->cache_change_attribute; } /** - * nfs_verify_change_attribute - Detects NFS remote directory changes - * @dir - pointer to parent directory inode + * nfs_verify_change_attribute - Detects NFS inode cache updates + * @inode - pointer to inode * @chattr - previously saved change attribute - * Return "false" if the verifiers doesn't match the change attribute. - * This would usually indicate that the directory contents have changed on - * the server, and that any dentries need revalidating. + * Return "false" if metadata has been updated (or is in the process of + * being updated) since the change attribute was saved. */ -static inline int nfs_verify_change_attribute(struct inode *dir, unsigned long chattr) +static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr) { - return chattr == NFS_I(dir)->cache_change_attribute; + return !nfs_caches_unstable(inode) + && time_after_eq(chattr, NFS_I(inode)->cache_change_attribute); } /* @@ -269,14 +283,15 @@ static inline int nfs_verify_change_attribute(struct inode *dir, unsigned long c extern int nfs_sync_mapping(struct address_space *mapping); extern void nfs_zap_mapping(struct inode *inode, struct address_space *mapping); extern void nfs_zap_caches(struct inode *); -extern void nfs_invalidate_atime(struct inode *); extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, struct nfs_fattr *); extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); -extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int nfs_permission(struct inode *, int, struct nameidata *); +extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *); +extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); +extern void nfs_access_zap_cache(struct inode *inode); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); @@ -286,10 +301,13 @@ extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *map extern int nfs_revalidate_mapping_nolock(struct inode *inode, struct address_space *mapping); extern int nfs_setattr(struct dentry *, struct iattr *); extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr); +extern void nfs_begin_attr_update(struct inode *); +extern void nfs_end_attr_update(struct inode *); +extern void nfs_begin_data_update(struct inode *); +extern void nfs_end_data_update(struct inode *); extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); extern void put_nfs_open_context(struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_cred *cred, int mode); -extern u64 nfs_compat_user_ino64(u64 fileid); /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ extern __be32 root_nfs_parse_addr(char *name); /*__init*/ @@ -310,15 +328,14 @@ extern const struct inode_operations nfs3_file_inode_operations; extern const struct file_operations nfs_file_operations; extern const struct address_space_operations nfs_file_aops; -static inline struct nfs_open_context *nfs_file_open_context(struct file *filp) -{ - return filp->private_data; -} - static inline struct rpc_cred *nfs_file_cred(struct file *file) { - if (file != NULL) - return nfs_file_open_context(file)->cred; + if (file != NULL) { + struct nfs_open_context *ctx; + + ctx = (struct nfs_open_context*)file->private_data; + return ctx->cred; + } return NULL; } @@ -361,8 +378,6 @@ extern const struct file_operations nfs_dir_operations; extern struct dentry_operations nfs_dentry_operations; extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr); -extern int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags); -extern void nfs_access_zap_cache(struct inode *inode); /* * linux/fs/nfs/symlink.c @@ -405,14 +420,15 @@ extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); extern void nfs_writedata_release(void *); +extern int nfs_set_page_dirty(struct page *); /* * Try to write back everything synchronously (but check the * return value!) */ extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int); +extern int nfs_sync_mapping_range(struct address_space *, loff_t, loff_t, int); extern int nfs_wb_all(struct inode *inode); -extern int nfs_wb_nocommit(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_priority(struct inode *inode, struct page* page, int how); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); diff --git a/trunk/include/linux/nfs_page.h b/trunk/include/linux/nfs_page.h index 30dbcc185e69..78e60798d10e 100644 --- a/trunk/include/linux/nfs_page.h +++ b/trunk/include/linux/nfs_page.h @@ -30,6 +30,7 @@ #define PG_BUSY 0 #define PG_NEED_COMMIT 1 #define PG_NEED_RESCHED 2 +#define PG_NEED_FLUSH 3 struct nfs_inode; struct nfs_page { diff --git a/trunk/include/linux/nfs_xdr.h b/trunk/include/linux/nfs_xdr.h index daab252f2e5c..cf74a4db84a5 100644 --- a/trunk/include/linux/nfs_xdr.h +++ b/trunk/include/linux/nfs_xdr.h @@ -62,8 +62,7 @@ struct nfs_fattr { #define NFS_ATTR_FATTR 0x0002 /* post-op attributes */ #define NFS_ATTR_FATTR_V3 0x0004 /* NFSv3 attributes */ #define NFS_ATTR_FATTR_V4 0x0008 /* NFSv4 change attribute */ -#define NFS_ATTR_WCC_V4 0x0010 /* pre-op change attribute */ -#define NFS_ATTR_FATTR_V4_REFERRAL 0x0020 /* NFSv4 referral */ +#define NFS_ATTR_FATTR_V4_REFERRAL 0x0010 /* NFSv4 referral */ /* * Info on the file system @@ -539,13 +538,10 @@ typedef u64 clientid4; struct nfs4_accessargs { const struct nfs_fh * fh; - const u32 * bitmask; u32 access; }; struct nfs4_accessres { - const struct nfs_server * server; - struct nfs_fattr * fattr; u32 supported; u32 access; }; diff --git a/trunk/include/linux/rcupdate.h b/trunk/include/linux/rcupdate.h index 76c1a530edc5..fe17d7d750c2 100644 --- a/trunk/include/linux/rcupdate.h +++ b/trunk/include/linux/rcupdate.h @@ -41,7 +41,6 @@ #include #include #include -#include /** * struct rcu_head - callback structure for use with RCU @@ -134,15 +133,6 @@ static inline void rcu_bh_qsctr_inc(int cpu) extern int rcu_pending(int cpu); extern int rcu_needs_cpu(int cpu); -#ifdef CONFIG_DEBUG_LOCK_ALLOC -extern struct lockdep_map rcu_lock_map; -# define rcu_read_acquire() lock_acquire(&rcu_lock_map, 0, 0, 2, 1, _THIS_IP_) -# define rcu_read_release() lock_release(&rcu_lock_map, 1, _THIS_IP_) -#else -# define rcu_read_acquire() do { } while (0) -# define rcu_read_release() do { } while (0) -#endif - /** * rcu_read_lock - mark the beginning of an RCU read-side critical section. * @@ -176,7 +166,6 @@ extern struct lockdep_map rcu_lock_map; do { \ preempt_disable(); \ __acquire(RCU); \ - rcu_read_acquire(); \ } while(0) /** @@ -186,7 +175,6 @@ extern struct lockdep_map rcu_lock_map; */ #define rcu_read_unlock() \ do { \ - rcu_read_release(); \ __release(RCU); \ preempt_enable(); \ } while(0) @@ -216,7 +204,6 @@ extern struct lockdep_map rcu_lock_map; do { \ local_bh_disable(); \ __acquire(RCU_BH); \ - rcu_read_acquire(); \ } while(0) /* @@ -226,7 +213,6 @@ extern struct lockdep_map rcu_lock_map; */ #define rcu_read_unlock_bh() \ do { \ - rcu_read_release(); \ __release(RCU_BH); \ local_bh_enable(); \ } while(0) diff --git a/trunk/include/linux/skbuff.h b/trunk/include/linux/skbuff.h index f93f22b3d2ff..a656cecd373c 100644 --- a/trunk/include/linux/skbuff.h +++ b/trunk/include/linux/skbuff.h @@ -357,7 +357,6 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, } extern void kfree_skbmem(struct sk_buff *skb); -extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); extern struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); extern struct sk_buff *skb_copy(const struct sk_buff *skb, @@ -1782,11 +1781,6 @@ static inline int skb_is_gso(const struct sk_buff *skb) return skb_shinfo(skb)->gso_size; } -static inline int skb_is_gso_v6(const struct sk_buff *skb) -{ - return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; -} - static inline void skb_forward_csum(struct sk_buff *skb) { /* Unfortunately we don't support this one. Any brave souls? */ diff --git a/trunk/include/linux/sunrpc/clnt.h b/trunk/include/linux/sunrpc/clnt.h index d9d5c5ad826c..c0d9d14983b3 100644 --- a/trunk/include/linux/sunrpc/clnt.h +++ b/trunk/include/linux/sunrpc/clnt.h @@ -117,7 +117,7 @@ struct rpc_create_args { struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, - struct rpc_program *, u32); + struct rpc_program *, int); struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); void rpc_shutdown_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); diff --git a/trunk/include/linux/sunrpc/debug.h b/trunk/include/linux/sunrpc/debug.h index 3347c72b848a..3912cf16361e 100644 --- a/trunk/include/linux/sunrpc/debug.h +++ b/trunk/include/linux/sunrpc/debug.h @@ -88,11 +88,6 @@ enum { CTL_SLOTTABLE_TCP, CTL_MIN_RESVPORT, CTL_MAX_RESVPORT, - CTL_SLOTTABLE_RDMA, - CTL_RDMA_MAXINLINEREAD, - CTL_RDMA_MAXINLINEWRITE, - CTL_RDMA_WRITEPADDING, - CTL_RDMA_MEMREG, }; #endif /* _LINUX_SUNRPC_DEBUG_H_ */ diff --git a/trunk/include/linux/sunrpc/msg_prot.h b/trunk/include/linux/sunrpc/msg_prot.h index c4beb5775111..784d4c3ef651 100644 --- a/trunk/include/linux/sunrpc/msg_prot.h +++ b/trunk/include/linux/sunrpc/msg_prot.h @@ -138,19 +138,6 @@ typedef __be32 rpc_fraghdr; #define RPC_MAX_HEADER_WITH_AUTH \ (RPC_CALLHDRSIZE + 2*(2+RPC_MAX_AUTH_SIZE/4)) -/* - * RFC1833/RFC3530 rpcbind (v3+) well-known netid's. - */ -#define RPCBIND_NETID_UDP "udp" -#define RPCBIND_NETID_TCP "tcp" -#define RPCBIND_NETID_UDP6 "udp6" -#define RPCBIND_NETID_TCP6 "tcp6" - -/* - * Note that RFC 1833 does not put any size restrictions on the - * netid string, but all currently defined netid's fit in 4 bytes. - */ -#define RPCBIND_MAXNETIDLEN (4u) #endif /* __KERNEL__ */ #endif /* _LINUX_SUNRPC_MSGPROT_H_ */ diff --git a/trunk/include/linux/sunrpc/rpc_rdma.h b/trunk/include/linux/sunrpc/rpc_rdma.h deleted file mode 100644 index 0013a0d8dc6b..000000000000 --- a/trunk/include/linux/sunrpc/rpc_rdma.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the BSD-type - * license below: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * Neither the name of the Network Appliance, Inc. nor the names of - * its contributors may be used to endorse or promote products - * derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _LINUX_SUNRPC_RPC_RDMA_H -#define _LINUX_SUNRPC_RPC_RDMA_H - -struct rpcrdma_segment { - uint32_t rs_handle; /* Registered memory handle */ - uint32_t rs_length; /* Length of the chunk in bytes */ - uint64_t rs_offset; /* Chunk virtual address or offset */ -}; - -/* - * read chunk(s), encoded as a linked list. - */ -struct rpcrdma_read_chunk { - uint32_t rc_discrim; /* 1 indicates presence */ - uint32_t rc_position; /* Position in XDR stream */ - struct rpcrdma_segment rc_target; -}; - -/* - * write chunk, and reply chunk. - */ -struct rpcrdma_write_chunk { - struct rpcrdma_segment wc_target; -}; - -/* - * write chunk(s), encoded as a counted array. - */ -struct rpcrdma_write_array { - uint32_t wc_discrim; /* 1 indicates presence */ - uint32_t wc_nchunks; /* Array count */ - struct rpcrdma_write_chunk wc_array[0]; -}; - -struct rpcrdma_msg { - uint32_t rm_xid; /* Mirrors the RPC header xid */ - uint32_t rm_vers; /* Version of this protocol */ - uint32_t rm_credit; /* Buffers requested/granted */ - uint32_t rm_type; /* Type of message (enum rpcrdma_proc) */ - union { - - struct { /* no chunks */ - uint32_t rm_empty[3]; /* 3 empty chunk lists */ - } rm_nochunks; - - struct { /* no chunks and padded */ - uint32_t rm_align; /* Padding alignment */ - uint32_t rm_thresh; /* Padding threshold */ - uint32_t rm_pempty[3]; /* 3 empty chunk lists */ - } rm_padded; - - uint32_t rm_chunks[0]; /* read, write and reply chunks */ - - } rm_body; -}; - -#define RPCRDMA_HDRLEN_MIN 28 - -enum rpcrdma_errcode { - ERR_VERS = 1, - ERR_CHUNK = 2 -}; - -struct rpcrdma_err_vers { - uint32_t rdma_vers_low; /* Version range supported by peer */ - uint32_t rdma_vers_high; -}; - -enum rpcrdma_proc { - RDMA_MSG = 0, /* An RPC call or reply msg */ - RDMA_NOMSG = 1, /* An RPC call or reply msg - separate body */ - RDMA_MSGP = 2, /* An RPC call or reply msg with padding */ - RDMA_DONE = 3, /* Client signals reply completion */ - RDMA_ERROR = 4 /* An RPC RDMA encoding error */ -}; - -#endif /* _LINUX_SUNRPC_RPC_RDMA_H */ diff --git a/trunk/include/linux/sunrpc/xdr.h b/trunk/include/linux/sunrpc/xdr.h index 0751c9464d0f..c6b53d181bfa 100644 --- a/trunk/include/linux/sunrpc/xdr.h +++ b/trunk/include/linux/sunrpc/xdr.h @@ -70,10 +70,7 @@ struct xdr_buf { struct page ** pages; /* Array of contiguous pages */ unsigned int page_base, /* Start of page data */ - page_len, /* Length of page data */ - flags; /* Flags for data disposition */ -#define XDRBUF_READ 0x01 /* target of file read */ -#define XDRBUF_WRITE 0x02 /* source of file write */ + page_len; /* Length of page data */ unsigned int buflen, /* Total length of storage buffer */ len; /* Length of XDR encoded message */ diff --git a/trunk/include/linux/sunrpc/xprt.h b/trunk/include/linux/sunrpc/xprt.h index 30b17b3bc1a9..d11cedd14f0f 100644 --- a/trunk/include/linux/sunrpc/xprt.h +++ b/trunk/include/linux/sunrpc/xprt.h @@ -19,10 +19,24 @@ #ifdef __KERNEL__ +extern unsigned int xprt_udp_slot_table_entries; +extern unsigned int xprt_tcp_slot_table_entries; + #define RPC_MIN_SLOT_TABLE (2U) #define RPC_DEF_SLOT_TABLE (16U) #define RPC_MAX_SLOT_TABLE (128U) +/* + * Parameters for choosing a free port + */ +extern unsigned int xprt_min_resvport; +extern unsigned int xprt_max_resvport; + +#define RPC_MIN_RESVPORT (1U) +#define RPC_MAX_RESVPORT (65535U) +#define RPC_DEF_MIN_RESVPORT (665U) +#define RPC_DEF_MAX_RESVPORT (1023U) + /* * This describes a timeout strategy */ @@ -39,10 +53,6 @@ enum rpc_display_format_t { RPC_DISPLAY_PORT, RPC_DISPLAY_PROTO, RPC_DISPLAY_ALL, - RPC_DISPLAY_HEX_ADDR, - RPC_DISPLAY_HEX_PORT, - RPC_DISPLAY_UNIVERSAL_ADDR, - RPC_DISPLAY_NETID, RPC_DISPLAY_MAX, }; @@ -186,22 +196,14 @@ struct rpc_xprt { char * address_strings[RPC_DISPLAY_MAX]; }; -struct xprt_create { - int ident; /* XPRT_TRANSPORT identifier */ +struct rpc_xprtsock_create { + int proto; /* IPPROTO_UDP or IPPROTO_TCP */ struct sockaddr * srcaddr; /* optional local address */ struct sockaddr * dstaddr; /* remote peer address */ size_t addrlen; struct rpc_timeout * timeout; /* optional timeout parameters */ }; -struct xprt_class { - struct list_head list; - int ident; /* XPRT_TRANSPORT identifier */ - struct rpc_xprt * (*setup)(struct xprt_create *); - struct module *owner; - char name[32]; -}; - /* * Transport operations used by ULPs */ @@ -210,7 +212,7 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long /* * Generic internal transport functions */ -struct rpc_xprt *xprt_create_transport(struct xprt_create *args); +struct rpc_xprt * xprt_create_transport(struct rpc_xprtsock_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); int xprt_reserve_xprt(struct rpc_task *task); @@ -233,8 +235,6 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * /* * Transport switch helper functions */ -int xprt_register_transport(struct xprt_class *type); -int xprt_unregister_transport(struct xprt_class *type); void xprt_set_retrans_timeout_def(struct rpc_task *task); void xprt_set_retrans_timeout_rtt(struct rpc_task *task); void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status); @@ -247,6 +247,14 @@ void xprt_complete_rqst(struct rpc_task *task, int copied); void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect(struct rpc_xprt *xprt); +/* + * Socket transport setup operations + */ +struct rpc_xprt * xs_setup_udp(struct rpc_xprtsock_create *args); +struct rpc_xprt * xs_setup_tcp(struct rpc_xprtsock_create *args); +int init_socket_xprt(void); +void cleanup_socket_xprt(void); + /* * Reserved bit positions in xprt->state */ diff --git a/trunk/include/linux/sunrpc/xprtrdma.h b/trunk/include/linux/sunrpc/xprtrdma.h deleted file mode 100644 index 4de56b1d372b..000000000000 --- a/trunk/include/linux/sunrpc/xprtrdma.h +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the BSD-type - * license below: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * Neither the name of the Network Appliance, Inc. nor the names of - * its contributors may be used to endorse or promote products - * derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _LINUX_SUNRPC_XPRTRDMA_H -#define _LINUX_SUNRPC_XPRTRDMA_H - -/* - * RPC transport identifier for RDMA - */ -#define XPRT_TRANSPORT_RDMA 256 - -/* - * rpcbind (v3+) RDMA netid. - */ -#define RPCBIND_NETID_RDMA "rdma" - -/* - * Constants. Max RPC/NFS header is big enough to account for - * additional marshaling buffers passed down by Linux client. - * - * RDMA header is currently fixed max size, and is big enough for a - * fully-chunked NFS message (read chunks are the largest). Note only - * a single chunk type per message is supported currently. - */ -#define RPCRDMA_MIN_SLOT_TABLE (2U) -#define RPCRDMA_DEF_SLOT_TABLE (32U) -#define RPCRDMA_MAX_SLOT_TABLE (256U) - -#define RPCRDMA_DEF_INLINE (1024) /* default inline max */ - -#define RPCRDMA_INLINE_PAD_THRESH (512)/* payload threshold to pad (bytes) */ - -#define RDMA_RESOLVE_TIMEOUT (5*HZ) /* TBD 5 seconds */ -#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ - -/* memory registration strategies */ -#define RPCRDMA_PERSISTENT_REGISTRATION (1) - -enum rpcrdma_memreg { - RPCRDMA_BOUNCEBUFFERS = 0, - RPCRDMA_REGISTER, - RPCRDMA_MEMWINDOWS, - RPCRDMA_MEMWINDOWS_ASYNC, - RPCRDMA_MTHCAFMR, - RPCRDMA_ALLPHYSICAL, - RPCRDMA_LAST -}; - -#endif /* _LINUX_SUNRPC_XPRTRDMA_H */ diff --git a/trunk/include/linux/sunrpc/xprtsock.h b/trunk/include/linux/sunrpc/xprtsock.h deleted file mode 100644 index 2c6c2c2783d8..000000000000 --- a/trunk/include/linux/sunrpc/xprtsock.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - * linux/include/linux/sunrpc/xprtsock.h - * - * Declarations for the RPC transport socket provider. - */ - -#ifndef _LINUX_SUNRPC_XPRTSOCK_H -#define _LINUX_SUNRPC_XPRTSOCK_H - -#ifdef __KERNEL__ - -/* - * Socket transport setup operations - */ -struct rpc_xprt *xs_setup_udp(struct xprt_create *args); -struct rpc_xprt *xs_setup_tcp(struct xprt_create *args); - -int init_socket_xprt(void); -void cleanup_socket_xprt(void); - -/* - * RPC transport identifiers for UDP, TCP - * - * To preserve compatibility with the historical use of raw IP protocol - * id's for transport selection, these are specified with the previous - * values. No such restriction exists for new transports, except that - * they may not collide with these values (17 and 6, respectively). - */ -#define XPRT_TRANSPORT_UDP IPPROTO_UDP -#define XPRT_TRANSPORT_TCP IPPROTO_TCP - -/* - * RPC slot table sizes for UDP, TCP transports - */ -extern unsigned int xprt_udp_slot_table_entries; -extern unsigned int xprt_tcp_slot_table_entries; - -/* - * Parameters for choosing a free port - */ -extern unsigned int xprt_min_resvport; -extern unsigned int xprt_max_resvport; - -#define RPC_MIN_RESVPORT (1U) -#define RPC_MAX_RESVPORT (65535U) -#define RPC_DEF_MIN_RESVPORT (665U) -#define RPC_DEF_MAX_RESVPORT (1023U) - -#endif /* __KERNEL__ */ - -#endif /* _LINUX_SUNRPC_XPRTSOCK_H */ diff --git a/trunk/include/linux/tcp.h b/trunk/include/linux/tcp.h index bac17c59b24e..c5b94c1a5ee2 100644 --- a/trunk/include/linux/tcp.h +++ b/trunk/include/linux/tcp.h @@ -315,7 +315,7 @@ struct tcp_sock { */ u32 snd_ssthresh; /* Slow start size threshold */ u32 snd_cwnd; /* Sending congestion window */ - u32 snd_cwnd_cnt; /* Linear increase counter */ + u16 snd_cwnd_cnt; /* Linear increase counter */ u32 snd_cwnd_clamp; /* Do not allow snd_cwnd to grow above this */ u32 snd_cwnd_used; u32 snd_cwnd_stamp; diff --git a/trunk/include/linux/writeback.h b/trunk/include/linux/writeback.h index d1321a81c9c4..c7c3337c3a88 100644 --- a/trunk/include/linux/writeback.h +++ b/trunk/include/linux/writeback.h @@ -62,6 +62,8 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned for_writepages:1; /* This is a writepages() call */ unsigned range_cyclic:1; /* range_start is cyclic */ + + void *fs_private; /* For use by ->writepages() */ }; /* diff --git a/trunk/include/net/inet_frag.h b/trunk/include/net/inet_frag.h deleted file mode 100644 index 911c2cd02941..000000000000 --- a/trunk/include/net/inet_frag.h +++ /dev/null @@ -1,60 +0,0 @@ -#ifndef __NET_FRAG_H__ -#define __NET_FRAG_H__ - -struct inet_frag_queue { - struct hlist_node list; - struct list_head lru_list; /* lru list member */ - spinlock_t lock; - atomic_t refcnt; - struct timer_list timer; /* when will this queue expire? */ - struct sk_buff *fragments; /* list of received fragments */ - ktime_t stamp; - int len; /* total length of orig datagram */ - int meat; - __u8 last_in; /* first/last segment arrived? */ - -#define COMPLETE 4 -#define FIRST_IN 2 -#define LAST_IN 1 -}; - -#define INETFRAGS_HASHSZ 64 - -struct inet_frags_ctl { - int high_thresh; - int low_thresh; - int timeout; - int secret_interval; -}; - -struct inet_frags { - struct list_head lru_list; - struct hlist_head hash[INETFRAGS_HASHSZ]; - rwlock_t lock; - u32 rnd; - int nqueues; - int qsize; - atomic_t mem; - struct timer_list secret_timer; - struct inet_frags_ctl *ctl; - - unsigned int (*hashfn)(struct inet_frag_queue *); - void (*destructor)(struct inet_frag_queue *); - void (*skb_free)(struct sk_buff *); -}; - -void inet_frags_init(struct inet_frags *); -void inet_frags_fini(struct inet_frags *); - -void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); -void inet_frag_destroy(struct inet_frag_queue *q, - struct inet_frags *f, int *work); -int inet_frag_evictor(struct inet_frags *f); - -static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f) -{ - if (atomic_dec_and_test(&q->refcnt)) - inet_frag_destroy(q, f, NULL); -} - -#endif diff --git a/trunk/include/net/ip.h b/trunk/include/net/ip.h index 840dd91b513b..3af3ed9d320b 100644 --- a/trunk/include/net/ip.h +++ b/trunk/include/net/ip.h @@ -160,7 +160,6 @@ DECLARE_SNMP_STAT(struct ipstats_mib, ip_statistics); #define IP_INC_STATS(field) SNMP_INC_STATS(ip_statistics, field) #define IP_INC_STATS_BH(field) SNMP_INC_STATS_BH(ip_statistics, field) #define IP_INC_STATS_USER(field) SNMP_INC_STATS_USER(ip_statistics, field) -#define IP_ADD_STATS_BH(field, val) SNMP_ADD_STATS_BH(ip_statistics, field, val) DECLARE_SNMP_STAT(struct linux_mib, net_statistics); #define NET_INC_STATS(field) SNMP_INC_STATS(net_statistics, field) #define NET_INC_STATS_BH(field) SNMP_INC_STATS_BH(net_statistics, field) @@ -178,8 +177,10 @@ extern int sysctl_ip_default_ttl; extern int sysctl_ip_nonlocal_bind; /* From ip_fragment.c */ -struct inet_frags_ctl; -extern struct inet_frags_ctl ip4_frags_ctl; +extern int sysctl_ipfrag_high_thresh; +extern int sysctl_ipfrag_low_thresh; +extern int sysctl_ipfrag_time; +extern int sysctl_ipfrag_secret_interval; extern int sysctl_ipfrag_max_dist; /* From inetpeer.c */ @@ -331,9 +332,9 @@ enum ip_defrag_users IP_DEFRAG_VS_FWD }; -int ip_defrag(struct sk_buff *skb, u32 user); -int ip_frag_mem(void); -int ip_frag_nqueues(void); +struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user); +extern int ip_frag_nqueues; +extern atomic_t ip_frag_mem; /* * Functions provided by ip_forward.c diff --git a/trunk/include/net/ip_vs.h b/trunk/include/net/ip_vs.h index 41870564df8e..672564e5a81d 100644 --- a/trunk/include/net/ip_vs.h +++ b/trunk/include/net/ip_vs.h @@ -464,10 +464,10 @@ struct ip_vs_protocol { unsigned int proto_off, int inverse); - int (*snat_handler)(struct sk_buff *skb, + int (*snat_handler)(struct sk_buff **pskb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp); - int (*dnat_handler)(struct sk_buff *skb, + int (*dnat_handler)(struct sk_buff **pskb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp); int (*csum_check)(struct sk_buff *skb, struct ip_vs_protocol *pp); @@ -654,11 +654,11 @@ struct ip_vs_app /* output hook: return false if can't linearize. diff set for TCP. */ int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *, - struct sk_buff *, int *diff); + struct sk_buff **, int *diff); /* input hook: return false if can't linearize. diff set for TCP. */ int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *, - struct sk_buff *, int *diff); + struct sk_buff **, int *diff); /* ip_vs_app initializer */ int (*init_conn)(struct ip_vs_app *, struct ip_vs_conn *); @@ -832,8 +832,8 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port); extern int ip_vs_app_inc_get(struct ip_vs_app *inc); extern void ip_vs_app_inc_put(struct ip_vs_app *inc); -extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff *skb); -extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); +extern int ip_vs_app_pkt_out(struct ip_vs_conn *, struct sk_buff **pskb); +extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff **pskb); extern int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri, char *o_buf, int o_len, char *n_buf, int n_len); extern int ip_vs_app_init(void); @@ -984,6 +984,7 @@ static inline char ip_vs_fwd_tag(struct ip_vs_conn *cp) return fwd; } +extern int ip_vs_make_skb_writable(struct sk_buff **pskb, int len); extern void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int dir); diff --git a/trunk/include/net/ipv6.h b/trunk/include/net/ipv6.h index cc796cbc1b26..31b3f1b45a2b 100644 --- a/trunk/include/net/ipv6.h +++ b/trunk/include/net/ipv6.h @@ -120,21 +120,12 @@ extern int sysctl_mld_max_msf; SNMP_INC_STATS##modifier(statname##_statistics, (field)); \ }) -#define _DEVADD(statname, modifier, idev, field, val) \ -({ \ - struct inet6_dev *_idev = (idev); \ - if (likely(_idev != NULL)) \ - SNMP_ADD_STATS##modifier((_idev)->stats.statname, (field), (val)); \ - SNMP_ADD_STATS##modifier(statname##_statistics, (field), (val));\ -}) - /* MIBs */ DECLARE_SNMP_STAT(struct ipstats_mib, ipv6_statistics); #define IP6_INC_STATS(idev,field) _DEVINC(ipv6, , idev, field) #define IP6_INC_STATS_BH(idev,field) _DEVINC(ipv6, _BH, idev, field) #define IP6_INC_STATS_USER(idev,field) _DEVINC(ipv6, _USER, idev, field) -#define IP6_ADD_STATS_BH(idev,field,val) _DEVADD(ipv6, _BH, idev, field, val) DECLARE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics); DECLARE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics); @@ -249,7 +240,7 @@ extern int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)); -extern int ipv6_parse_hopopts(struct sk_buff *skb); +extern int ipv6_parse_hopopts(struct sk_buff **skbp); extern struct ipv6_txoptions * ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt); extern struct ipv6_txoptions * ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, @@ -261,8 +252,8 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb); -int ip6_frag_nqueues(void); -int ip6_frag_mem(void); +extern int ip6_frag_nqueues; +extern atomic_t ip6_frag_mem; #define IPV6_FRAG_TIMEOUT (60*HZ) /* 60 seconds */ @@ -574,8 +565,10 @@ extern int inet6_hash_connect(struct inet_timewait_death_row *death_row, /* * reassembly.c */ -struct inet_frags_ctl; -extern struct inet_frags_ctl ip6_frags_ctl; +extern int sysctl_ip6frag_high_thresh; +extern int sysctl_ip6frag_low_thresh; +extern int sysctl_ip6frag_time; +extern int sysctl_ip6frag_secret_interval; extern const struct proto_ops inet6_stream_ops; extern const struct proto_ops inet6_dgram_ops; diff --git a/trunk/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/trunk/include/net/netfilter/ipv6/nf_conntrack_ipv6.h index f703533fb4db..070d12cb4634 100644 --- a/trunk/include/net/netfilter/ipv6/nf_conntrack_ipv6.h +++ b/trunk/include/net/netfilter/ipv6/nf_conntrack_ipv6.h @@ -15,7 +15,8 @@ extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, struct net_device *out, int (*okfn)(struct sk_buff *)); -struct inet_frags_ctl; -extern struct inet_frags_ctl nf_frags_ctl; +extern unsigned int nf_ct_frag6_timeout; +extern unsigned int nf_ct_frag6_low_thresh; +extern unsigned int nf_ct_frag6_high_thresh; #endif /* _NF_CONNTRACK_IPV6_H*/ diff --git a/trunk/include/net/netfilter/nf_conntrack_core.h b/trunk/include/net/netfilter/nf_conntrack_core.h index a532e7b5ed6a..4056f5f08da1 100644 --- a/trunk/include/net/netfilter/nf_conntrack_core.h +++ b/trunk/include/net/netfilter/nf_conntrack_core.h @@ -22,7 +22,7 @@ of connection tracking. */ extern unsigned int nf_conntrack_in(int pf, unsigned int hooknum, - struct sk_buff *skb); + struct sk_buff **pskb); extern int nf_conntrack_init(void); extern void nf_conntrack_cleanup(void); @@ -60,17 +60,17 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, extern struct nf_conntrack_tuple_hash * nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple); -extern int __nf_conntrack_confirm(struct sk_buff *skb); +extern int __nf_conntrack_confirm(struct sk_buff **pskb); /* Confirm a connection: returns NF_DROP if packet must be dropped. */ -static inline int nf_conntrack_confirm(struct sk_buff *skb) +static inline int nf_conntrack_confirm(struct sk_buff **pskb) { - struct nf_conn *ct = (struct nf_conn *)skb->nfct; + struct nf_conn *ct = (struct nf_conn *)(*pskb)->nfct; int ret = NF_ACCEPT; if (ct) { if (!nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) - ret = __nf_conntrack_confirm(skb); + ret = __nf_conntrack_confirm(pskb); nf_ct_deliver_cached_events(ct); } return ret; diff --git a/trunk/include/net/netfilter/nf_conntrack_helper.h b/trunk/include/net/netfilter/nf_conntrack_helper.h index d7b2d5483a71..0dcc4c828ce9 100644 --- a/trunk/include/net/netfilter/nf_conntrack_helper.h +++ b/trunk/include/net/netfilter/nf_conntrack_helper.h @@ -29,7 +29,7 @@ struct nf_conntrack_helper /* Function to call when data passes; return verdict, or -1 to invalidate. */ - int (*help)(struct sk_buff *skb, + int (*help)(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info conntrackinfo); diff --git a/trunk/include/net/netfilter/nf_nat_core.h b/trunk/include/net/netfilter/nf_nat_core.h index f29eeb9777e0..c3cd127ba4bb 100644 --- a/trunk/include/net/netfilter/nf_nat_core.h +++ b/trunk/include/net/netfilter/nf_nat_core.h @@ -10,12 +10,12 @@ extern unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, - struct sk_buff *skb); + struct sk_buff **pskb); extern int nf_nat_icmp_reply_translation(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, - struct sk_buff *skb); + struct sk_buff **pskb); static inline int nf_nat_initialized(struct nf_conn *ct, enum nf_nat_manip_type manip) diff --git a/trunk/include/net/netfilter/nf_nat_helper.h b/trunk/include/net/netfilter/nf_nat_helper.h index 58dd22687949..ec98ecf95fc8 100644 --- a/trunk/include/net/netfilter/nf_nat_helper.h +++ b/trunk/include/net/netfilter/nf_nat_helper.h @@ -7,21 +7,21 @@ struct sk_buff; /* These return true or false. */ -extern int nf_nat_mangle_tcp_packet(struct sk_buff *skb, +extern int nf_nat_mangle_tcp_packet(struct sk_buff **skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int match_offset, unsigned int match_len, const char *rep_buffer, unsigned int rep_len); -extern int nf_nat_mangle_udp_packet(struct sk_buff *skb, +extern int nf_nat_mangle_udp_packet(struct sk_buff **skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int match_offset, unsigned int match_len, const char *rep_buffer, unsigned int rep_len); -extern int nf_nat_seq_adjust(struct sk_buff *skb, +extern int nf_nat_seq_adjust(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo); diff --git a/trunk/include/net/netfilter/nf_nat_protocol.h b/trunk/include/net/netfilter/nf_nat_protocol.h index 04578bfe23e1..14c7b2d7263c 100644 --- a/trunk/include/net/netfilter/nf_nat_protocol.h +++ b/trunk/include/net/netfilter/nf_nat_protocol.h @@ -18,7 +18,7 @@ struct nf_nat_protocol /* Translate a packet to the target according to manip type. Return true if succeeded. */ - int (*manip_pkt)(struct sk_buff *skb, + int (*manip_pkt)(struct sk_buff **pskb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype); diff --git a/trunk/include/net/netfilter/nf_nat_rule.h b/trunk/include/net/netfilter/nf_nat_rule.h index 75d1825031d7..f9743187d57f 100644 --- a/trunk/include/net/netfilter/nf_nat_rule.h +++ b/trunk/include/net/netfilter/nf_nat_rule.h @@ -6,7 +6,7 @@ extern int nf_nat_rule_init(void) __init; extern void nf_nat_rule_cleanup(void); -extern int nf_nat_rule_find(struct sk_buff *skb, +extern int nf_nat_rule_find(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in, const struct net_device *out, diff --git a/trunk/include/net/protocol.h b/trunk/include/net/protocol.h index 1166ffb4b3ec..105bf12b0c79 100644 --- a/trunk/include/net/protocol.h +++ b/trunk/include/net/protocol.h @@ -45,7 +45,7 @@ struct net_protocol { #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) struct inet6_protocol { - int (*handler)(struct sk_buff *skb); + int (*handler)(struct sk_buff **skb); void (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt, diff --git a/trunk/include/net/xfrm.h b/trunk/include/net/xfrm.h index 0e844845f3f4..77be396ca633 100644 --- a/trunk/include/net/xfrm.h +++ b/trunk/include/net/xfrm.h @@ -1051,7 +1051,7 @@ extern int xfrm4_output(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family); extern int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi); -extern int xfrm6_rcv(struct sk_buff *skb); +extern int xfrm6_rcv(struct sk_buff **pskb); extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family); diff --git a/trunk/kernel/auditsc.c b/trunk/kernel/auditsc.c index 0ae703c157ba..04f3ffb8d9d4 100644 --- a/trunk/kernel/auditsc.c +++ b/trunk/kernel/auditsc.c @@ -1525,7 +1525,6 @@ void __audit_inode_child(const char *dname, const struct inode *inode, context->names[idx].ino = (unsigned long)-1; } } -EXPORT_SYMBOL_GPL(__audit_inode_child); /** * auditsc_get_stamp - get local copies of audit_context values diff --git a/trunk/kernel/lockdep.c b/trunk/kernel/lockdep.c index a6f1ee9c92d9..734da579ad13 100644 --- a/trunk/kernel/lockdep.c +++ b/trunk/kernel/lockdep.c @@ -1521,7 +1521,7 @@ static inline int lookup_chain_cache(u64 chain_key, struct lock_class *class) } static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, - struct held_lock *hlock, int chain_head, u64 chain_key) + struct held_lock *hlock, int chain_head) { /* * Trylock needs to maintain the stack of held locks, but it @@ -1534,7 +1534,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, * graph_lock for us) */ if (!hlock->trylock && (hlock->check == 2) && - lookup_chain_cache(chain_key, hlock->class)) { + lookup_chain_cache(curr->curr_chain_key, hlock->class)) { /* * Check whether last held lock: * @@ -1576,7 +1576,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, #else static inline int validate_chain(struct task_struct *curr, struct lockdep_map *lock, struct held_lock *hlock, - int chain_head, u64 chain_key) + int chain_head) { return 1; } @@ -2450,11 +2450,11 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, chain_head = 1; } chain_key = iterate_chain_key(chain_key, id); + curr->curr_chain_key = chain_key; - if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) + if (!validate_chain(curr, lock, hlock, chain_head)) return 0; - curr->curr_chain_key = chain_key; curr->lockdep_depth++; check_chain_key(curr); #ifdef CONFIG_DEBUG_LOCKDEP @@ -3199,19 +3199,3 @@ void debug_show_held_locks(struct task_struct *task) } EXPORT_SYMBOL_GPL(debug_show_held_locks); - -void lockdep_sys_exit(void) -{ - struct task_struct *curr = current; - - if (unlikely(curr->lockdep_depth)) { - if (!debug_locks_off()) - return; - printk("\n================================================\n"); - printk( "[ BUG: lock held when returning to user space! ]\n"); - printk( "------------------------------------------------\n"); - printk("%s/%d is leaving the kernel with locks still held!\n", - curr->comm, curr->pid); - lockdep_print_held_locks(curr); - } -} diff --git a/trunk/kernel/lockdep_proc.c b/trunk/kernel/lockdep_proc.c index 8a135bd163c2..c851b2dcc685 100644 --- a/trunk/kernel/lockdep_proc.c +++ b/trunk/kernel/lockdep_proc.c @@ -25,38 +25,28 @@ static void *l_next(struct seq_file *m, void *v, loff_t *pos) { - struct lock_class *class; + struct lock_class *class = v; (*pos)++; - if (v == SEQ_START_TOKEN) - class = m->private; - else { - class = v; - - if (class->lock_entry.next != &all_lock_classes) - class = list_entry(class->lock_entry.next, - struct lock_class, lock_entry); - else - class = NULL; - } + if (class->lock_entry.next != &all_lock_classes) + class = list_entry(class->lock_entry.next, struct lock_class, + lock_entry); + else + class = NULL; + m->private = class; return class; } static void *l_start(struct seq_file *m, loff_t *pos) { - struct lock_class *class; - loff_t i = 0; + struct lock_class *class = m->private; - if (*pos == 0) - return SEQ_START_TOKEN; + if (&class->lock_entry == all_lock_classes.next) + seq_printf(m, "all lock classes:\n"); - list_for_each_entry(class, &all_lock_classes, lock_entry) { - if (++i == *pos) - return class; - } - return NULL; + return class; } static void l_stop(struct seq_file *m, void *v) @@ -111,15 +101,10 @@ static void print_name(struct seq_file *m, struct lock_class *class) static int l_show(struct seq_file *m, void *v) { unsigned long nr_forward_deps, nr_backward_deps; - struct lock_class *class = v; + struct lock_class *class = m->private; struct lock_list *entry; char c1, c2, c3, c4; - if (v == SEQ_START_TOKEN) { - seq_printf(m, "all lock classes:\n"); - return 0; - } - seq_printf(m, "%p", class->key); #ifdef CONFIG_DEBUG_LOCKDEP seq_printf(m, " OPS:%8ld", class->ops); @@ -538,11 +523,10 @@ static void *ls_start(struct seq_file *m, loff_t *pos) { struct lock_stat_seq *data = m->private; - if (*pos == 0) - return SEQ_START_TOKEN; + if (data->iter == data->stats) + seq_header(m); - data->iter = data->stats + *pos; - if (data->iter >= data->iter_end) + if (data->iter == data->iter_end) data->iter = NULL; return data->iter; @@ -554,13 +538,8 @@ static void *ls_next(struct seq_file *m, void *v, loff_t *pos) (*pos)++; - if (v == SEQ_START_TOKEN) - data->iter = data->stats; - else { - data->iter = v; - data->iter++; - } - + data->iter = v; + data->iter++; if (data->iter == data->iter_end) data->iter = NULL; @@ -573,11 +552,9 @@ static void ls_stop(struct seq_file *m, void *v) static int ls_show(struct seq_file *m, void *v) { - if (v == SEQ_START_TOKEN) - seq_header(m); - else - seq_stats(m, v); + struct lock_stat_seq *data = m->private; + seq_stats(m, data->iter); return 0; } diff --git a/trunk/kernel/mutex.c b/trunk/kernel/mutex.c index d7fe50cc556f..691b86564dd9 100644 --- a/trunk/kernel/mutex.c +++ b/trunk/kernel/mutex.c @@ -51,7 +51,6 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) EXPORT_SYMBOL(__mutex_init); -#ifndef CONFIG_DEBUG_LOCK_ALLOC /* * We split the mutex lock/unlock logic into separate fastpath and * slowpath functions, to reduce the register pressure on the fastpath. @@ -93,7 +92,6 @@ void inline fastcall __sched mutex_lock(struct mutex *lock) } EXPORT_SYMBOL(mutex_lock); -#endif static void fastcall noinline __sched __mutex_unlock_slowpath(atomic_t *lock_count); @@ -124,8 +122,7 @@ EXPORT_SYMBOL(mutex_unlock); * Lock a mutex (possibly interruptible), slowpath: */ static inline int __sched -__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, - unsigned long ip) +__mutex_lock_common(struct mutex *lock, long state, unsigned int subclass) { struct task_struct *task = current; struct mutex_waiter waiter; @@ -135,7 +132,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, spin_lock_mutex(&lock->wait_lock, flags); debug_mutex_lock_common(lock, &waiter); - mutex_acquire(&lock->dep_map, subclass, 0, ip); + mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_); debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); /* add waiting tasks to the end of the waitqueue (FIFO): */ @@ -146,7 +143,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, if (old_val == 1) goto done; - lock_contended(&lock->dep_map, ip); + lock_contended(&lock->dep_map, _RET_IP_); for (;;) { /* @@ -169,7 +166,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, if (unlikely(state == TASK_INTERRUPTIBLE && signal_pending(task))) { mutex_remove_waiter(lock, &waiter, task_thread_info(task)); - mutex_release(&lock->dep_map, 1, ip); + mutex_release(&lock->dep_map, 1, _RET_IP_); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); @@ -200,12 +197,20 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, return 0; } +static void fastcall noinline __sched +__mutex_lock_slowpath(atomic_t *lock_count) +{ + struct mutex *lock = container_of(lock_count, struct mutex, count); + + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0); +} + #ifdef CONFIG_DEBUG_LOCK_ALLOC void __sched mutex_lock_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass, _RET_IP_); + __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, subclass); } EXPORT_SYMBOL_GPL(mutex_lock_nested); @@ -214,7 +219,7 @@ int __sched mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass) { might_sleep(); - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_); + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass); } EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested); @@ -266,7 +271,6 @@ __mutex_unlock_slowpath(atomic_t *lock_count) __mutex_unlock_common_slowpath(lock_count, 1); } -#ifndef CONFIG_DEBUG_LOCK_ALLOC /* * Here come the less common (and hence less performance-critical) APIs: * mutex_lock_interruptible() and mutex_trylock(). @@ -294,22 +298,13 @@ int fastcall __sched mutex_lock_interruptible(struct mutex *lock) EXPORT_SYMBOL(mutex_lock_interruptible); -static void fastcall noinline __sched -__mutex_lock_slowpath(atomic_t *lock_count) -{ - struct mutex *lock = container_of(lock_count, struct mutex, count); - - __mutex_lock_common(lock, TASK_UNINTERRUPTIBLE, 0, _RET_IP_); -} - static int fastcall noinline __sched __mutex_lock_interruptible_slowpath(atomic_t *lock_count) { struct mutex *lock = container_of(lock_count, struct mutex, count); - return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0, _RET_IP_); + return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, 0); } -#endif /* * Spinlock based trylock, we take the spinlock and check whether we diff --git a/trunk/kernel/rcupdate.c b/trunk/kernel/rcupdate.c index 130214f3d229..2c2dd8410dc4 100644 --- a/trunk/kernel/rcupdate.c +++ b/trunk/kernel/rcupdate.c @@ -49,14 +49,6 @@ #include #include -#ifdef CONFIG_DEBUG_LOCK_ALLOC -static struct lock_class_key rcu_lock_key; -struct lockdep_map rcu_lock_map = - STATIC_LOCKDEP_MAP_INIT("rcu_read_lock", &rcu_lock_key); - -EXPORT_SYMBOL_GPL(rcu_lock_map); -#endif - /* Definition for rcupdate control block. */ static struct rcu_ctrlblk rcu_ctrlblk = { .cur = -300, diff --git a/trunk/net/bridge/br.c b/trunk/net/bridge/br.c index 93867bb6cc97..848b8fa8bedd 100644 --- a/trunk/net/bridge/br.c +++ b/trunk/net/bridge/br.c @@ -23,7 +23,7 @@ #include "br_private.h" -int (*br_should_route_hook)(struct sk_buff *skb); +int (*br_should_route_hook) (struct sk_buff **pskb) = NULL; static struct llc_sap *br_stp_sap; diff --git a/trunk/net/bridge/br_input.c b/trunk/net/bridge/br_input.c index 3cedd4eeeed6..3a8a015c92e0 100644 --- a/trunk/net/bridge/br_input.c +++ b/trunk/net/bridge/br_input.c @@ -126,10 +126,6 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) if (!is_valid_ether_addr(eth_hdr(skb)->h_source)) goto drop; - skb = skb_share_check(skb, GFP_ATOMIC); - if (!skb) - return NULL; - if (unlikely(is_link_local(dest))) { /* Pause frames shouldn't be passed up by driver anyway */ if (skb->protocol == htons(ETH_P_PAUSE)) @@ -149,7 +145,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb) case BR_STATE_FORWARDING: if (br_should_route_hook) { - if (br_should_route_hook(skb)) + if (br_should_route_hook(&skb)) return skb; dest = eth_hdr(skb)->h_dest; } diff --git a/trunk/net/bridge/br_netfilter.c b/trunk/net/bridge/br_netfilter.c index da22f900e89d..8245f051ccbb 100644 --- a/trunk/net/bridge/br_netfilter.c +++ b/trunk/net/bridge/br_netfilter.c @@ -503,14 +503,18 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook, * receiving device) to make netfilter happy, the REDIRECT * target in particular. Save the original destination IP * address to be able to detect DNAT afterwards. */ -static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct iphdr *iph; + struct sk_buff *skb = *pskb; __u32 len = nf_bridge_encap_header_len(skb); + if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) + return NF_STOLEN; + if (unlikely(!pskb_may_pull(skb, len))) goto out; @@ -580,11 +584,13 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb, * took place when the packet entered the bridge), but we * register an IPv4 PRE_ROUTING 'sabotage' hook that will * prevent this from happening. */ -static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct sk_buff *skb = *pskb; + if (skb->dst == (struct dst_entry *)&__fake_rtable) { dst_release(skb->dst); skb->dst = NULL; @@ -619,11 +625,12 @@ static int br_nf_forward_finish(struct sk_buff *skb) * but we are still able to filter on the 'real' indev/outdev * because of the physdev module. For ARP, indev and outdev are the * bridge ports. */ -static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; struct net_device *parent; int pf; @@ -641,7 +648,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, else pf = PF_INET6; - nf_bridge_pull_encap_header(skb); + nf_bridge_pull_encap_header(*pskb); nf_bridge = skb->nf_bridge; if (skb->pkt_type == PACKET_OTHERHOST) { @@ -659,11 +666,12 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, return NF_STOLEN; } -static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct sk_buff *skb = *pskb; struct net_device **d = (struct net_device **)(skb->cb); #ifdef CONFIG_SYSCTL @@ -674,12 +682,12 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, if (skb->protocol != htons(ETH_P_ARP)) { if (!IS_VLAN_ARP(skb)) return NF_ACCEPT; - nf_bridge_pull_encap_header(skb); + nf_bridge_pull_encap_header(*pskb); } if (arp_hdr(skb)->ar_pln != 4) { if (IS_VLAN_ARP(skb)) - nf_bridge_push_encap_header(skb); + nf_bridge_push_encap_header(*pskb); return NF_ACCEPT; } *d = (struct net_device *)in; @@ -701,12 +709,13 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor * will be executed. */ -static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct net_device *realindev; + struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; if (!skb->nf_bridge) @@ -743,12 +752,13 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) } /* PF_BRIDGE/POST_ROUTING ********************************************/ -static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, +static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct sk_buff *skb = *pskb; + struct nf_bridge_info *nf_bridge = (*pskb)->nf_bridge; struct net_device *realoutdev = bridge_parent(skb->dev); int pf; @@ -818,13 +828,13 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, /* IP/SABOTAGE *****************************************************/ /* Don't hand locally destined packets to PF_INET(6)/PRE_ROUTING * for the second time. */ -static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb, +static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (skb->nf_bridge && - !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { + if ((*pskb)->nf_bridge && + !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { return NF_STOP; } diff --git a/trunk/net/bridge/netfilter/ebt_arpreply.c b/trunk/net/bridge/netfilter/ebt_arpreply.c index 48a80e423287..ffe468a632e7 100644 --- a/trunk/net/bridge/netfilter/ebt_arpreply.c +++ b/trunk/net/bridge/netfilter/ebt_arpreply.c @@ -15,7 +15,7 @@ #include #include -static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr, +static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { @@ -23,6 +23,7 @@ static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr, __be32 _sip, *siptr, _dip, *diptr; struct arphdr _ah, *ap; unsigned char _sha[ETH_ALEN], *shp; + struct sk_buff *skb = *pskb; ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); if (ap == NULL) diff --git a/trunk/net/bridge/netfilter/ebt_dnat.c b/trunk/net/bridge/netfilter/ebt_dnat.c index 74262e9a566a..4582659dff0e 100644 --- a/trunk/net/bridge/netfilter/ebt_dnat.c +++ b/trunk/net/bridge/netfilter/ebt_dnat.c @@ -8,22 +8,29 @@ * */ -#include #include #include #include #include -static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr, +static int ebt_target_dnat(struct sk_buff **pskb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { struct ebt_nat_info *info = (struct ebt_nat_info *)data; - if (skb_make_writable(skb, 0)) - return NF_DROP; + if (skb_shared(*pskb) || skb_cloned(*pskb)) { + struct sk_buff *nskb; - memcpy(eth_hdr(skb)->h_dest, info->mac, ETH_ALEN); + nskb = skb_copy(*pskb, GFP_ATOMIC); + if (!nskb) + return NF_DROP; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } + memcpy(eth_hdr(*pskb)->h_dest, info->mac, ETH_ALEN); return info->target; } diff --git a/trunk/net/bridge/netfilter/ebt_mark.c b/trunk/net/bridge/netfilter/ebt_mark.c index 6cba54309c09..62d23c7b25e6 100644 --- a/trunk/net/bridge/netfilter/ebt_mark.c +++ b/trunk/net/bridge/netfilter/ebt_mark.c @@ -17,7 +17,7 @@ #include #include -static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr, +static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { @@ -25,13 +25,13 @@ static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr, int action = info->target & -16; if (action == MARK_SET_VALUE) - skb->mark = info->mark; + (*pskb)->mark = info->mark; else if (action == MARK_OR_VALUE) - skb->mark |= info->mark; + (*pskb)->mark |= info->mark; else if (action == MARK_AND_VALUE) - skb->mark &= info->mark; + (*pskb)->mark &= info->mark; else - skb->mark ^= info->mark; + (*pskb)->mark ^= info->mark; return info->target | ~EBT_VERDICT_BITS; } diff --git a/trunk/net/bridge/netfilter/ebt_redirect.c b/trunk/net/bridge/netfilter/ebt_redirect.c index 422cb834cff9..9f378eab72d0 100644 --- a/trunk/net/bridge/netfilter/ebt_redirect.c +++ b/trunk/net/bridge/netfilter/ebt_redirect.c @@ -8,28 +8,35 @@ * */ -#include #include #include #include #include #include "../br_private.h" -static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr, +static int ebt_target_redirect(struct sk_buff **pskb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { struct ebt_redirect_info *info = (struct ebt_redirect_info *)data; - if (skb_make_writable(skb, 0)) - return NF_DROP; + if (skb_shared(*pskb) || skb_cloned(*pskb)) { + struct sk_buff *nskb; + nskb = skb_copy(*pskb, GFP_ATOMIC); + if (!nskb) + return NF_DROP; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } if (hooknr != NF_BR_BROUTING) - memcpy(eth_hdr(skb)->h_dest, + memcpy(eth_hdr(*pskb)->h_dest, in->br_port->br->dev->dev_addr, ETH_ALEN); else - memcpy(eth_hdr(skb)->h_dest, in->dev_addr, ETH_ALEN); - skb->pkt_type = PACKET_HOST; + memcpy(eth_hdr(*pskb)->h_dest, in->dev_addr, ETH_ALEN); + (*pskb)->pkt_type = PACKET_HOST; return info->target; } diff --git a/trunk/net/bridge/netfilter/ebt_snat.c b/trunk/net/bridge/netfilter/ebt_snat.c index 425ac920904d..a50722182bfe 100644 --- a/trunk/net/bridge/netfilter/ebt_snat.c +++ b/trunk/net/bridge/netfilter/ebt_snat.c @@ -8,7 +8,6 @@ * */ -#include #include #include #include @@ -16,26 +15,34 @@ #include #include -static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr, +static int ebt_target_snat(struct sk_buff **pskb, unsigned int hooknr, const struct net_device *in, const struct net_device *out, const void *data, unsigned int datalen) { struct ebt_nat_info *info = (struct ebt_nat_info *) data; - if (skb_make_writable(skb, 0)) - return NF_DROP; + if (skb_shared(*pskb) || skb_cloned(*pskb)) { + struct sk_buff *nskb; - memcpy(eth_hdr(skb)->h_source, info->mac, ETH_ALEN); + nskb = skb_copy(*pskb, GFP_ATOMIC); + if (!nskb) + return NF_DROP; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } + memcpy(eth_hdr(*pskb)->h_source, info->mac, ETH_ALEN); if (!(info->target & NAT_ARP_BIT) && - eth_hdr(skb)->h_proto == htons(ETH_P_ARP)) { + eth_hdr(*pskb)->h_proto == htons(ETH_P_ARP)) { struct arphdr _ah, *ap; - ap = skb_header_pointer(skb, 0, sizeof(_ah), &_ah); + ap = skb_header_pointer(*pskb, 0, sizeof(_ah), &_ah); if (ap == NULL) return EBT_DROP; if (ap->ar_hln != ETH_ALEN) goto out; - if (skb_store_bits(skb, sizeof(_ah), info->mac,ETH_ALEN)) + if (skb_store_bits(*pskb, sizeof(_ah), info->mac,ETH_ALEN)) return EBT_DROP; } out: diff --git a/trunk/net/bridge/netfilter/ebtable_broute.c b/trunk/net/bridge/netfilter/ebtable_broute.c index e44519ebf1d2..d37ce0478938 100644 --- a/trunk/net/bridge/netfilter/ebtable_broute.c +++ b/trunk/net/bridge/netfilter/ebtable_broute.c @@ -51,11 +51,11 @@ static struct ebt_table broute_table = .me = THIS_MODULE, }; -static int ebt_broute(struct sk_buff *skb) +static int ebt_broute(struct sk_buff **pskb) { int ret; - ret = ebt_do_table(NF_BR_BROUTING, skb, skb->dev, NULL, + ret = ebt_do_table(NF_BR_BROUTING, pskb, (*pskb)->dev, NULL, &broute_table); if (ret == NF_DROP) return 1; /* route it */ diff --git a/trunk/net/bridge/netfilter/ebtable_filter.c b/trunk/net/bridge/netfilter/ebtable_filter.c index 210493f99bc4..81d84145c417 100644 --- a/trunk/net/bridge/netfilter/ebtable_filter.c +++ b/trunk/net/bridge/netfilter/ebtable_filter.c @@ -61,10 +61,10 @@ static struct ebt_table frame_filter = }; static unsigned int -ebt_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, +ebt_hook (unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, &frame_filter); + return ebt_do_table(hook, pskb, in, out, &frame_filter); } static struct nf_hook_ops ebt_ops_filter[] = { diff --git a/trunk/net/bridge/netfilter/ebtable_nat.c b/trunk/net/bridge/netfilter/ebtable_nat.c index 3e58c2e5ee21..9c50488b62eb 100644 --- a/trunk/net/bridge/netfilter/ebtable_nat.c +++ b/trunk/net/bridge/netfilter/ebtable_nat.c @@ -61,17 +61,17 @@ static struct ebt_table frame_nat = }; static unsigned int -ebt_nat_dst(unsigned int hook, struct sk_buff *skb, const struct net_device *in +ebt_nat_dst(unsigned int hook, struct sk_buff **pskb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, &frame_nat); + return ebt_do_table(hook, pskb, in, out, &frame_nat); } static unsigned int -ebt_nat_src(unsigned int hook, struct sk_buff *skb, const struct net_device *in +ebt_nat_src(unsigned int hook, struct sk_buff **pskb, const struct net_device *in , const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ebt_do_table(hook, skb, in, out, &frame_nat); + return ebt_do_table(hook, pskb, in, out, &frame_nat); } static struct nf_hook_ops ebt_ops_nat[] = { diff --git a/trunk/net/bridge/netfilter/ebtables.c b/trunk/net/bridge/netfilter/ebtables.c index d5a09eaef915..6018d0e51938 100644 --- a/trunk/net/bridge/netfilter/ebtables.c +++ b/trunk/net/bridge/netfilter/ebtables.c @@ -142,7 +142,7 @@ static inline int ebt_basic_match(struct ebt_entry *e, struct ethhdr *h, } /* Do some firewalling */ -unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, +unsigned int ebt_do_table (unsigned int hook, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, struct ebt_table *table) { @@ -172,19 +172,19 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, base = private->entries; i = 0; while (i < nentries) { - if (ebt_basic_match(point, eth_hdr(skb), in, out)) + if (ebt_basic_match(point, eth_hdr(*pskb), in, out)) goto letscontinue; - if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, in, out) != 0) + if (EBT_MATCH_ITERATE(point, ebt_do_match, *pskb, in, out) != 0) goto letscontinue; /* increase counter */ (*(counter_base + i)).pcnt++; - (*(counter_base + i)).bcnt += skb->len; + (*(counter_base + i)).bcnt+=(**pskb).len; /* these should only watch: not modify, nor tell us what to do with the packet */ - EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, hook, in, + EBT_WATCHER_ITERATE(point, ebt_do_watcher, *pskb, hook, in, out); t = (struct ebt_entry_target *) @@ -193,7 +193,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, if (!t->u.target->target) verdict = ((struct ebt_standard_target *)t)->verdict; else - verdict = t->u.target->target(skb, hook, + verdict = t->u.target->target(pskb, hook, in, out, t->data, t->target_size); if (verdict == EBT_ACCEPT) { read_unlock_bh(&table->lock); diff --git a/trunk/net/core/dev.c b/trunk/net/core/dev.c index 38b03da5c1ca..99b7bda37d10 100644 --- a/trunk/net/core/dev.c +++ b/trunk/net/core/dev.c @@ -1362,21 +1362,22 @@ int skb_checksum_help(struct sk_buff *skb) goto out_set_summed; } - offset = skb->csum_start - skb_headroom(skb); - BUG_ON(offset >= skb_headlen(skb)); - csum = skb_checksum(skb, offset, skb->len - offset, 0); - - offset += skb->csum_offset; - BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb)); - - if (skb_cloned(skb) && - !skb_clone_writable(skb, offset + sizeof(__sum16))) { + if (skb_cloned(skb)) { ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); if (ret) goto out; } - *(__sum16 *)(skb->data + offset) = csum_fold(csum); + offset = skb->csum_start - skb_headroom(skb); + BUG_ON(offset > (int)skb->len); + csum = skb_checksum(skb, offset, skb->len-offset, 0); + + offset = skb_headlen(skb) - offset; + BUG_ON(offset <= 0); + BUG_ON(skb->csum_offset + 2 > offset); + + *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) = + csum_fold(csum); out_set_summed: skb->ip_summed = CHECKSUM_NONE; out: @@ -1948,51 +1949,27 @@ static int ing_filter(struct sk_buff *skb) struct Qdisc *q; struct net_device *dev = skb->dev; int result = TC_ACT_OK; - u32 ttl = G_TC_RTTL(skb->tc_verd); - - if (MAX_RED_LOOP < ttl++) { - printk(KERN_WARNING - "Redir loop detected Dropping packet (%d->%d)\n", - skb->iif, dev->ifindex); - return TC_ACT_SHOT; - } - - skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl); - skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS); - spin_lock(&dev->ingress_lock); - if ((q = dev->qdisc_ingress) != NULL) - result = q->enqueue(skb, q); - spin_unlock(&dev->ingress_lock); + if (dev->qdisc_ingress) { + __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); + if (MAX_RED_LOOP < ttl++) { + printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n", + skb->iif, skb->dev->ifindex); + return TC_ACT_SHOT; + } - return result; -} + skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); -static inline struct sk_buff *handle_ing(struct sk_buff *skb, - struct packet_type **pt_prev, - int *ret, struct net_device *orig_dev) -{ - if (!skb->dev->qdisc_ingress) - goto out; + skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); - if (*pt_prev) { - *ret = deliver_skb(skb, *pt_prev, orig_dev); - *pt_prev = NULL; - } else { - /* Huh? Why does turning on AF_PACKET affect this? */ - skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); - } + spin_lock(&dev->ingress_lock); + if ((q = dev->qdisc_ingress) != NULL) + result = q->enqueue(skb, q); + spin_unlock(&dev->ingress_lock); - switch (ing_filter(skb)) { - case TC_ACT_SHOT: - case TC_ACT_STOLEN: - kfree_skb(skb); - return NULL; } -out: - skb->tc_verd = 0; - return skb; + return result; } #endif @@ -2044,9 +2021,21 @@ int netif_receive_skb(struct sk_buff *skb) } #ifdef CONFIG_NET_CLS_ACT - skb = handle_ing(skb, &pt_prev, &ret, orig_dev); - if (!skb) + if (pt_prev) { + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = NULL; /* noone else should process this after*/ + } else { + skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); + } + + ret = ing_filter(skb); + + if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) { + kfree_skb(skb); goto out; + } + + skb->tc_verd = 0; ncls: #endif diff --git a/trunk/net/core/neighbour.c b/trunk/net/core/neighbour.c index cd3af59b38a1..c52df858d0be 100644 --- a/trunk/net/core/neighbour.c +++ b/trunk/net/core/neighbour.c @@ -481,8 +481,6 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey, if (!creat) goto out; - ASSERT_RTNL(); - n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL); if (!n) goto out; diff --git a/trunk/net/core/skbuff.c b/trunk/net/core/skbuff.c index 70d9b5da96ae..944189d96323 100644 --- a/trunk/net/core/skbuff.c +++ b/trunk/net/core/skbuff.c @@ -362,97 +362,6 @@ void kfree_skb(struct sk_buff *skb) __kfree_skb(skb); } -static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) -{ - new->tstamp = old->tstamp; - new->dev = old->dev; - new->transport_header = old->transport_header; - new->network_header = old->network_header; - new->mac_header = old->mac_header; - new->dst = dst_clone(old->dst); -#ifdef CONFIG_INET - new->sp = secpath_get(old->sp); -#endif - memcpy(new->cb, old->cb, sizeof(old->cb)); - new->csum_start = old->csum_start; - new->csum_offset = old->csum_offset; - new->local_df = old->local_df; - new->pkt_type = old->pkt_type; - new->ip_summed = old->ip_summed; - skb_copy_queue_mapping(new, old); - new->priority = old->priority; -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) - new->ipvs_property = old->ipvs_property; -#endif - new->protocol = old->protocol; - new->mark = old->mark; - __nf_copy(new, old); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) - new->nf_trace = old->nf_trace; -#endif -#ifdef CONFIG_NET_SCHED - new->tc_index = old->tc_index; -#ifdef CONFIG_NET_CLS_ACT - new->tc_verd = old->tc_verd; -#endif -#endif - skb_copy_secmark(new, old); -} - -static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) -{ -#define C(x) n->x = skb->x - - n->next = n->prev = NULL; - n->sk = NULL; - __copy_skb_header(n, skb); - - C(len); - C(data_len); - C(mac_len); - n->cloned = 1; - n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; - n->nohdr = 0; - n->destructor = NULL; -#ifdef CONFIG_NET_CLS_ACT - /* FIXME What is this and why don't we do it in copy_skb_header? */ - n->tc_verd = SET_TC_VERD(n->tc_verd,0); - n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); - n->tc_verd = CLR_TC_MUNGED(n->tc_verd); - C(iif); -#endif - C(truesize); - atomic_set(&n->users, 1); - C(head); - C(data); - C(tail); - C(end); - - atomic_inc(&(skb_shinfo(skb)->dataref)); - skb->cloned = 1; - - return n; -#undef C -} - -/** - * skb_morph - morph one skb into another - * @dst: the skb to receive the contents - * @src: the skb to supply the contents - * - * This is identical to skb_clone except that the target skb is - * supplied by the user. - * - * The target skb is returned upon exit. - */ -struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) -{ - skb_release_data(dst); - return __skb_clone(dst, src); -} -EXPORT_SYMBOL_GPL(skb_morph); - /** * skb_clone - duplicate an sk_buff * @skb: buffer to clone @@ -484,7 +393,66 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->fclone = SKB_FCLONE_UNAVAILABLE; } - return __skb_clone(n, skb); +#define C(x) n->x = skb->x + + n->next = n->prev = NULL; + n->sk = NULL; + C(tstamp); + C(dev); + C(transport_header); + C(network_header); + C(mac_header); + C(dst); + dst_clone(skb->dst); + C(sp); +#ifdef CONFIG_INET + secpath_get(skb->sp); +#endif + memcpy(n->cb, skb->cb, sizeof(skb->cb)); + C(len); + C(data_len); + C(mac_len); + C(csum); + C(local_df); + n->cloned = 1; + n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len; + n->nohdr = 0; + C(pkt_type); + C(ip_summed); + skb_copy_queue_mapping(n, skb); + C(priority); +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + C(ipvs_property); +#endif + C(protocol); + n->destructor = NULL; + C(mark); + __nf_copy(n, skb); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + C(nf_trace); +#endif +#ifdef CONFIG_NET_SCHED + C(tc_index); +#ifdef CONFIG_NET_CLS_ACT + n->tc_verd = SET_TC_VERD(skb->tc_verd,0); + n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd); + n->tc_verd = CLR_TC_MUNGED(n->tc_verd); + C(iif); +#endif +#endif + skb_copy_secmark(n, skb); + C(truesize); + atomic_set(&n->users, 1); + C(head); + C(data); + C(tail); + C(end); + + atomic_inc(&(skb_shinfo(skb)->dataref)); + skb->cloned = 1; + + return n; } static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) @@ -495,15 +463,50 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) */ unsigned long offset = new->data - old->data; #endif - - __copy_skb_header(new, old); - + new->sk = NULL; + new->dev = old->dev; + skb_copy_queue_mapping(new, old); + new->priority = old->priority; + new->protocol = old->protocol; + new->dst = dst_clone(old->dst); +#ifdef CONFIG_INET + new->sp = secpath_get(old->sp); +#endif + new->csum_start = old->csum_start; + new->csum_offset = old->csum_offset; + new->ip_summed = old->ip_summed; + new->transport_header = old->transport_header; + new->network_header = old->network_header; + new->mac_header = old->mac_header; #ifndef NET_SKBUFF_DATA_USES_OFFSET /* {transport,network,mac}_header are relative to skb->head */ new->transport_header += offset; new->network_header += offset; new->mac_header += offset; #endif + memcpy(new->cb, old->cb, sizeof(old->cb)); + new->local_df = old->local_df; + new->fclone = SKB_FCLONE_UNAVAILABLE; + new->pkt_type = old->pkt_type; + new->tstamp = old->tstamp; + new->destructor = NULL; + new->mark = old->mark; + __nf_copy(new, old); +#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ + defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) + new->nf_trace = old->nf_trace; +#endif +#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) + new->ipvs_property = old->ipvs_property; +#endif +#ifdef CONFIG_NET_SCHED +#ifdef CONFIG_NET_CLS_ACT + new->tc_verd = old->tc_verd; +#endif + new->tc_index = old->tc_index; +#endif + skb_copy_secmark(new, old); + atomic_set(&new->users, 1); skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; @@ -682,7 +685,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->transport_header += off; skb->network_header += off; skb->mac_header += off; - skb->csum_start += nhead; + skb->csum_start += off; skb->cloned = 0; skb->hdr_len = 0; skb->nohdr = 0; diff --git a/trunk/net/dccp/ipv6.c b/trunk/net/dccp/ipv6.c index cac53548c2d8..006a3834fbcd 100644 --- a/trunk/net/dccp/ipv6.c +++ b/trunk/net/dccp/ipv6.c @@ -767,9 +767,10 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } -static int dccp_v6_rcv(struct sk_buff *skb) +static int dccp_v6_rcv(struct sk_buff **pskb) { const struct dccp_hdr *dh; + struct sk_buff *skb = *pskb; struct sock *sk; int min_cov; diff --git a/trunk/net/decnet/netfilter/dn_rtmsg.c b/trunk/net/decnet/netfilter/dn_rtmsg.c index 43fcd29046d1..f7fba7721e63 100644 --- a/trunk/net/decnet/netfilter/dn_rtmsg.c +++ b/trunk/net/decnet/netfilter/dn_rtmsg.c @@ -88,12 +88,12 @@ static void dnrmg_send_peer(struct sk_buff *skb) static unsigned int dnrmg_hook(unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - dnrmg_send_peer(skb); + dnrmg_send_peer(*pskb); return NF_ACCEPT; } diff --git a/trunk/net/ipv4/Makefile b/trunk/net/ipv4/Makefile index 93fe3966805d..a02c36d0a13e 100644 --- a/trunk/net/ipv4/Makefile +++ b/trunk/net/ipv4/Makefile @@ -10,8 +10,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp_minisocks.o tcp_cong.o \ datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ - sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \ - inet_fragment.o + sysctl_net_ipv4.o fib_frontend.o fib_semantics.o obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o diff --git a/trunk/net/ipv4/inet_fragment.c b/trunk/net/ipv4/inet_fragment.c deleted file mode 100644 index 484cf512858f..000000000000 --- a/trunk/net/ipv4/inet_fragment.c +++ /dev/null @@ -1,174 +0,0 @@ -/* - * inet fragments management - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Authors: Pavel Emelyanov - * Started as consolidation of ipv4/ip_fragment.c, - * ipv6/reassembly. and ipv6 nf conntrack reassembly - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -static void inet_frag_secret_rebuild(unsigned long dummy) -{ - struct inet_frags *f = (struct inet_frags *)dummy; - unsigned long now = jiffies; - int i; - - write_lock(&f->lock); - get_random_bytes(&f->rnd, sizeof(u32)); - for (i = 0; i < INETFRAGS_HASHSZ; i++) { - struct inet_frag_queue *q; - struct hlist_node *p, *n; - - hlist_for_each_entry_safe(q, p, n, &f->hash[i], list) { - unsigned int hval = f->hashfn(q); - - if (hval != i) { - hlist_del(&q->list); - - /* Relink to new hash chain. */ - hlist_add_head(&q->list, &f->hash[hval]); - } - } - } - write_unlock(&f->lock); - - mod_timer(&f->secret_timer, now + f->ctl->secret_interval); -} - -void inet_frags_init(struct inet_frags *f) -{ - int i; - - for (i = 0; i < INETFRAGS_HASHSZ; i++) - INIT_HLIST_HEAD(&f->hash[i]); - - INIT_LIST_HEAD(&f->lru_list); - rwlock_init(&f->lock); - - f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ - (jiffies ^ (jiffies >> 6))); - - f->nqueues = 0; - atomic_set(&f->mem, 0); - - init_timer(&f->secret_timer); - f->secret_timer.function = inet_frag_secret_rebuild; - f->secret_timer.data = (unsigned long)f; - f->secret_timer.expires = jiffies + f->ctl->secret_interval; - add_timer(&f->secret_timer); -} -EXPORT_SYMBOL(inet_frags_init); - -void inet_frags_fini(struct inet_frags *f) -{ - del_timer(&f->secret_timer); -} -EXPORT_SYMBOL(inet_frags_fini); - -static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f) -{ - write_lock(&f->lock); - hlist_del(&fq->list); - list_del(&fq->lru_list); - f->nqueues--; - write_unlock(&f->lock); -} - -void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f) -{ - if (del_timer(&fq->timer)) - atomic_dec(&fq->refcnt); - - if (!(fq->last_in & COMPLETE)) { - fq_unlink(fq, f); - atomic_dec(&fq->refcnt); - fq->last_in |= COMPLETE; - } -} - -EXPORT_SYMBOL(inet_frag_kill); - -static inline void frag_kfree_skb(struct inet_frags *f, struct sk_buff *skb, - int *work) -{ - if (work) - *work -= skb->truesize; - - atomic_sub(skb->truesize, &f->mem); - if (f->skb_free) - f->skb_free(skb); - kfree_skb(skb); -} - -void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, - int *work) -{ - struct sk_buff *fp; - - BUG_TRAP(q->last_in & COMPLETE); - BUG_TRAP(del_timer(&q->timer) == 0); - - /* Release all fragment data. */ - fp = q->fragments; - while (fp) { - struct sk_buff *xp = fp->next; - - frag_kfree_skb(f, fp, work); - fp = xp; - } - - if (work) - *work -= f->qsize; - atomic_sub(f->qsize, &f->mem); - - f->destructor(q); - -} -EXPORT_SYMBOL(inet_frag_destroy); - -int inet_frag_evictor(struct inet_frags *f) -{ - struct inet_frag_queue *q; - int work, evicted = 0; - - work = atomic_read(&f->mem) - f->ctl->low_thresh; - while (work > 0) { - read_lock(&f->lock); - if (list_empty(&f->lru_list)) { - read_unlock(&f->lock); - break; - } - - q = list_first_entry(&f->lru_list, - struct inet_frag_queue, lru_list); - atomic_inc(&q->refcnt); - read_unlock(&f->lock); - - spin_lock(&q->lock); - if (!(q->last_in & COMPLETE)) - inet_frag_kill(q, f); - spin_unlock(&q->lock); - - if (atomic_dec_and_test(&q->refcnt)) - inet_frag_destroy(q, f, &work); - evicted++; - } - - return evicted; -} -EXPORT_SYMBOL(inet_frag_evictor); diff --git a/trunk/net/ipv4/ip_forward.c b/trunk/net/ipv4/ip_forward.c index 877da3ed52e2..afbf938836f5 100644 --- a/trunk/net/ipv4/ip_forward.c +++ b/trunk/net/ipv4/ip_forward.c @@ -40,7 +40,7 @@ #include #include -static int ip_forward_finish(struct sk_buff *skb) +static inline int ip_forward_finish(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); diff --git a/trunk/net/ipv4/ip_fragment.c b/trunk/net/ipv4/ip_fragment.c index 443b3f89192f..fabb86db763b 100644 --- a/trunk/net/ipv4/ip_fragment.c +++ b/trunk/net/ipv4/ip_fragment.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include #include @@ -50,8 +49,21 @@ * as well. Or notify me, at least. --ANK */ +/* Fragment cache limits. We will commit 256K at one time. Should we + * cross that limit we will prune down to 192K. This should cope with + * even the most extreme cases without allowing an attacker to measurably + * harm machine performance. + */ +int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; +int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; + int sysctl_ipfrag_max_dist __read_mostly = 64; +/* Important NOTE! Fragment queue must be destroyed before MSL expires. + * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. + */ +int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; + struct ipfrag_skb_cb { struct inet_skb_parm h; @@ -62,102 +74,153 @@ struct ipfrag_skb_cb /* Describe an entry in the "incomplete datagrams" queue. */ struct ipq { - struct inet_frag_queue q; - + struct hlist_node list; + struct list_head lru_list; /* lru list member */ u32 user; __be32 saddr; __be32 daddr; __be16 id; u8 protocol; + u8 last_in; +#define COMPLETE 4 +#define FIRST_IN 2 +#define LAST_IN 1 + + struct sk_buff *fragments; /* linked list of received fragments */ + int len; /* total length of original datagram */ + int meat; + spinlock_t lock; + atomic_t refcnt; + struct timer_list timer; /* when will this queue expire? */ + ktime_t stamp; int iif; unsigned int rid; struct inet_peer *peer; }; -struct inet_frags_ctl ip4_frags_ctl __read_mostly = { - /* - * Fragment cache limits. We will commit 256K at one time. Should we - * cross that limit we will prune down to 192K. This should cope with - * even the most extreme cases without allowing an attacker to - * measurably harm machine performance. - */ - .high_thresh = 256 * 1024, - .low_thresh = 192 * 1024, +/* Hash table. */ - /* - * Important NOTE! Fragment queue must be destroyed before MSL expires. - * RFC791 is wrong proposing to prolongate timer each fragment arrival - * by TTL. - */ - .timeout = IP_FRAG_TIME, - .secret_interval = 10 * 60 * HZ, -}; +#define IPQ_HASHSZ 64 -static struct inet_frags ip4_frags; +/* Per-bucket lock is easy to add now. */ +static struct hlist_head ipq_hash[IPQ_HASHSZ]; +static DEFINE_RWLOCK(ipfrag_lock); +static u32 ipfrag_hash_rnd; +static LIST_HEAD(ipq_lru_list); +int ip_frag_nqueues = 0; -int ip_frag_nqueues(void) +static __inline__ void __ipq_unlink(struct ipq *qp) { - return ip4_frags.nqueues; + hlist_del(&qp->list); + list_del(&qp->lru_list); + ip_frag_nqueues--; } -int ip_frag_mem(void) +static __inline__ void ipq_unlink(struct ipq *ipq) { - return atomic_read(&ip4_frags.mem); + write_lock(&ipfrag_lock); + __ipq_unlink(ipq); + write_unlock(&ipfrag_lock); } -static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, - struct net_device *dev); - static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot) { return jhash_3words((__force u32)id << 16 | prot, (__force u32)saddr, (__force u32)daddr, - ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1); + ipfrag_hash_rnd) & (IPQ_HASHSZ - 1); } -static unsigned int ip4_hashfn(struct inet_frag_queue *q) +static struct timer_list ipfrag_secret_timer; +int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; + +static void ipfrag_secret_rebuild(unsigned long dummy) { - struct ipq *ipq; + unsigned long now = jiffies; + int i; + + write_lock(&ipfrag_lock); + get_random_bytes(&ipfrag_hash_rnd, sizeof(u32)); + for (i = 0; i < IPQ_HASHSZ; i++) { + struct ipq *q; + struct hlist_node *p, *n; + + hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], list) { + unsigned int hval = ipqhashfn(q->id, q->saddr, + q->daddr, q->protocol); + + if (hval != i) { + hlist_del(&q->list); - ipq = container_of(q, struct ipq, q); - return ipqhashfn(ipq->id, ipq->saddr, ipq->daddr, ipq->protocol); + /* Relink to new hash chain. */ + hlist_add_head(&q->list, &ipq_hash[hval]); + } + } + } + write_unlock(&ipfrag_lock); + + mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval); } +atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */ + /* Memory Tracking Functions. */ static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &ip4_frags.mem); + atomic_sub(skb->truesize, &ip_frag_mem); kfree_skb(skb); } -static __inline__ void ip4_frag_free(struct inet_frag_queue *q) +static __inline__ void frag_free_queue(struct ipq *qp, int *work) { - struct ipq *qp; - - qp = container_of(q, struct ipq, q); - if (qp->peer) - inet_putpeer(qp->peer); + if (work) + *work -= sizeof(struct ipq); + atomic_sub(sizeof(struct ipq), &ip_frag_mem); kfree(qp); } static __inline__ struct ipq *frag_alloc_queue(void) { - struct ipq *qp = kzalloc(sizeof(struct ipq), GFP_ATOMIC); + struct ipq *qp = kmalloc(sizeof(struct ipq), GFP_ATOMIC); if (!qp) return NULL; - atomic_add(sizeof(struct ipq), &ip4_frags.mem); + atomic_add(sizeof(struct ipq), &ip_frag_mem); return qp; } /* Destruction primitives. */ -static __inline__ void ipq_put(struct ipq *ipq) +/* Complete destruction of ipq. */ +static void ip_frag_destroy(struct ipq *qp, int *work) +{ + struct sk_buff *fp; + + BUG_TRAP(qp->last_in&COMPLETE); + BUG_TRAP(del_timer(&qp->timer) == 0); + + if (qp->peer) + inet_putpeer(qp->peer); + + /* Release all fragment data. */ + fp = qp->fragments; + while (fp) { + struct sk_buff *xp = fp->next; + + frag_kfree_skb(fp, work); + fp = xp; + } + + /* Finally, release the queue descriptor itself. */ + frag_free_queue(qp, work); +} + +static __inline__ void ipq_put(struct ipq *ipq, int *work) { - inet_frag_put(&ipq->q, &ip4_frags); + if (atomic_dec_and_test(&ipq->refcnt)) + ip_frag_destroy(ipq, work); } /* Kill ipq entry. It is not destroyed immediately, @@ -165,7 +228,14 @@ static __inline__ void ipq_put(struct ipq *ipq) */ static void ipq_kill(struct ipq *ipq) { - inet_frag_kill(&ipq->q, &ip4_frags); + if (del_timer(&ipq->timer)) + atomic_dec(&ipq->refcnt); + + if (!(ipq->last_in & COMPLETE)) { + ipq_unlink(ipq); + atomic_dec(&ipq->refcnt); + ipq->last_in |= COMPLETE; + } } /* Memory limiting on fragments. Evictor trashes the oldest @@ -173,11 +243,33 @@ static void ipq_kill(struct ipq *ipq) */ static void ip_evictor(void) { - int evicted; + struct ipq *qp; + struct list_head *tmp; + int work; + + work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh; + if (work <= 0) + return; + + while (work > 0) { + read_lock(&ipfrag_lock); + if (list_empty(&ipq_lru_list)) { + read_unlock(&ipfrag_lock); + return; + } + tmp = ipq_lru_list.next; + qp = list_entry(tmp, struct ipq, lru_list); + atomic_inc(&qp->refcnt); + read_unlock(&ipfrag_lock); - evicted = inet_frag_evictor(&ip4_frags); - if (evicted) - IP_ADD_STATS_BH(IPSTATS_MIB_REASMFAILS, evicted); + spin_lock(&qp->lock); + if (!(qp->last_in&COMPLETE)) + ipq_kill(qp); + spin_unlock(&qp->lock); + + ipq_put(qp, &work); + IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + } } /* @@ -187,9 +279,9 @@ static void ip_expire(unsigned long arg) { struct ipq *qp = (struct ipq *) arg; - spin_lock(&qp->q.lock); + spin_lock(&qp->lock); - if (qp->q.last_in & COMPLETE) + if (qp->last_in & COMPLETE) goto out; ipq_kill(qp); @@ -197,8 +289,8 @@ static void ip_expire(unsigned long arg) IP_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - if ((qp->q.last_in&FIRST_IN) && qp->q.fragments != NULL) { - struct sk_buff *head = qp->q.fragments; + if ((qp->last_in&FIRST_IN) && qp->fragments != NULL) { + struct sk_buff *head = qp->fragments; /* Send an ICMP "Fragment Reassembly Timeout" message. */ if ((head->dev = dev_get_by_index(&init_net, qp->iif)) != NULL) { icmp_send(head, ICMP_TIME_EXCEEDED, ICMP_EXC_FRAGTIME, 0); @@ -206,8 +298,8 @@ static void ip_expire(unsigned long arg) } } out: - spin_unlock(&qp->q.lock); - ipq_put(qp); + spin_unlock(&qp->lock); + ipq_put(qp, NULL); } /* Creation primitives. */ @@ -220,7 +312,7 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in) #endif unsigned int hash; - write_lock(&ip4_frags.lock); + write_lock(&ipfrag_lock); hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr, qp_in->protocol); #ifdef CONFIG_SMP @@ -228,31 +320,31 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in) * such entry could be created on other cpu, while we * promoted read lock to write lock. */ - hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) { + hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { if (qp->id == qp_in->id && qp->saddr == qp_in->saddr && qp->daddr == qp_in->daddr && qp->protocol == qp_in->protocol && qp->user == qp_in->user) { - atomic_inc(&qp->q.refcnt); - write_unlock(&ip4_frags.lock); - qp_in->q.last_in |= COMPLETE; - ipq_put(qp_in); + atomic_inc(&qp->refcnt); + write_unlock(&ipfrag_lock); + qp_in->last_in |= COMPLETE; + ipq_put(qp_in, NULL); return qp; } } #endif qp = qp_in; - if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) - atomic_inc(&qp->q.refcnt); + if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) + atomic_inc(&qp->refcnt); - atomic_inc(&qp->q.refcnt); - hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]); - INIT_LIST_HEAD(&qp->q.lru_list); - list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list); - ip4_frags.nqueues++; - write_unlock(&ip4_frags.lock); + atomic_inc(&qp->refcnt); + hlist_add_head(&qp->list, &ipq_hash[hash]); + INIT_LIST_HEAD(&qp->lru_list); + list_add_tail(&qp->lru_list, &ipq_lru_list); + ip_frag_nqueues++; + write_unlock(&ipfrag_lock); return qp; } @@ -265,18 +357,23 @@ static struct ipq *ip_frag_create(struct iphdr *iph, u32 user) goto out_nomem; qp->protocol = iph->protocol; + qp->last_in = 0; qp->id = iph->id; qp->saddr = iph->saddr; qp->daddr = iph->daddr; qp->user = user; + qp->len = 0; + qp->meat = 0; + qp->fragments = NULL; + qp->iif = 0; qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL; /* Initialize a timer for this entry. */ - init_timer(&qp->q.timer); - qp->q.timer.data = (unsigned long) qp; /* pointer to queue */ - qp->q.timer.function = ip_expire; /* expire function */ - spin_lock_init(&qp->q.lock); - atomic_set(&qp->q.refcnt, 1); + init_timer(&qp->timer); + qp->timer.data = (unsigned long) qp; /* pointer to queue */ + qp->timer.function = ip_expire; /* expire function */ + spin_lock_init(&qp->lock); + atomic_set(&qp->refcnt, 1); return ip_frag_intern(qp); @@ -298,20 +395,20 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user) struct ipq *qp; struct hlist_node *n; - read_lock(&ip4_frags.lock); + read_lock(&ipfrag_lock); hash = ipqhashfn(id, saddr, daddr, protocol); - hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) { + hlist_for_each_entry(qp, n, &ipq_hash[hash], list) { if (qp->id == id && qp->saddr == saddr && qp->daddr == daddr && qp->protocol == protocol && qp->user == user) { - atomic_inc(&qp->q.refcnt); - read_unlock(&ip4_frags.lock); + atomic_inc(&qp->refcnt); + read_unlock(&ipfrag_lock); return qp; } } - read_unlock(&ip4_frags.lock); + read_unlock(&ipfrag_lock); return ip_frag_create(iph, user); } @@ -332,7 +429,7 @@ static inline int ip_frag_too_far(struct ipq *qp) end = atomic_inc_return(&peer->rid); qp->rid = end; - rc = qp->q.fragments && (end - start) > max; + rc = qp->fragments && (end - start) > max; if (rc) { IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); @@ -345,42 +442,39 @@ static int ip_frag_reinit(struct ipq *qp) { struct sk_buff *fp; - if (!mod_timer(&qp->q.timer, jiffies + ip4_frags_ctl.timeout)) { - atomic_inc(&qp->q.refcnt); + if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) { + atomic_inc(&qp->refcnt); return -ETIMEDOUT; } - fp = qp->q.fragments; + fp = qp->fragments; do { struct sk_buff *xp = fp->next; frag_kfree_skb(fp, NULL); fp = xp; } while (fp); - qp->q.last_in = 0; - qp->q.len = 0; - qp->q.meat = 0; - qp->q.fragments = NULL; + qp->last_in = 0; + qp->len = 0; + qp->meat = 0; + qp->fragments = NULL; qp->iif = 0; return 0; } /* Add new segment to existing queue. */ -static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) +static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { struct sk_buff *prev, *next; - struct net_device *dev; int flags, offset; int ihl, end; - int err = -ENOENT; - if (qp->q.last_in & COMPLETE) + if (qp->last_in & COMPLETE) goto err; if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) && - unlikely(ip_frag_too_far(qp)) && - unlikely(err = ip_frag_reinit(qp))) { + unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) { ipq_kill(qp); goto err; } @@ -393,40 +487,36 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) /* Determine the position of this fragment. */ end = offset + skb->len - ihl; - err = -EINVAL; /* Is this the final fragment? */ if ((flags & IP_MF) == 0) { /* If we already have some bits beyond end * or have different end, the segment is corrrupted. */ - if (end < qp->q.len || - ((qp->q.last_in & LAST_IN) && end != qp->q.len)) + if (end < qp->len || + ((qp->last_in & LAST_IN) && end != qp->len)) goto err; - qp->q.last_in |= LAST_IN; - qp->q.len = end; + qp->last_in |= LAST_IN; + qp->len = end; } else { if (end&7) { end &= ~7; if (skb->ip_summed != CHECKSUM_UNNECESSARY) skb->ip_summed = CHECKSUM_NONE; } - if (end > qp->q.len) { + if (end > qp->len) { /* Some bits beyond end -> corruption. */ - if (qp->q.last_in & LAST_IN) + if (qp->last_in & LAST_IN) goto err; - qp->q.len = end; + qp->len = end; } } if (end == offset) goto err; - err = -ENOMEM; if (pskb_pull(skb, ihl) == NULL) goto err; - - err = pskb_trim_rcsum(skb, end - offset); - if (err) + if (pskb_trim_rcsum(skb, end-offset)) goto err; /* Find out which fragments are in front and at the back of us @@ -434,7 +524,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) * this fragment, right? */ prev = NULL; - for (next = qp->q.fragments; next != NULL; next = next->next) { + for (next = qp->fragments; next != NULL; next = next->next) { if (FRAG_CB(next)->offset >= offset) break; /* bingo! */ prev = next; @@ -449,10 +539,8 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (i > 0) { offset += i; - err = -EINVAL; if (end <= offset) goto err; - err = -ENOMEM; if (!pskb_pull(skb, i)) goto err; if (skb->ip_summed != CHECKSUM_UNNECESSARY) @@ -460,8 +548,6 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) } } - err = -ENOMEM; - while (next && FRAG_CB(next)->offset < end) { int i = end - FRAG_CB(next)->offset; /* overlap is 'i' bytes */ @@ -472,7 +558,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (!pskb_pull(next, i)) goto err; FRAG_CB(next)->offset += i; - qp->q.meat -= i; + qp->meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) next->ip_summed = CHECKSUM_NONE; break; @@ -487,9 +573,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (prev) prev->next = next; else - qp->q.fragments = next; + qp->fragments = next; - qp->q.meat -= free_it->len; + qp->meat -= free_it->len; frag_kfree_skb(free_it, NULL); } } @@ -501,77 +587,50 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) if (prev) prev->next = skb; else - qp->q.fragments = skb; - - dev = skb->dev; - if (dev) { - qp->iif = dev->ifindex; - skb->dev = NULL; - } - qp->q.stamp = skb->tstamp; - qp->q.meat += skb->len; - atomic_add(skb->truesize, &ip4_frags.mem); + qp->fragments = skb; + + if (skb->dev) + qp->iif = skb->dev->ifindex; + skb->dev = NULL; + qp->stamp = skb->tstamp; + qp->meat += skb->len; + atomic_add(skb->truesize, &ip_frag_mem); if (offset == 0) - qp->q.last_in |= FIRST_IN; + qp->last_in |= FIRST_IN; - if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len) - return ip_frag_reasm(qp, prev, dev); + write_lock(&ipfrag_lock); + list_move_tail(&qp->lru_list, &ipq_lru_list); + write_unlock(&ipfrag_lock); - write_lock(&ip4_frags.lock); - list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list); - write_unlock(&ip4_frags.lock); - return -EINPROGRESS; + return; err: kfree_skb(skb); - return err; } /* Build a new IP datagram from all its fragments. */ -static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, - struct net_device *dev) +static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) { struct iphdr *iph; - struct sk_buff *fp, *head = qp->q.fragments; + struct sk_buff *fp, *head = qp->fragments; int len; int ihlen; - int err; ipq_kill(qp); - /* Make the one we just received the head. */ - if (prev) { - head = prev->next; - fp = skb_clone(head, GFP_ATOMIC); - - if (!fp) - goto out_nomem; - - fp->next = head->next; - prev->next = fp; - - skb_morph(head, qp->q.fragments); - head->next = qp->q.fragments->next; - - kfree_skb(qp->q.fragments); - qp->q.fragments = head; - } - BUG_TRAP(head != NULL); BUG_TRAP(FRAG_CB(head)->offset == 0); /* Allocate a new buffer for the datagram. */ ihlen = ip_hdrlen(head); - len = ihlen + qp->q.len; + len = ihlen + qp->len; - err = -E2BIG; if (len > 65535) goto out_oversize; /* Head of list must not be cloned. */ - err = -ENOMEM; if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) goto out_nomem; @@ -595,12 +654,12 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &ip4_frags.mem); + atomic_add(clone->truesize, &ip_frag_mem); } skb_shinfo(head)->frag_list = head->next; skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &ip4_frags.mem); + atomic_sub(head->truesize, &ip_frag_mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -610,19 +669,19 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &ip4_frags.mem); + atomic_sub(fp->truesize, &ip_frag_mem); } head->next = NULL; head->dev = dev; - head->tstamp = qp->q.stamp; + head->tstamp = qp->stamp; iph = ip_hdr(head); iph->frag_off = 0; iph->tot_len = htons(len); IP_INC_STATS_BH(IPSTATS_MIB_REASMOKS); - qp->q.fragments = NULL; - return 0; + qp->fragments = NULL; + return head; out_nomem: LIMIT_NETDEBUG(KERN_ERR "IP: queue_glue: no memory for gluing " @@ -635,46 +694,54 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, NIPQUAD(qp->saddr)); out_fail: IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - return err; + return NULL; } /* Process an incoming IP datagram fragment. */ -int ip_defrag(struct sk_buff *skb, u32 user) +struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user) { struct ipq *qp; + struct net_device *dev; IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); /* Start by cleaning up the memory. */ - if (atomic_read(&ip4_frags.mem) > ip4_frags_ctl.high_thresh) + if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh) ip_evictor(); + dev = skb->dev; + /* Lookup (or create) queue header */ if ((qp = ip_find(ip_hdr(skb), user)) != NULL) { - int ret; + struct sk_buff *ret = NULL; + + spin_lock(&qp->lock); - spin_lock(&qp->q.lock); + ip_frag_queue(qp, skb); - ret = ip_frag_queue(qp, skb); + if (qp->last_in == (FIRST_IN|LAST_IN) && + qp->meat == qp->len) + ret = ip_frag_reasm(qp, dev); - spin_unlock(&qp->q.lock); - ipq_put(qp); + spin_unlock(&qp->lock); + ipq_put(qp, NULL); return ret; } IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); kfree_skb(skb); - return -ENOMEM; + return NULL; } void __init ipfrag_init(void) { - ip4_frags.ctl = &ip4_frags_ctl; - ip4_frags.hashfn = ip4_hashfn; - ip4_frags.destructor = ip4_frag_free; - ip4_frags.skb_free = NULL; - ip4_frags.qsize = sizeof(struct ipq); - inet_frags_init(&ip4_frags); + ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ + (jiffies ^ (jiffies >> 6))); + + init_timer(&ipfrag_secret_timer); + ipfrag_secret_timer.function = ipfrag_secret_rebuild; + ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval; + add_timer(&ipfrag_secret_timer); } EXPORT_SYMBOL(ip_defrag); diff --git a/trunk/net/ipv4/ip_input.c b/trunk/net/ipv4/ip_input.c index 168c871fcd79..41d8964591e7 100644 --- a/trunk/net/ipv4/ip_input.c +++ b/trunk/net/ipv4/ip_input.c @@ -172,7 +172,8 @@ int ip_call_ra_chain(struct sk_buff *skb) (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == skb->dev->ifindex)) { if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { + skb = ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN); + if (skb == NULL) { read_unlock(&ip_ra_lock); return 1; } @@ -195,7 +196,7 @@ int ip_call_ra_chain(struct sk_buff *skb) return 0; } -static int ip_local_deliver_finish(struct sk_buff *skb) +static inline int ip_local_deliver_finish(struct sk_buff *skb) { __skb_pull(skb, ip_hdrlen(skb)); @@ -264,7 +265,8 @@ int ip_local_deliver(struct sk_buff *skb) */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) + skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER); + if (!skb) return 0; } @@ -324,7 +326,7 @@ static inline int ip_rcv_options(struct sk_buff *skb) return -1; } -static int ip_rcv_finish(struct sk_buff *skb) +static inline int ip_rcv_finish(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; diff --git a/trunk/net/ipv4/ip_output.c b/trunk/net/ipv4/ip_output.c index f508835ba713..699f06781fd8 100644 --- a/trunk/net/ipv4/ip_output.c +++ b/trunk/net/ipv4/ip_output.c @@ -202,7 +202,7 @@ static inline int ip_skb_dst_mtu(struct sk_buff *skb) skb->dst->dev->mtu : dst_mtu(skb->dst); } -static int ip_finish_output(struct sk_buff *skb) +static inline int ip_finish_output(struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ diff --git a/trunk/net/ipv4/ipvs/ip_vs_app.c b/trunk/net/ipv4/ipvs/ip_vs_app.c index 664cb8e97c1c..341474eefa55 100644 --- a/trunk/net/ipv4/ipvs/ip_vs_app.c +++ b/trunk/net/ipv4/ipvs/ip_vs_app.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -329,18 +328,18 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq, spin_unlock(&cp->lock); } -static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, +static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb, struct ip_vs_app *app) { int diff; - const unsigned int tcp_offset = ip_hdrlen(skb); + const unsigned int tcp_offset = ip_hdrlen(*pskb); struct tcphdr *th; __u32 seq; - if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) + if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) return 0; - th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); + th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset); /* * Remember seq number in case this pkt gets resized @@ -361,7 +360,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, if (app->pkt_out == NULL) return 1; - if (!app->pkt_out(app, cp, skb, &diff)) + if (!app->pkt_out(app, cp, pskb, &diff)) return 0; /* @@ -379,7 +378,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb, * called by ipvs packet handler, assumes previously checked cp!=NULL * returns false if it can't handle packet (oom) */ -int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) +int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb) { struct ip_vs_app *app; @@ -392,7 +391,7 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) /* TCP is complicated */ if (cp->protocol == IPPROTO_TCP) - return app_tcp_pkt_out(cp, skb, app); + return app_tcp_pkt_out(cp, pskb, app); /* * Call private output hook function @@ -400,22 +399,22 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb) if (app->pkt_out == NULL) return 1; - return app->pkt_out(app, cp, skb, NULL); + return app->pkt_out(app, cp, pskb, NULL); } -static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, +static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb, struct ip_vs_app *app) { int diff; - const unsigned int tcp_offset = ip_hdrlen(skb); + const unsigned int tcp_offset = ip_hdrlen(*pskb); struct tcphdr *th; __u32 seq; - if (!skb_make_writable(skb, tcp_offset + sizeof(*th))) + if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th))) return 0; - th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset); + th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset); /* * Remember seq number in case this pkt gets resized @@ -436,7 +435,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, if (app->pkt_in == NULL) return 1; - if (!app->pkt_in(app, cp, skb, &diff)) + if (!app->pkt_in(app, cp, pskb, &diff)) return 0; /* @@ -454,7 +453,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb, * called by ipvs packet handler, assumes previously checked cp!=NULL. * returns false if can't handle packet (oom). */ -int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) +int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb) { struct ip_vs_app *app; @@ -467,7 +466,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) /* TCP is complicated */ if (cp->protocol == IPPROTO_TCP) - return app_tcp_pkt_in(cp, skb, app); + return app_tcp_pkt_in(cp, pskb, app); /* * Call private input hook function @@ -475,7 +474,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb) if (app->pkt_in == NULL) return 1; - return app->pkt_in(app, cp, skb, NULL); + return app->pkt_in(app, cp, pskb, NULL); } diff --git a/trunk/net/ipv4/ipvs/ip_vs_core.c b/trunk/net/ipv4/ipvs/ip_vs_core.c index c6ed7654e839..fbca2a2ff29f 100644 --- a/trunk/net/ipv4/ipvs/ip_vs_core.c +++ b/trunk/net/ipv4/ipvs/ip_vs_core.c @@ -58,6 +58,7 @@ EXPORT_SYMBOL(ip_vs_conn_put); #ifdef CONFIG_IP_VS_DEBUG EXPORT_SYMBOL(ip_vs_get_debug_level); #endif +EXPORT_SYMBOL(ip_vs_make_skb_writable); /* ID used in ICMP lookups */ @@ -162,6 +163,42 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction, } +int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len) +{ + struct sk_buff *skb = *pskb; + + /* skb is already used, better copy skb and its payload */ + if (unlikely(skb_shared(skb) || skb->sk)) + goto copy_skb; + + /* skb data is already used, copy it */ + if (unlikely(skb_cloned(skb))) + goto copy_data; + + return pskb_may_pull(skb, writable_len); + + copy_data: + if (unlikely(writable_len > skb->len)) + return 0; + return !pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + + copy_skb: + if (unlikely(writable_len > skb->len)) + return 0; + skb = skb_copy(skb, GFP_ATOMIC); + if (!skb) + return 0; + BUG_ON(skb_is_nonlinear(skb)); + + /* Rest of kernel will get very unhappy if we pass it a + suddenly-orphaned skbuff */ + if ((*pskb)->sk) + skb_set_owner_w(skb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = skb; + return 1; +} + /* * IPVS persistent scheduling function * It creates a connection entry according to its template if exists, @@ -488,12 +525,12 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, * for VS/NAT. */ static unsigned int ip_vs_post_routing(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - if (!skb->ipvs_property) + if (!((*pskb)->ipvs_property)) return NF_ACCEPT; /* The packet was sent from IPVS, exit this chain */ return NF_STOP; @@ -504,14 +541,13 @@ __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); } -static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) +static inline struct sk_buff * +ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) { - int err = ip_defrag(skb, user); - - if (!err) + skb = ip_defrag(skb, user); + if (skb) ip_send_check(ip_hdr(skb)); - - return err; + return skb; } /* @@ -569,8 +605,9 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, * Currently handles error types - unreachable, quench, ttl exceeded. * (Only used in VS/NAT) */ -static int ip_vs_out_icmp(struct sk_buff *skb, int *related) +static int ip_vs_out_icmp(struct sk_buff **pskb, int *related) { + struct sk_buff *skb = *pskb; struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ @@ -582,8 +619,10 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related) /* reassemble IP fragments */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) + skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); + if (!skb) return NF_STOLEN; + *pskb = skb; } iph = ip_hdr(skb); @@ -651,8 +690,9 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related) if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) offset += 2 * sizeof(__u16); - if (!skb_make_writable(skb, offset)) + if (!ip_vs_make_skb_writable(pskb, offset)) goto out; + skb = *pskb; ip_vs_nat_icmp(skb, pp, cp, 1); @@ -684,10 +724,11 @@ static inline int is_tcp_reset(const struct sk_buff *skb) * rewrite addresses of the packet and send it on its way... */ static unsigned int -ip_vs_out(unsigned int hooknum, struct sk_buff *skb, +ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct sk_buff *skb = *pskb; struct iphdr *iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; @@ -700,10 +741,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, iph = ip_hdr(skb); if (unlikely(iph->protocol == IPPROTO_ICMP)) { - int related, verdict = ip_vs_out_icmp(skb, &related); + int related, verdict = ip_vs_out_icmp(pskb, &related); if (related) return verdict; + skb = *pskb; iph = ip_hdr(skb); } @@ -714,9 +756,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, /* reassemble IP fragments */ if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) && !pp->dont_defrag)) { - if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) + skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT); + if (!skb) return NF_STOLEN; iph = ip_hdr(skb); + *pskb = skb; } ihl = iph->ihl << 2; @@ -758,12 +802,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); - if (!skb_make_writable(skb, ihl)) + if (!ip_vs_make_skb_writable(pskb, ihl)) goto drop; /* mangle the packet */ - if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) + if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp)) goto drop; + skb = *pskb; ip_hdr(skb)->saddr = cp->vaddr; ip_send_check(ip_hdr(skb)); @@ -773,8 +818,9 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, * if it came from this machine itself. So re-compute * the routing information. */ - if (ip_route_me_harder(skb, RTN_LOCAL) != 0) + if (ip_route_me_harder(pskb, RTN_LOCAL) != 0) goto drop; + skb = *pskb; IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); @@ -789,7 +835,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, drop: ip_vs_conn_put(cp); - kfree_skb(skb); + kfree_skb(*pskb); return NF_STOLEN; } @@ -801,8 +847,9 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, * Currently handles error types - unreachable, quench, ttl exceeded. */ static int -ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) +ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum) { + struct sk_buff *skb = *pskb; struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ @@ -814,9 +861,12 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) /* reassemble IP fragments */ if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (ip_vs_gather_frags(skb, hooknum == NF_IP_LOCAL_IN ? - IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD)) + skb = ip_vs_gather_frags(skb, + hooknum == NF_IP_LOCAL_IN ? + IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD); + if (!skb) return NF_STOLEN; + *pskb = skb; } iph = ip_hdr(skb); @@ -895,10 +945,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) * and send it on its way... */ static unsigned int -ip_vs_in(unsigned int hooknum, struct sk_buff *skb, +ip_vs_in(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct sk_buff *skb = *pskb; struct iphdr *iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; @@ -920,10 +971,11 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, iph = ip_hdr(skb); if (unlikely(iph->protocol == IPPROTO_ICMP)) { - int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); + int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum); if (related) return verdict; + skb = *pskb; iph = ip_hdr(skb); } @@ -1004,16 +1056,16 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, * and send them to ip_vs_in_icmp. */ static unsigned int -ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, +ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { int r; - if (ip_hdr(skb)->protocol != IPPROTO_ICMP) + if (ip_hdr(*pskb)->protocol != IPPROTO_ICMP) return NF_ACCEPT; - return ip_vs_in_icmp(skb, &r, hooknum); + return ip_vs_in_icmp(pskb, &r, hooknum); } diff --git a/trunk/net/ipv4/ipvs/ip_vs_ftp.c b/trunk/net/ipv4/ipvs/ip_vs_ftp.c index 59aa166b7678..344ddbbdc756 100644 --- a/trunk/net/ipv4/ipvs/ip_vs_ftp.c +++ b/trunk/net/ipv4/ipvs/ip_vs_ftp.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -136,7 +135,7 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number. */ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, - struct sk_buff *skb, int *diff) + struct sk_buff **pskb, int *diff) { struct iphdr *iph; struct tcphdr *th; @@ -156,14 +155,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; /* Linear packets are much easier to deal with. */ - if (!skb_make_writable(skb, skb->len)) + if (!ip_vs_make_skb_writable(pskb, (*pskb)->len)) return 0; if (cp->app_data == &ip_vs_ftp_pasv) { - iph = ip_hdr(skb); + iph = ip_hdr(*pskb); th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); data = (char *)th + (th->doff << 2); - data_limit = skb_tail_pointer(skb); + data_limit = skb_tail_pointer(*pskb); if (ip_vs_ftp_get_addrport(data, data_limit, SERVER_STRING, @@ -214,7 +213,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, memcpy(start, buf, buf_len); ret = 1; } else { - ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start, + ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start, end-start, buf, buf_len); } @@ -239,7 +238,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * the client. */ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, - struct sk_buff *skb, int *diff) + struct sk_buff **pskb, int *diff) { struct iphdr *iph; struct tcphdr *th; @@ -257,20 +256,20 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, return 1; /* Linear packets are much easier to deal with. */ - if (!skb_make_writable(skb, skb->len)) + if (!ip_vs_make_skb_writable(pskb, (*pskb)->len)) return 0; /* * Detecting whether it is passive */ - iph = ip_hdr(skb); + iph = ip_hdr(*pskb); th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]); /* Since there may be OPTIONS in the TCP packet and the HLEN is the length of the header in 32-bit multiples, it is accurate to calculate data address by th+HLEN*4 */ data = data_start = (char *)th + (th->doff << 2); - data_limit = skb_tail_pointer(skb); + data_limit = skb_tail_pointer(*pskb); while (data <= data_limit - 6) { if (strnicmp(data, "PASV\r\n", 6) == 0) { diff --git a/trunk/net/ipv4/ipvs/ip_vs_proto_tcp.c b/trunk/net/ipv4/ipvs/ip_vs_proto_tcp.c index 12dc0d640b6d..e65577a77006 100644 --- a/trunk/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/trunk/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -20,7 +20,6 @@ #include /* for tcphdr */ #include #include /* for csum_tcpudp_magic */ -#include #include #include @@ -123,27 +122,27 @@ tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip, static int -tcp_snat_handler(struct sk_buff *skb, +tcp_snat_handler(struct sk_buff **pskb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct tcphdr *tcph; - const unsigned int tcphoff = ip_hdrlen(skb); + const unsigned int tcphoff = ip_hdrlen(*pskb); /* csum_check requires unshared skb */ - if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) + if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(*pskb, pp)) return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, skb)) + if (!ip_vs_app_pkt_out(cp, pskb)) return 0; } - tcph = (void *)ip_hdr(skb) + tcphoff; + tcph = (void *)ip_hdr(*pskb) + tcphoff; tcph->source = cp->vport; /* Adjust TCP checksums */ @@ -151,15 +150,17 @@ tcp_snat_handler(struct sk_buff *skb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) + (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ tcph->check = 0; - skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); + (*pskb)->csum = skb_checksum(*pskb, tcphoff, + (*pskb)->len - tcphoff, 0); tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, - skb->len - tcphoff, - cp->protocol, skb->csum); + (*pskb)->len - tcphoff, + cp->protocol, + (*pskb)->csum); IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, tcph->check, (char*)&(tcph->check) - (char*)tcph); @@ -169,30 +170,30 @@ tcp_snat_handler(struct sk_buff *skb, static int -tcp_dnat_handler(struct sk_buff *skb, +tcp_dnat_handler(struct sk_buff **pskb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct tcphdr *tcph; - const unsigned int tcphoff = ip_hdrlen(skb); + const unsigned int tcphoff = ip_hdrlen(*pskb); /* csum_check requires unshared skb */ - if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) + if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(*pskb, pp)) return 0; /* * Attempt ip_vs_app call. * It will fix ip_vs_conn and iph ack_seq stuff */ - if (!ip_vs_app_pkt_in(cp, skb)) + if (!ip_vs_app_pkt_in(cp, pskb)) return 0; } - tcph = (void *)ip_hdr(skb) + tcphoff; + tcph = (void *)ip_hdr(*pskb) + tcphoff; tcph->dest = cp->dport; /* @@ -202,16 +203,18 @@ tcp_dnat_handler(struct sk_buff *skb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) + (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ tcph->check = 0; - skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); + (*pskb)->csum = skb_checksum(*pskb, tcphoff, + (*pskb)->len - tcphoff, 0); tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, - skb->len - tcphoff, - cp->protocol, skb->csum); - skb->ip_summed = CHECKSUM_UNNECESSARY; + (*pskb)->len - tcphoff, + cp->protocol, + (*pskb)->csum); + (*pskb)->ip_summed = CHECKSUM_UNNECESSARY; } return 1; } diff --git a/trunk/net/ipv4/ipvs/ip_vs_proto_udp.c b/trunk/net/ipv4/ipvs/ip_vs_proto_udp.c index 1fa7b330b9ac..8ee5fe6a101d 100644 --- a/trunk/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/trunk/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -130,29 +129,29 @@ udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip, } static int -udp_snat_handler(struct sk_buff *skb, +udp_snat_handler(struct sk_buff **pskb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct udphdr *udph; - const unsigned int udphoff = ip_hdrlen(skb); + const unsigned int udphoff = ip_hdrlen(*pskb); /* csum_check requires unshared skb */ - if (!skb_make_writable(skb, udphoff+sizeof(*udph))) + if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(*pskb, pp)) return 0; /* * Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, skb)) + if (!ip_vs_app_pkt_out(cp, pskb)) return 0; } - udph = (void *)ip_hdr(skb) + udphoff; + udph = (void *)ip_hdr(*pskb) + udphoff; udph->source = cp->vport; /* @@ -162,15 +161,17 @@ udp_snat_handler(struct sk_buff *skb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) + (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; - skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); + (*pskb)->csum = skb_checksum(*pskb, udphoff, + (*pskb)->len - udphoff, 0); udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, - skb->len - udphoff, - cp->protocol, skb->csum); + (*pskb)->len - udphoff, + cp->protocol, + (*pskb)->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", @@ -182,30 +183,30 @@ udp_snat_handler(struct sk_buff *skb, static int -udp_dnat_handler(struct sk_buff *skb, +udp_dnat_handler(struct sk_buff **pskb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct udphdr *udph; - unsigned int udphoff = ip_hdrlen(skb); + unsigned int udphoff = ip_hdrlen(*pskb); /* csum_check requires unshared skb */ - if (!skb_make_writable(skb, udphoff+sizeof(*udph))) + if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph))) return 0; if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(*pskb, pp)) return 0; /* * Attempt ip_vs_app call. * It will fix ip_vs_conn */ - if (!ip_vs_app_pkt_in(cp, skb)) + if (!ip_vs_app_pkt_in(cp, pskb)) return 0; } - udph = (void *)ip_hdr(skb) + udphoff; + udph = (void *)ip_hdr(*pskb) + udphoff; udph->dest = cp->dport; /* @@ -215,18 +216,20 @@ udp_dnat_handler(struct sk_buff *skb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; + if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) + (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; - skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); + (*pskb)->csum = skb_checksum(*pskb, udphoff, + (*pskb)->len - udphoff, 0); udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, - skb->len - udphoff, - cp->protocol, skb->csum); + (*pskb)->len - udphoff, + cp->protocol, + (*pskb)->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; - skb->ip_summed = CHECKSUM_UNNECESSARY; + (*pskb)->ip_summed = CHECKSUM_UNNECESSARY; } return 1; } diff --git a/trunk/net/ipv4/ipvs/ip_vs_xmit.c b/trunk/net/ipv4/ipvs/ip_vs_xmit.c index d0a92dec1050..666e080a74a3 100644 --- a/trunk/net/ipv4/ipvs/ip_vs_xmit.c +++ b/trunk/net/ipv4/ipvs/ip_vs_xmit.c @@ -253,7 +253,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } /* copy-on-write the packet before mangling it */ - if (!skb_make_writable(skb, sizeof(struct iphdr))) + if (!ip_vs_make_skb_writable(&skb, sizeof(struct iphdr))) goto tx_error_put; if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) @@ -264,7 +264,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->dst = &rt->u.dst; /* mangle the packet */ - if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) + if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp)) goto tx_error; ip_hdr(skb)->daddr = cp->daddr; ip_send_check(ip_hdr(skb)); @@ -529,7 +529,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, } /* copy-on-write the packet before mangling it */ - if (!skb_make_writable(skb, offset)) + if (!ip_vs_make_skb_writable(&skb, offset)) goto tx_error_put; if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) diff --git a/trunk/net/ipv4/netfilter.c b/trunk/net/ipv4/netfilter.c index 5539debf4973..b44192924f95 100644 --- a/trunk/net/ipv4/netfilter.c +++ b/trunk/net/ipv4/netfilter.c @@ -3,15 +3,14 @@ #include #include #include -#include #include #include #include /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ -int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) +int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type) { - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph = ip_hdr(*pskb); struct rtable *rt; struct flowi fl = {}; struct dst_entry *odst; @@ -30,14 +29,14 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) if (type == RTN_LOCAL) fl.nl_u.ip4_u.saddr = iph->saddr; fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; - fl.mark = skb->mark; + fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; + fl.mark = (*pskb)->mark; if (ip_route_output_key(&rt, &fl) != 0) return -1; /* Drop old route. */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + dst_release((*pskb)->dst); + (*pskb)->dst = &rt->u.dst; } else { /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ @@ -45,8 +44,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) if (ip_route_output_key(&rt, &fl) != 0) return -1; - odst = skb->dst; - if (ip_route_input(skb, iph->daddr, iph->saddr, + odst = (*pskb)->dst; + if (ip_route_input(*pskb, iph->daddr, iph->saddr, RT_TOS(iph->tos), rt->u.dst.dev) != 0) { dst_release(&rt->u.dst); return -1; @@ -55,54 +54,70 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) dst_release(odst); } - if (skb->dst->error) + if ((*pskb)->dst->error) return -1; #ifdef CONFIG_XFRM - if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, &fl, AF_INET) == 0) - if (xfrm_lookup(&skb->dst, &fl, skb->sk, 0)) + if (!(IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) && + xfrm_decode_session(*pskb, &fl, AF_INET) == 0) + if (xfrm_lookup(&(*pskb)->dst, &fl, (*pskb)->sk, 0)) return -1; #endif /* Change in oif may mean change in hh_len. */ - hh_len = skb->dst->dev->hard_header_len; - if (skb_headroom(skb) < hh_len && - pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) - return -1; + hh_len = (*pskb)->dst->dev->hard_header_len; + if (skb_headroom(*pskb) < hh_len) { + struct sk_buff *nskb; + + nskb = skb_realloc_headroom(*pskb, hh_len); + if (!nskb) + return -1; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } return 0; } EXPORT_SYMBOL(ip_route_me_harder); #ifdef CONFIG_XFRM -int ip_xfrm_me_harder(struct sk_buff *skb) +int ip_xfrm_me_harder(struct sk_buff **pskb) { struct flowi fl; unsigned int hh_len; struct dst_entry *dst; - if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) + if (IPCB(*pskb)->flags & IPSKB_XFRM_TRANSFORMED) return 0; - if (xfrm_decode_session(skb, &fl, AF_INET) < 0) + if (xfrm_decode_session(*pskb, &fl, AF_INET) < 0) return -1; - dst = skb->dst; + dst = (*pskb)->dst; if (dst->xfrm) dst = ((struct xfrm_dst *)dst)->route; dst_hold(dst); - if (xfrm_lookup(&dst, &fl, skb->sk, 0) < 0) + if (xfrm_lookup(&dst, &fl, (*pskb)->sk, 0) < 0) return -1; - dst_release(skb->dst); - skb->dst = dst; + dst_release((*pskb)->dst); + (*pskb)->dst = dst; /* Change in oif may mean change in hh_len. */ - hh_len = skb->dst->dev->hard_header_len; - if (skb_headroom(skb) < hh_len && - pskb_expand_head(skb, hh_len - skb_headroom(skb), 0, GFP_ATOMIC)) - return -1; + hh_len = (*pskb)->dst->dev->hard_header_len; + if (skb_headroom(*pskb) < hh_len) { + struct sk_buff *nskb; + + nskb = skb_realloc_headroom(*pskb, hh_len); + if (!nskb) + return -1; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } return 0; } EXPORT_SYMBOL(ip_xfrm_me_harder); @@ -135,17 +150,17 @@ static void nf_ip_saveroute(const struct sk_buff *skb, struct nf_info *info) } } -static int nf_ip_reroute(struct sk_buff *skb, const struct nf_info *info) +static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info) { const struct ip_rt_info *rt_info = nf_info_reroute(info); if (info->hook == NF_IP_LOCAL_OUT) { - const struct iphdr *iph = ip_hdr(skb); + const struct iphdr *iph = ip_hdr(*pskb); if (!(iph->tos == rt_info->tos && iph->daddr == rt_info->daddr && iph->saddr == rt_info->saddr)) - return ip_route_me_harder(skb, RTN_UNSPEC); + return ip_route_me_harder(pskb, RTN_UNSPEC); } return 0; } diff --git a/trunk/net/ipv4/netfilter/arp_tables.c b/trunk/net/ipv4/netfilter/arp_tables.c index 2909c92ecd99..29114a9ccd1d 100644 --- a/trunk/net/ipv4/netfilter/arp_tables.c +++ b/trunk/net/ipv4/netfilter/arp_tables.c @@ -197,7 +197,7 @@ static inline int arp_checkentry(const struct arpt_arp *arp) return 1; } -static unsigned int arpt_error(struct sk_buff *skb, +static unsigned int arpt_error(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -215,7 +215,7 @@ static inline struct arpt_entry *get_entry(void *base, unsigned int offset) return (struct arpt_entry *)(base + offset); } -unsigned int arpt_do_table(struct sk_buff *skb, +unsigned int arpt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, @@ -231,9 +231,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, struct xt_table_info *private; /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ - if (!pskb_may_pull(skb, (sizeof(struct arphdr) + - (2 * skb->dev->addr_len) + - (2 * sizeof(u32))))) + if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) + + (2 * (*pskb)->dev->addr_len) + + (2 * sizeof(u32))))) return NF_DROP; indev = in ? in->name : nulldevname; @@ -245,14 +245,14 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); - arp = arp_hdr(skb); + arp = arp_hdr(*pskb); do { - if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { + if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) { struct arpt_entry_target *t; int hdr_len; hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + - (2 * skb->dev->addr_len); + (2 * (*pskb)->dev->addr_len); ADD_COUNTER(e->counters, hdr_len, 1); t = arpt_get_target(e); @@ -290,14 +290,14 @@ unsigned int arpt_do_table(struct sk_buff *skb, /* Targets which reenter must return * abs. verdicts */ - verdict = t->u.kernel.target->target(skb, + verdict = t->u.kernel.target->target(pskb, in, out, hook, t->u.kernel.target, t->data); /* Target might have changed stuff. */ - arp = arp_hdr(skb); + arp = arp_hdr(*pskb); if (verdict == ARPT_CONTINUE) e = (void *)e + e->next_offset; diff --git a/trunk/net/ipv4/netfilter/arpt_mangle.c b/trunk/net/ipv4/netfilter/arpt_mangle.c index 45fa4e20094a..c4bdab47597f 100644 --- a/trunk/net/ipv4/netfilter/arpt_mangle.c +++ b/trunk/net/ipv4/netfilter/arpt_mangle.c @@ -1,6 +1,5 @@ /* module that allows mangling of the arp payload */ #include -#include #include #include @@ -9,7 +8,7 @@ MODULE_AUTHOR("Bart De Schuymer "); MODULE_DESCRIPTION("arptables arp payload mangle target"); static unsigned int -target(struct sk_buff *skb, +target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -19,38 +18,47 @@ target(struct sk_buff *skb, unsigned char *arpptr; int pln, hln; - if (skb_make_writable(skb, skb->len)) - return NF_DROP; + if (skb_shared(*pskb) || skb_cloned(*pskb)) { + struct sk_buff *nskb; - arp = arp_hdr(skb); - arpptr = skb_network_header(skb) + sizeof(*arp); + nskb = skb_copy(*pskb, GFP_ATOMIC); + if (!nskb) + return NF_DROP; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } + + arp = arp_hdr(*pskb); + arpptr = skb_network_header(*pskb) + sizeof(*arp); pln = arp->ar_pln; hln = arp->ar_hln; /* We assume that pln and hln were checked in the match */ if (mangle->flags & ARPT_MANGLE_SDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || - (arpptr + hln > skb_tail_pointer(skb))) + (arpptr + hln > skb_tail_pointer(*pskb))) return NF_DROP; memcpy(arpptr, mangle->src_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_SIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || - (arpptr + pln > skb_tail_pointer(skb))) + (arpptr + pln > skb_tail_pointer(*pskb))) return NF_DROP; memcpy(arpptr, &mangle->u_s.src_ip, pln); } arpptr += pln; if (mangle->flags & ARPT_MANGLE_TDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || - (arpptr + hln > skb_tail_pointer(skb))) + (arpptr + hln > skb_tail_pointer(*pskb))) return NF_DROP; memcpy(arpptr, mangle->tgt_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_TIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || - (arpptr + pln > skb_tail_pointer(skb))) + (arpptr + pln > skb_tail_pointer(*pskb))) return NF_DROP; memcpy(arpptr, &mangle->u_t.tgt_ip, pln); } diff --git a/trunk/net/ipv4/netfilter/arptable_filter.c b/trunk/net/ipv4/netfilter/arptable_filter.c index 302d3da5f696..75c023062533 100644 --- a/trunk/net/ipv4/netfilter/arptable_filter.c +++ b/trunk/net/ipv4/netfilter/arptable_filter.c @@ -56,12 +56,12 @@ static struct arpt_table packet_filter = { /* The work comes in here from netfilter.c */ static unsigned int arpt_hook(unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return arpt_do_table(skb, hook, in, out, &packet_filter); + return arpt_do_table(pskb, hook, in, out, &packet_filter); } static struct nf_hook_ops arpt_ops[] = { diff --git a/trunk/net/ipv4/netfilter/ip_queue.c b/trunk/net/ipv4/netfilter/ip_queue.c index 10a2ce09fd8e..23cbfc7c80fd 100644 --- a/trunk/net/ipv4/netfilter/ip_queue.c +++ b/trunk/net/ipv4/netfilter/ip_queue.c @@ -335,7 +335,6 @@ static int ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) { int diff; - int err; struct iphdr *user_iph = (struct iphdr *)v->payload; if (v->data_len < sizeof(*user_iph)) @@ -348,18 +347,25 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (v->data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - err = pskb_expand_head(e->skb, 0, - diff - skb_tailroom(e->skb), - GFP_ATOMIC); - if (err) { - printk(KERN_WARNING "ip_queue: error " - "in mangle, dropping packet: %d\n", -err); - return err; + struct sk_buff *newskb; + + newskb = skb_copy_expand(e->skb, + skb_headroom(e->skb), + diff, + GFP_ATOMIC); + if (newskb == NULL) { + printk(KERN_WARNING "ip_queue: OOM " + "in mangle, dropping packet\n"); + return -ENOMEM; } + if (e->skb->sk) + skb_set_owner_w(newskb, e->skb->sk); + kfree_skb(e->skb); + e->skb = newskb; } skb_put(e->skb, diff); } - if (!skb_make_writable(e->skb, v->data_len)) + if (!skb_make_writable(&e->skb, v->data_len)) return -ENOMEM; skb_copy_to_linear_data(e->skb, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; diff --git a/trunk/net/ipv4/netfilter/ip_tables.c b/trunk/net/ipv4/netfilter/ip_tables.c index 4b10b98640ac..6486894f450c 100644 --- a/trunk/net/ipv4/netfilter/ip_tables.c +++ b/trunk/net/ipv4/netfilter/ip_tables.c @@ -169,7 +169,7 @@ ip_checkentry(const struct ipt_ip *ip) } static unsigned int -ipt_error(struct sk_buff *skb, +ipt_error(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -312,7 +312,7 @@ static void trace_packet(struct sk_buff *skb, /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ipt_do_table(struct sk_buff *skb, +ipt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, @@ -331,8 +331,8 @@ ipt_do_table(struct sk_buff *skb, struct xt_table_info *private; /* Initialization */ - ip = ip_hdr(skb); - datalen = skb->len - ip->ihl * 4; + ip = ip_hdr(*pskb); + datalen = (*pskb)->len - ip->ihl * 4; indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; /* We handle fragments by dealing with the first fragment as @@ -359,7 +359,7 @@ ipt_do_table(struct sk_buff *skb, struct ipt_entry_target *t; if (IPT_MATCH_ITERATE(e, do_match, - skb, in, out, + *pskb, in, out, offset, &hotdrop) != 0) goto no_match; @@ -371,8 +371,8 @@ ipt_do_table(struct sk_buff *skb, #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) /* The packet is traced: log it */ - if (unlikely(skb->nf_trace)) - trace_packet(skb, hook, in, out, + if (unlikely((*pskb)->nf_trace)) + trace_packet(*pskb, hook, in, out, table->name, private, e); #endif /* Standard target? */ @@ -410,7 +410,7 @@ ipt_do_table(struct sk_buff *skb, ((struct ipt_entry *)table_base)->comefrom = 0xeeeeeeec; #endif - verdict = t->u.kernel.target->target(skb, + verdict = t->u.kernel.target->target(pskb, in, out, hook, t->u.kernel.target, @@ -428,8 +428,8 @@ ipt_do_table(struct sk_buff *skb, = 0x57acc001; #endif /* Target might have changed stuff. */ - ip = ip_hdr(skb); - datalen = skb->len - ip->ihl * 4; + ip = ip_hdr(*pskb); + datalen = (*pskb)->len - ip->ihl * 4; if (verdict == IPT_CONTINUE) e = (void *)e + e->next_offset; diff --git a/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c b/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c index 2f544dac72df..27f14e1ebd8b 100644 --- a/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -289,7 +289,7 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) ***********************************************************************/ static unsigned int -target(struct sk_buff *skb, +target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -305,7 +305,7 @@ target(struct sk_buff *skb, * is only decremented by destroy() - and ip_tables guarantees * that the ->target() function isn't called after ->destroy() */ - ct = nf_ct_get(skb, &ctinfo); + ct = nf_ct_get(*pskb, &ctinfo); if (ct == NULL) { printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); /* FIXME: need to drop invalid ones, since replies @@ -316,7 +316,7 @@ target(struct sk_buff *skb, /* special case: ICMP error handling. conntrack distinguishes between * error messages (RELATED) and information requests (see below) */ - if (ip_hdr(skb)->protocol == IPPROTO_ICMP + if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP && (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) return XT_CONTINUE; @@ -325,7 +325,7 @@ target(struct sk_buff *skb, * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here * on, which all have an ID field [relevant for hashing]. */ - hash = clusterip_hashfn(skb, cipinfo->config); + hash = clusterip_hashfn(*pskb, cipinfo->config); switch (ctinfo) { case IP_CT_NEW: @@ -355,7 +355,7 @@ target(struct sk_buff *skb, /* despite being received via linklayer multicast, this is * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ - skb->pkt_type = PACKET_HOST; + (*pskb)->pkt_type = PACKET_HOST; return XT_CONTINUE; } @@ -505,12 +505,12 @@ static void arp_print(struct arp_payload *payload) static unsigned int arp_mangle(unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct arphdr *arp = arp_hdr(skb); + struct arphdr *arp = arp_hdr(*pskb); struct arp_payload *payload; struct clusterip_config *c; diff --git a/trunk/net/ipv4/netfilter/ipt_ECN.c b/trunk/net/ipv4/netfilter/ipt_ECN.c index add110060a22..f1253bd3837f 100644 --- a/trunk/net/ipv4/netfilter/ipt_ECN.c +++ b/trunk/net/ipv4/netfilter/ipt_ECN.c @@ -26,15 +26,15 @@ MODULE_DESCRIPTION("iptables ECN modification module"); /* set ECT codepoint from IP header. * return false if there was an error. */ static inline bool -set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo) +set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { - struct iphdr *iph = ip_hdr(skb); + struct iphdr *iph = ip_hdr(*pskb); if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { __u8 oldtos; - if (!skb_make_writable(skb, sizeof(struct iphdr))) + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return false; - iph = ip_hdr(skb); + iph = ip_hdr(*pskb); oldtos = iph->tos; iph->tos &= ~IPT_ECN_IP_MASK; iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); @@ -45,13 +45,14 @@ set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo) /* Return false if there was an error. */ static inline bool -set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) +set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { struct tcphdr _tcph, *tcph; __be16 oldval; /* Not enought header? */ - tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); + tcph = skb_header_pointer(*pskb, ip_hdrlen(*pskb), + sizeof(_tcph), &_tcph); if (!tcph) return false; @@ -61,9 +62,9 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) tcph->cwr == einfo->proto.tcp.cwr)) return true; - if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) + if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph))) return false; - tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb); + tcph = (void *)ip_hdr(*pskb) + ip_hdrlen(*pskb); oldval = ((__be16 *)tcph)[6]; if (einfo->operation & IPT_ECN_OP_SET_ECE) @@ -71,13 +72,13 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) if (einfo->operation & IPT_ECN_OP_SET_CWR) tcph->cwr = einfo->proto.tcp.cwr; - nf_proto_csum_replace2(&tcph->check, skb, + nf_proto_csum_replace2(&tcph->check, *pskb, oldval, ((__be16 *)tcph)[6], 0); return true; } static unsigned int -target(struct sk_buff *skb, +target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -87,12 +88,12 @@ target(struct sk_buff *skb, const struct ipt_ECN_info *einfo = targinfo; if (einfo->operation & IPT_ECN_OP_SET_IP) - if (!set_ect_ip(skb, einfo)) + if (!set_ect_ip(pskb, einfo)) return NF_DROP; if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) - && ip_hdr(skb)->protocol == IPPROTO_TCP) - if (!set_ect_tcp(skb, einfo)) + && ip_hdr(*pskb)->protocol == IPPROTO_TCP) + if (!set_ect_tcp(pskb, einfo)) return NF_DROP; return XT_CONTINUE; diff --git a/trunk/net/ipv4/netfilter/ipt_LOG.c b/trunk/net/ipv4/netfilter/ipt_LOG.c index 4b5e8216a4e7..127a5e89bf14 100644 --- a/trunk/net/ipv4/netfilter/ipt_LOG.c +++ b/trunk/net/ipv4/netfilter/ipt_LOG.c @@ -418,7 +418,7 @@ ipt_log_packet(unsigned int pf, } static unsigned int -ipt_log_target(struct sk_buff *skb, +ipt_log_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -432,7 +432,7 @@ ipt_log_target(struct sk_buff *skb, li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - ipt_log_packet(PF_INET, hooknum, skb, in, out, &li, + ipt_log_packet(PF_INET, hooknum, *pskb, in, out, &li, loginfo->prefix); return XT_CONTINUE; } diff --git a/trunk/net/ipv4/netfilter/ipt_MASQUERADE.c b/trunk/net/ipv4/netfilter/ipt_MASQUERADE.c index 44b516e7cb79..3e0b562b2db7 100644 --- a/trunk/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/trunk/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -52,7 +52,7 @@ masquerade_check(const char *tablename, } static unsigned int -masquerade_target(struct sk_buff *skb, +masquerade_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -69,7 +69,7 @@ masquerade_target(struct sk_buff *skb, NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); - ct = nf_ct_get(skb, &ctinfo); + ct = nf_ct_get(*pskb, &ctinfo); nat = nfct_nat(ct); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED @@ -82,7 +82,7 @@ masquerade_target(struct sk_buff *skb, return NF_ACCEPT; mr = targinfo; - rt = (struct rtable *)skb->dst; + rt = (struct rtable *)(*pskb)->dst; newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); if (!newsrc) { printk("MASQUERADE: %s ate my IP address\n", out->name); diff --git a/trunk/net/ipv4/netfilter/ipt_NETMAP.c b/trunk/net/ipv4/netfilter/ipt_NETMAP.c index f8699291e33d..41a011d5a065 100644 --- a/trunk/net/ipv4/netfilter/ipt_NETMAP.c +++ b/trunk/net/ipv4/netfilter/ipt_NETMAP.c @@ -43,7 +43,7 @@ check(const char *tablename, } static unsigned int -target(struct sk_buff *skb, +target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -59,14 +59,14 @@ target(struct sk_buff *skb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_POST_ROUTING || hooknum == NF_IP_LOCAL_OUT); - ct = nf_ct_get(skb, &ctinfo); + ct = nf_ct_get(*pskb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT) - new_ip = ip_hdr(skb)->daddr & ~netmask; + new_ip = ip_hdr(*pskb)->daddr & ~netmask; else - new_ip = ip_hdr(skb)->saddr & ~netmask; + new_ip = ip_hdr(*pskb)->saddr & ~netmask; new_ip |= mr->range[0].min_ip & netmask; newrange = ((struct nf_nat_range) diff --git a/trunk/net/ipv4/netfilter/ipt_REDIRECT.c b/trunk/net/ipv4/netfilter/ipt_REDIRECT.c index f7cf7d61a2d4..6ac7a2373316 100644 --- a/trunk/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/trunk/net/ipv4/netfilter/ipt_REDIRECT.c @@ -47,7 +47,7 @@ redirect_check(const char *tablename, } static unsigned int -redirect_target(struct sk_buff *skb, +redirect_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -63,7 +63,7 @@ redirect_target(struct sk_buff *skb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT); - ct = nf_ct_get(skb, &ctinfo); + ct = nf_ct_get(*pskb, &ctinfo); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); /* Local packets: make them go to loopback */ @@ -76,7 +76,7 @@ redirect_target(struct sk_buff *skb, newdst = 0; rcu_read_lock(); - indev = __in_dev_get_rcu(skb->dev); + indev = __in_dev_get_rcu((*pskb)->dev); if (indev && (ifa = indev->ifa_list)) newdst = ifa->ifa_local; rcu_read_unlock(); diff --git a/trunk/net/ipv4/netfilter/ipt_REJECT.c b/trunk/net/ipv4/netfilter/ipt_REJECT.c index dcf4d21d5116..cb038c8fbc9d 100644 --- a/trunk/net/ipv4/netfilter/ipt_REJECT.c +++ b/trunk/net/ipv4/netfilter/ipt_REJECT.c @@ -131,7 +131,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) ) addr_type = RTN_LOCAL; - if (ip_route_me_harder(nskb, addr_type)) + if (ip_route_me_harder(&nskb, addr_type)) goto free_nskb; nskb->ip_summed = CHECKSUM_NONE; @@ -162,7 +162,7 @@ static inline void send_unreach(struct sk_buff *skb_in, int code) icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); } -static unsigned int reject(struct sk_buff *skb, +static unsigned int reject(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -173,7 +173,7 @@ static unsigned int reject(struct sk_buff *skb, /* Our naive response construction doesn't deal with IP options, and probably shouldn't try. */ - if (ip_hdrlen(skb) != sizeof(struct iphdr)) + if (ip_hdrlen(*pskb) != sizeof(struct iphdr)) return NF_DROP; /* WARNING: This code causes reentry within iptables. @@ -181,28 +181,28 @@ static unsigned int reject(struct sk_buff *skb, must return an absolute verdict. --RR */ switch (reject->with) { case IPT_ICMP_NET_UNREACHABLE: - send_unreach(skb, ICMP_NET_UNREACH); + send_unreach(*pskb, ICMP_NET_UNREACH); break; case IPT_ICMP_HOST_UNREACHABLE: - send_unreach(skb, ICMP_HOST_UNREACH); + send_unreach(*pskb, ICMP_HOST_UNREACH); break; case IPT_ICMP_PROT_UNREACHABLE: - send_unreach(skb, ICMP_PROT_UNREACH); + send_unreach(*pskb, ICMP_PROT_UNREACH); break; case IPT_ICMP_PORT_UNREACHABLE: - send_unreach(skb, ICMP_PORT_UNREACH); + send_unreach(*pskb, ICMP_PORT_UNREACH); break; case IPT_ICMP_NET_PROHIBITED: - send_unreach(skb, ICMP_NET_ANO); + send_unreach(*pskb, ICMP_NET_ANO); break; case IPT_ICMP_HOST_PROHIBITED: - send_unreach(skb, ICMP_HOST_ANO); + send_unreach(*pskb, ICMP_HOST_ANO); break; case IPT_ICMP_ADMIN_PROHIBITED: - send_unreach(skb, ICMP_PKT_FILTERED); + send_unreach(*pskb, ICMP_PKT_FILTERED); break; case IPT_TCP_RESET: - send_reset(skb, hooknum); + send_reset(*pskb, hooknum); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; diff --git a/trunk/net/ipv4/netfilter/ipt_SAME.c b/trunk/net/ipv4/netfilter/ipt_SAME.c index 8988571436b8..97641f1a97f6 100644 --- a/trunk/net/ipv4/netfilter/ipt_SAME.c +++ b/trunk/net/ipv4/netfilter/ipt_SAME.c @@ -104,7 +104,7 @@ same_destroy(const struct xt_target *target, void *targinfo) } static unsigned int -same_target(struct sk_buff *skb, +same_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -121,7 +121,7 @@ same_target(struct sk_buff *skb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_POST_ROUTING); - ct = nf_ct_get(skb, &ctinfo); + ct = nf_ct_get(*pskb, &ctinfo); t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; diff --git a/trunk/net/ipv4/netfilter/ipt_TOS.c b/trunk/net/ipv4/netfilter/ipt_TOS.c index d4573baa7f27..25f5d0b39065 100644 --- a/trunk/net/ipv4/netfilter/ipt_TOS.c +++ b/trunk/net/ipv4/netfilter/ipt_TOS.c @@ -21,7 +21,7 @@ MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("iptables TOS mangling module"); static unsigned int -target(struct sk_buff *skb, +target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -29,13 +29,13 @@ target(struct sk_buff *skb, const void *targinfo) { const struct ipt_tos_target_info *tosinfo = targinfo; - struct iphdr *iph = ip_hdr(skb); + struct iphdr *iph = ip_hdr(*pskb); if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { __u8 oldtos; - if (!skb_make_writable(skb, sizeof(struct iphdr))) + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return NF_DROP; - iph = ip_hdr(skb); + iph = ip_hdr(*pskb); oldtos = iph->tos; iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; nf_csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); diff --git a/trunk/net/ipv4/netfilter/ipt_TTL.c b/trunk/net/ipv4/netfilter/ipt_TTL.c index c620a0527666..2b54e7b0cfe8 100644 --- a/trunk/net/ipv4/netfilter/ipt_TTL.c +++ b/trunk/net/ipv4/netfilter/ipt_TTL.c @@ -20,7 +20,7 @@ MODULE_DESCRIPTION("IP tables TTL modification module"); MODULE_LICENSE("GPL"); static unsigned int -ipt_ttl_target(struct sk_buff *skb, +ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -29,10 +29,10 @@ ipt_ttl_target(struct sk_buff *skb, const struct ipt_TTL_info *info = targinfo; int new_ttl; - if (!skb_make_writable(skb, skb->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; - iph = ip_hdr(skb); + iph = ip_hdr(*pskb); switch (info->mode) { case IPT_TTL_SET: diff --git a/trunk/net/ipv4/netfilter/ipt_ULOG.c b/trunk/net/ipv4/netfilter/ipt_ULOG.c index 212b830765a4..c636d6d63574 100644 --- a/trunk/net/ipv4/netfilter/ipt_ULOG.c +++ b/trunk/net/ipv4/netfilter/ipt_ULOG.c @@ -279,7 +279,7 @@ static void ipt_ulog_packet(unsigned int hooknum, spin_unlock_bh(&ulog_lock); } -static unsigned int ipt_ulog_target(struct sk_buff *skb, +static unsigned int ipt_ulog_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -288,7 +288,7 @@ static unsigned int ipt_ulog_target(struct sk_buff *skb, { struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; - ipt_ulog_packet(hooknum, skb, in, out, loginfo, NULL); + ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL); return XT_CONTINUE; } diff --git a/trunk/net/ipv4/netfilter/iptable_filter.c b/trunk/net/ipv4/netfilter/iptable_filter.c index ba3262c60437..4f51c1d7d2d6 100644 --- a/trunk/net/ipv4/netfilter/iptable_filter.c +++ b/trunk/net/ipv4/netfilter/iptable_filter.c @@ -62,31 +62,31 @@ static struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int ipt_hook(unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, &packet_filter); + return ipt_do_table(pskb, hook, in, out, &packet_filter); } static unsigned int ipt_local_out_hook(unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) { + if ((*pskb)->len < sizeof(struct iphdr) + || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("iptable_filter: ignoring short SOCK_RAW " "packet.\n"); return NF_ACCEPT; } - return ipt_do_table(skb, hook, in, out, &packet_filter); + return ipt_do_table(pskb, hook, in, out, &packet_filter); } static struct nf_hook_ops ipt_ops[] = { diff --git a/trunk/net/ipv4/netfilter/iptable_mangle.c b/trunk/net/ipv4/netfilter/iptable_mangle.c index b4360a69d5ca..902446f7cbca 100644 --- a/trunk/net/ipv4/netfilter/iptable_mangle.c +++ b/trunk/net/ipv4/netfilter/iptable_mangle.c @@ -75,17 +75,17 @@ static struct xt_table packet_mangler = { /* The work comes in here from netfilter.c. */ static unsigned int ipt_route_hook(unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, &packet_mangler); + return ipt_do_table(pskb, hook, in, out, &packet_mangler); } static unsigned int ipt_local_hook(unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -97,8 +97,8 @@ ipt_local_hook(unsigned int hook, u_int32_t mark; /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) - || ip_hdrlen(skb) < sizeof(struct iphdr)) { + if ((*pskb)->len < sizeof(struct iphdr) + || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("iptable_mangle: ignoring short SOCK_RAW " "packet.\n"); @@ -106,22 +106,22 @@ ipt_local_hook(unsigned int hook, } /* Save things which could affect route */ - mark = skb->mark; - iph = ip_hdr(skb); + mark = (*pskb)->mark; + iph = ip_hdr(*pskb); saddr = iph->saddr; daddr = iph->daddr; tos = iph->tos; - ret = ipt_do_table(skb, hook, in, out, &packet_mangler); + ret = ipt_do_table(pskb, hook, in, out, &packet_mangler); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { - iph = ip_hdr(skb); + iph = ip_hdr(*pskb); if (iph->saddr != saddr || iph->daddr != daddr || - skb->mark != mark || + (*pskb)->mark != mark || iph->tos != tos) - if (ip_route_me_harder(skb, RTN_UNSPEC)) + if (ip_route_me_harder(pskb, RTN_UNSPEC)) ret = NF_DROP; } diff --git a/trunk/net/ipv4/netfilter/iptable_raw.c b/trunk/net/ipv4/netfilter/iptable_raw.c index 5de6e57ac55c..d6e503395684 100644 --- a/trunk/net/ipv4/netfilter/iptable_raw.c +++ b/trunk/net/ipv4/netfilter/iptable_raw.c @@ -47,30 +47,30 @@ static struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int ipt_hook(unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(skb, hook, in, out, &packet_raw); + return ipt_do_table(pskb, hook, in, out, &packet_raw); } static unsigned int ipt_local_hook(unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) { + if ((*pskb)->len < sizeof(struct iphdr) || + ip_hdrlen(*pskb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("iptable_raw: ignoring short SOCK_RAW" "packet.\n"); return NF_ACCEPT; } - return ipt_do_table(skb, hook, in, out, &packet_raw); + return ipt_do_table(pskb, hook, in, out, &packet_raw); } /* 'raw' is the very first table. */ diff --git a/trunk/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/trunk/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 831e9b29806d..2fcb9249a8da 100644 --- a/trunk/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/trunk/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -63,20 +63,19 @@ static int ipv4_print_conntrack(struct seq_file *s, } /* Returns new sk_buff, or NULL */ -static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +static struct sk_buff * +nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) { - int err; - skb_orphan(skb); local_bh_disable(); - err = ip_defrag(skb, user); + skb = ip_defrag(skb, user); local_bh_enable(); - if (!err) + if (skb) ip_send_check(ip_hdr(skb)); - return err; + return skb; } static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, @@ -100,17 +99,17 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, } static unsigned int ipv4_confirm(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(skb); + return nf_conntrack_confirm(pskb); } static unsigned int ipv4_conntrack_help(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -121,7 +120,7 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, struct nf_conntrack_helper *helper; /* This is where we call the helper: as the packet goes out. */ - ct = nf_ct_get(skb, &ctinfo); + ct = nf_ct_get(*pskb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) return NF_ACCEPT; @@ -132,55 +131,56 @@ static unsigned int ipv4_conntrack_help(unsigned int hooknum, helper = rcu_dereference(help->helper); if (!helper) return NF_ACCEPT; - return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), + return helper->help(pskb, skb_network_offset(*pskb) + ip_hdrlen(*pskb), ct, ctinfo); } static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* Previously seen (loopback)? Ignore. Do this before fragment check. */ - if (skb->nfct) + if ((*pskb)->nfct) return NF_ACCEPT; /* Gather fragments. */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (nf_ct_ipv4_gather_frags(skb, - hooknum == NF_IP_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT)) + if (ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET)) { + *pskb = nf_ct_ipv4_gather_frags(*pskb, + hooknum == NF_IP_PRE_ROUTING ? + IP_DEFRAG_CONNTRACK_IN : + IP_DEFRAG_CONNTRACK_OUT); + if (!*pskb) return NF_STOLEN; } return NF_ACCEPT; } static unsigned int ipv4_conntrack_in(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return nf_conntrack_in(PF_INET, hooknum, skb); + return nf_conntrack_in(PF_INET, hooknum, pskb); } static unsigned int ipv4_conntrack_local(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) { + if ((*pskb)->len < sizeof(struct iphdr) + || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("ipt_hook: happy cracking.\n"); return NF_ACCEPT; } - return nf_conntrack_in(PF_INET, hooknum, skb); + return nf_conntrack_in(PF_INET, hooknum, pskb); } /* Connection tracking may drop packets, but never alters them, so diff --git a/trunk/net/ipv4/netfilter/nf_nat_amanda.c b/trunk/net/ipv4/netfilter/nf_nat_amanda.c index 35a5aa69cd92..bd93a1d71052 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_amanda.c +++ b/trunk/net/ipv4/netfilter/nf_nat_amanda.c @@ -24,7 +24,7 @@ MODULE_DESCRIPTION("Amanda NAT helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_nat_amanda"); -static unsigned int help(struct sk_buff *skb, +static unsigned int help(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -53,7 +53,7 @@ static unsigned int help(struct sk_buff *skb, return NF_DROP; sprintf(buffer, "%u", port); - ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo, + ret = nf_nat_mangle_udp_packet(pskb, exp->master, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) diff --git a/trunk/net/ipv4/netfilter/nf_nat_core.c b/trunk/net/ipv4/netfilter/nf_nat_core.c index 56e93f692e82..7221aa20e6ff 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_core.c +++ b/trunk/net/ipv4/netfilter/nf_nat_core.c @@ -349,7 +349,7 @@ EXPORT_SYMBOL(nf_nat_setup_info); /* Returns true if succeeded. */ static int manip_pkt(u_int16_t proto, - struct sk_buff *skb, + struct sk_buff **pskb, unsigned int iphdroff, const struct nf_conntrack_tuple *target, enum nf_nat_manip_type maniptype) @@ -357,19 +357,19 @@ manip_pkt(u_int16_t proto, struct iphdr *iph; struct nf_nat_protocol *p; - if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) + if (!skb_make_writable(pskb, iphdroff + sizeof(*iph))) return 0; - iph = (void *)skb->data + iphdroff; + iph = (void *)(*pskb)->data + iphdroff; /* Manipulate protcol part. */ /* rcu_read_lock()ed by nf_hook_slow */ p = __nf_nat_proto_find(proto); - if (!p->manip_pkt(skb, iphdroff, target, maniptype)) + if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) return 0; - iph = (void *)skb->data + iphdroff; + iph = (void *)(*pskb)->data + iphdroff; if (maniptype == IP_NAT_MANIP_SRC) { nf_csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); @@ -385,7 +385,7 @@ manip_pkt(u_int16_t proto, unsigned int nf_nat_packet(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, - struct sk_buff *skb) + struct sk_buff **pskb) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; @@ -407,7 +407,7 @@ unsigned int nf_nat_packet(struct nf_conn *ct, /* We are aiming to look like inverse of other direction. */ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype)) + if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype)) return NF_DROP; } return NF_ACCEPT; @@ -418,7 +418,7 @@ EXPORT_SYMBOL_GPL(nf_nat_packet); int nf_nat_icmp_reply_translation(struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum, - struct sk_buff *skb) + struct sk_buff **pskb) { struct { struct icmphdr icmp; @@ -426,24 +426,24 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, } *inside; struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple inner, target; - int hdrlen = ip_hdrlen(skb); + int hdrlen = ip_hdrlen(*pskb); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); - if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) + if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) return 0; - inside = (void *)skb->data + ip_hdrlen(skb); + inside = (void *)(*pskb)->data + ip_hdrlen(*pskb); /* We're actually going to mangle it beyond trivial checksum adjustment, so make sure the current checksum is correct. */ - if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) + if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0)) return 0; /* Must be RELATED */ - NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED || - skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); + NF_CT_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || + (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); /* Redirects on non-null nats must be dropped, else they'll start talking to each other without our translation, and be @@ -458,15 +458,15 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, } pr_debug("icmp_reply_translation: translating error %p manip %u " - "dir %s\n", skb, manip, + "dir %s\n", *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); /* rcu_read_lock()ed by nf_hook_slow */ l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); - if (!nf_ct_get_tuple(skb, - ip_hdrlen(skb) + sizeof(struct icmphdr), - (ip_hdrlen(skb) + + if (!nf_ct_get_tuple(*pskb, + ip_hdrlen(*pskb) + sizeof(struct icmphdr), + (ip_hdrlen(*pskb) + sizeof(struct icmphdr) + inside->ip.ihl * 4), (u_int16_t)AF_INET, inside->ip.protocol, @@ -478,19 +478,19 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, pass all hooks (locally-generated ICMP). Consider incoming packet: PREROUTING (DST manip), routing produces ICMP, goes through POSTROUTING (which must correct the DST manip). */ - if (!manip_pkt(inside->ip.protocol, skb, - ip_hdrlen(skb) + sizeof(inside->icmp), + if (!manip_pkt(inside->ip.protocol, pskb, + ip_hdrlen(*pskb) + sizeof(inside->icmp), &ct->tuplehash[!dir].tuple, !manip)) return 0; - if (skb->ip_summed != CHECKSUM_PARTIAL) { + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)skb->data + ip_hdrlen(skb); + inside = (void *)(*pskb)->data + ip_hdrlen(*pskb); inside->icmp.checksum = 0; inside->icmp.checksum = - csum_fold(skb_checksum(skb, hdrlen, - skb->len - hdrlen, 0)); + csum_fold(skb_checksum(*pskb, hdrlen, + (*pskb)->len - hdrlen, 0)); } /* Change outer to look the reply to an incoming packet @@ -506,7 +506,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, if (ct->status & statusbit) { nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - if (!manip_pkt(0, skb, 0, &target, manip)) + if (!manip_pkt(0, pskb, 0, &target, manip)) return 0; } diff --git a/trunk/net/ipv4/netfilter/nf_nat_ftp.c b/trunk/net/ipv4/netfilter/nf_nat_ftp.c index e1a16d3ea4cb..3663bd879c39 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_ftp.c +++ b/trunk/net/ipv4/netfilter/nf_nat_ftp.c @@ -28,7 +28,7 @@ MODULE_ALIAS("ip_nat_ftp"); /* FIXME: Time out? --RR */ static int -mangle_rfc959_packet(struct sk_buff *skb, +mangle_rfc959_packet(struct sk_buff **pskb, __be32 newip, u_int16_t port, unsigned int matchoff, @@ -43,13 +43,13 @@ mangle_rfc959_packet(struct sk_buff *skb, pr_debug("calling nf_nat_mangle_tcp_packet\n"); - return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, + return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } /* |1|132.235.1.2|6275| */ static int -mangle_eprt_packet(struct sk_buff *skb, +mangle_eprt_packet(struct sk_buff **pskb, __be32 newip, u_int16_t port, unsigned int matchoff, @@ -63,13 +63,13 @@ mangle_eprt_packet(struct sk_buff *skb, pr_debug("calling nf_nat_mangle_tcp_packet\n"); - return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, + return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } /* |1|132.235.1.2|6275| */ static int -mangle_epsv_packet(struct sk_buff *skb, +mangle_epsv_packet(struct sk_buff **pskb, __be32 newip, u_int16_t port, unsigned int matchoff, @@ -83,11 +83,11 @@ mangle_epsv_packet(struct sk_buff *skb, pr_debug("calling nf_nat_mangle_tcp_packet\n"); - return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff, + return nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } -static int (*mangle[])(struct sk_buff *, __be32, u_int16_t, +static int (*mangle[])(struct sk_buff **, __be32, u_int16_t, unsigned int, unsigned int, struct nf_conn *, enum ip_conntrack_info) = { @@ -99,7 +99,7 @@ static int (*mangle[])(struct sk_buff *, __be32, u_int16_t, /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ -static unsigned int nf_nat_ftp(struct sk_buff *skb, +static unsigned int nf_nat_ftp(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, enum nf_ct_ftp_type type, unsigned int matchoff, @@ -132,7 +132,7 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, if (port == 0) return NF_DROP; - if (!mangle[type](skb, newip, port, matchoff, matchlen, ct, ctinfo)) { + if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo)) { nf_ct_unexpect_related(exp); return NF_DROP; } diff --git a/trunk/net/ipv4/netfilter/nf_nat_h323.c b/trunk/net/ipv4/netfilter/nf_nat_h323.c index a868c8c41328..c1b059a73708 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_h323.c +++ b/trunk/net/ipv4/netfilter/nf_nat_h323.c @@ -22,12 +22,12 @@ #include /****************************************************************************/ -static int set_addr(struct sk_buff *skb, +static int set_addr(struct sk_buff **pskb, unsigned char **data, int dataoff, unsigned int addroff, __be32 ip, __be16 port) { enum ip_conntrack_info ctinfo; - struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn *ct = nf_ct_get(*pskb, &ctinfo); struct { __be32 ip; __be16 port; @@ -38,8 +38,8 @@ static int set_addr(struct sk_buff *skb, buf.port = port; addroff += dataoff; - if (ip_hdr(skb)->protocol == IPPROTO_TCP) { - if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + if (ip_hdr(*pskb)->protocol == IPPROTO_TCP) { + if (!nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, addroff, sizeof(buf), (char *) &buf, sizeof(buf))) { if (net_ratelimit()) @@ -49,13 +49,14 @@ static int set_addr(struct sk_buff *skb, } /* Relocate data pointer */ - th = skb_header_pointer(skb, ip_hdrlen(skb), + th = skb_header_pointer(*pskb, ip_hdrlen(*pskb), sizeof(_tcph), &_tcph); if (th == NULL) return -1; - *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff; + *data = (*pskb)->data + ip_hdrlen(*pskb) + + th->doff * 4 + dataoff; } else { - if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, + if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, addroff, sizeof(buf), (char *) &buf, sizeof(buf))) { if (net_ratelimit()) @@ -66,35 +67,36 @@ static int set_addr(struct sk_buff *skb, /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy * or pull everything in a linear buffer, so we can safely * use the skb pointers now */ - *data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); + *data = ((*pskb)->data + ip_hdrlen(*pskb) + + sizeof(struct udphdr)); } return 0; } /****************************************************************************/ -static int set_h225_addr(struct sk_buff *skb, +static int set_h225_addr(struct sk_buff **pskb, unsigned char **data, int dataoff, TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) { - return set_addr(skb, data, dataoff, taddr->ipAddress.ip, + return set_addr(pskb, data, dataoff, taddr->ipAddress.ip, addr->ip, port); } /****************************************************************************/ -static int set_h245_addr(struct sk_buff *skb, +static int set_h245_addr(struct sk_buff **pskb, unsigned char **data, int dataoff, H245_TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) { - return set_addr(skb, data, dataoff, + return set_addr(pskb, data, dataoff, taddr->unicastAddress.iPAddress.network, addr->ip, port); } /****************************************************************************/ -static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, +static int set_sig_addr(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) @@ -123,7 +125,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, NIPQUAD(addr.ip), port, NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), info->sig_port[!dir]); - return set_h225_addr(skb, data, 0, &taddr[i], + return set_h225_addr(pskb, data, 0, &taddr[i], &ct->tuplehash[!dir]. tuple.dst.u3, info->sig_port[!dir]); @@ -135,7 +137,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, NIPQUAD(addr.ip), port, NIPQUAD(ct->tuplehash[!dir].tuple.src.u3.ip), info->sig_port[!dir]); - return set_h225_addr(skb, data, 0, &taddr[i], + return set_h225_addr(pskb, data, 0, &taddr[i], &ct->tuplehash[!dir]. tuple.src.u3, info->sig_port[!dir]); @@ -147,7 +149,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, +static int set_ras_addr(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) @@ -166,7 +168,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, NIPQUAD(addr.ip), ntohs(port), NIPQUAD(ct->tuplehash[!dir].tuple.dst.u3.ip), ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); - return set_h225_addr(skb, data, 0, &taddr[i], + return set_h225_addr(pskb, data, 0, &taddr[i], &ct->tuplehash[!dir].tuple.dst.u3, ct->tuplehash[!dir].tuple. dst.u.udp.port); @@ -177,7 +179,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, +static int nat_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr, @@ -242,7 +244,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (set_h245_addr(skb, data, dataoff, taddr, + if (set_h245_addr(pskb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons((port & htons(1)) ? nated_port + 1 : nated_port)) == 0) { @@ -271,7 +273,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, +static int nat_t120(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, @@ -299,7 +301,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (set_h245_addr(skb, data, dataoff, taddr, + if (set_h245_addr(pskb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) < 0) { nf_ct_unexpect_related(exp); @@ -316,7 +318,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, +static int nat_h245(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, @@ -349,7 +351,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (set_h225_addr(skb, data, dataoff, taddr, + if (set_h225_addr(pskb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { /* Save ports */ @@ -404,7 +406,7 @@ static void ip_nat_q931_expect(struct nf_conn *new, } /****************************************************************************/ -static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, +static int nat_q931(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int idx, __be16 port, struct nf_conntrack_expect *exp) @@ -437,7 +439,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (set_h225_addr(skb, data, 0, &taddr[idx], + if (set_h225_addr(pskb, data, 0, &taddr[idx], &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { /* Save ports */ @@ -448,7 +450,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, if (idx > 0 && get_h225_addr(ct, *data, &taddr[0], &addr, &port) && (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { - set_h225_addr(skb, data, 0, &taddr[0], + set_h225_addr(pskb, data, 0, &taddr[0], &ct->tuplehash[!dir].tuple.dst.u3, info->sig_port[!dir]); } @@ -493,7 +495,7 @@ static void ip_nat_callforwarding_expect(struct nf_conn *new, } /****************************************************************************/ -static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, +static int nat_callforwarding(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, @@ -523,7 +525,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (!set_h225_addr(skb, data, dataoff, taddr, + if (!set_h225_addr(pskb, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons(nated_port)) == 0) { nf_ct_unexpect_related(exp); diff --git a/trunk/net/ipv4/netfilter/nf_nat_helper.c b/trunk/net/ipv4/netfilter/nf_nat_helper.c index 8718da00ef2a..93d8a0a8f035 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_helper.c +++ b/trunk/net/ipv4/netfilter/nf_nat_helper.c @@ -111,14 +111,22 @@ static void mangle_contents(struct sk_buff *skb, } /* Unusual, but possible case. */ -static int enlarge_skb(struct sk_buff *skb, unsigned int extra) +static int enlarge_skb(struct sk_buff **pskb, unsigned int extra) { - if (skb->len + extra > 65535) + struct sk_buff *nskb; + + if ((*pskb)->len + extra > 65535) return 0; - if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC)) + nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC); + if (!nskb) return 0; + /* Transfer socket to new skb. */ + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; return 1; } @@ -131,7 +139,7 @@ static int enlarge_skb(struct sk_buff *skb, unsigned int extra) * * */ int -nf_nat_mangle_tcp_packet(struct sk_buff *skb, +nf_nat_mangle_tcp_packet(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int match_offset, @@ -139,37 +147,37 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, const char *rep_buffer, unsigned int rep_len) { - struct rtable *rt = (struct rtable *)skb->dst; + struct rtable *rt = (struct rtable *)(*pskb)->dst; struct iphdr *iph; struct tcphdr *tcph; int oldlen, datalen; - if (!skb_make_writable(skb, skb->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return 0; if (rep_len > match_len && - rep_len - match_len > skb_tailroom(skb) && - !enlarge_skb(skb, rep_len - match_len)) + rep_len - match_len > skb_tailroom(*pskb) && + !enlarge_skb(pskb, rep_len - match_len)) return 0; - SKB_LINEAR_ASSERT(skb); + SKB_LINEAR_ASSERT(*pskb); - iph = ip_hdr(skb); + iph = ip_hdr(*pskb); tcph = (void *)iph + iph->ihl*4; - oldlen = skb->len - iph->ihl*4; - mangle_contents(skb, iph->ihl*4 + tcph->doff*4, + oldlen = (*pskb)->len - iph->ihl*4; + mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, match_offset, match_len, rep_buffer, rep_len); - datalen = skb->len - iph->ihl*4; - if (skb->ip_summed != CHECKSUM_PARTIAL) { + datalen = (*pskb)->len - iph->ihl*4; + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - skb->dev->features & NETIF_F_V4_CSUM) { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + - skb_network_offset(skb) + - iph->ihl * 4; - skb->csum_offset = offsetof(struct tcphdr, check); + (*pskb)->dev->features & NETIF_F_V4_CSUM) { + (*pskb)->ip_summed = CHECKSUM_PARTIAL; + (*pskb)->csum_start = skb_headroom(*pskb) + + skb_network_offset(*pskb) + + iph->ihl * 4; + (*pskb)->csum_offset = offsetof(struct tcphdr, check); tcph->check = ~tcp_v4_check(datalen, iph->saddr, iph->daddr, 0); } else { @@ -180,7 +188,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, datalen, 0)); } } else - nf_proto_csum_replace2(&tcph->check, skb, + nf_proto_csum_replace2(&tcph->check, *pskb, htons(oldlen), htons(datalen), 1); if (rep_len != match_len) { @@ -189,7 +197,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, (int)rep_len - (int)match_len, ct, ctinfo); /* Tell TCP window tracking about seq change */ - nf_conntrack_tcp_update(skb, ip_hdrlen(skb), + nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), ct, CTINFO2DIR(ctinfo)); } return 1; @@ -207,7 +215,7 @@ EXPORT_SYMBOL(nf_nat_mangle_tcp_packet); * should be fairly easy to do. */ int -nf_nat_mangle_udp_packet(struct sk_buff *skb, +nf_nat_mangle_udp_packet(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int match_offset, @@ -215,48 +223,48 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, const char *rep_buffer, unsigned int rep_len) { - struct rtable *rt = (struct rtable *)skb->dst; + struct rtable *rt = (struct rtable *)(*pskb)->dst; struct iphdr *iph; struct udphdr *udph; int datalen, oldlen; /* UDP helpers might accidentally mangle the wrong packet */ - iph = ip_hdr(skb); - if (skb->len < iph->ihl*4 + sizeof(*udph) + + iph = ip_hdr(*pskb); + if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) + match_offset + match_len) return 0; - if (!skb_make_writable(skb, skb->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return 0; if (rep_len > match_len && - rep_len - match_len > skb_tailroom(skb) && - !enlarge_skb(skb, rep_len - match_len)) + rep_len - match_len > skb_tailroom(*pskb) && + !enlarge_skb(pskb, rep_len - match_len)) return 0; - iph = ip_hdr(skb); + iph = ip_hdr(*pskb); udph = (void *)iph + iph->ihl*4; - oldlen = skb->len - iph->ihl*4; - mangle_contents(skb, iph->ihl*4 + sizeof(*udph), + oldlen = (*pskb)->len - iph->ihl*4; + mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), match_offset, match_len, rep_buffer, rep_len); /* update the length of the UDP packet */ - datalen = skb->len - iph->ihl*4; + datalen = (*pskb)->len - iph->ihl*4; udph->len = htons(datalen); /* fix udp checksum if udp checksum was previously calculated */ - if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL) + if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL) return 1; - if (skb->ip_summed != CHECKSUM_PARTIAL) { + if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { if (!(rt->rt_flags & RTCF_LOCAL) && - skb->dev->features & NETIF_F_V4_CSUM) { - skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum_start = skb_headroom(skb) + - skb_network_offset(skb) + - iph->ihl * 4; - skb->csum_offset = offsetof(struct udphdr, check); + (*pskb)->dev->features & NETIF_F_V4_CSUM) { + (*pskb)->ip_summed = CHECKSUM_PARTIAL; + (*pskb)->csum_start = skb_headroom(*pskb) + + skb_network_offset(*pskb) + + iph->ihl * 4; + (*pskb)->csum_offset = offsetof(struct udphdr, check); udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_UDP, 0); @@ -270,7 +278,7 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, udph->check = CSUM_MANGLED_0; } } else - nf_proto_csum_replace2(&udph->check, skb, + nf_proto_csum_replace2(&udph->check, *pskb, htons(oldlen), htons(datalen), 1); return 1; @@ -322,7 +330,7 @@ sack_adjust(struct sk_buff *skb, /* TCP SACK sequence number adjustment */ static inline unsigned int -nf_nat_sack_adjust(struct sk_buff *skb, +nf_nat_sack_adjust(struct sk_buff **pskb, struct tcphdr *tcph, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -330,17 +338,17 @@ nf_nat_sack_adjust(struct sk_buff *skb, unsigned int dir, optoff, optend; struct nf_conn_nat *nat = nfct_nat(ct); - optoff = ip_hdrlen(skb) + sizeof(struct tcphdr); - optend = ip_hdrlen(skb) + tcph->doff * 4; + optoff = ip_hdrlen(*pskb) + sizeof(struct tcphdr); + optend = ip_hdrlen(*pskb) + tcph->doff * 4; - if (!skb_make_writable(skb, optend)) + if (!skb_make_writable(pskb, optend)) return 0; dir = CTINFO2DIR(ctinfo); while (optoff < optend) { /* Usually: option, length. */ - unsigned char *op = skb->data + optoff; + unsigned char *op = (*pskb)->data + optoff; switch (op[0]) { case TCPOPT_EOL: @@ -357,7 +365,7 @@ nf_nat_sack_adjust(struct sk_buff *skb, if (op[0] == TCPOPT_SACK && op[1] >= 2+TCPOLEN_SACK_PERBLOCK && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) - sack_adjust(skb, tcph, optoff+2, + sack_adjust(*pskb, tcph, optoff+2, optoff+op[1], &nat->seq[!dir]); optoff += op[1]; } @@ -367,7 +375,7 @@ nf_nat_sack_adjust(struct sk_buff *skb, /* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ int -nf_nat_seq_adjust(struct sk_buff *skb, +nf_nat_seq_adjust(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { @@ -382,10 +390,10 @@ nf_nat_seq_adjust(struct sk_buff *skb, this_way = &nat->seq[dir]; other_way = &nat->seq[!dir]; - if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) + if (!skb_make_writable(pskb, ip_hdrlen(*pskb) + sizeof(*tcph))) return 0; - tcph = (void *)skb->data + ip_hdrlen(skb); + tcph = (void *)(*pskb)->data + ip_hdrlen(*pskb); if (after(ntohl(tcph->seq), this_way->correction_pos)) newseq = htonl(ntohl(tcph->seq) + this_way->offset_after); else @@ -397,8 +405,8 @@ nf_nat_seq_adjust(struct sk_buff *skb, else newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before); - nf_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0); - nf_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0); + nf_proto_csum_replace4(&tcph->check, *pskb, tcph->seq, newseq, 0); + nf_proto_csum_replace4(&tcph->check, *pskb, tcph->ack_seq, newack, 0); pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), @@ -407,10 +415,10 @@ nf_nat_seq_adjust(struct sk_buff *skb, tcph->seq = newseq; tcph->ack_seq = newack; - if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo)) + if (!nf_nat_sack_adjust(pskb, tcph, ct, ctinfo)) return 0; - nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir); + nf_conntrack_tcp_update(*pskb, ip_hdrlen(*pskb), ct, dir); return 1; } diff --git a/trunk/net/ipv4/netfilter/nf_nat_irc.c b/trunk/net/ipv4/netfilter/nf_nat_irc.c index 766e2c16c6b9..bcf274bba602 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_irc.c +++ b/trunk/net/ipv4/netfilter/nf_nat_irc.c @@ -27,7 +27,7 @@ MODULE_DESCRIPTION("IRC (DCC) NAT helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_nat_irc"); -static unsigned int help(struct sk_buff *skb, +static unsigned int help(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -58,7 +58,7 @@ static unsigned int help(struct sk_buff *skb, pr_debug("nf_nat_irc: inserting '%s' == %u.%u.%u.%u, port %u\n", buffer, NIPQUAD(ip), port); - ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, + ret = nf_nat_mangle_tcp_packet(pskb, exp->master, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) diff --git a/trunk/net/ipv4/netfilter/nf_nat_pptp.c b/trunk/net/ipv4/netfilter/nf_nat_pptp.c index e1385a099079..984ec8308b2e 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_pptp.c +++ b/trunk/net/ipv4/netfilter/nf_nat_pptp.c @@ -110,7 +110,7 @@ static void pptp_nat_expected(struct nf_conn *ct, /* outbound packets == from PNS to PAC */ static int -pptp_outbound_pkt(struct sk_buff *skb, +pptp_outbound_pkt(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, @@ -175,7 +175,7 @@ pptp_outbound_pkt(struct sk_buff *skb, ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); /* mangle packet */ - if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, cid_off + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader), sizeof(new_callid), (char *)&new_callid, @@ -213,7 +213,7 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig, /* inbound packets == from PAC to PNS */ static int -pptp_inbound_pkt(struct sk_buff *skb, +pptp_inbound_pkt(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, @@ -268,7 +268,7 @@ pptp_inbound_pkt(struct sk_buff *skb, pr_debug("altering peer call id from 0x%04x to 0x%04x\n", ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); - if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + if (nf_nat_mangle_tcp_packet(pskb, ct, ctinfo, pcid_off + sizeof(struct pptp_pkt_hdr) + sizeof(struct PptpControlHeader), sizeof(new_pcid), (char *)&new_pcid, diff --git a/trunk/net/ipv4/netfilter/nf_nat_proto_gre.c b/trunk/net/ipv4/netfilter/nf_nat_proto_gre.c index b820f9960356..d562290b1820 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/trunk/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -98,21 +98,21 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, /* manipulate a GRE packet according to maniptype */ static int -gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, +gre_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { struct gre_hdr *greh; struct gre_hdr_pptp *pgreh; - struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); + struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); unsigned int hdroff = iphdroff + iph->ihl * 4; /* pgreh includes two optional 32bit fields which are not required * to be there. That's where the magic '8' comes from */ - if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8)) + if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh) - 8)) return 0; - greh = (void *)skb->data + hdroff; + greh = (void *)(*pskb)->data + hdroff; pgreh = (struct gre_hdr_pptp *)greh; /* we only have destination manip of a packet, since 'source key' diff --git a/trunk/net/ipv4/netfilter/nf_nat_proto_icmp.c b/trunk/net/ipv4/netfilter/nf_nat_proto_icmp.c index b9fc724388fc..898d73771155 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/trunk/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -52,20 +52,20 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -icmp_manip_pkt(struct sk_buff *skb, +icmp_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); + struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); struct icmphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) return 0; - hdr = (struct icmphdr *)(skb->data + hdroff); - nf_proto_csum_replace2(&hdr->checksum, skb, + hdr = (struct icmphdr *)((*pskb)->data + hdroff); + nf_proto_csum_replace2(&hdr->checksum, *pskb, hdr->un.echo.id, tuple->src.u.icmp.id, 0); hdr->un.echo.id = tuple->src.u.icmp.id; return 1; diff --git a/trunk/net/ipv4/netfilter/nf_nat_proto_tcp.c b/trunk/net/ipv4/netfilter/nf_nat_proto_tcp.c index 6bab2e184455..5bbbb2acdc70 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/trunk/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -88,12 +88,12 @@ tcp_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -tcp_manip_pkt(struct sk_buff *skb, +tcp_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); + struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); struct tcphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; __be32 oldip, newip; @@ -103,14 +103,14 @@ tcp_manip_pkt(struct sk_buff *skb, /* this could be a inner header returned in icmp packet; in such cases we cannot update the checksum field since it is outside of the 8 bytes of transport layer headers we are guaranteed */ - if (skb->len >= hdroff + sizeof(struct tcphdr)) + if ((*pskb)->len >= hdroff + sizeof(struct tcphdr)) hdrsize = sizeof(struct tcphdr); - if (!skb_make_writable(skb, hdroff + hdrsize)) + if (!skb_make_writable(pskb, hdroff + hdrsize)) return 0; - iph = (struct iphdr *)(skb->data + iphdroff); - hdr = (struct tcphdr *)(skb->data + hdroff); + iph = (struct iphdr *)((*pskb)->data + iphdroff); + hdr = (struct tcphdr *)((*pskb)->data + hdroff); if (maniptype == IP_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ @@ -132,8 +132,8 @@ tcp_manip_pkt(struct sk_buff *skb, if (hdrsize < sizeof(*hdr)) return 1; - nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); - nf_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0); + nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); + nf_proto_csum_replace2(&hdr->check, *pskb, oldport, newport, 0); return 1; } diff --git a/trunk/net/ipv4/netfilter/nf_nat_proto_udp.c b/trunk/net/ipv4/netfilter/nf_nat_proto_udp.c index cbf1a61e2908..a0af4fd95584 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/trunk/net/ipv4/netfilter/nf_nat_proto_udp.c @@ -86,22 +86,22 @@ udp_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -udp_manip_pkt(struct sk_buff *skb, +udp_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); + struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); struct udphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; __be32 oldip, newip; __be16 *portptr, newport; - if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) return 0; - iph = (struct iphdr *)(skb->data + iphdroff); - hdr = (struct udphdr *)(skb->data + hdroff); + iph = (struct iphdr *)((*pskb)->data + iphdroff); + hdr = (struct udphdr *)((*pskb)->data + hdroff); if (maniptype == IP_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ @@ -116,9 +116,9 @@ udp_manip_pkt(struct sk_buff *skb, newport = tuple->dst.u.udp.port; portptr = &hdr->dest; } - if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) { - nf_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1); - nf_proto_csum_replace2(&hdr->check, skb, *portptr, newport, + if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) { + nf_proto_csum_replace4(&hdr->check, *pskb, oldip, newip, 1); + nf_proto_csum_replace2(&hdr->check, *pskb, *portptr, newport, 0); if (!hdr->check) hdr->check = CSUM_MANGLED_0; diff --git a/trunk/net/ipv4/netfilter/nf_nat_proto_unknown.c b/trunk/net/ipv4/netfilter/nf_nat_proto_unknown.c index cfd2742e9706..f50d0203f9c0 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ b/trunk/net/ipv4/netfilter/nf_nat_proto_unknown.c @@ -37,7 +37,7 @@ static int unknown_unique_tuple(struct nf_conntrack_tuple *tuple, } static int -unknown_manip_pkt(struct sk_buff *skb, +unknown_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) diff --git a/trunk/net/ipv4/netfilter/nf_nat_rule.c b/trunk/net/ipv4/netfilter/nf_nat_rule.c index 46b25ab5f78b..76ec59ae524d 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_rule.c +++ b/trunk/net/ipv4/netfilter/nf_nat_rule.c @@ -65,7 +65,7 @@ static struct xt_table nat_table = { }; /* Source NAT */ -static unsigned int ipt_snat_target(struct sk_buff *skb, +static unsigned int ipt_snat_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -78,7 +78,7 @@ static unsigned int ipt_snat_target(struct sk_buff *skb, NF_CT_ASSERT(hooknum == NF_IP_POST_ROUTING); - ct = nf_ct_get(skb, &ctinfo); + ct = nf_ct_get(*pskb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || @@ -107,7 +107,7 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) ip_rt_put(rt); } -static unsigned int ipt_dnat_target(struct sk_buff *skb, +static unsigned int ipt_dnat_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -121,14 +121,14 @@ static unsigned int ipt_dnat_target(struct sk_buff *skb, NF_CT_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT); - ct = nf_ct_get(skb, &ctinfo); + ct = nf_ct_get(*pskb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); if (hooknum == NF_IP_LOCAL_OUT && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) - warn_if_extra_mangle(ip_hdr(skb)->daddr, + warn_if_extra_mangle(ip_hdr(*pskb)->daddr, mr->range[0].min_ip); return nf_nat_setup_info(ct, &mr->range[0], hooknum); @@ -204,7 +204,7 @@ alloc_null_binding_confirmed(struct nf_conn *ct, unsigned int hooknum) return nf_nat_setup_info(ct, &range, hooknum); } -int nf_nat_rule_find(struct sk_buff *skb, +int nf_nat_rule_find(struct sk_buff **pskb, unsigned int hooknum, const struct net_device *in, const struct net_device *out, @@ -212,7 +212,7 @@ int nf_nat_rule_find(struct sk_buff *skb, { int ret; - ret = ipt_do_table(skb, hooknum, in, out, &nat_table); + ret = ipt_do_table(pskb, hooknum, in, out, &nat_table); if (ret == NF_ACCEPT) { if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) diff --git a/trunk/net/ipv4/netfilter/nf_nat_sip.c b/trunk/net/ipv4/netfilter/nf_nat_sip.c index ce9edbcc01e3..e14d41976c27 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_sip.c +++ b/trunk/net/ipv4/netfilter/nf_nat_sip.c @@ -60,7 +60,7 @@ static void addr_map_init(struct nf_conn *ct, struct addr_map *map) } } -static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo, +static int map_sip_addr(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr, size_t dlen, enum sip_header_pos pos, struct addr_map *map) { @@ -84,15 +84,15 @@ static int map_sip_addr(struct sk_buff *skb, enum ip_conntrack_info ctinfo, } else return 1; - if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, + if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, matchoff, matchlen, addr, addrlen)) return 0; - *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); + *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr); return 1; } -static unsigned int ip_nat_sip(struct sk_buff *skb, +static unsigned int ip_nat_sip(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr) @@ -101,8 +101,8 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, struct addr_map map; int dataoff, datalen; - dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); - datalen = skb->len - dataoff; + dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); + datalen = (*pskb)->len - dataoff; if (datalen < sizeof("SIP/2.0") - 1) return NF_ACCEPT; @@ -121,19 +121,19 @@ static unsigned int ip_nat_sip(struct sk_buff *skb, else pos = POS_REQ_URI; - if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, pos, &map)) + if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, pos, &map)) return NF_DROP; } - if (!map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_FROM, &map) || - !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_TO, &map) || - !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_VIA, &map) || - !map_sip_addr(skb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map)) + if (!map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_FROM, &map) || + !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_TO, &map) || + !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_VIA, &map) || + !map_sip_addr(pskb, ctinfo, ct, dptr, datalen, POS_CONTACT, &map)) return NF_DROP; return NF_ACCEPT; } -static unsigned int mangle_sip_packet(struct sk_buff *skb, +static unsigned int mangle_sip_packet(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr, size_t dlen, @@ -145,16 +145,16 @@ static unsigned int mangle_sip_packet(struct sk_buff *skb, if (ct_sip_get_info(ct, *dptr, dlen, &matchoff, &matchlen, pos) <= 0) return 0; - if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, + if (!nf_nat_mangle_udp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, bufflen)) return 0; /* We need to reload this. Thanks Patrick. */ - *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); + *dptr = (*pskb)->data + ip_hdrlen(*pskb) + sizeof(struct udphdr); return 1; } -static int mangle_content_len(struct sk_buff *skb, +static int mangle_content_len(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char *dptr) @@ -163,22 +163,22 @@ static int mangle_content_len(struct sk_buff *skb, char buffer[sizeof("65536")]; int bufflen; - dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); + dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); /* Get actual SDP lenght */ - if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, + if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff, &matchlen, POS_SDP_HEADER) > 0) { /* since ct_sip_get_info() give us a pointer passing 'v=' we need to add 2 bytes in this count. */ - int c_len = skb->len - dataoff - matchoff + 2; + int c_len = (*pskb)->len - dataoff - matchoff + 2; /* Now, update SDP length */ - if (ct_sip_get_info(ct, dptr, skb->len - dataoff, &matchoff, + if (ct_sip_get_info(ct, dptr, (*pskb)->len - dataoff, &matchoff, &matchlen, POS_CONTENT) > 0) { bufflen = sprintf(buffer, "%u", c_len); - return nf_nat_mangle_udp_packet(skb, ct, ctinfo, + return nf_nat_mangle_udp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, bufflen); } @@ -186,7 +186,7 @@ static int mangle_content_len(struct sk_buff *skb, return 0; } -static unsigned int mangle_sdp(struct sk_buff *skb, +static unsigned int mangle_sdp(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, __be32 newip, u_int16_t port, @@ -195,25 +195,25 @@ static unsigned int mangle_sdp(struct sk_buff *skb, char buffer[sizeof("nnn.nnn.nnn.nnn")]; unsigned int dataoff, bufflen; - dataoff = ip_hdrlen(skb) + sizeof(struct udphdr); + dataoff = ip_hdrlen(*pskb) + sizeof(struct udphdr); /* Mangle owner and contact info. */ bufflen = sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(newip)); - if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, + if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, buffer, bufflen, POS_OWNER_IP4)) return 0; - if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, + if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, buffer, bufflen, POS_CONNECTION_IP4)) return 0; /* Mangle media port. */ bufflen = sprintf(buffer, "%u", port); - if (!mangle_sip_packet(skb, ctinfo, ct, &dptr, skb->len - dataoff, + if (!mangle_sip_packet(pskb, ctinfo, ct, &dptr, (*pskb)->len - dataoff, buffer, bufflen, POS_MEDIA)) return 0; - return mangle_content_len(skb, ctinfo, ct, dptr); + return mangle_content_len(pskb, ctinfo, ct, dptr); } static void ip_nat_sdp_expect(struct nf_conn *ct, @@ -241,7 +241,7 @@ static void ip_nat_sdp_expect(struct nf_conn *ct, /* So, this packet has hit the connection tracking matching code. Mangle it, and change the expectation to match the new version. */ -static unsigned int ip_nat_sdp(struct sk_buff *skb, +static unsigned int ip_nat_sdp(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp, const char *dptr) @@ -277,7 +277,7 @@ static unsigned int ip_nat_sdp(struct sk_buff *skb, if (port == 0) return NF_DROP; - if (!mangle_sdp(skb, ctinfo, ct, newip, port, dptr)) { + if (!mangle_sdp(pskb, ctinfo, ct, newip, port, dptr)) { nf_ct_unexpect_related(exp); return NF_DROP; } diff --git a/trunk/net/ipv4/netfilter/nf_nat_snmp_basic.c b/trunk/net/ipv4/netfilter/nf_nat_snmp_basic.c index 03709d6b4b06..6bfcd3a90f08 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/trunk/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1188,9 +1188,9 @@ static int snmp_parse_mangle(unsigned char *msg, */ static int snmp_translate(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - struct sk_buff *skb) + struct sk_buff **pskb) { - struct iphdr *iph = ip_hdr(skb); + struct iphdr *iph = ip_hdr(*pskb); struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); u_int16_t udplen = ntohs(udph->len); u_int16_t paylen = udplen - sizeof(struct udphdr); @@ -1225,13 +1225,13 @@ static int snmp_translate(struct nf_conn *ct, /* We don't actually set up expectations, just adjust internal IP * addresses if this is being NATted */ -static int help(struct sk_buff *skb, unsigned int protoff, +static int help(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { int dir = CTINFO2DIR(ctinfo); unsigned int ret; - struct iphdr *iph = ip_hdr(skb); + struct iphdr *iph = ip_hdr(*pskb); struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); /* SNMP replies and originating SNMP traps get mangled */ @@ -1250,7 +1250,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, * enough room for a UDP header. Just verify the UDP length field so we * can mess around with the payload. */ - if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { + if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) { if (net_ratelimit()) printk(KERN_WARNING "SNMP: dropping malformed packet " "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n", @@ -1258,11 +1258,11 @@ static int help(struct sk_buff *skb, unsigned int protoff, return NF_DROP; } - if (!skb_make_writable(skb, skb->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; spin_lock_bh(&snmp_lock); - ret = snmp_translate(ct, ctinfo, skb); + ret = snmp_translate(ct, ctinfo, pskb); spin_unlock_bh(&snmp_lock); return ret; } diff --git a/trunk/net/ipv4/netfilter/nf_nat_standalone.c b/trunk/net/ipv4/netfilter/nf_nat_standalone.c index 7db76ea9af91..46cc99def165 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_standalone.c +++ b/trunk/net/ipv4/netfilter/nf_nat_standalone.c @@ -67,7 +67,7 @@ static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) static unsigned int nf_nat_fn(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -80,9 +80,9 @@ nf_nat_fn(unsigned int hooknum, /* We never see fragments: conntrack defrags on pre-routing and local-out, and nf_nat_out protects post-routing. */ - NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); + NF_CT_ASSERT(!(ip_hdr(*pskb)->frag_off & htons(IP_MF | IP_OFFSET))); - ct = nf_ct_get(skb, &ctinfo); + ct = nf_ct_get(*pskb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would have dropped it. Hence it's the user's responsibilty to packet filter it out, or implement conntrack/NAT for that @@ -91,10 +91,10 @@ nf_nat_fn(unsigned int hooknum, /* Exception: ICMP redirect to new connection (not in hash table yet). We must not let this through, in case we're doing NAT to the same network. */ - if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { + if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) { struct icmphdr _hdr, *hp; - hp = skb_header_pointer(skb, ip_hdrlen(skb), + hp = skb_header_pointer(*pskb, ip_hdrlen(*pskb), sizeof(_hdr), &_hdr); if (hp != NULL && hp->type == ICMP_REDIRECT) @@ -119,9 +119,9 @@ nf_nat_fn(unsigned int hooknum, switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: - if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { + if (ip_hdr(*pskb)->protocol == IPPROTO_ICMP) { if (!nf_nat_icmp_reply_translation(ct, ctinfo, - hooknum, skb)) + hooknum, pskb)) return NF_DROP; else return NF_ACCEPT; @@ -141,7 +141,7 @@ nf_nat_fn(unsigned int hooknum, /* LOCAL_IN hook doesn't have a chain! */ ret = alloc_null_binding(ct, hooknum); else - ret = nf_nat_rule_find(skb, hooknum, in, out, + ret = nf_nat_rule_find(pskb, hooknum, in, out, ct); if (ret != NF_ACCEPT) { @@ -159,31 +159,31 @@ nf_nat_fn(unsigned int hooknum, ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); } - return nf_nat_packet(ct, ctinfo, hooknum, skb); + return nf_nat_packet(ct, ctinfo, hooknum, pskb); } static unsigned int nf_nat_in(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { unsigned int ret; - __be32 daddr = ip_hdr(skb)->daddr; + __be32 daddr = ip_hdr(*pskb)->daddr; - ret = nf_nat_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_fn(hooknum, pskb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && - daddr != ip_hdr(skb)->daddr) { - dst_release(skb->dst); - skb->dst = NULL; + daddr != ip_hdr(*pskb)->daddr) { + dst_release((*pskb)->dst); + (*pskb)->dst = NULL; } return ret; } static unsigned int nf_nat_out(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -195,14 +195,14 @@ nf_nat_out(unsigned int hooknum, unsigned int ret; /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) + if ((*pskb)->len < sizeof(struct iphdr) || + ip_hdrlen(*pskb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_fn(hooknum, pskb, in, out, okfn); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (ct->tuplehash[dir].tuple.src.u3.ip != @@ -210,7 +210,7 @@ nf_nat_out(unsigned int hooknum, || ct->tuplehash[dir].tuple.src.u.all != ct->tuplehash[!dir].tuple.dst.u.all ) - return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP; + return ip_xfrm_me_harder(pskb) == 0 ? ret : NF_DROP; } #endif return ret; @@ -218,7 +218,7 @@ nf_nat_out(unsigned int hooknum, static unsigned int nf_nat_local_fn(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -228,24 +228,24 @@ nf_nat_local_fn(unsigned int hooknum, unsigned int ret; /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) || - ip_hdrlen(skb) < sizeof(struct iphdr)) + if ((*pskb)->len < sizeof(struct iphdr) || + ip_hdrlen(*pskb) < sizeof(struct iphdr)) return NF_ACCEPT; - ret = nf_nat_fn(hooknum, skb, in, out, okfn); + ret = nf_nat_fn(hooknum, pskb, in, out, okfn); if (ret != NF_DROP && ret != NF_STOLEN && - (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + (ct = nf_ct_get(*pskb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); if (ct->tuplehash[dir].tuple.dst.u3.ip != ct->tuplehash[!dir].tuple.src.u3.ip) { - if (ip_route_me_harder(skb, RTN_UNSPEC)) + if (ip_route_me_harder(pskb, RTN_UNSPEC)) ret = NF_DROP; } #ifdef CONFIG_XFRM else if (ct->tuplehash[dir].tuple.dst.u.all != ct->tuplehash[!dir].tuple.src.u.all) - if (ip_xfrm_me_harder(skb)) + if (ip_xfrm_me_harder(pskb)) ret = NF_DROP; #endif } @@ -254,7 +254,7 @@ nf_nat_local_fn(unsigned int hooknum, static unsigned int nf_nat_adjust(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -262,10 +262,10 @@ nf_nat_adjust(unsigned int hooknum, struct nf_conn *ct; enum ip_conntrack_info ctinfo; - ct = nf_ct_get(skb, &ctinfo); + ct = nf_ct_get(*pskb, &ctinfo); if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { pr_debug("nf_nat_standalone: adjusting sequence number\n"); - if (!nf_nat_seq_adjust(skb, ct, ctinfo)) + if (!nf_nat_seq_adjust(pskb, ct, ctinfo)) return NF_DROP; } return NF_ACCEPT; diff --git a/trunk/net/ipv4/netfilter/nf_nat_tftp.c b/trunk/net/ipv4/netfilter/nf_nat_tftp.c index 0ecec701cb44..04dfeaefec02 100644 --- a/trunk/net/ipv4/netfilter/nf_nat_tftp.c +++ b/trunk/net/ipv4/netfilter/nf_nat_tftp.c @@ -20,7 +20,7 @@ MODULE_DESCRIPTION("TFTP NAT helper"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ip_nat_tftp"); -static unsigned int help(struct sk_buff *skb, +static unsigned int help(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp) { diff --git a/trunk/net/ipv4/proc.c b/trunk/net/ipv4/proc.c index fd16cb8f8abe..e5b05b039101 100644 --- a/trunk/net/ipv4/proc.c +++ b/trunk/net/ipv4/proc.c @@ -70,8 +70,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); - seq_printf(seq, "FRAG: inuse %d memory %d\n", - ip_frag_nqueues(), ip_frag_mem()); + seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues, + atomic_read(&ip_frag_mem)); return 0; } diff --git a/trunk/net/ipv4/sysctl_net_ipv4.c b/trunk/net/ipv4/sysctl_net_ipv4.c index c98ef16effd2..eb286abcf5dc 100644 --- a/trunk/net/ipv4/sysctl_net_ipv4.c +++ b/trunk/net/ipv4/sysctl_net_ipv4.c @@ -19,7 +19,6 @@ #include #include #include -#include /* From af_inet.c */ extern int sysctl_ip_nonlocal_bind; @@ -358,7 +357,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_HIGH_THRESH, .procname = "ipfrag_high_thresh", - .data = &ip4_frags_ctl.high_thresh, + .data = &sysctl_ipfrag_high_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -366,7 +365,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_LOW_THRESH, .procname = "ipfrag_low_thresh", - .data = &ip4_frags_ctl.low_thresh, + .data = &sysctl_ipfrag_low_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -382,7 +381,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_TIME, .procname = "ipfrag_time", - .data = &ip4_frags_ctl.timeout, + .data = &sysctl_ipfrag_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -733,7 +732,7 @@ ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_IPFRAG_SECRET_INTERVAL, .procname = "ipfrag_secret_interval", - .data = &ip4_frags_ctl.secret_interval, + .data = &sysctl_ipfrag_secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, diff --git a/trunk/net/ipv4/tcp_input.c b/trunk/net/ipv4/tcp_input.c index 0f00966b1784..0a42e9340346 100644 --- a/trunk/net/ipv4/tcp_input.c +++ b/trunk/net/ipv4/tcp_input.c @@ -1995,7 +1995,8 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, } /* Mark head of queue up as lost. */ -static void tcp_mark_head_lost(struct sock *sk, int packets) +static void tcp_mark_head_lost(struct sock *sk, + int packets, u32 high_seq) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; @@ -2018,7 +2019,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) tp->lost_skb_hint = skb; tp->lost_cnt_hint = cnt; cnt += tcp_skb_pcount(skb); - if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq)) + if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, high_seq)) break; if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; @@ -2039,9 +2040,9 @@ static void tcp_update_scoreboard(struct sock *sk) int lost = tp->fackets_out - tp->reordering; if (lost <= 0) lost = 1; - tcp_mark_head_lost(sk, lost); + tcp_mark_head_lost(sk, lost, tp->high_seq); } else { - tcp_mark_head_lost(sk, 1); + tcp_mark_head_lost(sk, 1, tp->high_seq); } /* New heuristics: it is possible only after we switched @@ -2380,7 +2381,7 @@ tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag) before(tp->snd_una, tp->high_seq) && icsk->icsk_ca_state != TCP_CA_Open && tp->fackets_out > tp->reordering) { - tcp_mark_head_lost(sk, tp->fackets_out - tp->reordering); + tcp_mark_head_lost(sk, tp->fackets_out-tp->reordering, tp->high_seq); NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); } diff --git a/trunk/net/ipv4/xfrm4_output.c b/trunk/net/ipv4/xfrm4_output.c index a4edd666318b..434ef302ba83 100644 --- a/trunk/net/ipv4/xfrm4_output.c +++ b/trunk/net/ipv4/xfrm4_output.c @@ -78,7 +78,7 @@ static int xfrm4_output_finish2(struct sk_buff *skb) while (likely((err = xfrm4_output_one(skb)) == 0)) { nf_reset(skb); - err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, + err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL, skb->dst->dev, dst_output); if (unlikely(err != 1)) break; @@ -86,7 +86,7 @@ static int xfrm4_output_finish2(struct sk_buff *skb) if (!skb->dst->xfrm) return dst_output(skb); - err = nf_hook(PF_INET, NF_IP_POST_ROUTING, skb, NULL, + err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL, skb->dst->dev, xfrm4_output_finish2); if (unlikely(err != 1)) break; diff --git a/trunk/net/ipv6/exthdrs.c b/trunk/net/ipv6/exthdrs.c index 1e89efd38a0c..c82d4d49f71f 100644 --- a/trunk/net/ipv6/exthdrs.c +++ b/trunk/net/ipv6/exthdrs.c @@ -102,7 +102,7 @@ EXPORT_SYMBOL_GPL(ipv6_find_tlv); struct tlvtype_proc { int type; - int (*func)(struct sk_buff *skb, int offset); + int (*func)(struct sk_buff **skbp, int offset); }; /********************* @@ -111,8 +111,10 @@ struct tlvtype_proc { /* An unknown option is detected, decide what to do */ -static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) +static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff) { + struct sk_buff *skb = *skbp; + switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) { case 0: /* ignore */ return 1; @@ -137,8 +139,9 @@ static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) /* Parse tlv encoded option header (hop-by-hop or destination) */ -static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) +static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp) { + struct sk_buff *skb = *skbp; struct tlvtype_proc *curr; const unsigned char *nh = skb_network_header(skb); int off = skb_network_header_len(skb); @@ -169,13 +172,13 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) /* type specific length/alignment checks will be performed in the func(). */ - if (curr->func(skb, off) == 0) + if (curr->func(skbp, off) == 0) return 0; break; } } if (curr->type < 0) { - if (ip6_tlvopt_unknown(skb, off) == 0) + if (ip6_tlvopt_unknown(skbp, off) == 0) return 0; } break; @@ -195,8 +198,9 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) *****************************/ #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) -static int ipv6_dest_hao(struct sk_buff *skb, int optoff) +static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) { + struct sk_buff *skb = *skbp; struct ipv6_destopt_hao *hao; struct inet6_skb_parm *opt = IP6CB(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); @@ -230,13 +234,22 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff) goto discard; if (skb_cloned(skb)) { - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); + struct inet6_skb_parm *opt2; + + if (skb2 == NULL) goto discard; + opt2 = IP6CB(skb2); + memcpy(opt2, opt, sizeof(*opt2)); + + kfree_skb(skb); + /* update all variable using below by copied skbuff */ - hao = (struct ipv6_destopt_hao *)(skb_network_header(skb) + + *skbp = skb = skb2; + hao = (struct ipv6_destopt_hao *)(skb_network_header(skb2) + optoff); - ipv6h = ipv6_hdr(skb); + ipv6h = ipv6_hdr(skb2); } if (skb->ip_summed == CHECKSUM_COMPLETE) @@ -267,8 +280,9 @@ static struct tlvtype_proc tlvprocdestopt_lst[] = { {-1, NULL} }; -static int ipv6_destopt_rcv(struct sk_buff *skb) +static int ipv6_destopt_rcv(struct sk_buff **skbp) { + struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dstbuf; @@ -290,8 +304,9 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) #endif dst = dst_clone(skb->dst); - if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { + if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) { dst_release(dst); + skb = *skbp; skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) @@ -322,8 +337,10 @@ void __init ipv6_destopt_init(void) NONE header. No data in packet. ********************************/ -static int ipv6_nodata_rcv(struct sk_buff *skb) +static int ipv6_nodata_rcv(struct sk_buff **skbp) { + struct sk_buff *skb = *skbp; + kfree_skb(skb); return 0; } @@ -343,8 +360,9 @@ void __init ipv6_nodata_init(void) Routing header. ********************************/ -static int ipv6_rthdr_rcv(struct sk_buff *skb) +static int ipv6_rthdr_rcv(struct sk_buff **skbp) { + struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); struct in6_addr *addr = NULL; struct in6_addr daddr; @@ -446,14 +464,18 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) Do not damage packets queued somewhere. */ if (skb_cloned(skb)) { + struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); /* the copy is a forwarded packet */ - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { + if (skb2 == NULL) { IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -1; } - hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb); + kfree_skb(skb); + *skbp = skb = skb2; + opt = IP6CB(skb2); + hdr = (struct ipv6_rt_hdr *)skb_transport_header(skb2); } if (skb->ip_summed == CHECKSUM_COMPLETE) @@ -556,8 +578,9 @@ static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb) /* Router Alert as of RFC 2711 */ -static int ipv6_hop_ra(struct sk_buff *skb, int optoff) +static int ipv6_hop_ra(struct sk_buff **skbp, int optoff) { + struct sk_buff *skb = *skbp; const unsigned char *nh = skb_network_header(skb); if (nh[optoff + 1] == 2) { @@ -572,8 +595,9 @@ static int ipv6_hop_ra(struct sk_buff *skb, int optoff) /* Jumbo payload */ -static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) +static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff) { + struct sk_buff *skb = *skbp; const unsigned char *nh = skb_network_header(skb); u32 pkt_len; @@ -624,8 +648,9 @@ static struct tlvtype_proc tlvprochopopt_lst[] = { { -1, } }; -int ipv6_parse_hopopts(struct sk_buff *skb) +int ipv6_parse_hopopts(struct sk_buff **skbp) { + struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); /* @@ -642,7 +667,8 @@ int ipv6_parse_hopopts(struct sk_buff *skb) } opt->hop = sizeof(struct ipv6hdr); - if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { + if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) { + skb = *skbp; skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); diff --git a/trunk/net/ipv6/icmp.c b/trunk/net/ipv6/icmp.c index 9bb031fa1c2f..47b8ce232e84 100644 --- a/trunk/net/ipv6/icmp.c +++ b/trunk/net/ipv6/icmp.c @@ -82,7 +82,7 @@ EXPORT_SYMBOL(icmpv6msg_statistics); static DEFINE_PER_CPU(struct socket *, __icmpv6_socket) = NULL; #define icmpv6_socket __get_cpu_var(__icmpv6_socket) -static int icmpv6_rcv(struct sk_buff *skb); +static int icmpv6_rcv(struct sk_buff **pskb); static struct inet6_protocol icmpv6_protocol = { .handler = icmpv6_rcv, @@ -614,8 +614,9 @@ static void icmpv6_notify(struct sk_buff *skb, int type, int code, __be32 info) * Handle icmp messages */ -static int icmpv6_rcv(struct sk_buff *skb) +static int icmpv6_rcv(struct sk_buff **pskb) { + struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(dev); struct in6_addr *saddr, *daddr; diff --git a/trunk/net/ipv6/inet6_connection_sock.c b/trunk/net/ipv6/inet6_connection_sock.c index 78de42ada844..25b931709749 100644 --- a/trunk/net/ipv6/inet6_connection_sock.c +++ b/trunk/net/ipv6/inet6_connection_sock.c @@ -146,7 +146,7 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, __ip6_dst_store(sk, dst, daddr, saddr); #ifdef CONFIG_XFRM - { + if (dst) { struct rt6_info *rt = (struct rt6_info *)dst; rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid); } diff --git a/trunk/net/ipv6/ip6_input.c b/trunk/net/ipv6/ip6_input.c index fac6f7f9dd73..9149fc239759 100644 --- a/trunk/net/ipv6/ip6_input.c +++ b/trunk/net/ipv6/ip6_input.c @@ -125,7 +125,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt } if (hdr->nexthdr == NEXTHDR_HOP) { - if (ipv6_parse_hopopts(skb) < 0) { + if (ipv6_parse_hopopts(&skb) < 0) { IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); rcu_read_unlock(); return 0; @@ -149,7 +149,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt */ -static int ip6_input_finish(struct sk_buff *skb) +static inline int ip6_input_finish(struct sk_buff *skb) { struct inet6_protocol *ipprot; struct sock *raw_sk; @@ -199,7 +199,7 @@ static int ip6_input_finish(struct sk_buff *skb) !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; - ret = ipprot->handler(skb); + ret = ipprot->handler(&skb); if (ret > 0) goto resubmit; else if (ret == 0) diff --git a/trunk/net/ipv6/ip6_output.c b/trunk/net/ipv6/ip6_output.c index 13565dfb1b45..011082ed921a 100644 --- a/trunk/net/ipv6/ip6_output.c +++ b/trunk/net/ipv6/ip6_output.c @@ -70,7 +70,7 @@ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *f spin_unlock_bh(&ip6_id_lock); } -static int ip6_output_finish(struct sk_buff *skb) +static inline int ip6_output_finish(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; diff --git a/trunk/net/ipv6/netfilter.c b/trunk/net/ipv6/netfilter.c index b1326c2bf8aa..38b149613915 100644 --- a/trunk/net/ipv6/netfilter.c +++ b/trunk/net/ipv6/netfilter.c @@ -68,15 +68,15 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, struct nf_info *info) } } -static int nf_ip6_reroute(struct sk_buff *skb, const struct nf_info *info) +static int nf_ip6_reroute(struct sk_buff **pskb, const struct nf_info *info) { struct ip6_rt_info *rt_info = nf_info_reroute(info); if (info->hook == NF_IP6_LOCAL_OUT) { - struct ipv6hdr *iph = ipv6_hdr(skb); + struct ipv6hdr *iph = ipv6_hdr(*pskb); if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr)) - return ip6_route_me_harder(skb); + return ip6_route_me_harder(*pskb); } return 0; } diff --git a/trunk/net/ipv6/netfilter/ip6_queue.c b/trunk/net/ipv6/netfilter/ip6_queue.c index 6413a30d9f68..0473145ac534 100644 --- a/trunk/net/ipv6/netfilter/ip6_queue.c +++ b/trunk/net/ipv6/netfilter/ip6_queue.c @@ -332,7 +332,6 @@ static int ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) { int diff; - int err; struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload; if (v->data_len < sizeof(*user_iph)) @@ -345,18 +344,25 @@ ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (v->data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - err = pskb_expand_head(e->skb, 0, - diff - skb_tailroom(e->skb), - GFP_ATOMIC); - if (err) { + struct sk_buff *newskb; + + newskb = skb_copy_expand(e->skb, + skb_headroom(e->skb), + diff, + GFP_ATOMIC); + if (newskb == NULL) { printk(KERN_WARNING "ip6_queue: OOM " "in mangle, dropping packet\n"); - return err; + return -ENOMEM; } + if (e->skb->sk) + skb_set_owner_w(newskb, e->skb->sk); + kfree_skb(e->skb); + e->skb = newskb; } skb_put(e->skb, diff); } - if (!skb_make_writable(e->skb, v->data_len)) + if (!skb_make_writable(&e->skb, v->data_len)) return -ENOMEM; skb_copy_to_linear_data(e->skb, v->payload, v->data_len); e->skb->ip_summed = CHECKSUM_NONE; diff --git a/trunk/net/ipv6/netfilter/ip6_tables.c b/trunk/net/ipv6/netfilter/ip6_tables.c index acaba1537931..cd9df02bb85c 100644 --- a/trunk/net/ipv6/netfilter/ip6_tables.c +++ b/trunk/net/ipv6/netfilter/ip6_tables.c @@ -205,7 +205,7 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6) } static unsigned int -ip6t_error(struct sk_buff *skb, +ip6t_error(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -350,7 +350,7 @@ static void trace_packet(struct sk_buff *skb, /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ip6t_do_table(struct sk_buff *skb, +ip6t_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, @@ -389,17 +389,17 @@ ip6t_do_table(struct sk_buff *skb, do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); - if (ip6_packet_match(skb, indev, outdev, &e->ipv6, + if (ip6_packet_match(*pskb, indev, outdev, &e->ipv6, &protoff, &offset, &hotdrop)) { struct ip6t_entry_target *t; if (IP6T_MATCH_ITERATE(e, do_match, - skb, in, out, + *pskb, in, out, offset, protoff, &hotdrop) != 0) goto no_match; ADD_COUNTER(e->counters, - ntohs(ipv6_hdr(skb)->payload_len) + ntohs(ipv6_hdr(*pskb)->payload_len) + IPV6_HDR_LEN, 1); @@ -409,8 +409,8 @@ ip6t_do_table(struct sk_buff *skb, #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) /* The packet is traced: log it */ - if (unlikely(skb->nf_trace)) - trace_packet(skb, hook, in, out, + if (unlikely((*pskb)->nf_trace)) + trace_packet(*pskb, hook, in, out, table->name, private, e); #endif /* Standard target? */ @@ -448,7 +448,7 @@ ip6t_do_table(struct sk_buff *skb, ((struct ip6t_entry *)table_base)->comefrom = 0xeeeeeeec; #endif - verdict = t->u.kernel.target->target(skb, + verdict = t->u.kernel.target->target(pskb, in, out, hook, t->u.kernel.target, diff --git a/trunk/net/ipv6/netfilter/ip6t_HL.c b/trunk/net/ipv6/netfilter/ip6t_HL.c index 9afc836fd454..ad4d94310b87 100644 --- a/trunk/net/ipv6/netfilter/ip6t_HL.c +++ b/trunk/net/ipv6/netfilter/ip6t_HL.c @@ -18,7 +18,7 @@ MODULE_AUTHOR("Maciej Soltysiak "); MODULE_DESCRIPTION("IP6 tables Hop Limit modification module"); MODULE_LICENSE("GPL"); -static unsigned int ip6t_hl_target(struct sk_buff *skb, +static unsigned int ip6t_hl_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -29,10 +29,10 @@ static unsigned int ip6t_hl_target(struct sk_buff *skb, const struct ip6t_HL_info *info = targinfo; int new_hl; - if (!skb_make_writable(skb, skb->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; - ip6h = ipv6_hdr(skb); + ip6h = ipv6_hdr(*pskb); switch (info->mode) { case IP6T_HL_SET: diff --git a/trunk/net/ipv6/netfilter/ip6t_LOG.c b/trunk/net/ipv6/netfilter/ip6t_LOG.c index 7a48c342df46..6ab99001dccc 100644 --- a/trunk/net/ipv6/netfilter/ip6t_LOG.c +++ b/trunk/net/ipv6/netfilter/ip6t_LOG.c @@ -431,7 +431,7 @@ ip6t_log_packet(unsigned int pf, } static unsigned int -ip6t_log_target(struct sk_buff *skb, +ip6t_log_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -445,7 +445,8 @@ ip6t_log_target(struct sk_buff *skb, li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - ip6t_log_packet(PF_INET6, hooknum, skb, in, out, &li, loginfo->prefix); + ip6t_log_packet(PF_INET6, hooknum, *pskb, in, out, &li, + loginfo->prefix); return XT_CONTINUE; } diff --git a/trunk/net/ipv6/netfilter/ip6t_REJECT.c b/trunk/net/ipv6/netfilter/ip6t_REJECT.c index 1a7d2917545d..3fd08d5567a6 100644 --- a/trunk/net/ipv6/netfilter/ip6t_REJECT.c +++ b/trunk/net/ipv6/netfilter/ip6t_REJECT.c @@ -172,7 +172,7 @@ send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); } -static unsigned int reject6_target(struct sk_buff *skb, +static unsigned int reject6_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -187,25 +187,25 @@ static unsigned int reject6_target(struct sk_buff *skb, must return an absolute verdict. --RR */ switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: - send_unreach(skb, ICMPV6_NOROUTE, hooknum); + send_unreach(*pskb, ICMPV6_NOROUTE, hooknum); break; case IP6T_ICMP6_ADM_PROHIBITED: - send_unreach(skb, ICMPV6_ADM_PROHIBITED, hooknum); + send_unreach(*pskb, ICMPV6_ADM_PROHIBITED, hooknum); break; case IP6T_ICMP6_NOT_NEIGHBOUR: - send_unreach(skb, ICMPV6_NOT_NEIGHBOUR, hooknum); + send_unreach(*pskb, ICMPV6_NOT_NEIGHBOUR, hooknum); break; case IP6T_ICMP6_ADDR_UNREACH: - send_unreach(skb, ICMPV6_ADDR_UNREACH, hooknum); + send_unreach(*pskb, ICMPV6_ADDR_UNREACH, hooknum); break; case IP6T_ICMP6_PORT_UNREACH: - send_unreach(skb, ICMPV6_PORT_UNREACH, hooknum); + send_unreach(*pskb, ICMPV6_PORT_UNREACH, hooknum); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: - send_reset(skb); + send_reset(*pskb); break; default: if (net_ratelimit()) diff --git a/trunk/net/ipv6/netfilter/ip6table_filter.c b/trunk/net/ipv6/netfilter/ip6table_filter.c index 1d26b202bf30..7e32e2aaf7f7 100644 --- a/trunk/net/ipv6/netfilter/ip6table_filter.c +++ b/trunk/net/ipv6/netfilter/ip6table_filter.c @@ -60,32 +60,32 @@ static struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int ip6t_hook(unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(skb, hook, in, out, &packet_filter); + return ip6t_do_table(pskb, hook, in, out, &packet_filter); } static unsigned int ip6t_local_out_hook(unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { #if 0 /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) - || ip_hdrlen(skb) < sizeof(struct iphdr)) { + if ((*pskb)->len < sizeof(struct iphdr) + || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("ip6t_hook: happy cracking.\n"); return NF_ACCEPT; } #endif - return ip6t_do_table(skb, hook, in, out, &packet_filter); + return ip6t_do_table(pskb, hook, in, out, &packet_filter); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/trunk/net/ipv6/netfilter/ip6table_mangle.c b/trunk/net/ipv6/netfilter/ip6table_mangle.c index a0b6381f1e8c..f0a9efa67fb5 100644 --- a/trunk/net/ipv6/netfilter/ip6table_mangle.c +++ b/trunk/net/ipv6/netfilter/ip6table_mangle.c @@ -68,17 +68,17 @@ static struct xt_table packet_mangler = { /* The work comes in here from netfilter.c. */ static unsigned int ip6t_route_hook(unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(skb, hook, in, out, &packet_mangler); + return ip6t_do_table(pskb, hook, in, out, &packet_mangler); } static unsigned int ip6t_local_hook(unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -91,8 +91,8 @@ ip6t_local_hook(unsigned int hook, #if 0 /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct iphdr) - || ip_hdrlen(skb) < sizeof(struct iphdr)) { + if ((*pskb)->len < sizeof(struct iphdr) + || ip_hdrlen(*pskb) < sizeof(struct iphdr)) { if (net_ratelimit()) printk("ip6t_hook: happy cracking.\n"); return NF_ACCEPT; @@ -100,22 +100,22 @@ ip6t_local_hook(unsigned int hook, #endif /* save source/dest address, mark, hoplimit, flowlabel, priority, */ - memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); - memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); - mark = skb->mark; - hop_limit = ipv6_hdr(skb)->hop_limit; + memcpy(&saddr, &ipv6_hdr(*pskb)->saddr, sizeof(saddr)); + memcpy(&daddr, &ipv6_hdr(*pskb)->daddr, sizeof(daddr)); + mark = (*pskb)->mark; + hop_limit = ipv6_hdr(*pskb)->hop_limit; /* flowlabel and prio (includes version, which shouldn't change either */ - flowlabel = *((u_int32_t *)ipv6_hdr(skb)); + flowlabel = *((u_int32_t *)ipv6_hdr(*pskb)); - ret = ip6t_do_table(skb, hook, in, out, &packet_mangler); + ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler); if (ret != NF_DROP && ret != NF_STOLEN - && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) - || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) - || skb->mark != mark - || ipv6_hdr(skb)->hop_limit != hop_limit)) - return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; + && (memcmp(&ipv6_hdr(*pskb)->saddr, &saddr, sizeof(saddr)) + || memcmp(&ipv6_hdr(*pskb)->daddr, &daddr, sizeof(daddr)) + || (*pskb)->mark != mark + || ipv6_hdr(*pskb)->hop_limit != hop_limit)) + return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP; return ret; } diff --git a/trunk/net/ipv6/netfilter/ip6table_raw.c b/trunk/net/ipv6/netfilter/ip6table_raw.c index 8f7109f991e6..ec290e4ebdd8 100644 --- a/trunk/net/ipv6/netfilter/ip6table_raw.c +++ b/trunk/net/ipv6/netfilter/ip6table_raw.c @@ -46,12 +46,12 @@ static struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int ip6t_hook(unsigned int hook, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(skb, hook, in, out, &packet_raw); + return ip6t_do_table(pskb, hook, in, out, &packet_raw); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/trunk/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/trunk/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 0e40948f4fc6..37a3db926953 100644 --- a/trunk/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/trunk/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -146,7 +145,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, } static unsigned int ipv6_confirm(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -156,12 +155,12 @@ static unsigned int ipv6_confirm(unsigned int hooknum, struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; unsigned int ret, protoff; - unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; - unsigned char pnum = ipv6_hdr(skb)->nexthdr; + unsigned int extoff = (u8 *)(ipv6_hdr(*pskb) + 1) - (*pskb)->data; + unsigned char pnum = ipv6_hdr(*pskb)->nexthdr; /* This is where we call the helper: as the packet goes out. */ - ct = nf_ct_get(skb, &ctinfo); + ct = nf_ct_get(*pskb, &ctinfo); if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY) goto out; @@ -173,23 +172,23 @@ static unsigned int ipv6_confirm(unsigned int hooknum, if (!helper) goto out; - protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, - skb->len - extoff); - if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) { + protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, + (*pskb)->len - extoff); + if (protoff > (*pskb)->len || pnum == NEXTHDR_FRAGMENT) { pr_debug("proto header not found\n"); return NF_ACCEPT; } - ret = helper->help(skb, protoff, ct, ctinfo); + ret = helper->help(pskb, protoff, ct, ctinfo); if (ret != NF_ACCEPT) return ret; out: /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(skb); + return nf_conntrack_confirm(pskb); } static unsigned int ipv6_defrag(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) @@ -197,17 +196,17 @@ static unsigned int ipv6_defrag(unsigned int hooknum, struct sk_buff *reasm; /* Previously seen (loopback)? */ - if (skb->nfct) + if ((*pskb)->nfct) return NF_ACCEPT; - reasm = nf_ct_frag6_gather(skb); + reasm = nf_ct_frag6_gather(*pskb); /* queued */ if (reasm == NULL) return NF_STOLEN; /* error occured or not fragmented */ - if (reasm == skb) + if (reasm == *pskb) return NF_ACCEPT; nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, @@ -217,12 +216,12 @@ static unsigned int ipv6_defrag(unsigned int hooknum, } static unsigned int ipv6_conntrack_in(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct sk_buff *reasm = skb->nfct_reasm; + struct sk_buff *reasm = (*pskb)->nfct_reasm; /* This packet is fragmented and has reassembled packet. */ if (reasm) { @@ -230,32 +229,32 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, if (!reasm->nfct) { unsigned int ret; - ret = nf_conntrack_in(PF_INET6, hooknum, reasm); + ret = nf_conntrack_in(PF_INET6, hooknum, &reasm); if (ret != NF_ACCEPT) return ret; } nf_conntrack_get(reasm->nfct); - skb->nfct = reasm->nfct; - skb->nfctinfo = reasm->nfctinfo; + (*pskb)->nfct = reasm->nfct; + (*pskb)->nfctinfo = reasm->nfctinfo; return NF_ACCEPT; } - return nf_conntrack_in(PF_INET6, hooknum, skb); + return nf_conntrack_in(PF_INET6, hooknum, pskb); } static unsigned int ipv6_conntrack_local(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if (skb->len < sizeof(struct ipv6hdr)) { + if ((*pskb)->len < sizeof(struct ipv6hdr)) { if (net_ratelimit()) printk("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return ipv6_conntrack_in(hooknum, skb, in, out, okfn); + return ipv6_conntrack_in(hooknum, pskb, in, out, okfn); } static struct nf_hook_ops ipv6_conntrack_ops[] = { @@ -308,7 +307,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_FRAG6_TIMEOUT, .procname = "nf_conntrack_frag6_timeout", - .data = &nf_frags_ctl.timeout, + .data = &nf_ct_frag6_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -316,7 +315,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, .procname = "nf_conntrack_frag6_low_thresh", - .data = &nf_frags_ctl.low_thresh, + .data = &nf_ct_frag6_low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -324,7 +323,7 @@ static ctl_table nf_ct_ipv6_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, .procname = "nf_conntrack_frag6_high_thresh", - .data = &nf_frags_ctl.high_thresh, + .data = &nf_ct_frag6_high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, diff --git a/trunk/net/ipv6/netfilter/nf_conntrack_reasm.c b/trunk/net/ipv6/netfilter/nf_conntrack_reasm.c index 726fafd41961..25442a8c1ba8 100644 --- a/trunk/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/trunk/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -31,7 +31,6 @@ #include #include -#include #include #include @@ -49,6 +48,10 @@ #define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT +unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024; +unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024; +unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT; + struct nf_ct_frag6_skb_cb { struct inet6_skb_parm h; @@ -60,24 +63,51 @@ struct nf_ct_frag6_skb_cb struct nf_ct_frag6_queue { - struct inet_frag_queue q; + struct hlist_node list; + struct list_head lru_list; /* lru list member */ __be32 id; /* fragment id */ struct in6_addr saddr; struct in6_addr daddr; + spinlock_t lock; + atomic_t refcnt; + struct timer_list timer; /* expire timer */ + struct sk_buff *fragments; + int len; + int meat; + ktime_t stamp; unsigned int csum; + __u8 last_in; /* has first/last segment arrived? */ +#define COMPLETE 4 +#define FIRST_IN 2 +#define LAST_IN 1 __u16 nhoffset; }; -struct inet_frags_ctl nf_frags_ctl __read_mostly = { - .high_thresh = 256 * 1024, - .low_thresh = 192 * 1024, - .timeout = IPV6_FRAG_TIMEOUT, - .secret_interval = 10 * 60 * HZ, -}; +/* Hash table. */ + +#define FRAG6Q_HASHSZ 64 + +static struct hlist_head nf_ct_frag6_hash[FRAG6Q_HASHSZ]; +static DEFINE_RWLOCK(nf_ct_frag6_lock); +static u32 nf_ct_frag6_hash_rnd; +static LIST_HEAD(nf_ct_frag6_lru_list); +int nf_ct_frag6_nqueues = 0; + +static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq) +{ + hlist_del(&fq->list); + list_del(&fq->lru_list); + nf_ct_frag6_nqueues--; +} -static struct inet_frags nf_frags; +static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq) +{ + write_lock(&nf_ct_frag6_lock); + __fq_unlink(fq); + write_unlock(&nf_ct_frag6_lock); +} static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, struct in6_addr *daddr) @@ -90,7 +120,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; - c += nf_frags.rnd; + c += nf_ct_frag6_hash_rnd; __jhash_mix(a, b, c); a += (__force u32)saddr->s6_addr32[3]; @@ -103,54 +133,100 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, c += (__force u32)id; __jhash_mix(a, b, c); - return c & (INETFRAGS_HASHSZ - 1); + return c & (FRAG6Q_HASHSZ - 1); } -static unsigned int nf_hashfn(struct inet_frag_queue *q) +static struct timer_list nf_ct_frag6_secret_timer; +int nf_ct_frag6_secret_interval = 10 * 60 * HZ; + +static void nf_ct_frag6_secret_rebuild(unsigned long dummy) { - struct nf_ct_frag6_queue *nq; + unsigned long now = jiffies; + int i; + + write_lock(&nf_ct_frag6_lock); + get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32)); + for (i = 0; i < FRAG6Q_HASHSZ; i++) { + struct nf_ct_frag6_queue *q; + struct hlist_node *p, *n; + + hlist_for_each_entry_safe(q, p, n, &nf_ct_frag6_hash[i], list) { + unsigned int hval = ip6qhashfn(q->id, + &q->saddr, + &q->daddr); + if (hval != i) { + hlist_del(&q->list); + /* Relink to new hash chain. */ + hlist_add_head(&q->list, + &nf_ct_frag6_hash[hval]); + } + } + } + write_unlock(&nf_ct_frag6_lock); - nq = container_of(q, struct nf_ct_frag6_queue, q); - return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr); + mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval); } -static void nf_skb_free(struct sk_buff *skb) -{ - if (NFCT_FRAG6_CB(skb)->orig) - kfree_skb(NFCT_FRAG6_CB(skb)->orig); -} +atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0); /* Memory Tracking Functions. */ static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &nf_frags.mem); - nf_skb_free(skb); + atomic_sub(skb->truesize, &nf_ct_frag6_mem); + if (NFCT_FRAG6_CB(skb)->orig) + kfree_skb(NFCT_FRAG6_CB(skb)->orig); + kfree_skb(skb); } -static void nf_frag_free(struct inet_frag_queue *q) +static inline void frag_free_queue(struct nf_ct_frag6_queue *fq, + unsigned int *work) { - kfree(container_of(q, struct nf_ct_frag6_queue, q)); + if (work) + *work -= sizeof(struct nf_ct_frag6_queue); + atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); + kfree(fq); } static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) { - struct nf_ct_frag6_queue *fq; + struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC); - fq = kzalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC); - if (fq == NULL) + if (!fq) return NULL; - atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_frags.mem); + atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); return fq; } /* Destruction primitives. */ -static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) +/* Complete destruction of fq. */ +static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq, + unsigned int *work) { - inet_frag_put(&fq->q, &nf_frags); + struct sk_buff *fp; + + BUG_TRAP(fq->last_in&COMPLETE); + BUG_TRAP(del_timer(&fq->timer) == 0); + + /* Release all fragment data. */ + fp = fq->fragments; + while (fp) { + struct sk_buff *xp = fp->next; + + frag_kfree_skb(fp, work); + fp = xp; + } + + frag_free_queue(fq, work); +} + +static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work) +{ + if (atomic_dec_and_test(&fq->refcnt)) + nf_ct_frag6_destroy(fq, work); } /* Kill fq entry. It is not destroyed immediately, @@ -158,28 +234,62 @@ static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) */ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) { - inet_frag_kill(&fq->q, &nf_frags); + if (del_timer(&fq->timer)) + atomic_dec(&fq->refcnt); + + if (!(fq->last_in & COMPLETE)) { + fq_unlink(fq); + atomic_dec(&fq->refcnt); + fq->last_in |= COMPLETE; + } } static void nf_ct_frag6_evictor(void) { - inet_frag_evictor(&nf_frags); + struct nf_ct_frag6_queue *fq; + struct list_head *tmp; + unsigned int work; + + work = atomic_read(&nf_ct_frag6_mem); + if (work <= nf_ct_frag6_low_thresh) + return; + + work -= nf_ct_frag6_low_thresh; + while (work > 0) { + read_lock(&nf_ct_frag6_lock); + if (list_empty(&nf_ct_frag6_lru_list)) { + read_unlock(&nf_ct_frag6_lock); + return; + } + tmp = nf_ct_frag6_lru_list.next; + BUG_ON(tmp == NULL); + fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list); + atomic_inc(&fq->refcnt); + read_unlock(&nf_ct_frag6_lock); + + spin_lock(&fq->lock); + if (!(fq->last_in&COMPLETE)) + fq_kill(fq); + spin_unlock(&fq->lock); + + fq_put(fq, &work); + } } static void nf_ct_frag6_expire(unsigned long data) { struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data; - spin_lock(&fq->q.lock); + spin_lock(&fq->lock); - if (fq->q.last_in & COMPLETE) + if (fq->last_in & COMPLETE) goto out; fq_kill(fq); out: - spin_unlock(&fq->q.lock); - fq_put(fq); + spin_unlock(&fq->lock); + fq_put(fq, NULL); } /* Creation primitives. */ @@ -192,31 +302,31 @@ static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, struct hlist_node *n; #endif - write_lock(&nf_frags.lock); + write_lock(&nf_ct_frag6_lock); #ifdef CONFIG_SMP - hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) { + hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) { if (fq->id == fq_in->id && ipv6_addr_equal(&fq_in->saddr, &fq->saddr) && ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) { - atomic_inc(&fq->q.refcnt); - write_unlock(&nf_frags.lock); - fq_in->q.last_in |= COMPLETE; - fq_put(fq_in); + atomic_inc(&fq->refcnt); + write_unlock(&nf_ct_frag6_lock); + fq_in->last_in |= COMPLETE; + fq_put(fq_in, NULL); return fq; } } #endif fq = fq_in; - if (!mod_timer(&fq->q.timer, jiffies + nf_frags_ctl.timeout)) - atomic_inc(&fq->q.refcnt); + if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout)) + atomic_inc(&fq->refcnt); - atomic_inc(&fq->q.refcnt); - hlist_add_head(&fq->q.list, &nf_frags.hash[hash]); - INIT_LIST_HEAD(&fq->q.lru_list); - list_add_tail(&fq->q.lru_list, &nf_frags.lru_list); - nf_frags.nqueues++; - write_unlock(&nf_frags.lock); + atomic_inc(&fq->refcnt); + hlist_add_head(&fq->list, &nf_ct_frag6_hash[hash]); + INIT_LIST_HEAD(&fq->lru_list); + list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list); + nf_ct_frag6_nqueues++; + write_unlock(&nf_ct_frag6_lock); return fq; } @@ -231,13 +341,15 @@ nf_ct_frag6_create(unsigned int hash, __be32 id, struct in6_addr *src, str goto oom; } + memset(fq, 0, sizeof(struct nf_ct_frag6_queue)); + fq->id = id; ipv6_addr_copy(&fq->saddr, src); ipv6_addr_copy(&fq->daddr, dst); - setup_timer(&fq->q.timer, nf_ct_frag6_expire, (unsigned long)fq); - spin_lock_init(&fq->q.lock); - atomic_set(&fq->q.refcnt, 1); + setup_timer(&fq->timer, nf_ct_frag6_expire, (unsigned long)fq); + spin_lock_init(&fq->lock); + atomic_set(&fq->refcnt, 1); return nf_ct_frag6_intern(hash, fq); @@ -252,17 +364,17 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) struct hlist_node *n; unsigned int hash = ip6qhashfn(id, src, dst); - read_lock(&nf_frags.lock); - hlist_for_each_entry(fq, n, &nf_frags.hash[hash], q.list) { + read_lock(&nf_ct_frag6_lock); + hlist_for_each_entry(fq, n, &nf_ct_frag6_hash[hash], list) { if (fq->id == id && ipv6_addr_equal(src, &fq->saddr) && ipv6_addr_equal(dst, &fq->daddr)) { - atomic_inc(&fq->q.refcnt); - read_unlock(&nf_frags.lock); + atomic_inc(&fq->refcnt); + read_unlock(&nf_ct_frag6_lock); return fq; } } - read_unlock(&nf_frags.lock); + read_unlock(&nf_ct_frag6_lock); return nf_ct_frag6_create(hash, id, src, dst); } @@ -274,7 +386,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, struct sk_buff *prev, *next; int offset, end; - if (fq->q.last_in & COMPLETE) { + if (fq->last_in & COMPLETE) { pr_debug("Allready completed\n"); goto err; } @@ -300,13 +412,13 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* If we already have some bits beyond end * or have different end, the segment is corrupted. */ - if (end < fq->q.len || - ((fq->q.last_in & LAST_IN) && end != fq->q.len)) { + if (end < fq->len || + ((fq->last_in & LAST_IN) && end != fq->len)) { pr_debug("already received last fragment\n"); goto err; } - fq->q.last_in |= LAST_IN; - fq->q.len = end; + fq->last_in |= LAST_IN; + fq->len = end; } else { /* Check if the fragment is rounded to 8 bytes. * Required by the RFC. @@ -318,13 +430,13 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, pr_debug("end of fragment not rounded to 8 bytes.\n"); return -1; } - if (end > fq->q.len) { + if (end > fq->len) { /* Some bits beyond end -> corruption. */ - if (fq->q.last_in & LAST_IN) { + if (fq->last_in & LAST_IN) { pr_debug("last packet already reached.\n"); goto err; } - fq->q.len = end; + fq->len = end; } } @@ -346,7 +458,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, * this fragment, right? */ prev = NULL; - for (next = fq->q.fragments; next != NULL; next = next->next) { + for (next = fq->fragments; next != NULL; next = next->next) { if (NFCT_FRAG6_CB(next)->offset >= offset) break; /* bingo! */ prev = next; @@ -391,7 +503,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, /* next fragment */ NFCT_FRAG6_CB(next)->offset += i; - fq->q.meat -= i; + fq->meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) next->ip_summed = CHECKSUM_NONE; break; @@ -406,9 +518,9 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, if (prev) prev->next = next; else - fq->q.fragments = next; + fq->fragments = next; - fq->q.meat -= free_it->len; + fq->meat -= free_it->len; frag_kfree_skb(free_it, NULL); } } @@ -420,23 +532,23 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, if (prev) prev->next = skb; else - fq->q.fragments = skb; + fq->fragments = skb; skb->dev = NULL; - fq->q.stamp = skb->tstamp; - fq->q.meat += skb->len; - atomic_add(skb->truesize, &nf_frags.mem); + fq->stamp = skb->tstamp; + fq->meat += skb->len; + atomic_add(skb->truesize, &nf_ct_frag6_mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ if (offset == 0) { fq->nhoffset = nhoff; - fq->q.last_in |= FIRST_IN; + fq->last_in |= FIRST_IN; } - write_lock(&nf_frags.lock); - list_move_tail(&fq->q.lru_list, &nf_frags.lru_list); - write_unlock(&nf_frags.lock); + write_lock(&nf_ct_frag6_lock); + list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list); + write_unlock(&nf_ct_frag6_lock); return 0; err: @@ -455,7 +567,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, static struct sk_buff * nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) { - struct sk_buff *fp, *op, *head = fq->q.fragments; + struct sk_buff *fp, *op, *head = fq->fragments; int payload_len; fq_kill(fq); @@ -465,7 +577,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - - sizeof(struct ipv6hdr) + fq->q.len - + sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) { pr_debug("payload len is too large.\n"); @@ -502,7 +614,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - atomic_add(clone->truesize, &nf_frags.mem); + atomic_add(clone->truesize, &nf_ct_frag6_mem); } /* We have to remove fragment header from datagram and to relocate @@ -516,7 +628,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &nf_frags.mem); + atomic_sub(head->truesize, &nf_ct_frag6_mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -526,12 +638,12 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &nf_frags.mem); + atomic_sub(fp->truesize, &nf_ct_frag6_mem); } head->next = NULL; head->dev = dev; - head->tstamp = fq->q.stamp; + head->tstamp = fq->stamp; ipv6_hdr(head)->payload_len = htons(payload_len); /* Yes, and fold redundant checksum back. 8) */ @@ -540,7 +652,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) skb_network_header_len(head), head->csum); - fq->q.fragments = NULL; + fq->fragments = NULL; /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ fp = skb_shinfo(head)->frag_list; @@ -676,7 +788,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - if (atomic_read(&nf_frags.mem) > nf_frags_ctl.high_thresh) + if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh) nf_ct_frag6_evictor(); fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); @@ -685,23 +797,23 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - spin_lock(&fq->q.lock); + spin_lock(&fq->lock); if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { - spin_unlock(&fq->q.lock); + spin_unlock(&fq->lock); pr_debug("Can't insert skb to queue\n"); - fq_put(fq); + fq_put(fq, NULL); goto ret_orig; } - if (fq->q.last_in == (FIRST_IN|LAST_IN) && fq->q.meat == fq->q.len) { + if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) { ret_skb = nf_ct_frag6_reasm(fq, dev); if (ret_skb == NULL) pr_debug("Can't reassemble fragmented packets\n"); } - spin_unlock(&fq->q.lock); + spin_unlock(&fq->lock); - fq_put(fq); + fq_put(fq, NULL); return ret_skb; ret_orig: @@ -747,20 +859,20 @@ int nf_ct_frag6_kfree_frags(struct sk_buff *skb) int nf_ct_frag6_init(void) { - nf_frags.ctl = &nf_frags_ctl; - nf_frags.hashfn = nf_hashfn; - nf_frags.destructor = nf_frag_free; - nf_frags.skb_free = nf_skb_free; - nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); - inet_frags_init(&nf_frags); + nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ + (jiffies ^ (jiffies >> 6))); + + setup_timer(&nf_ct_frag6_secret_timer, nf_ct_frag6_secret_rebuild, 0); + nf_ct_frag6_secret_timer.expires = jiffies + + nf_ct_frag6_secret_interval; + add_timer(&nf_ct_frag6_secret_timer); return 0; } void nf_ct_frag6_cleanup(void) { - inet_frags_fini(&nf_frags); - - nf_frags_ctl.low_thresh = 0; + del_timer(&nf_ct_frag6_secret_timer); + nf_ct_frag6_low_thresh = 0; nf_ct_frag6_evictor(); } diff --git a/trunk/net/ipv6/proc.c b/trunk/net/ipv6/proc.c index be526ad92543..db945018579e 100644 --- a/trunk/net/ipv6/proc.c +++ b/trunk/net/ipv6/proc.c @@ -54,7 +54,7 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "RAW6: inuse %d\n", fold_prot_inuse(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", - ip6_frag_nqueues(), ip6_frag_mem()); + ip6_frag_nqueues, atomic_read(&ip6_frag_mem)); return 0; } diff --git a/trunk/net/ipv6/reassembly.c b/trunk/net/ipv6/reassembly.c index 6ad19cfc2025..31601c993541 100644 --- a/trunk/net/ipv6/reassembly.c +++ b/trunk/net/ipv6/reassembly.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include @@ -54,7 +53,11 @@ #include #include #include -#include + +int sysctl_ip6frag_high_thresh __read_mostly = 256*1024; +int sysctl_ip6frag_low_thresh __read_mostly = 192*1024; + +int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT; struct ip6frag_skb_cb { @@ -71,39 +74,53 @@ struct ip6frag_skb_cb struct frag_queue { - struct inet_frag_queue q; + struct hlist_node list; + struct list_head lru_list; /* lru list member */ __be32 id; /* fragment id */ struct in6_addr saddr; struct in6_addr daddr; + spinlock_t lock; + atomic_t refcnt; + struct timer_list timer; /* expire timer */ + struct sk_buff *fragments; + int len; + int meat; int iif; + ktime_t stamp; unsigned int csum; + __u8 last_in; /* has first/last segment arrived? */ +#define COMPLETE 4 +#define FIRST_IN 2 +#define LAST_IN 1 __u16 nhoffset; }; -struct inet_frags_ctl ip6_frags_ctl __read_mostly = { - .high_thresh = 256 * 1024, - .low_thresh = 192 * 1024, - .timeout = IPV6_FRAG_TIMEOUT, - .secret_interval = 10 * 60 * HZ, -}; +/* Hash table. */ -static struct inet_frags ip6_frags; +#define IP6Q_HASHSZ 64 -int ip6_frag_nqueues(void) +static struct hlist_head ip6_frag_hash[IP6Q_HASHSZ]; +static DEFINE_RWLOCK(ip6_frag_lock); +static u32 ip6_frag_hash_rnd; +static LIST_HEAD(ip6_frag_lru_list); +int ip6_frag_nqueues = 0; + +static __inline__ void __fq_unlink(struct frag_queue *fq) { - return ip6_frags.nqueues; + hlist_del(&fq->list); + list_del(&fq->lru_list); + ip6_frag_nqueues--; } -int ip6_frag_mem(void) +static __inline__ void fq_unlink(struct frag_queue *fq) { - return atomic_read(&ip6_frags.mem); + write_lock(&ip6_frag_lock); + __fq_unlink(fq); + write_unlock(&ip6_frag_lock); } -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, - struct net_device *dev); - /* * callers should be careful not to use the hash value outside the ipfrag_lock * as doing so could race with ipfrag_hash_rnd being recalculated. @@ -119,7 +136,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; - c += ip6_frags.rnd; + c += ip6_frag_hash_rnd; __jhash_mix(a, b, c); a += (__force u32)saddr->s6_addr32[3]; @@ -132,29 +149,60 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, c += (__force u32)id; __jhash_mix(a, b, c); - return c & (INETFRAGS_HASHSZ - 1); + return c & (IP6Q_HASHSZ - 1); } -static unsigned int ip6_hashfn(struct inet_frag_queue *q) +static struct timer_list ip6_frag_secret_timer; +int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ; + +static void ip6_frag_secret_rebuild(unsigned long dummy) { - struct frag_queue *fq; + unsigned long now = jiffies; + int i; + + write_lock(&ip6_frag_lock); + get_random_bytes(&ip6_frag_hash_rnd, sizeof(u32)); + for (i = 0; i < IP6Q_HASHSZ; i++) { + struct frag_queue *q; + struct hlist_node *p, *n; + + hlist_for_each_entry_safe(q, p, n, &ip6_frag_hash[i], list) { + unsigned int hval = ip6qhashfn(q->id, + &q->saddr, + &q->daddr); + + if (hval != i) { + hlist_del(&q->list); + + /* Relink to new hash chain. */ + hlist_add_head(&q->list, + &ip6_frag_hash[hval]); + + } + } + } + write_unlock(&ip6_frag_lock); - fq = container_of(q, struct frag_queue, q); - return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr); + mod_timer(&ip6_frag_secret_timer, now + sysctl_ip6frag_secret_interval); } +atomic_t ip6_frag_mem = ATOMIC_INIT(0); + /* Memory Tracking Functions. */ static inline void frag_kfree_skb(struct sk_buff *skb, int *work) { if (work) *work -= skb->truesize; - atomic_sub(skb->truesize, &ip6_frags.mem); + atomic_sub(skb->truesize, &ip6_frag_mem); kfree_skb(skb); } -static void ip6_frag_free(struct inet_frag_queue *fq) +static inline void frag_free_queue(struct frag_queue *fq, int *work) { - kfree(container_of(fq, struct frag_queue, q)); + if (work) + *work -= sizeof(struct frag_queue); + atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem); + kfree(fq); } static inline struct frag_queue *frag_alloc_queue(void) @@ -163,15 +211,36 @@ static inline struct frag_queue *frag_alloc_queue(void) if(!fq) return NULL; - atomic_add(sizeof(struct frag_queue), &ip6_frags.mem); + atomic_add(sizeof(struct frag_queue), &ip6_frag_mem); return fq; } /* Destruction primitives. */ -static __inline__ void fq_put(struct frag_queue *fq) +/* Complete destruction of fq. */ +static void ip6_frag_destroy(struct frag_queue *fq, int *work) +{ + struct sk_buff *fp; + + BUG_TRAP(fq->last_in&COMPLETE); + BUG_TRAP(del_timer(&fq->timer) == 0); + + /* Release all fragment data. */ + fp = fq->fragments; + while (fp) { + struct sk_buff *xp = fp->next; + + frag_kfree_skb(fp, work); + fp = xp; + } + + frag_free_queue(fq, work); +} + +static __inline__ void fq_put(struct frag_queue *fq, int *work) { - inet_frag_put(&fq->q, &ip6_frags); + if (atomic_dec_and_test(&fq->refcnt)) + ip6_frag_destroy(fq, work); } /* Kill fq entry. It is not destroyed immediately, @@ -179,16 +248,45 @@ static __inline__ void fq_put(struct frag_queue *fq) */ static __inline__ void fq_kill(struct frag_queue *fq) { - inet_frag_kill(&fq->q, &ip6_frags); + if (del_timer(&fq->timer)) + atomic_dec(&fq->refcnt); + + if (!(fq->last_in & COMPLETE)) { + fq_unlink(fq); + atomic_dec(&fq->refcnt); + fq->last_in |= COMPLETE; + } } static void ip6_evictor(struct inet6_dev *idev) { - int evicted; - - evicted = inet_frag_evictor(&ip6_frags); - if (evicted) - IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted); + struct frag_queue *fq; + struct list_head *tmp; + int work; + + work = atomic_read(&ip6_frag_mem) - sysctl_ip6frag_low_thresh; + if (work <= 0) + return; + + while(work > 0) { + read_lock(&ip6_frag_lock); + if (list_empty(&ip6_frag_lru_list)) { + read_unlock(&ip6_frag_lock); + return; + } + tmp = ip6_frag_lru_list.next; + fq = list_entry(tmp, struct frag_queue, lru_list); + atomic_inc(&fq->refcnt); + read_unlock(&ip6_frag_lock); + + spin_lock(&fq->lock); + if (!(fq->last_in&COMPLETE)) + fq_kill(fq); + spin_unlock(&fq->lock); + + fq_put(fq, &work); + IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); + } } static void ip6_frag_expire(unsigned long data) @@ -196,9 +294,9 @@ static void ip6_frag_expire(unsigned long data) struct frag_queue *fq = (struct frag_queue *) data; struct net_device *dev = NULL; - spin_lock(&fq->q.lock); + spin_lock(&fq->lock); - if (fq->q.last_in & COMPLETE) + if (fq->last_in & COMPLETE) goto out; fq_kill(fq); @@ -213,7 +311,7 @@ static void ip6_frag_expire(unsigned long data) rcu_read_unlock(); /* Don't send error if the first segment did not arrive. */ - if (!(fq->q.last_in&FIRST_IN) || !fq->q.fragments) + if (!(fq->last_in&FIRST_IN) || !fq->fragments) goto out; /* @@ -221,13 +319,13 @@ static void ip6_frag_expire(unsigned long data) segment was received. And do not use fq->dev pointer directly, device might already disappeared. */ - fq->q.fragments->dev = dev; - icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); + fq->fragments->dev = dev; + icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); out: if (dev) dev_put(dev); - spin_unlock(&fq->q.lock); - fq_put(fq); + spin_unlock(&fq->lock); + fq_put(fq, NULL); } /* Creation primitives. */ @@ -241,32 +339,32 @@ static struct frag_queue *ip6_frag_intern(struct frag_queue *fq_in) struct hlist_node *n; #endif - write_lock(&ip6_frags.lock); + write_lock(&ip6_frag_lock); hash = ip6qhashfn(fq_in->id, &fq_in->saddr, &fq_in->daddr); #ifdef CONFIG_SMP - hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) { + hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) { if (fq->id == fq_in->id && ipv6_addr_equal(&fq_in->saddr, &fq->saddr) && ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) { - atomic_inc(&fq->q.refcnt); - write_unlock(&ip6_frags.lock); - fq_in->q.last_in |= COMPLETE; - fq_put(fq_in); + atomic_inc(&fq->refcnt); + write_unlock(&ip6_frag_lock); + fq_in->last_in |= COMPLETE; + fq_put(fq_in, NULL); return fq; } } #endif fq = fq_in; - if (!mod_timer(&fq->q.timer, jiffies + ip6_frags_ctl.timeout)) - atomic_inc(&fq->q.refcnt); + if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time)) + atomic_inc(&fq->refcnt); - atomic_inc(&fq->q.refcnt); - hlist_add_head(&fq->q.list, &ip6_frags.hash[hash]); - INIT_LIST_HEAD(&fq->q.lru_list); - list_add_tail(&fq->q.lru_list, &ip6_frags.lru_list); - ip6_frags.nqueues++; - write_unlock(&ip6_frags.lock); + atomic_inc(&fq->refcnt); + hlist_add_head(&fq->list, &ip6_frag_hash[hash]); + INIT_LIST_HEAD(&fq->lru_list); + list_add_tail(&fq->lru_list, &ip6_frag_lru_list); + ip6_frag_nqueues++; + write_unlock(&ip6_frag_lock); return fq; } @@ -284,11 +382,11 @@ ip6_frag_create(__be32 id, struct in6_addr *src, struct in6_addr *dst, ipv6_addr_copy(&fq->saddr, src); ipv6_addr_copy(&fq->daddr, dst); - init_timer(&fq->q.timer); - fq->q.timer.function = ip6_frag_expire; - fq->q.timer.data = (long) fq; - spin_lock_init(&fq->q.lock); - atomic_set(&fq->q.refcnt, 1); + init_timer(&fq->timer); + fq->timer.function = ip6_frag_expire; + fq->timer.data = (long) fq; + spin_lock_init(&fq->lock); + atomic_set(&fq->refcnt, 1); return ip6_frag_intern(fq); @@ -305,31 +403,30 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst, struct hlist_node *n; unsigned int hash; - read_lock(&ip6_frags.lock); + read_lock(&ip6_frag_lock); hash = ip6qhashfn(id, src, dst); - hlist_for_each_entry(fq, n, &ip6_frags.hash[hash], q.list) { + hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) { if (fq->id == id && ipv6_addr_equal(src, &fq->saddr) && ipv6_addr_equal(dst, &fq->daddr)) { - atomic_inc(&fq->q.refcnt); - read_unlock(&ip6_frags.lock); + atomic_inc(&fq->refcnt); + read_unlock(&ip6_frag_lock); return fq; } } - read_unlock(&ip6_frags.lock); + read_unlock(&ip6_frag_lock); return ip6_frag_create(id, src, dst, idev); } -static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, +static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff) { struct sk_buff *prev, *next; - struct net_device *dev; int offset, end; - if (fq->q.last_in & COMPLETE) + if (fq->last_in & COMPLETE) goto err; offset = ntohs(fhdr->frag_off) & ~0x7; @@ -342,7 +439,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((u8 *)&fhdr->frag_off - skb_network_header(skb))); - return -1; + return; } if (skb->ip_summed == CHECKSUM_COMPLETE) { @@ -357,11 +454,11 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* If we already have some bits beyond end * or have different end, the segment is corrupted. */ - if (end < fq->q.len || - ((fq->q.last_in & LAST_IN) && end != fq->q.len)) + if (end < fq->len || + ((fq->last_in & LAST_IN) && end != fq->len)) goto err; - fq->q.last_in |= LAST_IN; - fq->q.len = end; + fq->last_in |= LAST_IN; + fq->len = end; } else { /* Check if the fragment is rounded to 8 bytes. * Required by the RFC. @@ -374,13 +471,13 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len)); - return -1; + return; } - if (end > fq->q.len) { + if (end > fq->len) { /* Some bits beyond end -> corruption. */ - if (fq->q.last_in & LAST_IN) + if (fq->last_in & LAST_IN) goto err; - fq->q.len = end; + fq->len = end; } } @@ -399,7 +496,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, * this fragment, right? */ prev = NULL; - for(next = fq->q.fragments; next != NULL; next = next->next) { + for(next = fq->fragments; next != NULL; next = next->next) { if (FRAG6_CB(next)->offset >= offset) break; /* bingo! */ prev = next; @@ -436,7 +533,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (!pskb_pull(next, i)) goto err; FRAG6_CB(next)->offset += i; /* next fragment */ - fq->q.meat -= i; + fq->meat -= i; if (next->ip_summed != CHECKSUM_UNNECESSARY) next->ip_summed = CHECKSUM_NONE; break; @@ -451,9 +548,9 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (prev) prev->next = next; else - fq->q.fragments = next; + fq->fragments = next; - fq->q.meat -= free_it->len; + fq->meat -= free_it->len; frag_kfree_skb(free_it, NULL); } } @@ -465,37 +562,30 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, if (prev) prev->next = skb; else - fq->q.fragments = skb; + fq->fragments = skb; - dev = skb->dev; - if (dev) { - fq->iif = dev->ifindex; - skb->dev = NULL; - } - fq->q.stamp = skb->tstamp; - fq->q.meat += skb->len; - atomic_add(skb->truesize, &ip6_frags.mem); + if (skb->dev) + fq->iif = skb->dev->ifindex; + skb->dev = NULL; + fq->stamp = skb->tstamp; + fq->meat += skb->len; + atomic_add(skb->truesize, &ip6_frag_mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ if (offset == 0) { fq->nhoffset = nhoff; - fq->q.last_in |= FIRST_IN; + fq->last_in |= FIRST_IN; } - - if (fq->q.last_in == (FIRST_IN | LAST_IN) && fq->q.meat == fq->q.len) - return ip6_frag_reasm(fq, prev, dev); - - write_lock(&ip6_frags.lock); - list_move_tail(&fq->q.lru_list, &ip6_frags.lru_list); - write_unlock(&ip6_frags.lock); - return -1; + write_lock(&ip6_frag_lock); + list_move_tail(&fq->lru_list, &ip6_frag_lru_list); + write_unlock(&ip6_frag_lock); + return; err: IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); - return -1; } /* @@ -507,39 +597,21 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. */ -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, struct net_device *dev) { - struct sk_buff *fp, *head = fq->q.fragments; + struct sk_buff *fp, *head = fq->fragments; int payload_len; unsigned int nhoff; fq_kill(fq); - /* Make the one we just received the head. */ - if (prev) { - head = prev->next; - fp = skb_clone(head, GFP_ATOMIC); - - if (!fp) - goto out_oom; - - fp->next = head->next; - prev->next = fp; - - skb_morph(head, fq->q.fragments); - head->next = fq->q.fragments->next; - - kfree_skb(fq->q.fragments); - fq->q.fragments = head; - } - BUG_TRAP(head != NULL); BUG_TRAP(FRAG6_CB(head)->offset == 0); /* Unfragmented part is taken from the first segment. */ payload_len = ((head->data - skb_network_header(head)) - - sizeof(struct ipv6hdr) + fq->q.len - + sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) goto out_oversize; @@ -568,7 +640,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &ip6_frags.mem); + atomic_add(clone->truesize, &ip6_frag_mem); } /* We have to remove fragment header from datagram and to relocate @@ -583,7 +655,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &ip6_frags.mem); + atomic_sub(head->truesize, &ip6_frag_mem); for (fp=head->next; fp; fp = fp->next) { head->data_len += fp->len; @@ -593,15 +665,17 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; - atomic_sub(fp->truesize, &ip6_frags.mem); + atomic_sub(fp->truesize, &ip6_frag_mem); } head->next = NULL; head->dev = dev; - head->tstamp = fq->q.stamp; + head->tstamp = fq->stamp; ipv6_hdr(head)->payload_len = htons(payload_len); IP6CB(head)->nhoff = nhoff; + *skb_in = head; + /* Yes, and fold redundant checksum back. 8) */ if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_partial(skb_network_header(head), @@ -611,7 +685,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, rcu_read_lock(); IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); - fq->q.fragments = NULL; + fq->fragments = NULL; return 1; out_oversize: @@ -628,8 +702,10 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, return -1; } -static int ipv6_frag_rcv(struct sk_buff *skb) +static int ipv6_frag_rcv(struct sk_buff **skbp) { + struct sk_buff *skb = *skbp; + struct net_device *dev = skb->dev; struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -663,19 +739,23 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - if (atomic_read(&ip6_frags.mem) > ip6_frags_ctl.high_thresh) + if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh) ip6_evictor(ip6_dst_idev(skb->dst)); if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_dst_idev(skb->dst))) != NULL) { - int ret; + int ret = -1; - spin_lock(&fq->q.lock); + spin_lock(&fq->lock); - ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); + ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); - spin_unlock(&fq->q.lock); - fq_put(fq); + if (fq->last_in == (FIRST_IN|LAST_IN) && + fq->meat == fq->len) + ret = ip6_frag_reasm(fq, skbp, dev); + + spin_unlock(&fq->lock); + fq_put(fq, NULL); return ret; } @@ -695,10 +775,11 @@ void __init ipv6_frag_init(void) if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0) printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n"); - ip6_frags.ctl = &ip6_frags_ctl; - ip6_frags.hashfn = ip6_hashfn; - ip6_frags.destructor = ip6_frag_free; - ip6_frags.skb_free = NULL; - ip6_frags.qsize = sizeof(struct frag_queue); - inet_frags_init(&ip6_frags); + ip6_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ + (jiffies ^ (jiffies >> 6))); + + init_timer(&ip6_frag_secret_timer); + ip6_frag_secret_timer.function = ip6_frag_secret_rebuild; + ip6_frag_secret_timer.expires = jiffies + sysctl_ip6frag_secret_interval; + add_timer(&ip6_frag_secret_timer); } diff --git a/trunk/net/ipv6/route.c b/trunk/net/ipv6/route.c index cce9941c11c6..6ff19f9eb9ee 100644 --- a/trunk/net/ipv6/route.c +++ b/trunk/net/ipv6/route.c @@ -663,7 +663,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d return rt; } -static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif, +static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, struct flowi *fl, int flags) { struct fib6_node *fn; @@ -682,7 +682,7 @@ static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif, fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: - rt = rt6_select(fn, oif, strict | reachable); + rt = rt6_select(fn, fl->iif, strict | reachable); BACKTRACK(&fl->fl6_src); if (rt == &ip6_null_entry || rt->rt6i_flags & RTF_CACHE) @@ -735,12 +735,6 @@ static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif, return rt; } -static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, - struct flowi *fl, int flags) -{ - return ip6_pol_route(table, fl->iif, fl, flags); -} - void ip6_route_input(struct sk_buff *skb) { struct ipv6hdr *iph = ipv6_hdr(skb); @@ -767,7 +761,72 @@ void ip6_route_input(struct sk_buff *skb) static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, struct flowi *fl, int flags) { - return ip6_pol_route(table, fl->oif, fl, flags); + struct fib6_node *fn; + struct rt6_info *rt, *nrt; + int strict = 0; + int attempts = 3; + int err; + int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE; + + strict |= flags & RT6_LOOKUP_F_IFACE; + +relookup: + read_lock_bh(&table->tb6_lock); + +restart_2: + fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); + +restart: + rt = rt6_select(fn, fl->oif, strict | reachable); + BACKTRACK(&fl->fl6_src); + if (rt == &ip6_null_entry || + rt->rt6i_flags & RTF_CACHE) + goto out; + + dst_hold(&rt->u.dst); + read_unlock_bh(&table->tb6_lock); + + if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) + nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); + else { +#if CLONE_OFFLINK_ROUTE + nrt = rt6_alloc_clone(rt, &fl->fl6_dst); +#else + goto out2; +#endif + } + + dst_release(&rt->u.dst); + rt = nrt ? : &ip6_null_entry; + + dst_hold(&rt->u.dst); + if (nrt) { + err = ip6_ins_rt(nrt); + if (!err) + goto out2; + } + + if (--attempts <= 0) + goto out2; + + /* + * Race condition! In the gap, when table->tb6_lock was + * released someone could insert this route. Relookup. + */ + dst_release(&rt->u.dst); + goto relookup; + +out: + if (reachable) { + reachable = 0; + goto restart_2; + } + dst_hold(&rt->u.dst); + read_unlock_bh(&table->tb6_lock); +out2: + rt->u.dst.lastuse = jiffies; + rt->u.dst.__use++; + return rt; } struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) diff --git a/trunk/net/ipv6/sysctl_net_ipv6.c b/trunk/net/ipv6/sysctl_net_ipv6.c index 68bb2548e469..3fb44277207b 100644 --- a/trunk/net/ipv6/sysctl_net_ipv6.c +++ b/trunk/net/ipv6/sysctl_net_ipv6.c @@ -12,7 +12,6 @@ #include #include #include -#include #ifdef CONFIG_SYSCTL @@ -42,7 +41,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_HIGH_THRESH, .procname = "ip6frag_high_thresh", - .data = &ip6_frags_ctl.high_thresh, + .data = &sysctl_ip6frag_high_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -50,7 +49,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_LOW_THRESH, .procname = "ip6frag_low_thresh", - .data = &ip6_frags_ctl.low_thresh, + .data = &sysctl_ip6frag_low_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -58,7 +57,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_TIME, .procname = "ip6frag_time", - .data = &ip6_frags_ctl.timeout, + .data = &sysctl_ip6frag_time, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, @@ -67,7 +66,7 @@ static ctl_table ipv6_table[] = { { .ctl_name = NET_IPV6_IP6FRAG_SECRET_INTERVAL, .procname = "ip6frag_secret_interval", - .data = &ip6_frags_ctl.secret_interval, + .data = &sysctl_ip6frag_secret_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec_jiffies, diff --git a/trunk/net/ipv6/tcp_ipv6.c b/trunk/net/ipv6/tcp_ipv6.c index 737b755342bd..a07b59c528f3 100644 --- a/trunk/net/ipv6/tcp_ipv6.c +++ b/trunk/net/ipv6/tcp_ipv6.c @@ -1668,8 +1668,9 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } -static int tcp_v6_rcv(struct sk_buff *skb) +static int tcp_v6_rcv(struct sk_buff **pskb) { + struct sk_buff *skb = *pskb; struct tcphdr *th; struct sock *sk; int ret; diff --git a/trunk/net/ipv6/tunnel6.c b/trunk/net/ipv6/tunnel6.c index 6323921b40be..23e2809878ae 100644 --- a/trunk/net/ipv6/tunnel6.c +++ b/trunk/net/ipv6/tunnel6.c @@ -87,8 +87,9 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family) EXPORT_SYMBOL(xfrm6_tunnel_deregister); -static int tunnel6_rcv(struct sk_buff *skb) +static int tunnel6_rcv(struct sk_buff **pskb) { + struct sk_buff *skb = *pskb; struct xfrm6_tunnel *handler; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) @@ -105,8 +106,9 @@ static int tunnel6_rcv(struct sk_buff *skb) return 0; } -static int tunnel46_rcv(struct sk_buff *skb) +static int tunnel46_rcv(struct sk_buff **pskb) { + struct sk_buff *skb = *pskb; struct xfrm6_tunnel *handler; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) diff --git a/trunk/net/ipv6/udp.c b/trunk/net/ipv6/udp.c index caebad6ee510..82ff26dd4470 100644 --- a/trunk/net/ipv6/udp.c +++ b/trunk/net/ipv6/udp.c @@ -405,9 +405,10 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, return 0; } -int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], +int __udp6_lib_rcv(struct sk_buff **pskb, struct hlist_head udptable[], int proto) { + struct sk_buff *skb = *pskb; struct sock *sk; struct udphdr *uh; struct net_device *dev = skb->dev; @@ -493,9 +494,9 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], return 0; } -static __inline__ int udpv6_rcv(struct sk_buff *skb) +static __inline__ int udpv6_rcv(struct sk_buff **pskb) { - return __udp6_lib_rcv(skb, udp_hash, IPPROTO_UDP); + return __udp6_lib_rcv(pskb, udp_hash, IPPROTO_UDP); } /* diff --git a/trunk/net/ipv6/udp_impl.h b/trunk/net/ipv6/udp_impl.h index 2d3fda601232..6e252f318f7c 100644 --- a/trunk/net/ipv6/udp_impl.h +++ b/trunk/net/ipv6/udp_impl.h @@ -6,7 +6,7 @@ #include #include -extern int __udp6_lib_rcv(struct sk_buff *, struct hlist_head [], int ); +extern int __udp6_lib_rcv(struct sk_buff **, struct hlist_head [], int ); extern void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, int , int , int , __be32 , struct hlist_head []); diff --git a/trunk/net/ipv6/udplite.c b/trunk/net/ipv6/udplite.c index 766566f7de47..f54016a55004 100644 --- a/trunk/net/ipv6/udplite.c +++ b/trunk/net/ipv6/udplite.c @@ -17,9 +17,9 @@ DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly; -static int udplitev6_rcv(struct sk_buff *skb) +static int udplitev6_rcv(struct sk_buff **pskb) { - return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); + return __udp6_lib_rcv(pskb, udplite_hash, IPPROTO_UDPLITE); } static void udplitev6_err(struct sk_buff *skb, diff --git a/trunk/net/ipv6/xfrm6_input.c b/trunk/net/ipv6/xfrm6_input.c index 02f69e544f6f..c858537cec4b 100644 --- a/trunk/net/ipv6/xfrm6_input.c +++ b/trunk/net/ipv6/xfrm6_input.c @@ -133,9 +133,9 @@ int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) EXPORT_SYMBOL(xfrm6_rcv_spi); -int xfrm6_rcv(struct sk_buff *skb) +int xfrm6_rcv(struct sk_buff **pskb) { - return xfrm6_rcv_spi(skb, 0); + return xfrm6_rcv_spi(*pskb, 0); } EXPORT_SYMBOL(xfrm6_rcv); diff --git a/trunk/net/ipv6/xfrm6_output.c b/trunk/net/ipv6/xfrm6_output.c index a5a32c17249d..4618c18e611d 100644 --- a/trunk/net/ipv6/xfrm6_output.c +++ b/trunk/net/ipv6/xfrm6_output.c @@ -80,7 +80,7 @@ static int xfrm6_output_finish2(struct sk_buff *skb) while (likely((err = xfrm6_output_one(skb)) == 0)) { nf_reset(skb); - err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, + err = nf_hook(PF_INET6, NF_IP6_LOCAL_OUT, &skb, NULL, skb->dst->dev, dst_output); if (unlikely(err != 1)) break; @@ -88,7 +88,7 @@ static int xfrm6_output_finish2(struct sk_buff *skb) if (!skb->dst->xfrm) return dst_output(skb); - err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, + err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL, skb->dst->dev, xfrm6_output_finish2); if (unlikely(err != 1)) break; diff --git a/trunk/net/netfilter/core.c b/trunk/net/netfilter/core.c index bed9ba01e8ec..a523fa4136ed 100644 --- a/trunk/net/netfilter/core.c +++ b/trunk/net/netfilter/core.c @@ -117,7 +117,7 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) EXPORT_SYMBOL(nf_unregister_hooks); unsigned int nf_iterate(struct list_head *head, - struct sk_buff *skb, + struct sk_buff **skb, int hook, const struct net_device *indev, const struct net_device *outdev, @@ -160,7 +160,7 @@ unsigned int nf_iterate(struct list_head *head, /* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. */ -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, +int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), @@ -175,17 +175,17 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, elem = &nf_hooks[pf][hook]; next_hook: - verdict = nf_iterate(&nf_hooks[pf][hook], skb, hook, indev, + verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev, outdev, &elem, okfn, hook_thresh); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; goto unlock; } else if (verdict == NF_DROP) { - kfree_skb(skb); + kfree_skb(*pskb); ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn, + if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn, verdict >> NF_VERDICT_BITS)) goto next_hook; } @@ -196,24 +196,34 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, EXPORT_SYMBOL(nf_hook_slow); -int skb_make_writable(struct sk_buff *skb, unsigned int writable_len) +int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) { - if (writable_len > skb->len) + struct sk_buff *nskb; + + if (writable_len > (*pskb)->len) return 0; /* Not exclusive use of packet? Must copy. */ - if (!skb_cloned(skb)) { - if (writable_len <= skb_headlen(skb)) - return 1; - } else if (skb_clone_writable(skb, writable_len)) - return 1; - - if (writable_len <= skb_headlen(skb)) - writable_len = 0; - else - writable_len -= skb_headlen(skb); - - return !!__pskb_pull_tail(skb, writable_len); + if (skb_cloned(*pskb) && !skb_clone_writable(*pskb, writable_len)) + goto copy_skb; + if (skb_shared(*pskb)) + goto copy_skb; + + return pskb_may_pull(*pskb, writable_len); + +copy_skb: + nskb = skb_copy(*pskb, GFP_ATOMIC); + if (!nskb) + return 0; + BUG_ON(skb_is_nonlinear(nskb)); + + /* Rest of kernel will get very unhappy if we pass it a + suddenly-orphaned skbuff */ + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + return 1; } EXPORT_SYMBOL(skb_make_writable); diff --git a/trunk/net/netfilter/nf_conntrack_amanda.c b/trunk/net/netfilter/nf_conntrack_amanda.c index 7b8239c0cd5e..e42ab230ad88 100644 --- a/trunk/net/netfilter/nf_conntrack_amanda.c +++ b/trunk/net/netfilter/nf_conntrack_amanda.c @@ -36,7 +36,7 @@ MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); module_param(ts_algo, charp, 0400); MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)"); -unsigned int (*nf_nat_amanda_hook)(struct sk_buff *skb, +unsigned int (*nf_nat_amanda_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -79,7 +79,7 @@ static struct { }, }; -static int amanda_help(struct sk_buff *skb, +static int amanda_help(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -101,25 +101,25 @@ static int amanda_help(struct sk_buff *skb, /* increase the UDP timeout of the master connection as replies from * Amanda clients to the server can be quite delayed */ - nf_ct_refresh(ct, skb, master_timeout * HZ); + nf_ct_refresh(ct, *pskb, master_timeout * HZ); /* No data? */ dataoff = protoff + sizeof(struct udphdr); - if (dataoff >= skb->len) { + if (dataoff >= (*pskb)->len) { if (net_ratelimit()) - printk("amanda_help: skblen = %u\n", skb->len); + printk("amanda_help: skblen = %u\n", (*pskb)->len); return NF_ACCEPT; } memset(&ts, 0, sizeof(ts)); - start = skb_find_text(skb, dataoff, skb->len, + start = skb_find_text(*pskb, dataoff, (*pskb)->len, search[SEARCH_CONNECT].ts, &ts); if (start == UINT_MAX) goto out; start += dataoff + search[SEARCH_CONNECT].len; memset(&ts, 0, sizeof(ts)); - stop = skb_find_text(skb, start, skb->len, + stop = skb_find_text(*pskb, start, (*pskb)->len, search[SEARCH_NEWLINE].ts, &ts); if (stop == UINT_MAX) goto out; @@ -127,13 +127,13 @@ static int amanda_help(struct sk_buff *skb, for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) { memset(&ts, 0, sizeof(ts)); - off = skb_find_text(skb, start, stop, search[i].ts, &ts); + off = skb_find_text(*pskb, start, stop, search[i].ts, &ts); if (off == UINT_MAX) continue; off += start + search[i].len; len = min_t(unsigned int, sizeof(pbuf) - 1, stop - off); - if (skb_copy_bits(skb, off, pbuf, len)) + if (skb_copy_bits(*pskb, off, pbuf, len)) break; pbuf[len] = '\0'; @@ -153,7 +153,7 @@ static int amanda_help(struct sk_buff *skb, nf_nat_amanda = rcu_dereference(nf_nat_amanda_hook); if (nf_nat_amanda && ct->status & IPS_NAT_MASK) - ret = nf_nat_amanda(skb, ctinfo, off - dataoff, + ret = nf_nat_amanda(pskb, ctinfo, off - dataoff, len, exp); else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; diff --git a/trunk/net/netfilter/nf_conntrack_core.c b/trunk/net/netfilter/nf_conntrack_core.c index 4d6171bc0829..83c30b45d170 100644 --- a/trunk/net/netfilter/nf_conntrack_core.c +++ b/trunk/net/netfilter/nf_conntrack_core.c @@ -307,7 +307,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); /* Confirm a connection given skb; places it in hash table */ int -__nf_conntrack_confirm(struct sk_buff *skb) +__nf_conntrack_confirm(struct sk_buff **pskb) { unsigned int hash, repl_hash; struct nf_conntrack_tuple_hash *h; @@ -316,7 +316,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct hlist_node *n; enum ip_conntrack_info ctinfo; - ct = nf_ct_get(skb, &ctinfo); + ct = nf_ct_get(*pskb, &ctinfo); /* ipt_REJECT uses nf_conntrack_attach to attach related ICMP/TCP RST packets in other direction. Actual packet @@ -367,14 +367,14 @@ __nf_conntrack_confirm(struct sk_buff *skb) write_unlock_bh(&nf_conntrack_lock); help = nfct_help(ct); if (help && help->helper) - nf_conntrack_event_cache(IPCT_HELPER, skb); + nf_conntrack_event_cache(IPCT_HELPER, *pskb); #ifdef CONFIG_NF_NAT_NEEDED if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_NATINFO, skb); + nf_conntrack_event_cache(IPCT_NATINFO, *pskb); #endif nf_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, skb); + IPCT_RELATED : IPCT_NEW, *pskb); return NF_ACCEPT; out: @@ -632,7 +632,7 @@ resolve_normal_ct(struct sk_buff *skb, } unsigned int -nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) +nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -644,14 +644,14 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) int ret; /* Previously seen (loopback or untracked)? Ignore. */ - if (skb->nfct) { + if ((*pskb)->nfct) { NF_CT_STAT_INC_ATOMIC(ignore); return NF_ACCEPT; } /* rcu_read_lock()ed by nf_hook_slow */ l3proto = __nf_ct_l3proto_find((u_int16_t)pf); - ret = l3proto->get_l4proto(skb, skb_network_offset(skb), + ret = l3proto->get_l4proto(*pskb, skb_network_offset(*pskb), &dataoff, &protonum); if (ret <= 0) { pr_debug("not prepared to track yet or error occured\n"); @@ -666,13 +666,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) * inverse of the return code tells to the netfilter * core what to do with the packet. */ if (l4proto->error != NULL && - (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) { + (ret = l4proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) { NF_CT_STAT_INC_ATOMIC(error); NF_CT_STAT_INC_ATOMIC(invalid); return -ret; } - ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto, + ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ @@ -686,21 +686,21 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) return NF_DROP; } - NF_CT_ASSERT(skb->nfct); + NF_CT_ASSERT((*pskb)->nfct); - ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum); + ret = l4proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum); if (ret < 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ pr_debug("nf_conntrack_in: Can't track with proto module\n"); - nf_conntrack_put(skb->nfct); - skb->nfct = NULL; + nf_conntrack_put((*pskb)->nfct); + (*pskb)->nfct = NULL; NF_CT_STAT_INC_ATOMIC(invalid); return -ret; } if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, *pskb); return ret; } diff --git a/trunk/net/netfilter/nf_conntrack_ftp.c b/trunk/net/netfilter/nf_conntrack_ftp.c index 6df259067f7e..c763ee74ea02 100644 --- a/trunk/net/netfilter/nf_conntrack_ftp.c +++ b/trunk/net/netfilter/nf_conntrack_ftp.c @@ -43,7 +43,7 @@ module_param_array(ports, ushort, &ports_c, 0400); static int loose; module_param(loose, bool, 0600); -unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, +unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, enum nf_ct_ftp_type type, unsigned int matchoff, @@ -344,7 +344,7 @@ static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir, } } -static int help(struct sk_buff *skb, +static int help(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -371,21 +371,21 @@ static int help(struct sk_buff *skb, return NF_ACCEPT; } - th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return NF_ACCEPT; dataoff = protoff + th->doff * 4; /* No data? */ - if (dataoff >= skb->len) { + if (dataoff >= (*pskb)->len) { pr_debug("ftp: dataoff(%u) >= skblen(%u)\n", dataoff, - skb->len); + (*pskb)->len); return NF_ACCEPT; } - datalen = skb->len - dataoff; + datalen = (*pskb)->len - dataoff; spin_lock_bh(&nf_ftp_lock); - fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer); + fb_ptr = skb_header_pointer(*pskb, dataoff, datalen, ftp_buffer); BUG_ON(fb_ptr == NULL); ends_in_nl = (fb_ptr[datalen - 1] == '\n'); @@ -491,7 +491,7 @@ static int help(struct sk_buff *skb, * (possibly changed) expectation itself. */ nf_nat_ftp = rcu_dereference(nf_nat_ftp_hook); if (nf_nat_ftp && ct->status & IPS_NAT_MASK) - ret = nf_nat_ftp(skb, ctinfo, search[dir][i].ftptype, + ret = nf_nat_ftp(pskb, ctinfo, search[dir][i].ftptype, matchoff, matchlen, exp); else { /* Can't expect this? Best to drop packet now. */ @@ -508,7 +508,7 @@ static int help(struct sk_buff *skb, /* Now if this ends in \n, update ftp info. Seq may have been * adjusted by NAT code. */ if (ends_in_nl) - update_nl_seq(seq, ct_ftp_info, dir, skb); + update_nl_seq(seq, ct_ftp_info, dir, *pskb); out: spin_unlock_bh(&nf_ftp_lock); return ret; diff --git a/trunk/net/netfilter/nf_conntrack_h323_main.c b/trunk/net/netfilter/nf_conntrack_h323_main.c index f23fd9598e19..a8a9dfbe7a67 100644 --- a/trunk/net/netfilter/nf_conntrack_h323_main.c +++ b/trunk/net/netfilter/nf_conntrack_h323_main.c @@ -47,27 +47,27 @@ MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations " "(determined by routing information)"); /* Hooks for NAT */ -int (*set_h245_addr_hook) (struct sk_buff *skb, +int (*set_h245_addr_hook) (struct sk_buff **pskb, unsigned char **data, int dataoff, H245_TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) __read_mostly; -int (*set_h225_addr_hook) (struct sk_buff *skb, +int (*set_h225_addr_hook) (struct sk_buff **pskb, unsigned char **data, int dataoff, TransportAddress *taddr, union nf_conntrack_address *addr, __be16 port) __read_mostly; -int (*set_sig_addr_hook) (struct sk_buff *skb, +int (*set_sig_addr_hook) (struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) __read_mostly; -int (*set_ras_addr_hook) (struct sk_buff *skb, +int (*set_ras_addr_hook) (struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) __read_mostly; -int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, +int (*nat_rtp_rtcp_hook) (struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -75,25 +75,25 @@ int (*nat_rtp_rtcp_hook) (struct sk_buff *skb, __be16 port, __be16 rtp_port, struct nf_conntrack_expect *rtp_exp, struct nf_conntrack_expect *rtcp_exp) __read_mostly; -int (*nat_t120_hook) (struct sk_buff *skb, +int (*nat_t120_hook) (struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; -int (*nat_h245_hook) (struct sk_buff *skb, +int (*nat_h245_hook) (struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; -int (*nat_callforwarding_hook) (struct sk_buff *skb, +int (*nat_callforwarding_hook) (struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr, __be16 port, struct nf_conntrack_expect *exp) __read_mostly; -int (*nat_q931_hook) (struct sk_buff *skb, +int (*nat_q931_hook) (struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int idx, @@ -108,7 +108,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[]; static struct nf_conntrack_helper nf_conntrack_helper_ras[]; /****************************************************************************/ -static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, +static int get_tpkt_data(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int *datalen, int *dataoff) { @@ -122,7 +122,7 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, int tpktoff; /* Get TCP header */ - th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return 0; @@ -130,13 +130,13 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff, tcpdataoff = protoff + th->doff * 4; /* Get TCP data length */ - tcpdatalen = skb->len - tcpdataoff; + tcpdatalen = (*pskb)->len - tcpdataoff; if (tcpdatalen <= 0) /* No TCP data */ goto clear_out; if (*data == NULL) { /* first TPKT */ /* Get first TPKT pointer */ - tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen, + tpkt = skb_header_pointer(*pskb, tcpdataoff, tcpdatalen, h323_buffer); BUG_ON(tpkt == NULL); @@ -248,7 +248,7 @@ static int get_h245_addr(struct nf_conn *ct, unsigned char *data, } /****************************************************************************/ -static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, +static int expect_rtp_rtcp(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, H245_TransportAddress *taddr) @@ -297,7 +297,7 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, (nat_rtp_rtcp = rcu_dereference(nat_rtp_rtcp_hook)) && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_rtp_rtcp(skb, ct, ctinfo, data, dataoff, + ret = nat_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, taddr, port, rtp_port, rtp_exp, rtcp_exp); } else { /* Conntrack only */ if (nf_ct_expect_related(rtp_exp) == 0) { @@ -321,7 +321,7 @@ static int expect_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int expect_t120(struct sk_buff *skb, +static int expect_t120(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -355,7 +355,7 @@ static int expect_t120(struct sk_buff *skb, (nat_t120 = rcu_dereference(nat_t120_hook)) && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_t120(skb, ct, ctinfo, data, dataoff, taddr, + ret = nat_t120(pskb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -371,7 +371,7 @@ static int expect_t120(struct sk_buff *skb, } /****************************************************************************/ -static int process_h245_channel(struct sk_buff *skb, +static int process_h245_channel(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -381,7 +381,7 @@ static int process_h245_channel(struct sk_buff *skb, if (channel->options & eH2250LogicalChannelParameters_mediaChannel) { /* RTP */ - ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, &channel->mediaChannel); if (ret < 0) return -1; @@ -390,7 +390,7 @@ static int process_h245_channel(struct sk_buff *skb, if (channel-> options & eH2250LogicalChannelParameters_mediaControlChannel) { /* RTCP */ - ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, &channel->mediaControlChannel); if (ret < 0) return -1; @@ -400,7 +400,7 @@ static int process_h245_channel(struct sk_buff *skb, } /****************************************************************************/ -static int process_olc(struct sk_buff *skb, struct nf_conn *ct, +static int process_olc(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, OpenLogicalChannel *olc) @@ -412,7 +412,7 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, if (olc->forwardLogicalChannelParameters.multiplexParameters.choice == eOpenLogicalChannel_forwardLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters) { - ret = process_h245_channel(skb, ct, ctinfo, data, dataoff, + ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff, &olc-> forwardLogicalChannelParameters. multiplexParameters. @@ -430,7 +430,7 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, eOpenLogicalChannel_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) { ret = - process_h245_channel(skb, ct, ctinfo, data, dataoff, + process_h245_channel(pskb, ct, ctinfo, data, dataoff, &olc-> reverseLogicalChannelParameters. multiplexParameters. @@ -448,7 +448,7 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, t120.choice == eDataProtocolCapability_separateLANStack && olc->separateStack.networkAddress.choice == eNetworkAccessParameters_networkAddress_localAreaAddress) { - ret = expect_t120(skb, ct, ctinfo, data, dataoff, + ret = expect_t120(pskb, ct, ctinfo, data, dataoff, &olc->separateStack.networkAddress. localAreaAddress); if (ret < 0) @@ -459,7 +459,7 @@ static int process_olc(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int process_olca(struct sk_buff *skb, struct nf_conn *ct, +static int process_olca(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, OpenLogicalChannelAck *olca) @@ -477,7 +477,7 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, choice == eOpenLogicalChannelAck_reverseLogicalChannelParameters_multiplexParameters_h2250LogicalChannelParameters)) { - ret = process_h245_channel(skb, ct, ctinfo, data, dataoff, + ret = process_h245_channel(pskb, ct, ctinfo, data, dataoff, &olca-> reverseLogicalChannelParameters. multiplexParameters. @@ -496,7 +496,7 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, if (ack->options & eH2250LogicalChannelAckParameters_mediaChannel) { /* RTP */ - ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, &ack->mediaChannel); if (ret < 0) return -1; @@ -505,7 +505,7 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, if (ack->options & eH2250LogicalChannelAckParameters_mediaControlChannel) { /* RTCP */ - ret = expect_rtp_rtcp(skb, ct, ctinfo, data, dataoff, + ret = expect_rtp_rtcp(pskb, ct, ctinfo, data, dataoff, &ack->mediaControlChannel); if (ret < 0) return -1; @@ -515,7 +515,7 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, if ((olca->options & eOpenLogicalChannelAck_separateStack) && olca->separateStack.networkAddress.choice == eNetworkAccessParameters_networkAddress_localAreaAddress) { - ret = expect_t120(skb, ct, ctinfo, data, dataoff, + ret = expect_t120(pskb, ct, ctinfo, data, dataoff, &olca->separateStack.networkAddress. localAreaAddress); if (ret < 0) @@ -526,7 +526,7 @@ static int process_olca(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int process_h245(struct sk_buff *skb, struct nf_conn *ct, +static int process_h245(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, MultimediaSystemControlMessage *mscm) @@ -535,7 +535,7 @@ static int process_h245(struct sk_buff *skb, struct nf_conn *ct, case eMultimediaSystemControlMessage_request: if (mscm->request.choice == eRequestMessage_openLogicalChannel) { - return process_olc(skb, ct, ctinfo, data, dataoff, + return process_olc(pskb, ct, ctinfo, data, dataoff, &mscm->request.openLogicalChannel); } pr_debug("nf_ct_h323: H.245 Request %d\n", @@ -544,7 +544,7 @@ static int process_h245(struct sk_buff *skb, struct nf_conn *ct, case eMultimediaSystemControlMessage_response: if (mscm->response.choice == eResponseMessage_openLogicalChannelAck) { - return process_olca(skb, ct, ctinfo, data, dataoff, + return process_olca(pskb, ct, ctinfo, data, dataoff, &mscm->response. openLogicalChannelAck); } @@ -560,7 +560,7 @@ static int process_h245(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int h245_help(struct sk_buff *skb, unsigned int protoff, +static int h245_help(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { static MultimediaSystemControlMessage mscm; @@ -574,12 +574,12 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff, ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { return NF_ACCEPT; } - pr_debug("nf_ct_h245: skblen = %u\n", skb->len); + pr_debug("nf_ct_h245: skblen = %u\n", (*pskb)->len); spin_lock_bh(&nf_h323_lock); /* Process each TPKT */ - while (get_tpkt_data(skb, protoff, ct, ctinfo, + while (get_tpkt_data(pskb, protoff, ct, ctinfo, &data, &datalen, &dataoff)) { pr_debug("nf_ct_h245: TPKT len=%d ", datalen); NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); @@ -596,7 +596,7 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff, } /* Process H.245 signal */ - if (process_h245(skb, ct, ctinfo, &data, dataoff, &mscm) < 0) + if (process_h245(pskb, ct, ctinfo, &data, dataoff, &mscm) < 0) goto drop; } @@ -654,7 +654,7 @@ int get_h225_addr(struct nf_conn *ct, unsigned char *data, } /****************************************************************************/ -static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, +static int expect_h245(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, TransportAddress *taddr) @@ -687,7 +687,7 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, (nat_h245 = rcu_dereference(nat_h245_hook)) && ct->status & IPS_NAT_MASK) { /* NAT needed */ - ret = nat_h245(skb, ct, ctinfo, data, dataoff, taddr, + ret = nat_h245(pskb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -758,7 +758,7 @@ static int callforward_do_filter(union nf_conntrack_address *src, } /****************************************************************************/ -static int expect_callforwarding(struct sk_buff *skb, +static int expect_callforwarding(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -798,7 +798,7 @@ static int expect_callforwarding(struct sk_buff *skb, (nat_callforwarding = rcu_dereference(nat_callforwarding_hook)) && ct->status & IPS_NAT_MASK) { /* Need NAT */ - ret = nat_callforwarding(skb, ct, ctinfo, data, dataoff, + ret = nat_callforwarding(pskb, ct, ctinfo, data, dataoff, taddr, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { @@ -814,7 +814,7 @@ static int expect_callforwarding(struct sk_buff *skb, } /****************************************************************************/ -static int process_setup(struct sk_buff *skb, struct nf_conn *ct, +static int process_setup(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Setup_UUIE *setup) @@ -829,7 +829,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: Setup\n"); if (setup->options & eSetup_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(pskb, ct, ctinfo, data, dataoff, &setup->h245Address); if (ret < 0) return -1; @@ -846,7 +846,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, NIP6(*(struct in6_addr *)&addr), ntohs(port), NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.src.u3), ntohs(ct->tuplehash[!dir].tuple.src.u.tcp.port)); - ret = set_h225_addr(skb, data, dataoff, + ret = set_h225_addr(pskb, data, dataoff, &setup->destCallSignalAddress, &ct->tuplehash[!dir].tuple.src.u3, ct->tuplehash[!dir].tuple.src.u.tcp.port); @@ -864,7 +864,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, NIP6(*(struct in6_addr *)&addr), ntohs(port), NIP6(*(struct in6_addr *)&ct->tuplehash[!dir].tuple.dst.u3), ntohs(ct->tuplehash[!dir].tuple.dst.u.tcp.port)); - ret = set_h225_addr(skb, data, dataoff, + ret = set_h225_addr(pskb, data, dataoff, &setup->sourceCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, ct->tuplehash[!dir].tuple.dst.u.tcp.port); @@ -874,7 +874,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, if (setup->options & eSetup_UUIE_fastStart) { for (i = 0; i < setup->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(pskb, ct, ctinfo, data, dataoff, &setup->fastStart.item[i]); if (ret < 0) return -1; @@ -885,7 +885,7 @@ static int process_setup(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int process_callproceeding(struct sk_buff *skb, +static int process_callproceeding(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, @@ -897,7 +897,7 @@ static int process_callproceeding(struct sk_buff *skb, pr_debug("nf_ct_q931: CallProceeding\n"); if (callproc->options & eCallProceeding_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(pskb, ct, ctinfo, data, dataoff, &callproc->h245Address); if (ret < 0) return -1; @@ -905,7 +905,7 @@ static int process_callproceeding(struct sk_buff *skb, if (callproc->options & eCallProceeding_UUIE_fastStart) { for (i = 0; i < callproc->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(pskb, ct, ctinfo, data, dataoff, &callproc->fastStart.item[i]); if (ret < 0) return -1; @@ -916,7 +916,7 @@ static int process_callproceeding(struct sk_buff *skb, } /****************************************************************************/ -static int process_connect(struct sk_buff *skb, struct nf_conn *ct, +static int process_connect(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Connect_UUIE *connect) @@ -927,7 +927,7 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: Connect\n"); if (connect->options & eConnect_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(pskb, ct, ctinfo, data, dataoff, &connect->h245Address); if (ret < 0) return -1; @@ -935,7 +935,7 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct, if (connect->options & eConnect_UUIE_fastStart) { for (i = 0; i < connect->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(pskb, ct, ctinfo, data, dataoff, &connect->fastStart.item[i]); if (ret < 0) return -1; @@ -946,7 +946,7 @@ static int process_connect(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, +static int process_alerting(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Alerting_UUIE *alert) @@ -957,7 +957,7 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: Alerting\n"); if (alert->options & eAlerting_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(pskb, ct, ctinfo, data, dataoff, &alert->h245Address); if (ret < 0) return -1; @@ -965,7 +965,7 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, if (alert->options & eAlerting_UUIE_fastStart) { for (i = 0; i < alert->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(pskb, ct, ctinfo, data, dataoff, &alert->fastStart.item[i]); if (ret < 0) return -1; @@ -976,7 +976,7 @@ static int process_alerting(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int process_facility(struct sk_buff *skb, struct nf_conn *ct, +static int process_facility(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Facility_UUIE *facility) @@ -988,7 +988,7 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct, if (facility->reason.choice == eFacilityReason_callForwarded) { if (facility->options & eFacility_UUIE_alternativeAddress) - return expect_callforwarding(skb, ct, ctinfo, data, + return expect_callforwarding(pskb, ct, ctinfo, data, dataoff, &facility-> alternativeAddress); @@ -996,7 +996,7 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct, } if (facility->options & eFacility_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(pskb, ct, ctinfo, data, dataoff, &facility->h245Address); if (ret < 0) return -1; @@ -1004,7 +1004,7 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct, if (facility->options & eFacility_UUIE_fastStart) { for (i = 0; i < facility->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(pskb, ct, ctinfo, data, dataoff, &facility->fastStart.item[i]); if (ret < 0) return -1; @@ -1015,7 +1015,7 @@ static int process_facility(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int process_progress(struct sk_buff *skb, struct nf_conn *ct, +static int process_progress(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Progress_UUIE *progress) @@ -1026,7 +1026,7 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_q931: Progress\n"); if (progress->options & eProgress_UUIE_h245Address) { - ret = expect_h245(skb, ct, ctinfo, data, dataoff, + ret = expect_h245(pskb, ct, ctinfo, data, dataoff, &progress->h245Address); if (ret < 0) return -1; @@ -1034,7 +1034,7 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct, if (progress->options & eProgress_UUIE_fastStart) { for (i = 0; i < progress->fastStart.count; i++) { - ret = process_olc(skb, ct, ctinfo, data, dataoff, + ret = process_olc(pskb, ct, ctinfo, data, dataoff, &progress->fastStart.item[i]); if (ret < 0) return -1; @@ -1045,7 +1045,7 @@ static int process_progress(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int process_q931(struct sk_buff *skb, struct nf_conn *ct, +static int process_q931(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, int dataoff, Q931 *q931) { @@ -1055,28 +1055,28 @@ static int process_q931(struct sk_buff *skb, struct nf_conn *ct, switch (pdu->h323_message_body.choice) { case eH323_UU_PDU_h323_message_body_setup: - ret = process_setup(skb, ct, ctinfo, data, dataoff, + ret = process_setup(pskb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.setup); break; case eH323_UU_PDU_h323_message_body_callProceeding: - ret = process_callproceeding(skb, ct, ctinfo, data, dataoff, + ret = process_callproceeding(pskb, ct, ctinfo, data, dataoff, &pdu->h323_message_body. callProceeding); break; case eH323_UU_PDU_h323_message_body_connect: - ret = process_connect(skb, ct, ctinfo, data, dataoff, + ret = process_connect(pskb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.connect); break; case eH323_UU_PDU_h323_message_body_alerting: - ret = process_alerting(skb, ct, ctinfo, data, dataoff, + ret = process_alerting(pskb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.alerting); break; case eH323_UU_PDU_h323_message_body_facility: - ret = process_facility(skb, ct, ctinfo, data, dataoff, + ret = process_facility(pskb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.facility); break; case eH323_UU_PDU_h323_message_body_progress: - ret = process_progress(skb, ct, ctinfo, data, dataoff, + ret = process_progress(pskb, ct, ctinfo, data, dataoff, &pdu->h323_message_body.progress); break; default: @@ -1090,7 +1090,7 @@ static int process_q931(struct sk_buff *skb, struct nf_conn *ct, if (pdu->options & eH323_UU_PDU_h245Control) { for (i = 0; i < pdu->h245Control.count; i++) { - ret = process_h245(skb, ct, ctinfo, data, dataoff, + ret = process_h245(pskb, ct, ctinfo, data, dataoff, &pdu->h245Control.item[i]); if (ret < 0) return -1; @@ -1101,7 +1101,7 @@ static int process_q931(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int q931_help(struct sk_buff *skb, unsigned int protoff, +static int q931_help(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { static Q931 q931; @@ -1115,12 +1115,12 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff, ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { return NF_ACCEPT; } - pr_debug("nf_ct_q931: skblen = %u\n", skb->len); + pr_debug("nf_ct_q931: skblen = %u\n", (*pskb)->len); spin_lock_bh(&nf_h323_lock); /* Process each TPKT */ - while (get_tpkt_data(skb, protoff, ct, ctinfo, + while (get_tpkt_data(pskb, protoff, ct, ctinfo, &data, &datalen, &dataoff)) { pr_debug("nf_ct_q931: TPKT len=%d ", datalen); NF_CT_DUMP_TUPLE(&ct->tuplehash[CTINFO2DIR(ctinfo)].tuple); @@ -1136,7 +1136,7 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff, } /* Process Q.931 signal */ - if (process_q931(skb, ct, ctinfo, &data, dataoff, &q931) < 0) + if (process_q931(pskb, ct, ctinfo, &data, dataoff, &q931) < 0) goto drop; } @@ -1177,20 +1177,20 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { }; /****************************************************************************/ -static unsigned char *get_udp_data(struct sk_buff *skb, unsigned int protoff, +static unsigned char *get_udp_data(struct sk_buff **pskb, unsigned int protoff, int *datalen) { struct udphdr _uh, *uh; int dataoff; - uh = skb_header_pointer(skb, protoff, sizeof(_uh), &_uh); + uh = skb_header_pointer(*pskb, protoff, sizeof(_uh), &_uh); if (uh == NULL) return NULL; dataoff = protoff + sizeof(_uh); - if (dataoff >= skb->len) + if (dataoff >= (*pskb)->len) return NULL; - *datalen = skb->len - dataoff; - return skb_header_pointer(skb, dataoff, *datalen, h323_buffer); + *datalen = (*pskb)->len - dataoff; + return skb_header_pointer(*pskb, dataoff, *datalen, h323_buffer); } /****************************************************************************/ @@ -1227,7 +1227,7 @@ static int set_expect_timeout(struct nf_conntrack_expect *exp, } /****************************************************************************/ -static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, +static int expect_q931(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, TransportAddress *taddr, int count) @@ -1265,7 +1265,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, nat_q931 = rcu_dereference(nat_q931_hook); if (nat_q931 && ct->status & IPS_NAT_MASK) { /* Need NAT */ - ret = nat_q931(skb, ct, ctinfo, data, taddr, i, port, exp); + ret = nat_q931(pskb, ct, ctinfo, data, taddr, i, port, exp); } else { /* Conntrack only */ if (nf_ct_expect_related(exp) == 0) { pr_debug("nf_ct_ras: expect Q.931 "); @@ -1283,7 +1283,7 @@ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int process_grq(struct sk_buff *skb, struct nf_conn *ct, +static int process_grq(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, GatekeeperRequest *grq) { @@ -1293,13 +1293,13 @@ static int process_grq(struct sk_buff *skb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) /* NATed */ - return set_ras_addr(skb, ct, ctinfo, data, + return set_ras_addr(pskb, ct, ctinfo, data, &grq->rasAddress, 1); return 0; } /****************************************************************************/ -static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, +static int process_gcf(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, GatekeeperConfirm *gcf) { @@ -1343,7 +1343,7 @@ static int process_gcf(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, +static int process_rrq(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RegistrationRequest *rrq) { @@ -1353,7 +1353,7 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, pr_debug("nf_ct_ras: RRQ\n"); - ret = expect_q931(skb, ct, ctinfo, data, + ret = expect_q931(pskb, ct, ctinfo, data, rrq->callSignalAddress.item, rrq->callSignalAddress.count); if (ret < 0) @@ -1361,7 +1361,7 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) { - ret = set_ras_addr(skb, ct, ctinfo, data, + ret = set_ras_addr(pskb, ct, ctinfo, data, rrq->rasAddress.item, rrq->rasAddress.count); if (ret < 0) @@ -1378,7 +1378,7 @@ static int process_rrq(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, +static int process_rcf(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RegistrationConfirm *rcf) { @@ -1392,7 +1392,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(skb, ct, ctinfo, data, + ret = set_sig_addr(pskb, ct, ctinfo, data, rcf->callSignalAddress.item, rcf->callSignalAddress.count); if (ret < 0) @@ -1407,7 +1407,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, if (info->timeout > 0) { pr_debug("nf_ct_ras: set RAS connection timeout to " "%u seconds\n", info->timeout); - nf_ct_refresh(ct, skb, info->timeout * HZ); + nf_ct_refresh(ct, *pskb, info->timeout * HZ); /* Set expect timeout */ read_lock_bh(&nf_conntrack_lock); @@ -1427,7 +1427,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int process_urq(struct sk_buff *skb, struct nf_conn *ct, +static int process_urq(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, UnregistrationRequest *urq) { @@ -1440,7 +1440,7 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(skb, ct, ctinfo, data, + ret = set_sig_addr(pskb, ct, ctinfo, data, urq->callSignalAddress.item, urq->callSignalAddress.count); if (ret < 0) @@ -1453,13 +1453,13 @@ static int process_urq(struct sk_buff *skb, struct nf_conn *ct, info->sig_port[!dir] = 0; /* Give it 30 seconds for UCF or URJ */ - nf_ct_refresh(ct, skb, 30 * HZ); + nf_ct_refresh(ct, *pskb, 30 * HZ); return 0; } /****************************************************************************/ -static int process_arq(struct sk_buff *skb, struct nf_conn *ct, +static int process_arq(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, AdmissionRequest *arq) { @@ -1479,7 +1479,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, port == info->sig_port[dir] && set_h225_addr && ct->status & IPS_NAT_MASK) { /* Answering ARQ */ - return set_h225_addr(skb, data, 0, + return set_h225_addr(pskb, data, 0, &arq->destCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, info->sig_port[!dir]); @@ -1491,7 +1491,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, !memcmp(&addr, &ct->tuplehash[dir].tuple.src.u3, sizeof(addr)) && set_h225_addr && ct->status & IPS_NAT_MASK) { /* Calling ARQ */ - return set_h225_addr(skb, data, 0, + return set_h225_addr(pskb, data, 0, &arq->srcCallSignalAddress, &ct->tuplehash[!dir].tuple.dst.u3, port); @@ -1501,7 +1501,7 @@ static int process_arq(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int process_acf(struct sk_buff *skb, struct nf_conn *ct, +static int process_acf(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, AdmissionConfirm *acf) { @@ -1522,7 +1522,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, /* Answering ACF */ set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) - return set_sig_addr(skb, ct, ctinfo, data, + return set_sig_addr(pskb, ct, ctinfo, data, &acf->destCallSignalAddress, 1); return 0; } @@ -1548,7 +1548,7 @@ static int process_acf(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, +static int process_lrq(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, LocationRequest *lrq) { @@ -1558,13 +1558,13 @@ static int process_lrq(struct sk_buff *skb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) - return set_ras_addr(skb, ct, ctinfo, data, + return set_ras_addr(pskb, ct, ctinfo, data, &lrq->replyAddress, 1); return 0; } /****************************************************************************/ -static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, +static int process_lcf(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, LocationConfirm *lcf) { @@ -1603,7 +1603,7 @@ static int process_lcf(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int process_irr(struct sk_buff *skb, struct nf_conn *ct, +static int process_irr(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, InfoRequestResponse *irr) { @@ -1615,7 +1615,7 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct, set_ras_addr = rcu_dereference(set_ras_addr_hook); if (set_ras_addr && ct->status & IPS_NAT_MASK) { - ret = set_ras_addr(skb, ct, ctinfo, data, + ret = set_ras_addr(pskb, ct, ctinfo, data, &irr->rasAddress, 1); if (ret < 0) return -1; @@ -1623,7 +1623,7 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct, set_sig_addr = rcu_dereference(set_sig_addr_hook); if (set_sig_addr && ct->status & IPS_NAT_MASK) { - ret = set_sig_addr(skb, ct, ctinfo, data, + ret = set_sig_addr(pskb, ct, ctinfo, data, irr->callSignalAddress.item, irr->callSignalAddress.count); if (ret < 0) @@ -1634,40 +1634,40 @@ static int process_irr(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int process_ras(struct sk_buff *skb, struct nf_conn *ct, +static int process_ras(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned char **data, RasMessage *ras) { switch (ras->choice) { case eRasMessage_gatekeeperRequest: - return process_grq(skb, ct, ctinfo, data, + return process_grq(pskb, ct, ctinfo, data, &ras->gatekeeperRequest); case eRasMessage_gatekeeperConfirm: - return process_gcf(skb, ct, ctinfo, data, + return process_gcf(pskb, ct, ctinfo, data, &ras->gatekeeperConfirm); case eRasMessage_registrationRequest: - return process_rrq(skb, ct, ctinfo, data, + return process_rrq(pskb, ct, ctinfo, data, &ras->registrationRequest); case eRasMessage_registrationConfirm: - return process_rcf(skb, ct, ctinfo, data, + return process_rcf(pskb, ct, ctinfo, data, &ras->registrationConfirm); case eRasMessage_unregistrationRequest: - return process_urq(skb, ct, ctinfo, data, + return process_urq(pskb, ct, ctinfo, data, &ras->unregistrationRequest); case eRasMessage_admissionRequest: - return process_arq(skb, ct, ctinfo, data, + return process_arq(pskb, ct, ctinfo, data, &ras->admissionRequest); case eRasMessage_admissionConfirm: - return process_acf(skb, ct, ctinfo, data, + return process_acf(pskb, ct, ctinfo, data, &ras->admissionConfirm); case eRasMessage_locationRequest: - return process_lrq(skb, ct, ctinfo, data, + return process_lrq(pskb, ct, ctinfo, data, &ras->locationRequest); case eRasMessage_locationConfirm: - return process_lcf(skb, ct, ctinfo, data, + return process_lcf(pskb, ct, ctinfo, data, &ras->locationConfirm); case eRasMessage_infoRequestResponse: - return process_irr(skb, ct, ctinfo, data, + return process_irr(pskb, ct, ctinfo, data, &ras->infoRequestResponse); default: pr_debug("nf_ct_ras: RAS message %d\n", ras->choice); @@ -1678,7 +1678,7 @@ static int process_ras(struct sk_buff *skb, struct nf_conn *ct, } /****************************************************************************/ -static int ras_help(struct sk_buff *skb, unsigned int protoff, +static int ras_help(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { static RasMessage ras; @@ -1686,12 +1686,12 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff, int datalen = 0; int ret; - pr_debug("nf_ct_ras: skblen = %u\n", skb->len); + pr_debug("nf_ct_ras: skblen = %u\n", (*pskb)->len); spin_lock_bh(&nf_h323_lock); /* Get UDP data */ - data = get_udp_data(skb, protoff, &datalen); + data = get_udp_data(pskb, protoff, &datalen); if (data == NULL) goto accept; pr_debug("nf_ct_ras: RAS message len=%d ", datalen); @@ -1707,7 +1707,7 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff, } /* Process RAS message */ - if (process_ras(skb, ct, ctinfo, &data, &ras) < 0) + if (process_ras(pskb, ct, ctinfo, &data, &ras) < 0) goto drop; accept: diff --git a/trunk/net/netfilter/nf_conntrack_irc.c b/trunk/net/netfilter/nf_conntrack_irc.c index dfaed4ba83cd..1562ca97a349 100644 --- a/trunk/net/netfilter/nf_conntrack_irc.c +++ b/trunk/net/netfilter/nf_conntrack_irc.c @@ -30,7 +30,7 @@ static unsigned int dcc_timeout __read_mostly = 300; static char *irc_buffer; static DEFINE_SPINLOCK(irc_buffer_lock); -unsigned int (*nf_nat_irc_hook)(struct sk_buff *skb, +unsigned int (*nf_nat_irc_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, unsigned int matchoff, unsigned int matchlen, @@ -89,7 +89,7 @@ static int parse_dcc(char *data, char *data_end, u_int32_t *ip, return 0; } -static int help(struct sk_buff *skb, unsigned int protoff, +static int help(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { unsigned int dataoff; @@ -116,22 +116,22 @@ static int help(struct sk_buff *skb, unsigned int protoff, return NF_ACCEPT; /* Not a full tcp header? */ - th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return NF_ACCEPT; /* No data? */ dataoff = protoff + th->doff*4; - if (dataoff >= skb->len) + if (dataoff >= (*pskb)->len) return NF_ACCEPT; spin_lock_bh(&irc_buffer_lock); - ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff, + ib_ptr = skb_header_pointer(*pskb, dataoff, (*pskb)->len - dataoff, irc_buffer); BUG_ON(ib_ptr == NULL); data = ib_ptr; - data_limit = ib_ptr + skb->len - dataoff; + data_limit = ib_ptr + (*pskb)->len - dataoff; /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24 * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */ @@ -143,7 +143,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, data += 5; /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */ - iph = ip_hdr(skb); + iph = ip_hdr(*pskb); pr_debug("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u\n", NIPQUAD(iph->saddr), ntohs(th->source), NIPQUAD(iph->daddr), ntohs(th->dest)); @@ -193,7 +193,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, nf_nat_irc = rcu_dereference(nf_nat_irc_hook); if (nf_nat_irc && ct->status & IPS_NAT_MASK) - ret = nf_nat_irc(skb, ctinfo, + ret = nf_nat_irc(pskb, ctinfo, addr_beg_p - ib_ptr, addr_end_p - addr_beg_p, exp); diff --git a/trunk/net/netfilter/nf_conntrack_netbios_ns.c b/trunk/net/netfilter/nf_conntrack_netbios_ns.c index 9810d81e2a06..1d59fabeb5f7 100644 --- a/trunk/net/netfilter/nf_conntrack_netbios_ns.c +++ b/trunk/net/netfilter/nf_conntrack_netbios_ns.c @@ -42,17 +42,17 @@ static unsigned int timeout __read_mostly = 3; module_param(timeout, uint, 0400); MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); -static int help(struct sk_buff *skb, unsigned int protoff, +static int help(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { struct nf_conntrack_expect *exp; - struct iphdr *iph = ip_hdr(skb); - struct rtable *rt = (struct rtable *)skb->dst; + struct iphdr *iph = ip_hdr(*pskb); + struct rtable *rt = (struct rtable *)(*pskb)->dst; struct in_device *in_dev; __be32 mask = 0; /* we're only interested in locally generated packets */ - if (skb->sk == NULL) + if ((*pskb)->sk == NULL) goto out; if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) goto out; @@ -91,7 +91,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, nf_ct_expect_related(exp); nf_ct_expect_put(exp); - nf_ct_refresh(ct, skb, timeout * HZ); + nf_ct_refresh(ct, *pskb, timeout * HZ); out: return NF_ACCEPT; } diff --git a/trunk/net/netfilter/nf_conntrack_pptp.c b/trunk/net/netfilter/nf_conntrack_pptp.c index 099b6df3e2b5..b0804199ab59 100644 --- a/trunk/net/netfilter/nf_conntrack_pptp.c +++ b/trunk/net/netfilter/nf_conntrack_pptp.c @@ -41,14 +41,14 @@ MODULE_ALIAS("ip_conntrack_pptp"); static DEFINE_SPINLOCK(nf_pptp_lock); int -(*nf_nat_pptp_hook_outbound)(struct sk_buff *skb, +(*nf_nat_pptp_hook_outbound)(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_outbound); int -(*nf_nat_pptp_hook_inbound)(struct sk_buff *skb, +(*nf_nat_pptp_hook_inbound)(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq) __read_mostly; @@ -254,7 +254,7 @@ static int exp_gre(struct nf_conn *ct, __be16 callid, __be16 peer_callid) } static inline int -pptp_inbound_pkt(struct sk_buff *skb, +pptp_inbound_pkt(struct sk_buff **pskb, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, unsigned int reqlen, @@ -367,7 +367,7 @@ pptp_inbound_pkt(struct sk_buff *skb, nf_nat_pptp_inbound = rcu_dereference(nf_nat_pptp_hook_inbound); if (nf_nat_pptp_inbound && ct->status & IPS_NAT_MASK) - return nf_nat_pptp_inbound(skb, ct, ctinfo, ctlh, pptpReq); + return nf_nat_pptp_inbound(pskb, ct, ctinfo, ctlh, pptpReq); return NF_ACCEPT; invalid: @@ -380,7 +380,7 @@ pptp_inbound_pkt(struct sk_buff *skb, } static inline int -pptp_outbound_pkt(struct sk_buff *skb, +pptp_outbound_pkt(struct sk_buff **pskb, struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq, unsigned int reqlen, @@ -462,7 +462,7 @@ pptp_outbound_pkt(struct sk_buff *skb, nf_nat_pptp_outbound = rcu_dereference(nf_nat_pptp_hook_outbound); if (nf_nat_pptp_outbound && ct->status & IPS_NAT_MASK) - return nf_nat_pptp_outbound(skb, ct, ctinfo, ctlh, pptpReq); + return nf_nat_pptp_outbound(pskb, ct, ctinfo, ctlh, pptpReq); return NF_ACCEPT; invalid: @@ -492,7 +492,7 @@ static const unsigned int pptp_msg_size[] = { /* track caller id inside control connection, call expect_related */ static int -conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, +conntrack_pptp_help(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { @@ -502,7 +502,7 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, struct pptp_pkt_hdr _pptph, *pptph; struct PptpControlHeader _ctlh, *ctlh; union pptp_ctrl_union _pptpReq, *pptpReq; - unsigned int tcplen = skb->len - protoff; + unsigned int tcplen = (*pskb)->len - protoff; unsigned int datalen, reqlen, nexthdr_off; int oldsstate, oldcstate; int ret; @@ -514,12 +514,12 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, return NF_ACCEPT; nexthdr_off = protoff; - tcph = skb_header_pointer(skb, nexthdr_off, sizeof(_tcph), &_tcph); + tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); BUG_ON(!tcph); nexthdr_off += tcph->doff * 4; datalen = tcplen - tcph->doff * 4; - pptph = skb_header_pointer(skb, nexthdr_off, sizeof(_pptph), &_pptph); + pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); if (!pptph) { pr_debug("no full PPTP header, can't track\n"); return NF_ACCEPT; @@ -534,7 +534,7 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, return NF_ACCEPT; } - ctlh = skb_header_pointer(skb, nexthdr_off, sizeof(_ctlh), &_ctlh); + ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); if (!ctlh) return NF_ACCEPT; nexthdr_off += sizeof(_ctlh); @@ -547,7 +547,7 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, if (reqlen > sizeof(*pptpReq)) reqlen = sizeof(*pptpReq); - pptpReq = skb_header_pointer(skb, nexthdr_off, reqlen, &_pptpReq); + pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); if (!pptpReq) return NF_ACCEPT; @@ -560,11 +560,11 @@ conntrack_pptp_help(struct sk_buff *skb, unsigned int protoff, * established from PNS->PAC. However, RFC makes no guarantee */ if (dir == IP_CT_DIR_ORIGINAL) /* client -> server (PNS -> PAC) */ - ret = pptp_outbound_pkt(skb, ctlh, pptpReq, reqlen, ct, + ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, ctinfo); else /* server -> client (PAC -> PNS) */ - ret = pptp_inbound_pkt(skb, ctlh, pptpReq, reqlen, ct, + ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, ctinfo); pr_debug("sstate: %d->%d, cstate: %d->%d\n", oldsstate, info->sstate, oldcstate, info->cstate); diff --git a/trunk/net/netfilter/nf_conntrack_sane.c b/trunk/net/netfilter/nf_conntrack_sane.c index b5a16c6e21c2..355d371bac93 100644 --- a/trunk/net/netfilter/nf_conntrack_sane.c +++ b/trunk/net/netfilter/nf_conntrack_sane.c @@ -56,7 +56,7 @@ struct sane_reply_net_start { /* other fields aren't interesting for conntrack */ }; -static int help(struct sk_buff *skb, +static int help(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -80,19 +80,19 @@ static int help(struct sk_buff *skb, return NF_ACCEPT; /* Not a full tcp header? */ - th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); if (th == NULL) return NF_ACCEPT; /* No data? */ dataoff = protoff + th->doff * 4; - if (dataoff >= skb->len) + if (dataoff >= (*pskb)->len) return NF_ACCEPT; - datalen = skb->len - dataoff; + datalen = (*pskb)->len - dataoff; spin_lock_bh(&nf_sane_lock); - sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer); + sb_ptr = skb_header_pointer(*pskb, dataoff, datalen, sane_buffer); BUG_ON(sb_ptr == NULL); if (dir == IP_CT_DIR_ORIGINAL) { diff --git a/trunk/net/netfilter/nf_conntrack_sip.c b/trunk/net/netfilter/nf_conntrack_sip.c index 8f8b5a48df38..d449fa47491c 100644 --- a/trunk/net/netfilter/nf_conntrack_sip.c +++ b/trunk/net/netfilter/nf_conntrack_sip.c @@ -36,13 +36,13 @@ static unsigned int sip_timeout __read_mostly = SIP_TIMEOUT; module_param(sip_timeout, uint, 0600); MODULE_PARM_DESC(sip_timeout, "timeout for the master SIP session"); -unsigned int (*nf_nat_sip_hook)(struct sk_buff *skb, +unsigned int (*nf_nat_sip_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct nf_conn *ct, const char **dptr) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_sip_hook); -unsigned int (*nf_nat_sdp_hook)(struct sk_buff *skb, +unsigned int (*nf_nat_sdp_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp, const char *dptr) __read_mostly; @@ -363,7 +363,7 @@ int ct_sip_get_info(struct nf_conn *ct, } EXPORT_SYMBOL_GPL(ct_sip_get_info); -static int set_expected_rtp(struct sk_buff *skb, +static int set_expected_rtp(struct sk_buff **pskb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, union nf_conntrack_address *addr, @@ -385,7 +385,7 @@ static int set_expected_rtp(struct sk_buff *skb, nf_nat_sdp = rcu_dereference(nf_nat_sdp_hook); if (nf_nat_sdp && ct->status & IPS_NAT_MASK) - ret = nf_nat_sdp(skb, ctinfo, exp, dptr); + ret = nf_nat_sdp(pskb, ctinfo, exp, dptr); else { if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; @@ -397,7 +397,7 @@ static int set_expected_rtp(struct sk_buff *skb, return ret; } -static int sip_help(struct sk_buff *skb, +static int sip_help(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -414,13 +414,13 @@ static int sip_help(struct sk_buff *skb, /* No Data ? */ dataoff = protoff + sizeof(struct udphdr); - if (dataoff >= skb->len) + if (dataoff >= (*pskb)->len) return NF_ACCEPT; - nf_ct_refresh(ct, skb, sip_timeout * HZ); + nf_ct_refresh(ct, *pskb, sip_timeout * HZ); - if (!skb_is_nonlinear(skb)) - dptr = skb->data + dataoff; + if (!skb_is_nonlinear(*pskb)) + dptr = (*pskb)->data + dataoff; else { pr_debug("Copy of skbuff not supported yet.\n"); goto out; @@ -428,13 +428,13 @@ static int sip_help(struct sk_buff *skb, nf_nat_sip = rcu_dereference(nf_nat_sip_hook); if (nf_nat_sip && ct->status & IPS_NAT_MASK) { - if (!nf_nat_sip(skb, ctinfo, ct, &dptr)) { + if (!nf_nat_sip(pskb, ctinfo, ct, &dptr)) { ret = NF_DROP; goto out; } } - datalen = skb->len - dataoff; + datalen = (*pskb)->len - dataoff; if (datalen < sizeof("SIP/2.0 200") - 1) goto out; @@ -464,7 +464,7 @@ static int sip_help(struct sk_buff *skb, ret = NF_DROP; goto out; } - ret = set_expected_rtp(skb, ct, ctinfo, &addr, + ret = set_expected_rtp(pskb, ct, ctinfo, &addr, htons(port), dptr); } } diff --git a/trunk/net/netfilter/nf_conntrack_tftp.c b/trunk/net/netfilter/nf_conntrack_tftp.c index e894aa1ff3ad..cc19506cf2f8 100644 --- a/trunk/net/netfilter/nf_conntrack_tftp.c +++ b/trunk/net/netfilter/nf_conntrack_tftp.c @@ -29,12 +29,12 @@ static int ports_c; module_param_array(ports, ushort, &ports_c, 0400); MODULE_PARM_DESC(ports, "Port numbers of TFTP servers"); -unsigned int (*nf_nat_tftp_hook)(struct sk_buff *skb, +unsigned int (*nf_nat_tftp_hook)(struct sk_buff **pskb, enum ip_conntrack_info ctinfo, struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_tftp_hook); -static int tftp_help(struct sk_buff *skb, +static int tftp_help(struct sk_buff **pskb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) @@ -46,7 +46,7 @@ static int tftp_help(struct sk_buff *skb, int family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num; typeof(nf_nat_tftp_hook) nf_nat_tftp; - tfh = skb_header_pointer(skb, protoff + sizeof(struct udphdr), + tfh = skb_header_pointer(*pskb, protoff + sizeof(struct udphdr), sizeof(_tftph), &_tftph); if (tfh == NULL) return NF_ACCEPT; @@ -70,7 +70,7 @@ static int tftp_help(struct sk_buff *skb, nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook); if (nf_nat_tftp && ct->status & IPS_NAT_MASK) - ret = nf_nat_tftp(skb, ctinfo, exp); + ret = nf_nat_tftp(pskb, ctinfo, exp); else if (nf_ct_expect_related(exp) != 0) ret = NF_DROP; nf_ct_expect_put(exp); diff --git a/trunk/net/netfilter/nf_internals.h b/trunk/net/netfilter/nf_internals.h index 196269c1e586..0df7fff196a7 100644 --- a/trunk/net/netfilter/nf_internals.h +++ b/trunk/net/netfilter/nf_internals.h @@ -14,7 +14,7 @@ /* core.c */ extern unsigned int nf_iterate(struct list_head *head, - struct sk_buff *skb, + struct sk_buff **skb, int hook, const struct net_device *indev, const struct net_device *outdev, diff --git a/trunk/net/netfilter/nf_queue.c b/trunk/net/netfilter/nf_queue.c index 0cef1433d660..a481a349f7bf 100644 --- a/trunk/net/netfilter/nf_queue.c +++ b/trunk/net/netfilter/nf_queue.c @@ -256,14 +256,14 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, if (verdict == NF_ACCEPT) { afinfo = nf_get_afinfo(info->pf); - if (!afinfo || afinfo->reroute(skb, info) < 0) + if (!afinfo || afinfo->reroute(&skb, info) < 0) verdict = NF_DROP; } if (verdict == NF_ACCEPT) { next_hook: verdict = nf_iterate(&nf_hooks[info->pf][info->hook], - skb, info->hook, + &skb, info->hook, info->indev, info->outdev, &elem, info->okfn, INT_MIN); } diff --git a/trunk/net/netfilter/nfnetlink_queue.c b/trunk/net/netfilter/nfnetlink_queue.c index 3ceeffcf6f9d..49f0480afe09 100644 --- a/trunk/net/netfilter/nfnetlink_queue.c +++ b/trunk/net/netfilter/nfnetlink_queue.c @@ -617,7 +617,6 @@ static int nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) { int diff; - int err; diff = data_len - e->skb->len; if (diff < 0) { @@ -627,18 +626,25 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) if (data_len > 0xFFFF) return -EINVAL; if (diff > skb_tailroom(e->skb)) { - err = pskb_expand_head(e->skb, 0, - diff - skb_tailroom(e->skb), - GFP_ATOMIC); - if (err) { + struct sk_buff *newskb; + + newskb = skb_copy_expand(e->skb, + skb_headroom(e->skb), + diff, + GFP_ATOMIC); + if (newskb == NULL) { printk(KERN_WARNING "nf_queue: OOM " "in mangle, dropping packet\n"); - return err; + return -ENOMEM; } + if (e->skb->sk) + skb_set_owner_w(newskb, e->skb->sk); + kfree_skb(e->skb); + e->skb = newskb; } skb_put(e->skb, diff); } - if (!skb_make_writable(e->skb, data_len)) + if (!skb_make_writable(&e->skb, data_len)) return -ENOMEM; skb_copy_to_linear_data(e->skb, data, data_len); e->skb->ip_summed = CHECKSUM_NONE; diff --git a/trunk/net/netfilter/xt_CLASSIFY.c b/trunk/net/netfilter/xt_CLASSIFY.c index 77eeae658d42..07a1b9665005 100644 --- a/trunk/net/netfilter/xt_CLASSIFY.c +++ b/trunk/net/netfilter/xt_CLASSIFY.c @@ -27,7 +27,7 @@ MODULE_ALIAS("ipt_CLASSIFY"); MODULE_ALIAS("ip6t_CLASSIFY"); static unsigned int -target(struct sk_buff *skb, +target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -36,7 +36,7 @@ target(struct sk_buff *skb, { const struct xt_classify_target_info *clinfo = targinfo; - skb->priority = clinfo->priority; + (*pskb)->priority = clinfo->priority; return XT_CONTINUE; } diff --git a/trunk/net/netfilter/xt_CONNMARK.c b/trunk/net/netfilter/xt_CONNMARK.c index 8cc324b159e9..7043c2757e09 100644 --- a/trunk/net/netfilter/xt_CONNMARK.c +++ b/trunk/net/netfilter/xt_CONNMARK.c @@ -34,7 +34,7 @@ MODULE_ALIAS("ip6t_CONNMARK"); #include static unsigned int -target(struct sk_buff *skb, +target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -48,28 +48,28 @@ target(struct sk_buff *skb, u_int32_t mark; u_int32_t newmark; - ct = nf_ct_get(skb, &ctinfo); + ct = nf_ct_get(*pskb, &ctinfo); if (ct) { switch(markinfo->mode) { case XT_CONNMARK_SET: newmark = (ct->mark & ~markinfo->mask) | markinfo->mark; if (newmark != ct->mark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, *pskb); } break; case XT_CONNMARK_SAVE: newmark = (ct->mark & ~markinfo->mask) | - (skb->mark & markinfo->mask); + ((*pskb)->mark & markinfo->mask); if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, *pskb); } break; case XT_CONNMARK_RESTORE: - mark = skb->mark; + mark = (*pskb)->mark; diff = (ct->mark ^ mark) & markinfo->mask; - skb->mark = mark ^ diff; + (*pskb)->mark = mark ^ diff; break; } } diff --git a/trunk/net/netfilter/xt_CONNSECMARK.c b/trunk/net/netfilter/xt_CONNSECMARK.c index 021b5c8d20e2..63d73138c1b9 100644 --- a/trunk/net/netfilter/xt_CONNSECMARK.c +++ b/trunk/net/netfilter/xt_CONNSECMARK.c @@ -61,11 +61,12 @@ static void secmark_restore(struct sk_buff *skb) } } -static unsigned int target(struct sk_buff *skb, const struct net_device *in, +static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { + struct sk_buff *skb = *pskb; const struct xt_connsecmark_target_info *info = targinfo; switch (info->mode) { diff --git a/trunk/net/netfilter/xt_DSCP.c b/trunk/net/netfilter/xt_DSCP.c index 6322a933ab71..798ab731009d 100644 --- a/trunk/net/netfilter/xt_DSCP.c +++ b/trunk/net/netfilter/xt_DSCP.c @@ -25,7 +25,7 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_DSCP"); MODULE_ALIAS("ip6t_DSCP"); -static unsigned int target(struct sk_buff *skb, +static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -33,20 +33,20 @@ static unsigned int target(struct sk_buff *skb, const void *targinfo) { const struct xt_DSCP_info *dinfo = targinfo; - u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; + u_int8_t dscp = ipv4_get_dsfield(ip_hdr(*pskb)) >> XT_DSCP_SHIFT; if (dscp != dinfo->dscp) { - if (!skb_make_writable(skb, sizeof(struct iphdr))) + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return NF_DROP; - ipv4_change_dsfield(ip_hdr(skb), (__u8)(~XT_DSCP_MASK), + ipv4_change_dsfield(ip_hdr(*pskb), (__u8)(~XT_DSCP_MASK), dinfo->dscp << XT_DSCP_SHIFT); } return XT_CONTINUE; } -static unsigned int target6(struct sk_buff *skb, +static unsigned int target6(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -54,13 +54,13 @@ static unsigned int target6(struct sk_buff *skb, const void *targinfo) { const struct xt_DSCP_info *dinfo = targinfo; - u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; + u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(*pskb)) >> XT_DSCP_SHIFT; if (dscp != dinfo->dscp) { - if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) + if (!skb_make_writable(pskb, sizeof(struct ipv6hdr))) return NF_DROP; - ipv6_change_dsfield(ipv6_hdr(skb), (__u8)(~XT_DSCP_MASK), + ipv6_change_dsfield(ipv6_hdr(*pskb), (__u8)(~XT_DSCP_MASK), dinfo->dscp << XT_DSCP_SHIFT); } return XT_CONTINUE; diff --git a/trunk/net/netfilter/xt_MARK.c b/trunk/net/netfilter/xt_MARK.c index bc6503d77d75..f30fe0baf7de 100644 --- a/trunk/net/netfilter/xt_MARK.c +++ b/trunk/net/netfilter/xt_MARK.c @@ -22,7 +22,7 @@ MODULE_ALIAS("ipt_MARK"); MODULE_ALIAS("ip6t_MARK"); static unsigned int -target_v0(struct sk_buff *skb, +target_v0(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -31,12 +31,12 @@ target_v0(struct sk_buff *skb, { const struct xt_mark_target_info *markinfo = targinfo; - skb->mark = markinfo->mark; + (*pskb)->mark = markinfo->mark; return XT_CONTINUE; } static unsigned int -target_v1(struct sk_buff *skb, +target_v1(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -52,15 +52,15 @@ target_v1(struct sk_buff *skb, break; case XT_MARK_AND: - mark = skb->mark & markinfo->mark; + mark = (*pskb)->mark & markinfo->mark; break; case XT_MARK_OR: - mark = skb->mark | markinfo->mark; + mark = (*pskb)->mark | markinfo->mark; break; } - skb->mark = mark; + (*pskb)->mark = mark; return XT_CONTINUE; } diff --git a/trunk/net/netfilter/xt_NFLOG.c b/trunk/net/netfilter/xt_NFLOG.c index 9fb449ffbf8b..d3594c7ccb26 100644 --- a/trunk/net/netfilter/xt_NFLOG.c +++ b/trunk/net/netfilter/xt_NFLOG.c @@ -20,7 +20,7 @@ MODULE_ALIAS("ipt_NFLOG"); MODULE_ALIAS("ip6t_NFLOG"); static unsigned int -nflog_target(struct sk_buff *skb, +nflog_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -33,7 +33,7 @@ nflog_target(struct sk_buff *skb, li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(target->family, hooknum, skb, in, out, &li, + nf_log_packet(target->family, hooknum, *pskb, in, out, &li, "%s", info->prefix); return XT_CONTINUE; } diff --git a/trunk/net/netfilter/xt_NFQUEUE.c b/trunk/net/netfilter/xt_NFQUEUE.c index c3984e9f766a..13f59f3e8c38 100644 --- a/trunk/net/netfilter/xt_NFQUEUE.c +++ b/trunk/net/netfilter/xt_NFQUEUE.c @@ -24,7 +24,7 @@ MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); static unsigned int -target(struct sk_buff *skb, +target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, diff --git a/trunk/net/netfilter/xt_NOTRACK.c b/trunk/net/netfilter/xt_NOTRACK.c index 4976ce186615..fec1aefb1c32 100644 --- a/trunk/net/netfilter/xt_NOTRACK.c +++ b/trunk/net/netfilter/xt_NOTRACK.c @@ -12,7 +12,7 @@ MODULE_ALIAS("ipt_NOTRACK"); MODULE_ALIAS("ip6t_NOTRACK"); static unsigned int -target(struct sk_buff *skb, +target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, @@ -20,16 +20,16 @@ target(struct sk_buff *skb, const void *targinfo) { /* Previously seen (loopback)? Ignore. */ - if (skb->nfct != NULL) + if ((*pskb)->nfct != NULL) return XT_CONTINUE; /* Attach fake conntrack entry. If there is a real ct entry correspondig to this packet, it'll hang aroun till timing out. We don't deal with it for performance reasons. JK */ - skb->nfct = &nf_conntrack_untracked.ct_general; - skb->nfctinfo = IP_CT_NEW; - nf_conntrack_get(skb->nfct); + (*pskb)->nfct = &nf_conntrack_untracked.ct_general; + (*pskb)->nfctinfo = IP_CT_NEW; + nf_conntrack_get((*pskb)->nfct); return XT_CONTINUE; } diff --git a/trunk/net/netfilter/xt_SECMARK.c b/trunk/net/netfilter/xt_SECMARK.c index 235806eb6ecd..c83779a941a1 100644 --- a/trunk/net/netfilter/xt_SECMARK.c +++ b/trunk/net/netfilter/xt_SECMARK.c @@ -28,7 +28,7 @@ MODULE_ALIAS("ip6t_SECMARK"); static u8 mode; -static unsigned int target(struct sk_buff *skb, const struct net_device *in, +static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) @@ -47,7 +47,7 @@ static unsigned int target(struct sk_buff *skb, const struct net_device *in, BUG(); } - skb->secmark = secmark; + (*pskb)->secmark = secmark; return XT_CONTINUE; } diff --git a/trunk/net/netfilter/xt_TCPMSS.c b/trunk/net/netfilter/xt_TCPMSS.c index 07435a602b11..d40f7e4b1289 100644 --- a/trunk/net/netfilter/xt_TCPMSS.c +++ b/trunk/net/netfilter/xt_TCPMSS.c @@ -39,7 +39,7 @@ optlen(const u_int8_t *opt, unsigned int offset) } static int -tcpmss_mangle_packet(struct sk_buff *skb, +tcpmss_mangle_packet(struct sk_buff **pskb, const struct xt_tcpmss_info *info, unsigned int tcphoff, unsigned int minlen) @@ -50,11 +50,11 @@ tcpmss_mangle_packet(struct sk_buff *skb, u16 newmss; u8 *opt; - if (!skb_make_writable(skb, skb->len)) + if (!skb_make_writable(pskb, (*pskb)->len)) return -1; - tcplen = skb->len - tcphoff; - tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); + tcplen = (*pskb)->len - tcphoff; + tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff); /* Since it passed flags test in tcp match, we know it is is not a fragment, and has data >= tcp header length. SYN @@ -64,19 +64,19 @@ tcpmss_mangle_packet(struct sk_buff *skb, if (tcplen != tcph->doff*4) { if (net_ratelimit()) printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n", - skb->len); + (*pskb)->len); return -1; } if (info->mss == XT_TCPMSS_CLAMP_PMTU) { - if (dst_mtu(skb->dst) <= minlen) { + if (dst_mtu((*pskb)->dst) <= minlen) { if (net_ratelimit()) printk(KERN_ERR "xt_TCPMSS: " "unknown or invalid path-MTU (%u)\n", - dst_mtu(skb->dst)); + dst_mtu((*pskb)->dst)); return -1; } - newmss = dst_mtu(skb->dst) - minlen; + newmss = dst_mtu((*pskb)->dst) - minlen; } else newmss = info->mss; @@ -95,7 +95,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, opt[i+2] = (newmss & 0xff00) >> 8; opt[i+3] = newmss & 0x00ff; - nf_proto_csum_replace2(&tcph->check, skb, + nf_proto_csum_replace2(&tcph->check, *pskb, htons(oldmss), htons(newmss), 0); return 0; } @@ -104,53 +104,57 @@ tcpmss_mangle_packet(struct sk_buff *skb, /* * MSS Option not found ?! add it.. */ - if (skb_tailroom(skb) < TCPOLEN_MSS) { - if (pskb_expand_head(skb, 0, - TCPOLEN_MSS - skb_tailroom(skb), - GFP_ATOMIC)) + if (skb_tailroom((*pskb)) < TCPOLEN_MSS) { + struct sk_buff *newskb; + + newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), + TCPOLEN_MSS, GFP_ATOMIC); + if (!newskb) return -1; - tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); + kfree_skb(*pskb); + *pskb = newskb; + tcph = (struct tcphdr *)(skb_network_header(*pskb) + tcphoff); } - skb_put(skb, TCPOLEN_MSS); + skb_put((*pskb), TCPOLEN_MSS); opt = (u_int8_t *)tcph + sizeof(struct tcphdr); memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); - nf_proto_csum_replace2(&tcph->check, skb, + nf_proto_csum_replace2(&tcph->check, *pskb, htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1); opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; opt[3] = newmss & 0x00ff; - nf_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0); + nf_proto_csum_replace4(&tcph->check, *pskb, 0, *((__be32 *)opt), 0); oldval = ((__be16 *)tcph)[6]; tcph->doff += TCPOLEN_MSS/4; - nf_proto_csum_replace2(&tcph->check, skb, + nf_proto_csum_replace2(&tcph->check, *pskb, oldval, ((__be16 *)tcph)[6], 0); return TCPOLEN_MSS; } static unsigned int -xt_tcpmss_target4(struct sk_buff *skb, +xt_tcpmss_target4(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { - struct iphdr *iph = ip_hdr(skb); + struct iphdr *iph = ip_hdr(*pskb); __be16 newlen; int ret; - ret = tcpmss_mangle_packet(skb, targinfo, iph->ihl * 4, + ret = tcpmss_mangle_packet(pskb, targinfo, iph->ihl * 4, sizeof(*iph) + sizeof(struct tcphdr)); if (ret < 0) return NF_DROP; if (ret > 0) { - iph = ip_hdr(skb); + iph = ip_hdr(*pskb); newlen = htons(ntohs(iph->tot_len) + ret); nf_csum_replace2(&iph->check, iph->tot_len, newlen); iph->tot_len = newlen; @@ -160,30 +164,30 @@ xt_tcpmss_target4(struct sk_buff *skb, #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) static unsigned int -xt_tcpmss_target6(struct sk_buff *skb, +xt_tcpmss_target6(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { - struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct ipv6hdr *ipv6h = ipv6_hdr(*pskb); u8 nexthdr; int tcphoff; int ret; nexthdr = ipv6h->nexthdr; - tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr); + tcphoff = ipv6_skip_exthdr(*pskb, sizeof(*ipv6h), &nexthdr); if (tcphoff < 0) { WARN_ON(1); return NF_DROP; } - ret = tcpmss_mangle_packet(skb, targinfo, tcphoff, + ret = tcpmss_mangle_packet(pskb, targinfo, tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); if (ret < 0) return NF_DROP; if (ret > 0) { - ipv6h = ipv6_hdr(skb); + ipv6h = ipv6_hdr(*pskb); ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret); } return XT_CONTINUE; diff --git a/trunk/net/netfilter/xt_TRACE.c b/trunk/net/netfilter/xt_TRACE.c index 26c5d08ab2c2..4df2dedcc0b5 100644 --- a/trunk/net/netfilter/xt_TRACE.c +++ b/trunk/net/netfilter/xt_TRACE.c @@ -10,14 +10,14 @@ MODULE_ALIAS("ipt_TRACE"); MODULE_ALIAS("ip6t_TRACE"); static unsigned int -target(struct sk_buff *skb, +target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, const void *targinfo) { - skb->nf_trace = 1; + (*pskb)->nf_trace = 1; return XT_CONTINUE; } diff --git a/trunk/net/netlink/af_netlink.c b/trunk/net/netlink/af_netlink.c index 98e313e5e594..c776bcd9f825 100644 --- a/trunk/net/netlink/af_netlink.c +++ b/trunk/net/netlink/af_netlink.c @@ -1378,8 +1378,6 @@ netlink_kernel_create(struct net *net, int unit, unsigned int groups, nl_table[unit].cb_mutex = cb_mutex; nl_table[unit].module = module; nl_table[unit].registered = 1; - } else { - kfree(listeners); } netlink_table_ungrab(); diff --git a/trunk/net/sched/act_ipt.c b/trunk/net/sched/act_ipt.c index fa006e06ce33..6b407ece953c 100644 --- a/trunk/net/sched/act_ipt.c +++ b/trunk/net/sched/act_ipt.c @@ -202,7 +202,11 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, /* yes, we have to worry about both in and out dev worry later - danger - this API seems to have changed from earlier kernels */ - ret = ipt->tcfi_t->u.kernel.target->target(skb, skb->dev, NULL, + + /* iptables targets take a double skb pointer in case the skb + * needs to be replaced. We don't own the skb, so this must not + * happen. The pskb_expand_head above should make sure of this */ + ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL, ipt->tcfi_hook, ipt->tcfi_t->u.kernel.target, ipt->tcfi_t->data); diff --git a/trunk/net/sched/sch_ingress.c b/trunk/net/sched/sch_ingress.c index 3f8335e6ea2e..2d32fd27496e 100644 --- a/trunk/net/sched/sch_ingress.c +++ b/trunk/net/sched/sch_ingress.c @@ -205,19 +205,20 @@ static unsigned int ingress_drop(struct Qdisc *sch) #ifndef CONFIG_NET_CLS_ACT #ifdef CONFIG_NETFILTER static unsigned int -ing_hook(unsigned int hook, struct sk_buff *skb, +ing_hook(unsigned int hook, struct sk_buff **pskb, const struct net_device *indev, const struct net_device *outdev, int (*okfn)(struct sk_buff *)) { struct Qdisc *q; + struct sk_buff *skb = *pskb; struct net_device *dev = skb->dev; int fwres=NF_ACCEPT; DPRINTK("ing_hook: skb %s dev=%s len=%u\n", skb->sk ? "(owned)" : "(unowned)", - skb->dev ? skb->dev->name : "(no dev)", + skb->dev ? (*pskb)->dev->name : "(no dev)", skb->len); if (dev->qdisc_ingress) { diff --git a/trunk/net/sctp/ipv6.c b/trunk/net/sctp/ipv6.c index eb4deaf58914..9de3ddaa2768 100644 --- a/trunk/net/sctp/ipv6.c +++ b/trunk/net/sctp/ipv6.c @@ -954,9 +954,9 @@ static struct inet_protosw sctpv6_stream_protosw = { .flags = SCTP_PROTOSW_FLAG, }; -static int sctp6_rcv(struct sk_buff *skb) +static int sctp6_rcv(struct sk_buff **pskb) { - return sctp_rcv(skb) ? -1 : 0; + return sctp_rcv(*pskb) ? -1 : 0; } static struct inet6_protocol sctpv6_protocol = { diff --git a/trunk/net/sunrpc/Makefile b/trunk/net/sunrpc/Makefile index 5c69a725e530..8ebfc4db7f51 100644 --- a/trunk/net/sunrpc/Makefile +++ b/trunk/net/sunrpc/Makefile @@ -5,7 +5,6 @@ obj-$(CONFIG_SUNRPC) += sunrpc.o obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ -obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma/ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ auth.o auth_null.o auth_unix.o \ diff --git a/trunk/net/sunrpc/auth_gss/gss_krb5_wrap.c b/trunk/net/sunrpc/auth_gss/gss_krb5_wrap.c index 8bd074df27d3..42b3220bed39 100644 --- a/trunk/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/trunk/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -42,7 +42,7 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) { u8 *ptr; u8 pad; - size_t len = buf->len; + int len = buf->len; if (len <= buf->head[0].iov_len) { pad = *(u8 *)(buf->head[0].iov_base + len - 1); @@ -53,9 +53,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize) } else len -= buf->head[0].iov_len; if (len <= buf->page_len) { - unsigned int last = (buf->page_base + len - 1) + int last = (buf->page_base + len - 1) >>PAGE_CACHE_SHIFT; - unsigned int offset = (buf->page_base + len - 1) + int offset = (buf->page_base + len - 1) & (PAGE_CACHE_SIZE - 1); ptr = kmap_atomic(buf->pages[last], KM_USER0); pad = *(ptr + offset); diff --git a/trunk/net/sunrpc/clnt.c b/trunk/net/sunrpc/clnt.c index 76be83ee4b04..52429b1ffcc1 100644 --- a/trunk/net/sunrpc/clnt.c +++ b/trunk/net/sunrpc/clnt.c @@ -127,14 +127,7 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s struct rpc_clnt *clnt = NULL; struct rpc_auth *auth; int err; - size_t len; - - /* sanity check the name before trying to print it */ - err = -EINVAL; - len = strlen(servname); - if (len > RPC_MAXNETNAMELEN) - goto out_no_rpciod; - len++; + int len; dprintk("RPC: creating %s client for %s (xprt %p)\n", program->name, servname, xprt); @@ -155,6 +148,7 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s clnt->cl_parent = clnt; clnt->cl_server = clnt->cl_inline_name; + len = strlen(servname) + 1; if (len > sizeof(clnt->cl_inline_name)) { char *buf = kmalloc(len, GFP_KERNEL); if (buf != 0) @@ -240,8 +234,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) { struct rpc_xprt *xprt; struct rpc_clnt *clnt; - struct xprt_create xprtargs = { - .ident = args->protocol, + struct rpc_xprtsock_create xprtargs = { + .proto = args->protocol, .srcaddr = args->saddress, .dstaddr = args->address, .addrlen = args->addrsize, @@ -259,7 +253,7 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) */ if (args->servername == NULL) { struct sockaddr_in *addr = - (struct sockaddr_in *) args->address; + (struct sockaddr_in *) &args->address; snprintf(servername, sizeof(servername), NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); args->servername = servername; @@ -275,6 +269,9 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) if (args->flags & RPC_CLNT_CREATE_NONPRIVPORT) xprt->resvport = 0; + dprintk("RPC: creating %s client for %s (xprt %p)\n", + args->program->name, args->servername, xprt); + clnt = rpc_new_client(xprt, args->servername, args->program, args->version, args->authflavor); if (IS_ERR(clnt)) @@ -442,7 +439,7 @@ rpc_release_client(struct rpc_clnt *clnt) */ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *old, struct rpc_program *program, - u32 vers) + int vers) { struct rpc_clnt *clnt; struct rpc_version *version; @@ -846,7 +843,8 @@ call_allocate(struct rpc_task *task) dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); if (RPC_IS_ASYNC(task) || !signalled()) { - task->tk_action = call_allocate; + xprt_release(task); + task->tk_action = call_reserve; rpc_delay(task, HZ>>4); return; } @@ -873,7 +871,6 @@ rpc_xdr_buf_init(struct xdr_buf *buf, void *start, size_t len) buf->head[0].iov_len = len; buf->tail[0].iov_len = 0; buf->page_len = 0; - buf->flags = 0; buf->len = 0; buf->buflen = len; } @@ -940,7 +937,7 @@ call_bind(struct rpc_task *task) static void call_bind_status(struct rpc_task *task) { - int status = -EIO; + int status = -EACCES; if (task->tk_status >= 0) { dprint_status(task); @@ -950,20 +947,9 @@ call_bind_status(struct rpc_task *task) } switch (task->tk_status) { - case -EAGAIN: - dprintk("RPC: %5u rpcbind waiting for another request " - "to finish\n", task->tk_pid); - /* avoid busy-waiting here -- could be a network outage. */ - rpc_delay(task, 5*HZ); - goto retry_timeout; case -EACCES: dprintk("RPC: %5u remote rpcbind: RPC program/version " "unavailable\n", task->tk_pid); - /* fail immediately if this is an RPC ping */ - if (task->tk_msg.rpc_proc->p_proc == 0) { - status = -EOPNOTSUPP; - break; - } rpc_delay(task, 3*HZ); goto retry_timeout; case -ETIMEDOUT: @@ -971,7 +957,6 @@ call_bind_status(struct rpc_task *task) task->tk_pid); goto retry_timeout; case -EPFNOSUPPORT: - /* server doesn't support any rpcbind version we know of */ dprintk("RPC: %5u remote rpcbind service unavailable\n", task->tk_pid); break; @@ -984,6 +969,7 @@ call_bind_status(struct rpc_task *task) default: dprintk("RPC: %5u unrecognized rpcbind error (%d)\n", task->tk_pid, -task->tk_status); + status = -EIO; } rpc_exit(task, status); @@ -1271,6 +1257,7 @@ call_refresh(struct rpc_task *task) { dprint_status(task); + xprt_release(task); /* Must do to obtain new XID */ task->tk_action = call_refreshresult; task->tk_status = 0; task->tk_client->cl_stats->rpcauthrefresh++; @@ -1388,8 +1375,6 @@ call_verify(struct rpc_task *task) dprintk("RPC: %5u %s: retry stale creds\n", task->tk_pid, __FUNCTION__); rpcauth_invalcred(task); - /* Ensure we obtain a new XID! */ - xprt_release(task); task->tk_action = call_refresh; goto out_retry; case RPC_AUTH_BADCRED: @@ -1538,18 +1523,13 @@ void rpc_show_tasks(void) spin_lock(&clnt->cl_lock); list_for_each_entry(t, &clnt->cl_tasks, tk_task) { const char *rpc_waitq = "none"; - int proc; - - if (t->tk_msg.rpc_proc) - proc = t->tk_msg.rpc_proc->p_proc; - else - proc = -1; if (RPC_IS_QUEUED(t)) rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); printk("%5u %04d %04x %6d %8p %6d %8p %8ld %8s %8p %8p\n", - t->tk_pid, proc, + t->tk_pid, + (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), t->tk_flags, t->tk_status, t->tk_client, (t->tk_client ? t->tk_client->cl_prog : 0), diff --git a/trunk/net/sunrpc/rpc_pipe.c b/trunk/net/sunrpc/rpc_pipe.c index c8433e8865aa..669e12a4ed18 100644 --- a/trunk/net/sunrpc/rpc_pipe.c +++ b/trunk/net/sunrpc/rpc_pipe.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include @@ -329,7 +329,6 @@ rpc_show_info(struct seq_file *m, void *v) clnt->cl_prog, clnt->cl_vers); seq_printf(m, "address: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); seq_printf(m, "protocol: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PROTO)); - seq_printf(m, "port: %s\n", rpc_peeraddr2str(clnt, RPC_DISPLAY_PORT)); return 0; } @@ -586,7 +585,6 @@ rpc_populate(struct dentry *parent, if (S_ISDIR(mode)) inc_nlink(dir); d_add(dentry, inode); - fsnotify_create(dir, dentry); } mutex_unlock(&dir->i_mutex); return 0; @@ -608,7 +606,7 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry) inode->i_ino = iunique(dir->i_sb, 100); d_instantiate(dentry, inode); inc_nlink(dir); - fsnotify_mkdir(dir, dentry); + inode_dir_notify(dir, DN_CREATE); return 0; out_err: printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", @@ -750,7 +748,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi rpci->flags = flags; rpci->ops = ops; rpci->nkern_readwriters = 1; - fsnotify_create(dir, dentry); + inode_dir_notify(dir, DN_CREATE); dget(dentry); out: mutex_unlock(&dir->i_mutex); diff --git a/trunk/net/sunrpc/rpcb_clnt.c b/trunk/net/sunrpc/rpcb_clnt.c index a05493aedb68..d1740dbab991 100644 --- a/trunk/net/sunrpc/rpcb_clnt.c +++ b/trunk/net/sunrpc/rpcb_clnt.c @@ -16,14 +16,11 @@ #include #include -#include -#include #include #include #include #include -#include #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_BIND @@ -93,6 +90,26 @@ enum { */ #define RPCB_MAXADDRLEN (128u) +/* + * r_netid + * + * Quoting RFC 3530, section 2.2: + * + * For TCP over IPv4 the value of r_netid is the string "tcp". For UDP + * over IPv4 the value of r_netid is the string "udp". + * + * ... + * + * For TCP over IPv6 the value of r_netid is the string "tcp6". For UDP + * over IPv6 the value of r_netid is the string "udp6". + */ +#define RPCB_NETID_UDP "\165\144\160" /* "udp" */ +#define RPCB_NETID_TCP "\164\143\160" /* "tcp" */ +#define RPCB_NETID_UDP6 "\165\144\160\066" /* "udp6" */ +#define RPCB_NETID_TCP6 "\164\143\160\066" /* "tcp6" */ + +#define RPCB_MAXNETIDLEN (4u) + /* * r_owner * @@ -103,7 +120,7 @@ enum { #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) static void rpcb_getport_done(struct rpc_task *, void *); -static struct rpc_program rpcb_program; +extern struct rpc_program rpcb_program; struct rpcbind_args { struct rpc_xprt * r_xprt; @@ -120,13 +137,10 @@ struct rpcbind_args { static struct rpc_procinfo rpcb_procedures2[]; static struct rpc_procinfo rpcb_procedures3[]; -struct rpcb_info { +static struct rpcb_info { int rpc_vers; struct rpc_procinfo * rpc_proc; -}; - -static struct rpcb_info rpcb_next_version[]; -static struct rpcb_info rpcb_next_version6[]; +} rpcb_next_version[]; static void rpcb_getport_prepare(struct rpc_task *task, void *calldata) { @@ -176,17 +190,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, RPC_CLNT_CREATE_INTR), }; - switch (srvaddr->sa_family) { - case AF_INET: - ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); - break; - case AF_INET6: - ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT); - break; - default: - return NULL; - } - + ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); if (!privileged) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; return rpc_create(&args); @@ -230,7 +234,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) prog, vers, prot, port); rpcb_clnt = rpcb_create("localhost", (struct sockaddr *) &sin, - XPRT_TRANSPORT_UDP, 2, 1); + IPPROTO_UDP, 2, 1); if (IS_ERR(rpcb_clnt)) return PTR_ERR(rpcb_clnt); @@ -312,7 +316,6 @@ void rpcb_getport_async(struct rpc_task *task) struct rpc_task *child; struct sockaddr addr; int status; - struct rpcb_info *info; dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __FUNCTION__, @@ -322,7 +325,7 @@ void rpcb_getport_async(struct rpc_task *task) BUG_ON(clnt->cl_parent != clnt); if (xprt_test_and_set_binding(xprt)) { - status = -EAGAIN; /* tell caller to check again */ + status = -EACCES; /* tell caller to check again */ dprintk("RPC: %5u %s: waiting for another binder\n", task->tk_pid, __FUNCTION__); goto bailout_nowake; @@ -340,43 +343,18 @@ void rpcb_getport_async(struct rpc_task *task) goto bailout_nofree; } - rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); - - /* Don't ever use rpcbind v2 for AF_INET6 requests */ - switch (addr.sa_family) { - case AF_INET: - info = rpcb_next_version; - break; - case AF_INET6: - info = rpcb_next_version6; - break; - default: - status = -EAFNOSUPPORT; - dprintk("RPC: %5u %s: bad address family\n", - task->tk_pid, __FUNCTION__); - goto bailout_nofree; - } - if (info[xprt->bind_index].rpc_proc == NULL) { + if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) { xprt->bind_index = 0; - status = -EPFNOSUPPORT; + status = -EACCES; /* tell caller to try again later */ dprintk("RPC: %5u %s: no more getport versions available\n", task->tk_pid, __FUNCTION__); goto bailout_nofree; } - bind_version = info[xprt->bind_index].rpc_vers; + bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __FUNCTION__, bind_version); - rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, - bind_version, 0); - if (IS_ERR(rpcb_clnt)) { - status = PTR_ERR(rpcb_clnt); - dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", - task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); - goto bailout_nofree; - } - map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); if (!map) { status = -ENOMEM; @@ -389,19 +367,28 @@ void rpcb_getport_async(struct rpc_task *task) map->r_prot = xprt->prot; map->r_port = 0; map->r_xprt = xprt_get(xprt); - map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); - memcpy(map->r_addr, - rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR), - sizeof(map->r_addr)); + map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP : + RPCB_NETID_UDP; + memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR), + sizeof(map->r_addr)); map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ + rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); + rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0); + if (IS_ERR(rpcb_clnt)) { + status = PTR_ERR(rpcb_clnt); + dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", + task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); + goto bailout; + } + child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; dprintk("RPC: %5u %s: rpc_run_task failed\n", task->tk_pid, __FUNCTION__); - goto bailout; + goto bailout_nofree; } rpc_put_task(child); @@ -416,7 +403,6 @@ void rpcb_getport_async(struct rpc_task *task) bailout_nowake: task->tk_status = status; } -EXPORT_SYMBOL_GPL(rpcb_getport_async); /* * Rpcbind child task calls this callback via tk_exit. @@ -427,10 +413,6 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) struct rpc_xprt *xprt = map->r_xprt; int status = child->tk_status; - /* Garbage reply: retry with a lesser rpcbind version */ - if (status == -EIO) - status = -EPROTONOSUPPORT; - /* rpcbind server doesn't support this rpcbind protocol version */ if (status == -EPROTONOSUPPORT) xprt->bind_index++; @@ -508,24 +490,16 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, unsigned short *portp) { char *addr; - u32 addr_len; - int c, i, f, first, val; + int addr_len, c, i, f, first, val; *portp = 0; - addr_len = ntohl(*p++); - - /* - * Simple sanity check. The smallest possible universal - * address is an IPv4 address string containing 11 bytes. - */ - if (addr_len < 11 || addr_len > RPCB_MAXADDRLEN) - goto out_err; - - /* - * Start at the end and walk backwards until the first dot - * is encountered. When the second dot is found, we have - * both parts of the port number. - */ + addr_len = (unsigned int) ntohl(*p++); + if (addr_len > RPCB_MAXADDRLEN) /* sanity */ + return -EINVAL; + + dprintk("RPC: rpcb_decode_getaddr returned string: '%s'\n", + (char *) p); + addr = (char *)p; val = 0; first = 1; @@ -547,19 +521,8 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, } } - /* - * Simple sanity check. If we never saw a dot in the reply, - * then this was probably just garbage. - */ - if (first) - goto out_err; - dprintk("RPC: rpcb_decode_getaddr port=%u\n", *portp); return 0; - -out_err: - dprintk("RPC: rpcbind server returned malformed reply\n"); - return -EIO; } #define RPCB_program_sz (1u) @@ -568,7 +531,7 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, #define RPCB_port_sz (1u) #define RPCB_boolean_sz (1u) -#define RPCB_netid_sz (1+XDR_QUADLEN(RPCBIND_MAXNETIDLEN)) +#define RPCB_netid_sz (1+XDR_QUADLEN(RPCB_MAXNETIDLEN)) #define RPCB_addr_sz (1+XDR_QUADLEN(RPCB_MAXADDRLEN)) #define RPCB_ownerstring_sz (1+XDR_QUADLEN(RPCB_MAXOWNERLEN)) @@ -630,14 +593,6 @@ static struct rpcb_info rpcb_next_version[] = { { 0, NULL }, }; -static struct rpcb_info rpcb_next_version6[] = { -#ifdef CONFIG_SUNRPC_BIND34 - { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, - { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, -#endif - { 0, NULL }, -}; - static struct rpc_version rpcb_version2 = { .number = 2, .nrprocs = RPCB_HIGHPROC_2, @@ -666,7 +621,7 @@ static struct rpc_version *rpcb_version[] = { static struct rpc_stat rpcb_stats; -static struct rpc_program rpcb_program = { +struct rpc_program rpcb_program = { .name = "rpcbind", .number = RPCBIND_PROGRAM, .nrvers = ARRAY_SIZE(rpcb_version), diff --git a/trunk/net/sunrpc/sched.c b/trunk/net/sunrpc/sched.c index 3c773c53e12e..954d7ec86c7e 100644 --- a/trunk/net/sunrpc/sched.c +++ b/trunk/net/sunrpc/sched.c @@ -777,7 +777,6 @@ void *rpc_malloc(struct rpc_task *task, size_t size) task->tk_pid, size, buf); return &buf->data; } -EXPORT_SYMBOL_GPL(rpc_malloc); /** * rpc_free - free buffer allocated via rpc_malloc @@ -803,7 +802,6 @@ void rpc_free(void *buffer) else kfree(buf); } -EXPORT_SYMBOL_GPL(rpc_free); /* * Creation and deletion of RPC task structures diff --git a/trunk/net/sunrpc/socklib.c b/trunk/net/sunrpc/socklib.c index 97ac45f034d6..1d377d1ab7f4 100644 --- a/trunk/net/sunrpc/socklib.c +++ b/trunk/net/sunrpc/socklib.c @@ -34,7 +34,6 @@ size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len) desc->offset += len; return len; } -EXPORT_SYMBOL_GPL(xdr_skb_read_bits); /** * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer @@ -138,7 +137,6 @@ ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct out: return copied; } -EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb); /** * csum_partial_copy_to_xdr - checksum and copy data @@ -181,4 +179,3 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) return -1; return 0; } -EXPORT_SYMBOL_GPL(csum_partial_copy_to_xdr); diff --git a/trunk/net/sunrpc/sunrpc_syms.c b/trunk/net/sunrpc/sunrpc_syms.c index 33d89e842c85..384c4ad5ab86 100644 --- a/trunk/net/sunrpc/sunrpc_syms.c +++ b/trunk/net/sunrpc/sunrpc_syms.c @@ -20,7 +20,7 @@ #include #include #include -#include + /* RPC scheduler */ EXPORT_SYMBOL(rpc_execute); diff --git a/trunk/net/sunrpc/timer.c b/trunk/net/sunrpc/timer.c index 31becbf09263..8142fdb8a930 100644 --- a/trunk/net/sunrpc/timer.c +++ b/trunk/net/sunrpc/timer.c @@ -17,7 +17,6 @@ #include #include -#include #include @@ -41,7 +40,6 @@ rpc_init_rtt(struct rpc_rtt *rt, unsigned long timeo) rt->ntimeouts[i] = 0; } } -EXPORT_SYMBOL_GPL(rpc_init_rtt); /* * NB: When computing the smoothed RTT and standard deviation, @@ -77,7 +75,6 @@ rpc_update_rtt(struct rpc_rtt *rt, unsigned timer, long m) if (*sdrtt < RPC_RTO_MIN) *sdrtt = RPC_RTO_MIN; } -EXPORT_SYMBOL_GPL(rpc_update_rtt); /* * Estimate rto for an nfs rpc sent via. an unreliable datagram. @@ -106,4 +103,3 @@ rpc_calc_rto(struct rpc_rtt *rt, unsigned timer) return res; } -EXPORT_SYMBOL_GPL(rpc_calc_rto); diff --git a/trunk/net/sunrpc/xprt.c b/trunk/net/sunrpc/xprt.c index 282a9a2ec90c..c8c2edccad7e 100644 --- a/trunk/net/sunrpc/xprt.c +++ b/trunk/net/sunrpc/xprt.c @@ -62,9 +62,6 @@ static inline void do_xprt_reserve(struct rpc_task *); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); -static spinlock_t xprt_list_lock = SPIN_LOCK_UNLOCKED; -static LIST_HEAD(xprt_list); - /* * The transport code maintains an estimate on the maximum number of out- * standing RPC requests, using a smoothed version of the congestion @@ -83,78 +80,6 @@ static LIST_HEAD(xprt_list); #define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd) -/** - * xprt_register_transport - register a transport implementation - * @transport: transport to register - * - * If a transport implementation is loaded as a kernel module, it can - * call this interface to make itself known to the RPC client. - * - * Returns: - * 0: transport successfully registered - * -EEXIST: transport already registered - * -EINVAL: transport module being unloaded - */ -int xprt_register_transport(struct xprt_class *transport) -{ - struct xprt_class *t; - int result; - - result = -EEXIST; - spin_lock(&xprt_list_lock); - list_for_each_entry(t, &xprt_list, list) { - /* don't register the same transport class twice */ - if (t->ident == transport->ident) - goto out; - } - - result = -EINVAL; - if (try_module_get(THIS_MODULE)) { - list_add_tail(&transport->list, &xprt_list); - printk(KERN_INFO "RPC: Registered %s transport module.\n", - transport->name); - result = 0; - } - -out: - spin_unlock(&xprt_list_lock); - return result; -} -EXPORT_SYMBOL_GPL(xprt_register_transport); - -/** - * xprt_unregister_transport - unregister a transport implementation - * transport: transport to unregister - * - * Returns: - * 0: transport successfully unregistered - * -ENOENT: transport never registered - */ -int xprt_unregister_transport(struct xprt_class *transport) -{ - struct xprt_class *t; - int result; - - result = 0; - spin_lock(&xprt_list_lock); - list_for_each_entry(t, &xprt_list, list) { - if (t == transport) { - printk(KERN_INFO - "RPC: Unregistered %s transport module.\n", - transport->name); - list_del_init(&transport->list); - module_put(THIS_MODULE); - goto out; - } - } - result = -ENOENT; - -out: - spin_unlock(&xprt_list_lock); - return result; -} -EXPORT_SYMBOL_GPL(xprt_unregister_transport); - /** * xprt_reserve_xprt - serialize write access to transports * @task: task that is requesting access to the transport @@ -193,7 +118,6 @@ int xprt_reserve_xprt(struct rpc_task *task) rpc_sleep_on(&xprt->sending, task, NULL, NULL); return 0; } -EXPORT_SYMBOL_GPL(xprt_reserve_xprt); static void xprt_clear_locked(struct rpc_xprt *xprt) { @@ -243,7 +167,6 @@ int xprt_reserve_xprt_cong(struct rpc_task *task) rpc_sleep_on(&xprt->sending, task, NULL, NULL); return 0; } -EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) { @@ -323,7 +246,6 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) __xprt_lock_write_next(xprt); } } -EXPORT_SYMBOL_GPL(xprt_release_xprt); /** * xprt_release_xprt_cong - allow other requests to use a transport @@ -340,7 +262,6 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) __xprt_lock_write_next_cong(xprt); } } -EXPORT_SYMBOL_GPL(xprt_release_xprt_cong); static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) { @@ -393,7 +314,6 @@ void xprt_release_rqst_cong(struct rpc_task *task) { __xprt_put_cong(task->tk_xprt, task->tk_rqstp); } -EXPORT_SYMBOL_GPL(xprt_release_rqst_cong); /** * xprt_adjust_cwnd - adjust transport congestion window @@ -425,7 +345,6 @@ void xprt_adjust_cwnd(struct rpc_task *task, int result) xprt->cwnd = cwnd; __xprt_put_cong(xprt, req); } -EXPORT_SYMBOL_GPL(xprt_adjust_cwnd); /** * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue @@ -440,7 +359,6 @@ void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status) else rpc_wake_up(&xprt->pending); } -EXPORT_SYMBOL_GPL(xprt_wake_pending_tasks); /** * xprt_wait_for_buffer_space - wait for transport output buffer to clear @@ -455,7 +373,6 @@ void xprt_wait_for_buffer_space(struct rpc_task *task) task->tk_timeout = req->rq_timeout; rpc_sleep_on(&xprt->pending, task, NULL, NULL); } -EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space); /** * xprt_write_space - wake the task waiting for transport output buffer space @@ -476,7 +393,6 @@ void xprt_write_space(struct rpc_xprt *xprt) } spin_unlock_bh(&xprt->transport_lock); } -EXPORT_SYMBOL_GPL(xprt_write_space); /** * xprt_set_retrans_timeout_def - set a request's retransmit timeout @@ -490,7 +406,6 @@ void xprt_set_retrans_timeout_def(struct rpc_task *task) { task->tk_timeout = task->tk_rqstp->rq_timeout; } -EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_def); /* * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout @@ -510,7 +425,6 @@ void xprt_set_retrans_timeout_rtt(struct rpc_task *task) if (task->tk_timeout > max_timeout || task->tk_timeout == 0) task->tk_timeout = max_timeout; } -EXPORT_SYMBOL_GPL(xprt_set_retrans_timeout_rtt); static void xprt_reset_majortimeo(struct rpc_rqst *req) { @@ -586,7 +500,6 @@ void xprt_disconnect(struct rpc_xprt *xprt) xprt_wake_pending_tasks(xprt, -ENOTCONN); spin_unlock_bh(&xprt->transport_lock); } -EXPORT_SYMBOL_GPL(xprt_disconnect); static void xprt_init_autodisconnect(unsigned long data) @@ -697,7 +610,6 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) xprt->stat.bad_xids++; return NULL; } -EXPORT_SYMBOL_GPL(xprt_lookup_rqst); /** * xprt_update_rtt - update an RPC client's RTT state after receiving a reply @@ -717,7 +629,6 @@ void xprt_update_rtt(struct rpc_task *task) rpc_set_timeo(rtt, timer, req->rq_ntrans - 1); } } -EXPORT_SYMBOL_GPL(xprt_update_rtt); /** * xprt_complete_rqst - called when reply processing is complete @@ -742,7 +653,6 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) req->rq_received = req->rq_private_buf.len = copied; rpc_wake_up_task(task); } -EXPORT_SYMBOL_GPL(xprt_complete_rqst); static void xprt_timer(struct rpc_task *task) { @@ -979,25 +889,23 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i * @args: rpc transport creation arguments * */ -struct rpc_xprt *xprt_create_transport(struct xprt_create *args) +struct rpc_xprt *xprt_create_transport(struct rpc_xprtsock_create *args) { struct rpc_xprt *xprt; struct rpc_rqst *req; - struct xprt_class *t; - spin_lock(&xprt_list_lock); - list_for_each_entry(t, &xprt_list, list) { - if (t->ident == args->ident) { - spin_unlock(&xprt_list_lock); - goto found; - } + switch (args->proto) { + case IPPROTO_UDP: + xprt = xs_setup_udp(args); + break; + case IPPROTO_TCP: + xprt = xs_setup_tcp(args); + break; + default: + printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n", + args->proto); + return ERR_PTR(-EIO); } - spin_unlock(&xprt_list_lock); - printk(KERN_ERR "RPC: transport (%d) not supported\n", args->ident); - return ERR_PTR(-EIO); - -found: - xprt = t->setup(args); if (IS_ERR(xprt)) { dprintk("RPC: xprt_create_transport: failed, %ld\n", -PTR_ERR(xprt)); diff --git a/trunk/net/sunrpc/xprtrdma/Makefile b/trunk/net/sunrpc/xprtrdma/Makefile deleted file mode 100644 index 264f0feeb513..000000000000 --- a/trunk/net/sunrpc/xprtrdma/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -obj-$(CONFIG_SUNRPC_XPRT_RDMA) += xprtrdma.o - -xprtrdma-y := transport.o rpc_rdma.o verbs.o diff --git a/trunk/net/sunrpc/xprtrdma/rpc_rdma.c b/trunk/net/sunrpc/xprtrdma/rpc_rdma.c deleted file mode 100644 index 12db63580427..000000000000 --- a/trunk/net/sunrpc/xprtrdma/rpc_rdma.c +++ /dev/null @@ -1,868 +0,0 @@ -/* - * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the BSD-type - * license below: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * Neither the name of the Network Appliance, Inc. nor the names of - * its contributors may be used to endorse or promote products - * derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * rpc_rdma.c - * - * This file contains the guts of the RPC RDMA protocol, and - * does marshaling/unmarshaling, etc. It is also where interfacing - * to the Linux RPC framework lives. - */ - -#include "xprt_rdma.h" - -#include - -#ifdef RPC_DEBUG -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - -enum rpcrdma_chunktype { - rpcrdma_noch = 0, - rpcrdma_readch, - rpcrdma_areadch, - rpcrdma_writech, - rpcrdma_replych -}; - -#ifdef RPC_DEBUG -static const char transfertypes[][12] = { - "pure inline", /* no chunks */ - " read chunk", /* some argument via rdma read */ - "*read chunk", /* entire request via rdma read */ - "write chunk", /* some result via rdma write */ - "reply chunk" /* entire reply via rdma write */ -}; -#endif - -/* - * Chunk assembly from upper layer xdr_buf. - * - * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk - * elements. Segments are then coalesced when registered, if possible - * within the selected memreg mode. - * - * Note, this routine is never called if the connection's memory - * registration strategy is 0 (bounce buffers). - */ - -static int -rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, int pos, - enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, int nsegs) -{ - int len, n = 0, p; - - if (pos == 0 && xdrbuf->head[0].iov_len) { - seg[n].mr_page = NULL; - seg[n].mr_offset = xdrbuf->head[0].iov_base; - seg[n].mr_len = xdrbuf->head[0].iov_len; - pos += xdrbuf->head[0].iov_len; - ++n; - } - - if (xdrbuf->page_len && (xdrbuf->pages[0] != NULL)) { - if (n == nsegs) - return 0; - seg[n].mr_page = xdrbuf->pages[0]; - seg[n].mr_offset = (void *)(unsigned long) xdrbuf->page_base; - seg[n].mr_len = min_t(u32, - PAGE_SIZE - xdrbuf->page_base, xdrbuf->page_len); - len = xdrbuf->page_len - seg[n].mr_len; - pos += len; - ++n; - p = 1; - while (len > 0) { - if (n == nsegs) - return 0; - seg[n].mr_page = xdrbuf->pages[p]; - seg[n].mr_offset = NULL; - seg[n].mr_len = min_t(u32, PAGE_SIZE, len); - len -= seg[n].mr_len; - ++n; - ++p; - } - } - - if (pos < xdrbuf->len && xdrbuf->tail[0].iov_len) { - if (n == nsegs) - return 0; - seg[n].mr_page = NULL; - seg[n].mr_offset = xdrbuf->tail[0].iov_base; - seg[n].mr_len = xdrbuf->tail[0].iov_len; - pos += xdrbuf->tail[0].iov_len; - ++n; - } - - if (pos < xdrbuf->len) - dprintk("RPC: %s: marshaled only %d of %d\n", - __func__, pos, xdrbuf->len); - - return n; -} - -/* - * Create read/write chunk lists, and reply chunks, for RDMA - * - * Assume check against THRESHOLD has been done, and chunks are required. - * Assume only encoding one list entry for read|write chunks. The NFSv3 - * protocol is simple enough to allow this as it only has a single "bulk - * result" in each procedure - complicated NFSv4 COMPOUNDs are not. (The - * RDMA/Sessions NFSv4 proposal addresses this for future v4 revs.) - * - * When used for a single reply chunk (which is a special write - * chunk used for the entire reply, rather than just the data), it - * is used primarily for READDIR and READLINK which would otherwise - * be severely size-limited by a small rdma inline read max. The server - * response will come back as an RDMA Write, followed by a message - * of type RDMA_NOMSG carrying the xid and length. As a result, reply - * chunks do not provide data alignment, however they do not require - * "fixup" (moving the response to the upper layer buffer) either. - * - * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): - * - * Read chunklist (a linked list): - * N elements, position P (same P for all chunks of same arg!): - * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0 - * - * Write chunklist (a list of (one) counted array): - * N elements: - * 1 - N - HLOO - HLOO - ... - HLOO - 0 - * - * Reply chunk (a counted array): - * N elements: - * 1 - N - HLOO - HLOO - ... - HLOO - */ - -static unsigned int -rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target, - struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type) -{ - struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_task->tk_xprt); - int nsegs, nchunks = 0; - int pos; - struct rpcrdma_mr_seg *seg = req->rl_segments; - struct rpcrdma_read_chunk *cur_rchunk = NULL; - struct rpcrdma_write_array *warray = NULL; - struct rpcrdma_write_chunk *cur_wchunk = NULL; - u32 *iptr = headerp->rm_body.rm_chunks; - - if (type == rpcrdma_readch || type == rpcrdma_areadch) { - /* a read chunk - server will RDMA Read our memory */ - cur_rchunk = (struct rpcrdma_read_chunk *) iptr; - } else { - /* a write or reply chunk - server will RDMA Write our memory */ - *iptr++ = xdr_zero; /* encode a NULL read chunk list */ - if (type == rpcrdma_replych) - *iptr++ = xdr_zero; /* a NULL write chunk list */ - warray = (struct rpcrdma_write_array *) iptr; - cur_wchunk = (struct rpcrdma_write_chunk *) (warray + 1); - } - - if (type == rpcrdma_replych || type == rpcrdma_areadch) - pos = 0; - else - pos = target->head[0].iov_len; - - nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS); - if (nsegs == 0) - return 0; - - do { - /* bind/register the memory, then build chunk from result. */ - int n = rpcrdma_register_external(seg, nsegs, - cur_wchunk != NULL, r_xprt); - if (n <= 0) - goto out; - if (cur_rchunk) { /* read */ - cur_rchunk->rc_discrim = xdr_one; - /* all read chunks have the same "position" */ - cur_rchunk->rc_position = htonl(pos); - cur_rchunk->rc_target.rs_handle = htonl(seg->mr_rkey); - cur_rchunk->rc_target.rs_length = htonl(seg->mr_len); - xdr_encode_hyper( - (u32 *)&cur_rchunk->rc_target.rs_offset, - seg->mr_base); - dprintk("RPC: %s: read chunk " - "elem %d@0x%llx:0x%x pos %d (%s)\n", __func__, - seg->mr_len, seg->mr_base, seg->mr_rkey, pos, - n < nsegs ? "more" : "last"); - cur_rchunk++; - r_xprt->rx_stats.read_chunk_count++; - } else { /* write/reply */ - cur_wchunk->wc_target.rs_handle = htonl(seg->mr_rkey); - cur_wchunk->wc_target.rs_length = htonl(seg->mr_len); - xdr_encode_hyper( - (u32 *)&cur_wchunk->wc_target.rs_offset, - seg->mr_base); - dprintk("RPC: %s: %s chunk " - "elem %d@0x%llx:0x%x (%s)\n", __func__, - (type == rpcrdma_replych) ? "reply" : "write", - seg->mr_len, seg->mr_base, seg->mr_rkey, - n < nsegs ? "more" : "last"); - cur_wchunk++; - if (type == rpcrdma_replych) - r_xprt->rx_stats.reply_chunk_count++; - else - r_xprt->rx_stats.write_chunk_count++; - r_xprt->rx_stats.total_rdma_request += seg->mr_len; - } - nchunks++; - seg += n; - nsegs -= n; - } while (nsegs); - - /* success. all failures return above */ - req->rl_nchunks = nchunks; - - BUG_ON(nchunks == 0); - - /* - * finish off header. If write, marshal discrim and nchunks. - */ - if (cur_rchunk) { - iptr = (u32 *) cur_rchunk; - *iptr++ = xdr_zero; /* finish the read chunk list */ - *iptr++ = xdr_zero; /* encode a NULL write chunk list */ - *iptr++ = xdr_zero; /* encode a NULL reply chunk */ - } else { - warray->wc_discrim = xdr_one; - warray->wc_nchunks = htonl(nchunks); - iptr = (u32 *) cur_wchunk; - if (type == rpcrdma_writech) { - *iptr++ = xdr_zero; /* finish the write chunk list */ - *iptr++ = xdr_zero; /* encode a NULL reply chunk */ - } - } - - /* - * Return header size. - */ - return (unsigned char *)iptr - (unsigned char *)headerp; - -out: - for (pos = 0; nchunks--;) - pos += rpcrdma_deregister_external( - &req->rl_segments[pos], r_xprt, NULL); - return 0; -} - -/* - * Copy write data inline. - * This function is used for "small" requests. Data which is passed - * to RPC via iovecs (or page list) is copied directly into the - * pre-registered memory buffer for this request. For small amounts - * of data, this is efficient. The cutoff value is tunable. - */ -static int -rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) -{ - int i, npages, curlen; - int copy_len; - unsigned char *srcp, *destp; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); - - destp = rqst->rq_svec[0].iov_base; - curlen = rqst->rq_svec[0].iov_len; - destp += curlen; - /* - * Do optional padding where it makes sense. Alignment of write - * payload can help the server, if our setting is accurate. - */ - pad -= (curlen + 36/*sizeof(struct rpcrdma_msg_padded)*/); - if (pad < 0 || rqst->rq_slen - curlen < RPCRDMA_INLINE_PAD_THRESH) - pad = 0; /* don't pad this request */ - - dprintk("RPC: %s: pad %d destp 0x%p len %d hdrlen %d\n", - __func__, pad, destp, rqst->rq_slen, curlen); - - copy_len = rqst->rq_snd_buf.page_len; - r_xprt->rx_stats.pullup_copy_count += copy_len; - npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; - for (i = 0; copy_len && i < npages; i++) { - if (i == 0) - curlen = PAGE_SIZE - rqst->rq_snd_buf.page_base; - else - curlen = PAGE_SIZE; - if (curlen > copy_len) - curlen = copy_len; - dprintk("RPC: %s: page %d destp 0x%p len %d curlen %d\n", - __func__, i, destp, copy_len, curlen); - srcp = kmap_atomic(rqst->rq_snd_buf.pages[i], - KM_SKB_SUNRPC_DATA); - if (i == 0) - memcpy(destp, srcp+rqst->rq_snd_buf.page_base, curlen); - else - memcpy(destp, srcp, curlen); - kunmap_atomic(srcp, KM_SKB_SUNRPC_DATA); - rqst->rq_svec[0].iov_len += curlen; - destp += curlen; - copy_len -= curlen; - } - if (rqst->rq_snd_buf.tail[0].iov_len) { - curlen = rqst->rq_snd_buf.tail[0].iov_len; - if (destp != rqst->rq_snd_buf.tail[0].iov_base) { - memcpy(destp, - rqst->rq_snd_buf.tail[0].iov_base, curlen); - r_xprt->rx_stats.pullup_copy_count += curlen; - } - dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n", - __func__, destp, copy_len, curlen); - rqst->rq_svec[0].iov_len += curlen; - } - /* header now contains entire send message */ - return pad; -} - -/* - * Marshal a request: the primary job of this routine is to choose - * the transfer modes. See comments below. - * - * Uses multiple RDMA IOVs for a request: - * [0] -- RPC RDMA header, which uses memory from the *start* of the - * preregistered buffer that already holds the RPC data in - * its middle. - * [1] -- the RPC header/data, marshaled by RPC and the NFS protocol. - * [2] -- optional padding. - * [3] -- if padded, header only in [1] and data here. - */ - -int -rpcrdma_marshal_req(struct rpc_rqst *rqst) -{ - struct rpc_xprt *xprt = rqst->rq_task->tk_xprt; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - char *base; - size_t hdrlen, rpclen, padlen; - enum rpcrdma_chunktype rtype, wtype; - struct rpcrdma_msg *headerp; - - /* - * rpclen gets amount of data in first buffer, which is the - * pre-registered buffer. - */ - base = rqst->rq_svec[0].iov_base; - rpclen = rqst->rq_svec[0].iov_len; - - /* build RDMA header in private area at front */ - headerp = (struct rpcrdma_msg *) req->rl_base; - /* don't htonl XID, it's already done in request */ - headerp->rm_xid = rqst->rq_xid; - headerp->rm_vers = xdr_one; - headerp->rm_credit = htonl(r_xprt->rx_buf.rb_max_requests); - headerp->rm_type = __constant_htonl(RDMA_MSG); - - /* - * Chunks needed for results? - * - * o If the expected result is under the inline threshold, all ops - * return as inline (but see later). - * o Large non-read ops return as a single reply chunk. - * o Large read ops return data as write chunk(s), header as inline. - * - * Note: the NFS code sending down multiple result segments implies - * the op is one of read, readdir[plus], readlink or NFSv4 getacl. - */ - - /* - * This code can handle read chunks, write chunks OR reply - * chunks -- only one type. If the request is too big to fit - * inline, then we will choose read chunks. If the request is - * a READ, then use write chunks to separate the file data - * into pages; otherwise use reply chunks. - */ - if (rqst->rq_rcv_buf.buflen <= RPCRDMA_INLINE_READ_THRESHOLD(rqst)) - wtype = rpcrdma_noch; - else if (rqst->rq_rcv_buf.page_len == 0) - wtype = rpcrdma_replych; - else if (rqst->rq_rcv_buf.flags & XDRBUF_READ) - wtype = rpcrdma_writech; - else - wtype = rpcrdma_replych; - - /* - * Chunks needed for arguments? - * - * o If the total request is under the inline threshold, all ops - * are sent as inline. - * o Large non-write ops are sent with the entire message as a - * single read chunk (protocol 0-position special case). - * o Large write ops transmit data as read chunk(s), header as - * inline. - * - * Note: the NFS code sending down multiple argument segments - * implies the op is a write. - * TBD check NFSv4 setacl - */ - if (rqst->rq_snd_buf.len <= RPCRDMA_INLINE_WRITE_THRESHOLD(rqst)) - rtype = rpcrdma_noch; - else if (rqst->rq_snd_buf.page_len == 0) - rtype = rpcrdma_areadch; - else - rtype = rpcrdma_readch; - - /* The following simplification is not true forever */ - if (rtype != rpcrdma_noch && wtype == rpcrdma_replych) - wtype = rpcrdma_noch; - BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch); - - if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS && - (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) { - /* forced to "pure inline"? */ - dprintk("RPC: %s: too much data (%d/%d) for inline\n", - __func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len); - return -1; - } - - hdrlen = 28; /*sizeof *headerp;*/ - padlen = 0; - - /* - * Pull up any extra send data into the preregistered buffer. - * When padding is in use and applies to the transfer, insert - * it and change the message type. - */ - if (rtype == rpcrdma_noch) { - - padlen = rpcrdma_inline_pullup(rqst, - RPCRDMA_INLINE_PAD_VALUE(rqst)); - - if (padlen) { - headerp->rm_type = __constant_htonl(RDMA_MSGP); - headerp->rm_body.rm_padded.rm_align = - htonl(RPCRDMA_INLINE_PAD_VALUE(rqst)); - headerp->rm_body.rm_padded.rm_thresh = - __constant_htonl(RPCRDMA_INLINE_PAD_THRESH); - headerp->rm_body.rm_padded.rm_pempty[0] = xdr_zero; - headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero; - headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero; - hdrlen += 2 * sizeof(u32); /* extra words in padhdr */ - BUG_ON(wtype != rpcrdma_noch); - - } else { - headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero; - headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero; - headerp->rm_body.rm_nochunks.rm_empty[2] = xdr_zero; - /* new length after pullup */ - rpclen = rqst->rq_svec[0].iov_len; - /* - * Currently we try to not actually use read inline. - * Reply chunks have the desirable property that - * they land, packed, directly in the target buffers - * without headers, so they require no fixup. The - * additional RDMA Write op sends the same amount - * of data, streams on-the-wire and adds no overhead - * on receive. Therefore, we request a reply chunk - * for non-writes wherever feasible and efficient. - */ - if (wtype == rpcrdma_noch && - r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER) - wtype = rpcrdma_replych; - } - } - - /* - * Marshal chunks. This routine will return the header length - * consumed by marshaling. - */ - if (rtype != rpcrdma_noch) { - hdrlen = rpcrdma_create_chunks(rqst, - &rqst->rq_snd_buf, headerp, rtype); - wtype = rtype; /* simplify dprintk */ - - } else if (wtype != rpcrdma_noch) { - hdrlen = rpcrdma_create_chunks(rqst, - &rqst->rq_rcv_buf, headerp, wtype); - } - - if (hdrlen == 0) - return -1; - - dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n" - " headerp 0x%p base 0x%p lkey 0x%x\n", - __func__, transfertypes[wtype], hdrlen, rpclen, padlen, - headerp, base, req->rl_iov.lkey); - - /* - * initialize send_iov's - normally only two: rdma chunk header and - * single preregistered RPC header buffer, but if padding is present, - * then use a preregistered (and zeroed) pad buffer between the RPC - * header and any write data. In all non-rdma cases, any following - * data has been copied into the RPC header buffer. - */ - req->rl_send_iov[0].addr = req->rl_iov.addr; - req->rl_send_iov[0].length = hdrlen; - req->rl_send_iov[0].lkey = req->rl_iov.lkey; - - req->rl_send_iov[1].addr = req->rl_iov.addr + (base - req->rl_base); - req->rl_send_iov[1].length = rpclen; - req->rl_send_iov[1].lkey = req->rl_iov.lkey; - - req->rl_niovs = 2; - - if (padlen) { - struct rpcrdma_ep *ep = &r_xprt->rx_ep; - - req->rl_send_iov[2].addr = ep->rep_pad.addr; - req->rl_send_iov[2].length = padlen; - req->rl_send_iov[2].lkey = ep->rep_pad.lkey; - - req->rl_send_iov[3].addr = req->rl_send_iov[1].addr + rpclen; - req->rl_send_iov[3].length = rqst->rq_slen - rpclen; - req->rl_send_iov[3].lkey = req->rl_iov.lkey; - - req->rl_niovs = 4; - } - - return 0; -} - -/* - * Chase down a received write or reply chunklist to get length - * RDMA'd by server. See map at rpcrdma_create_chunks()! :-) - */ -static int -rpcrdma_count_chunks(struct rpcrdma_rep *rep, int max, int wrchunk, u32 **iptrp) -{ - unsigned int i, total_len; - struct rpcrdma_write_chunk *cur_wchunk; - - i = ntohl(**iptrp); /* get array count */ - if (i > max) - return -1; - cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1); - total_len = 0; - while (i--) { - struct rpcrdma_segment *seg = &cur_wchunk->wc_target; - ifdebug(FACILITY) { - u64 off; - xdr_decode_hyper((u32 *)&seg->rs_offset, &off); - dprintk("RPC: %s: chunk %d@0x%llx:0x%x\n", - __func__, - ntohl(seg->rs_length), - off, - ntohl(seg->rs_handle)); - } - total_len += ntohl(seg->rs_length); - ++cur_wchunk; - } - /* check and adjust for properly terminated write chunk */ - if (wrchunk) { - u32 *w = (u32 *) cur_wchunk; - if (*w++ != xdr_zero) - return -1; - cur_wchunk = (struct rpcrdma_write_chunk *) w; - } - if ((char *) cur_wchunk > rep->rr_base + rep->rr_len) - return -1; - - *iptrp = (u32 *) cur_wchunk; - return total_len; -} - -/* - * Scatter inline received data back into provided iov's. - */ -static void -rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) -{ - int i, npages, curlen, olen; - char *destp; - - curlen = rqst->rq_rcv_buf.head[0].iov_len; - if (curlen > copy_len) { /* write chunk header fixup */ - curlen = copy_len; - rqst->rq_rcv_buf.head[0].iov_len = curlen; - } - - dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n", - __func__, srcp, copy_len, curlen); - - /* Shift pointer for first receive segment only */ - rqst->rq_rcv_buf.head[0].iov_base = srcp; - srcp += curlen; - copy_len -= curlen; - - olen = copy_len; - i = 0; - rpcx_to_rdmax(rqst->rq_xprt)->rx_stats.fixup_copy_count += olen; - if (copy_len && rqst->rq_rcv_buf.page_len) { - npages = PAGE_ALIGN(rqst->rq_rcv_buf.page_base + - rqst->rq_rcv_buf.page_len) >> PAGE_SHIFT; - for (; i < npages; i++) { - if (i == 0) - curlen = PAGE_SIZE - rqst->rq_rcv_buf.page_base; - else - curlen = PAGE_SIZE; - if (curlen > copy_len) - curlen = copy_len; - dprintk("RPC: %s: page %d" - " srcp 0x%p len %d curlen %d\n", - __func__, i, srcp, copy_len, curlen); - destp = kmap_atomic(rqst->rq_rcv_buf.pages[i], - KM_SKB_SUNRPC_DATA); - if (i == 0) - memcpy(destp + rqst->rq_rcv_buf.page_base, - srcp, curlen); - else - memcpy(destp, srcp, curlen); - flush_dcache_page(rqst->rq_rcv_buf.pages[i]); - kunmap_atomic(destp, KM_SKB_SUNRPC_DATA); - srcp += curlen; - copy_len -= curlen; - if (copy_len == 0) - break; - } - rqst->rq_rcv_buf.page_len = olen - copy_len; - } else - rqst->rq_rcv_buf.page_len = 0; - - if (copy_len && rqst->rq_rcv_buf.tail[0].iov_len) { - curlen = copy_len; - if (curlen > rqst->rq_rcv_buf.tail[0].iov_len) - curlen = rqst->rq_rcv_buf.tail[0].iov_len; - if (rqst->rq_rcv_buf.tail[0].iov_base != srcp) - memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen); - dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n", - __func__, srcp, copy_len, curlen); - rqst->rq_rcv_buf.tail[0].iov_len = curlen; - copy_len -= curlen; ++i; - } else - rqst->rq_rcv_buf.tail[0].iov_len = 0; - - if (copy_len) - dprintk("RPC: %s: %d bytes in" - " %d extra segments (%d lost)\n", - __func__, olen, i, copy_len); - - /* TBD avoid a warning from call_decode() */ - rqst->rq_private_buf = rqst->rq_rcv_buf; -} - -/* - * This function is called when an async event is posted to - * the connection which changes the connection state. All it - * does at this point is mark the connection up/down, the rpc - * timers do the rest. - */ -void -rpcrdma_conn_func(struct rpcrdma_ep *ep) -{ - struct rpc_xprt *xprt = ep->rep_xprt; - - spin_lock_bh(&xprt->transport_lock); - if (ep->rep_connected > 0) { - if (!xprt_test_and_set_connected(xprt)) - xprt_wake_pending_tasks(xprt, 0); - } else { - if (xprt_test_and_clear_connected(xprt)) - xprt_wake_pending_tasks(xprt, ep->rep_connected); - } - spin_unlock_bh(&xprt->transport_lock); -} - -/* - * This function is called when memory window unbind which we are waiting - * for completes. Just use rr_func (zeroed by upcall) to signal completion. - */ -static void -rpcrdma_unbind_func(struct rpcrdma_rep *rep) -{ - wake_up(&rep->rr_unbind); -} - -/* - * Called as a tasklet to do req/reply match and complete a request - * Errors must result in the RPC task either being awakened, or - * allowed to timeout, to discover the errors at that time. - */ -void -rpcrdma_reply_handler(struct rpcrdma_rep *rep) -{ - struct rpcrdma_msg *headerp; - struct rpcrdma_req *req; - struct rpc_rqst *rqst; - struct rpc_xprt *xprt = rep->rr_xprt; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - u32 *iptr; - int i, rdmalen, status; - - /* Check status. If bad, signal disconnect and return rep to pool */ - if (rep->rr_len == ~0U) { - rpcrdma_recv_buffer_put(rep); - if (r_xprt->rx_ep.rep_connected == 1) { - r_xprt->rx_ep.rep_connected = -EIO; - rpcrdma_conn_func(&r_xprt->rx_ep); - } - return; - } - if (rep->rr_len < 28) { - dprintk("RPC: %s: short/invalid reply\n", __func__); - goto repost; - } - headerp = (struct rpcrdma_msg *) rep->rr_base; - if (headerp->rm_vers != xdr_one) { - dprintk("RPC: %s: invalid version %d\n", - __func__, ntohl(headerp->rm_vers)); - goto repost; - } - - /* Get XID and try for a match. */ - spin_lock(&xprt->transport_lock); - rqst = xprt_lookup_rqst(xprt, headerp->rm_xid); - if (rqst == NULL) { - spin_unlock(&xprt->transport_lock); - dprintk("RPC: %s: reply 0x%p failed " - "to match any request xid 0x%08x len %d\n", - __func__, rep, headerp->rm_xid, rep->rr_len); -repost: - r_xprt->rx_stats.bad_reply_count++; - rep->rr_func = rpcrdma_reply_handler; - if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, &r_xprt->rx_ep, rep)) - rpcrdma_recv_buffer_put(rep); - - return; - } - - /* get request object */ - req = rpcr_to_rdmar(rqst); - - dprintk("RPC: %s: reply 0x%p completes request 0x%p\n" - " RPC request 0x%p xid 0x%08x\n", - __func__, rep, req, rqst, headerp->rm_xid); - - BUG_ON(!req || req->rl_reply); - - /* from here on, the reply is no longer an orphan */ - req->rl_reply = rep; - - /* check for expected message types */ - /* The order of some of these tests is important. */ - switch (headerp->rm_type) { - case __constant_htonl(RDMA_MSG): - /* never expect read chunks */ - /* never expect reply chunks (two ways to check) */ - /* never expect write chunks without having offered RDMA */ - if (headerp->rm_body.rm_chunks[0] != xdr_zero || - (headerp->rm_body.rm_chunks[1] == xdr_zero && - headerp->rm_body.rm_chunks[2] != xdr_zero) || - (headerp->rm_body.rm_chunks[1] != xdr_zero && - req->rl_nchunks == 0)) - goto badheader; - if (headerp->rm_body.rm_chunks[1] != xdr_zero) { - /* count any expected write chunks in read reply */ - /* start at write chunk array count */ - iptr = &headerp->rm_body.rm_chunks[2]; - rdmalen = rpcrdma_count_chunks(rep, - req->rl_nchunks, 1, &iptr); - /* check for validity, and no reply chunk after */ - if (rdmalen < 0 || *iptr++ != xdr_zero) - goto badheader; - rep->rr_len -= - ((unsigned char *)iptr - (unsigned char *)headerp); - status = rep->rr_len + rdmalen; - r_xprt->rx_stats.total_rdma_reply += rdmalen; - } else { - /* else ordinary inline */ - iptr = (u32 *)((unsigned char *)headerp + 28); - rep->rr_len -= 28; /*sizeof *headerp;*/ - status = rep->rr_len; - } - /* Fix up the rpc results for upper layer */ - rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); - break; - - case __constant_htonl(RDMA_NOMSG): - /* never expect read or write chunks, always reply chunks */ - if (headerp->rm_body.rm_chunks[0] != xdr_zero || - headerp->rm_body.rm_chunks[1] != xdr_zero || - headerp->rm_body.rm_chunks[2] != xdr_one || - req->rl_nchunks == 0) - goto badheader; - iptr = (u32 *)((unsigned char *)headerp + 28); - rdmalen = rpcrdma_count_chunks(rep, req->rl_nchunks, 0, &iptr); - if (rdmalen < 0) - goto badheader; - r_xprt->rx_stats.total_rdma_reply += rdmalen; - /* Reply chunk buffer already is the reply vector - no fixup. */ - status = rdmalen; - break; - -badheader: - default: - dprintk("%s: invalid rpcrdma reply header (type %d):" - " chunks[012] == %d %d %d" - " expected chunks <= %d\n", - __func__, ntohl(headerp->rm_type), - headerp->rm_body.rm_chunks[0], - headerp->rm_body.rm_chunks[1], - headerp->rm_body.rm_chunks[2], - req->rl_nchunks); - status = -EIO; - r_xprt->rx_stats.bad_reply_count++; - break; - } - - /* If using mw bind, start the deregister process now. */ - /* (Note: if mr_free(), cannot perform it here, in tasklet context) */ - if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) { - case RPCRDMA_MEMWINDOWS: - for (i = 0; req->rl_nchunks-- > 1;) - i += rpcrdma_deregister_external( - &req->rl_segments[i], r_xprt, NULL); - /* Optionally wait (not here) for unbinds to complete */ - rep->rr_func = rpcrdma_unbind_func; - (void) rpcrdma_deregister_external(&req->rl_segments[i], - r_xprt, rep); - break; - case RPCRDMA_MEMWINDOWS_ASYNC: - for (i = 0; req->rl_nchunks--;) - i += rpcrdma_deregister_external(&req->rl_segments[i], - r_xprt, NULL); - break; - default: - break; - } - - dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", - __func__, xprt, rqst, status); - xprt_complete_rqst(rqst->rq_task, status); - spin_unlock(&xprt->transport_lock); -} diff --git a/trunk/net/sunrpc/xprtrdma/transport.c b/trunk/net/sunrpc/xprtrdma/transport.c deleted file mode 100644 index dc55cc974c90..000000000000 --- a/trunk/net/sunrpc/xprtrdma/transport.c +++ /dev/null @@ -1,800 +0,0 @@ -/* - * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the BSD-type - * license below: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * Neither the name of the Network Appliance, Inc. nor the names of - * its contributors may be used to endorse or promote products - * derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * transport.c - * - * This file contains the top-level implementation of an RPC RDMA - * transport. - * - * Naming convention: functions beginning with xprt_ are part of the - * transport switch. All others are RPC RDMA internal. - */ - -#include -#include -#include - -#include "xprt_rdma.h" - -#ifdef RPC_DEBUG -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - -MODULE_LICENSE("Dual BSD/GPL"); - -MODULE_DESCRIPTION("RPC/RDMA Transport for Linux kernel NFS"); -MODULE_AUTHOR("Network Appliance, Inc."); - -/* - * tunables - */ - -static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; -static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; -static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; -static unsigned int xprt_rdma_inline_write_padding; -#if !RPCRDMA_PERSISTENT_REGISTRATION -static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ -#else -static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL; -#endif - -#ifdef RPC_DEBUG - -static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE; -static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE; -static unsigned int zero; -static unsigned int max_padding = PAGE_SIZE; -static unsigned int min_memreg = RPCRDMA_BOUNCEBUFFERS; -static unsigned int max_memreg = RPCRDMA_LAST - 1; - -static struct ctl_table_header *sunrpc_table_header; - -static ctl_table xr_tunables_table[] = { - { - .ctl_name = CTL_SLOTTABLE_RDMA, - .procname = "rdma_slot_table_entries", - .data = &xprt_rdma_slot_table_entries, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &min_slot_table_size, - .extra2 = &max_slot_table_size - }, - { - .ctl_name = CTL_RDMA_MAXINLINEREAD, - .procname = "rdma_max_inline_read", - .data = &xprt_rdma_max_inline_read, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, - }, - { - .ctl_name = CTL_RDMA_MAXINLINEWRITE, - .procname = "rdma_max_inline_write", - .data = &xprt_rdma_max_inline_write, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec, - }, - { - .ctl_name = CTL_RDMA_WRITEPADDING, - .procname = "rdma_inline_write_padding", - .data = &xprt_rdma_inline_write_padding, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &zero, - .extra2 = &max_padding, - }, - { - .ctl_name = CTL_RDMA_MEMREG, - .procname = "rdma_memreg_strategy", - .data = &xprt_rdma_memreg_strategy, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &min_memreg, - .extra2 = &max_memreg, - }, - { - .ctl_name = 0, - }, -}; - -static ctl_table sunrpc_table[] = { - { - .ctl_name = CTL_SUNRPC, - .procname = "sunrpc", - .mode = 0555, - .child = xr_tunables_table - }, - { - .ctl_name = 0, - }, -}; - -#endif - -static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */ - -static void -xprt_rdma_format_addresses(struct rpc_xprt *xprt) -{ - struct sockaddr_in *addr = (struct sockaddr_in *) - &rpcx_to_rdmad(xprt).addr; - char *buf; - - buf = kzalloc(20, GFP_KERNEL); - if (buf) - snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); - xprt->address_strings[RPC_DISPLAY_ADDR] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) - snprintf(buf, 8, "%u", ntohs(addr->sin_port)); - xprt->address_strings[RPC_DISPLAY_PORT] = buf; - - xprt->address_strings[RPC_DISPLAY_PROTO] = "rdma"; - - buf = kzalloc(48, GFP_KERNEL); - if (buf) - snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", - NIPQUAD(addr->sin_addr.s_addr), - ntohs(addr->sin_port), "rdma"); - xprt->address_strings[RPC_DISPLAY_ALL] = buf; - - buf = kzalloc(10, GFP_KERNEL); - if (buf) - snprintf(buf, 10, "%02x%02x%02x%02x", - NIPQUAD(addr->sin_addr.s_addr)); - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) - snprintf(buf, 8, "%4hx", ntohs(addr->sin_port)); - xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - - buf = kzalloc(30, GFP_KERNEL); - if (buf) - snprintf(buf, 30, NIPQUAD_FMT".%u.%u", - NIPQUAD(addr->sin_addr.s_addr), - ntohs(addr->sin_port) >> 8, - ntohs(addr->sin_port) & 0xff); - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; - - /* netid */ - xprt->address_strings[RPC_DISPLAY_NETID] = "rdma"; -} - -static void -xprt_rdma_free_addresses(struct rpc_xprt *xprt) -{ - kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); - kfree(xprt->address_strings[RPC_DISPLAY_PORT]); - kfree(xprt->address_strings[RPC_DISPLAY_ALL]); - kfree(xprt->address_strings[RPC_DISPLAY_HEX_ADDR]); - kfree(xprt->address_strings[RPC_DISPLAY_HEX_PORT]); - kfree(xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR]); -} - -static void -xprt_rdma_connect_worker(struct work_struct *work) -{ - struct rpcrdma_xprt *r_xprt = - container_of(work, struct rpcrdma_xprt, rdma_connect.work); - struct rpc_xprt *xprt = &r_xprt->xprt; - int rc = 0; - - if (!xprt->shutdown) { - xprt_clear_connected(xprt); - - dprintk("RPC: %s: %sconnect\n", __func__, - r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); - rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); - if (rc) - goto out; - } - goto out_clear; - -out: - xprt_wake_pending_tasks(xprt, rc); - -out_clear: - dprintk("RPC: %s: exit\n", __func__); - xprt_clear_connecting(xprt); -} - -/* - * xprt_rdma_destroy - * - * Destroy the xprt. - * Free all memory associated with the object, including its own. - * NOTE: none of the *destroy methods free memory for their top-level - * objects, even though they may have allocated it (they do free - * private memory). It's up to the caller to handle it. In this - * case (RDMA transport), all structure memory is inlined with the - * struct rpcrdma_xprt. - */ -static void -xprt_rdma_destroy(struct rpc_xprt *xprt) -{ - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - int rc; - - dprintk("RPC: %s: called\n", __func__); - - cancel_delayed_work(&r_xprt->rdma_connect); - flush_scheduled_work(); - - xprt_clear_connected(xprt); - - rpcrdma_buffer_destroy(&r_xprt->rx_buf); - rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia); - if (rc) - dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n", - __func__, rc); - rpcrdma_ia_close(&r_xprt->rx_ia); - - xprt_rdma_free_addresses(xprt); - - kfree(xprt->slot); - xprt->slot = NULL; - kfree(xprt); - - dprintk("RPC: %s: returning\n", __func__); - - module_put(THIS_MODULE); -} - -/** - * xprt_setup_rdma - Set up transport to use RDMA - * - * @args: rpc transport arguments - */ -static struct rpc_xprt * -xprt_setup_rdma(struct xprt_create *args) -{ - struct rpcrdma_create_data_internal cdata; - struct rpc_xprt *xprt; - struct rpcrdma_xprt *new_xprt; - struct rpcrdma_ep *new_ep; - struct sockaddr_in *sin; - int rc; - - if (args->addrlen > sizeof(xprt->addr)) { - dprintk("RPC: %s: address too large\n", __func__); - return ERR_PTR(-EBADF); - } - - xprt = kzalloc(sizeof(struct rpcrdma_xprt), GFP_KERNEL); - if (xprt == NULL) { - dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", - __func__); - return ERR_PTR(-ENOMEM); - } - - xprt->max_reqs = xprt_rdma_slot_table_entries; - xprt->slot = kcalloc(xprt->max_reqs, - sizeof(struct rpc_rqst), GFP_KERNEL); - if (xprt->slot == NULL) { - kfree(xprt); - dprintk("RPC: %s: couldn't allocate %d slots\n", - __func__, xprt->max_reqs); - return ERR_PTR(-ENOMEM); - } - - /* 60 second timeout, no retries */ - xprt_set_timeout(&xprt->timeout, 0, 60UL * HZ); - xprt->bind_timeout = (60U * HZ); - xprt->connect_timeout = (60U * HZ); - xprt->reestablish_timeout = (5U * HZ); - xprt->idle_timeout = (5U * 60 * HZ); - - xprt->resvport = 0; /* privileged port not needed */ - xprt->tsh_size = 0; /* RPC-RDMA handles framing */ - xprt->max_payload = RPCRDMA_MAX_DATA_SEGS * PAGE_SIZE; - xprt->ops = &xprt_rdma_procs; - - /* - * Set up RDMA-specific connect data. - */ - - /* Put server RDMA address in local cdata */ - memcpy(&cdata.addr, args->dstaddr, args->addrlen); - - /* Ensure xprt->addr holds valid server TCP (not RDMA) - * address, for any side protocols which peek at it */ - xprt->prot = IPPROTO_TCP; - xprt->addrlen = args->addrlen; - memcpy(&xprt->addr, &cdata.addr, xprt->addrlen); - - sin = (struct sockaddr_in *)&cdata.addr; - if (ntohs(sin->sin_port) != 0) - xprt_set_bound(xprt); - - dprintk("RPC: %s: %u.%u.%u.%u:%u\n", __func__, - NIPQUAD(sin->sin_addr.s_addr), ntohs(sin->sin_port)); - - /* Set max requests */ - cdata.max_requests = xprt->max_reqs; - - /* Set some length limits */ - cdata.rsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA write max */ - cdata.wsize = RPCRDMA_MAX_SEGS * PAGE_SIZE; /* RDMA read max */ - - cdata.inline_wsize = xprt_rdma_max_inline_write; - if (cdata.inline_wsize > cdata.wsize) - cdata.inline_wsize = cdata.wsize; - - cdata.inline_rsize = xprt_rdma_max_inline_read; - if (cdata.inline_rsize > cdata.rsize) - cdata.inline_rsize = cdata.rsize; - - cdata.padding = xprt_rdma_inline_write_padding; - - /* - * Create new transport instance, which includes initialized - * o ia - * o endpoint - * o buffers - */ - - new_xprt = rpcx_to_rdmax(xprt); - - rc = rpcrdma_ia_open(new_xprt, (struct sockaddr *) &cdata.addr, - xprt_rdma_memreg_strategy); - if (rc) - goto out1; - - /* - * initialize and create ep - */ - new_xprt->rx_data = cdata; - new_ep = &new_xprt->rx_ep; - new_ep->rep_remote_addr = cdata.addr; - - rc = rpcrdma_ep_create(&new_xprt->rx_ep, - &new_xprt->rx_ia, &new_xprt->rx_data); - if (rc) - goto out2; - - /* - * Allocate pre-registered send and receive buffers for headers and - * any inline data. Also specify any padding which will be provided - * from a preregistered zero buffer. - */ - rc = rpcrdma_buffer_create(&new_xprt->rx_buf, new_ep, &new_xprt->rx_ia, - &new_xprt->rx_data); - if (rc) - goto out3; - - /* - * Register a callback for connection events. This is necessary because - * connection loss notification is async. We also catch connection loss - * when reaping receives. - */ - INIT_DELAYED_WORK(&new_xprt->rdma_connect, xprt_rdma_connect_worker); - new_ep->rep_func = rpcrdma_conn_func; - new_ep->rep_xprt = xprt; - - xprt_rdma_format_addresses(xprt); - - if (!try_module_get(THIS_MODULE)) - goto out4; - - return xprt; - -out4: - xprt_rdma_free_addresses(xprt); - rc = -EINVAL; -out3: - (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia); -out2: - rpcrdma_ia_close(&new_xprt->rx_ia); -out1: - kfree(xprt->slot); - kfree(xprt); - return ERR_PTR(rc); -} - -/* - * Close a connection, during shutdown or timeout/reconnect - */ -static void -xprt_rdma_close(struct rpc_xprt *xprt) -{ - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - - dprintk("RPC: %s: closing\n", __func__); - xprt_disconnect(xprt); - (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); -} - -static void -xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port) -{ - struct sockaddr_in *sap; - - sap = (struct sockaddr_in *)&xprt->addr; - sap->sin_port = htons(port); - sap = (struct sockaddr_in *)&rpcx_to_rdmad(xprt).addr; - sap->sin_port = htons(port); - dprintk("RPC: %s: %u\n", __func__, port); -} - -static void -xprt_rdma_connect(struct rpc_task *task) -{ - struct rpc_xprt *xprt = (struct rpc_xprt *)task->tk_xprt; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - - if (!xprt_test_and_set_connecting(xprt)) { - if (r_xprt->rx_ep.rep_connected != 0) { - /* Reconnect */ - schedule_delayed_work(&r_xprt->rdma_connect, - xprt->reestablish_timeout); - } else { - schedule_delayed_work(&r_xprt->rdma_connect, 0); - if (!RPC_IS_ASYNC(task)) - flush_scheduled_work(); - } - } -} - -static int -xprt_rdma_reserve_xprt(struct rpc_task *task) -{ - struct rpc_xprt *xprt = task->tk_xprt; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - int credits = atomic_read(&r_xprt->rx_buf.rb_credits); - - /* == RPC_CWNDSCALE @ init, but *after* setup */ - if (r_xprt->rx_buf.rb_cwndscale == 0UL) { - r_xprt->rx_buf.rb_cwndscale = xprt->cwnd; - dprintk("RPC: %s: cwndscale %lu\n", __func__, - r_xprt->rx_buf.rb_cwndscale); - BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); - } - xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; - return xprt_reserve_xprt_cong(task); -} - -/* - * The RDMA allocate/free functions need the task structure as a place - * to hide the struct rpcrdma_req, which is necessary for the actual send/recv - * sequence. For this reason, the recv buffers are attached to send - * buffers for portions of the RPC. Note that the RPC layer allocates - * both send and receive buffers in the same call. We may register - * the receive buffer portion when using reply chunks. - */ -static void * -xprt_rdma_allocate(struct rpc_task *task, size_t size) -{ - struct rpc_xprt *xprt = task->tk_xprt; - struct rpcrdma_req *req, *nreq; - - req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf); - BUG_ON(NULL == req); - - if (size > req->rl_size) { - dprintk("RPC: %s: size %zd too large for buffer[%zd]: " - "prog %d vers %d proc %d\n", - __func__, size, req->rl_size, - task->tk_client->cl_prog, task->tk_client->cl_vers, - task->tk_msg.rpc_proc->p_proc); - /* - * Outgoing length shortage. Our inline write max must have - * been configured to perform direct i/o. - * - * This is therefore a large metadata operation, and the - * allocate call was made on the maximum possible message, - * e.g. containing long filename(s) or symlink data. In - * fact, while these metadata operations *might* carry - * large outgoing payloads, they rarely *do*. However, we - * have to commit to the request here, so reallocate and - * register it now. The data path will never require this - * reallocation. - * - * If the allocation or registration fails, the RPC framework - * will (doggedly) retry. - */ - if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy == - RPCRDMA_BOUNCEBUFFERS) { - /* forced to "pure inline" */ - dprintk("RPC: %s: too much data (%zd) for inline " - "(r/w max %d/%d)\n", __func__, size, - rpcx_to_rdmad(xprt).inline_rsize, - rpcx_to_rdmad(xprt).inline_wsize); - size = req->rl_size; - rpc_exit(task, -EIO); /* fail the operation */ - rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; - goto out; - } - if (task->tk_flags & RPC_TASK_SWAPPER) - nreq = kmalloc(sizeof *req + size, GFP_ATOMIC); - else - nreq = kmalloc(sizeof *req + size, GFP_NOFS); - if (nreq == NULL) - goto outfail; - - if (rpcrdma_register_internal(&rpcx_to_rdmax(xprt)->rx_ia, - nreq->rl_base, size + sizeof(struct rpcrdma_req) - - offsetof(struct rpcrdma_req, rl_base), - &nreq->rl_handle, &nreq->rl_iov)) { - kfree(nreq); - goto outfail; - } - rpcx_to_rdmax(xprt)->rx_stats.hardway_register_count += size; - nreq->rl_size = size; - nreq->rl_niovs = 0; - nreq->rl_nchunks = 0; - nreq->rl_buffer = (struct rpcrdma_buffer *)req; - nreq->rl_reply = req->rl_reply; - memcpy(nreq->rl_segments, - req->rl_segments, sizeof nreq->rl_segments); - /* flag the swap with an unused field */ - nreq->rl_iov.length = 0; - req->rl_reply = NULL; - req = nreq; - } - dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); -out: - return req->rl_xdr_buf; - -outfail: - rpcrdma_buffer_put(req); - rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++; - return NULL; -} - -/* - * This function returns all RDMA resources to the pool. - */ -static void -xprt_rdma_free(void *buffer) -{ - struct rpcrdma_req *req; - struct rpcrdma_xprt *r_xprt; - struct rpcrdma_rep *rep; - int i; - - if (buffer == NULL) - return; - - req = container_of(buffer, struct rpcrdma_req, rl_xdr_buf[0]); - r_xprt = container_of(req->rl_buffer, struct rpcrdma_xprt, rx_buf); - rep = req->rl_reply; - - dprintk("RPC: %s: called on 0x%p%s\n", - __func__, rep, (rep && rep->rr_func) ? " (with waiter)" : ""); - - /* - * Finish the deregistration. When using mw bind, this was - * begun in rpcrdma_reply_handler(). In all other modes, we - * do it here, in thread context. The process is considered - * complete when the rr_func vector becomes NULL - this - * was put in place during rpcrdma_reply_handler() - the wait - * call below will not block if the dereg is "done". If - * interrupted, our framework will clean up. - */ - for (i = 0; req->rl_nchunks;) { - --req->rl_nchunks; - i += rpcrdma_deregister_external( - &req->rl_segments[i], r_xprt, NULL); - } - - if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) { - rep->rr_func = NULL; /* abandon the callback */ - req->rl_reply = NULL; - } - - if (req->rl_iov.length == 0) { /* see allocate above */ - struct rpcrdma_req *oreq = (struct rpcrdma_req *)req->rl_buffer; - oreq->rl_reply = req->rl_reply; - (void) rpcrdma_deregister_internal(&r_xprt->rx_ia, - req->rl_handle, - &req->rl_iov); - kfree(req); - req = oreq; - } - - /* Put back request+reply buffers */ - rpcrdma_buffer_put(req); -} - -/* - * send_request invokes the meat of RPC RDMA. It must do the following: - * 1. Marshal the RPC request into an RPC RDMA request, which means - * putting a header in front of data, and creating IOVs for RDMA - * from those in the request. - * 2. In marshaling, detect opportunities for RDMA, and use them. - * 3. Post a recv message to set up asynch completion, then send - * the request (rpcrdma_ep_post). - * 4. No partial sends are possible in the RPC-RDMA protocol (as in UDP). - */ - -static int -xprt_rdma_send_request(struct rpc_task *task) -{ - struct rpc_rqst *rqst = task->tk_rqstp; - struct rpc_xprt *xprt = task->tk_xprt; - struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - - /* marshal the send itself */ - if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) { - r_xprt->rx_stats.failed_marshal_count++; - dprintk("RPC: %s: rpcrdma_marshal_req failed\n", - __func__); - return -EIO; - } - - if (req->rl_reply == NULL) /* e.g. reconnection */ - rpcrdma_recv_buffer_get(req); - - if (req->rl_reply) { - req->rl_reply->rr_func = rpcrdma_reply_handler; - /* this need only be done once, but... */ - req->rl_reply->rr_xprt = xprt; - } - - if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { - xprt_disconnect(xprt); - return -ENOTCONN; /* implies disconnect */ - } - - rqst->rq_bytes_sent = 0; - return 0; -} - -static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) -{ - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - long idle_time = 0; - - if (xprt_connected(xprt)) - idle_time = (long)(jiffies - xprt->last_used) / HZ; - - seq_printf(seq, - "\txprt:\trdma %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu " - "%lu %lu %lu %Lu %Lu %Lu %Lu %lu %lu %lu\n", - - 0, /* need a local port? */ - xprt->stat.bind_count, - xprt->stat.connect_count, - xprt->stat.connect_time, - idle_time, - xprt->stat.sends, - xprt->stat.recvs, - xprt->stat.bad_xids, - xprt->stat.req_u, - xprt->stat.bklog_u, - - r_xprt->rx_stats.read_chunk_count, - r_xprt->rx_stats.write_chunk_count, - r_xprt->rx_stats.reply_chunk_count, - r_xprt->rx_stats.total_rdma_request, - r_xprt->rx_stats.total_rdma_reply, - r_xprt->rx_stats.pullup_copy_count, - r_xprt->rx_stats.fixup_copy_count, - r_xprt->rx_stats.hardway_register_count, - r_xprt->rx_stats.failed_marshal_count, - r_xprt->rx_stats.bad_reply_count); -} - -/* - * Plumbing for rpc transport switch and kernel module - */ - -static struct rpc_xprt_ops xprt_rdma_procs = { - .reserve_xprt = xprt_rdma_reserve_xprt, - .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ - .release_request = xprt_release_rqst_cong, /* ditto */ - .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ - .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ - .set_port = xprt_rdma_set_port, - .connect = xprt_rdma_connect, - .buf_alloc = xprt_rdma_allocate, - .buf_free = xprt_rdma_free, - .send_request = xprt_rdma_send_request, - .close = xprt_rdma_close, - .destroy = xprt_rdma_destroy, - .print_stats = xprt_rdma_print_stats -}; - -static struct xprt_class xprt_rdma = { - .list = LIST_HEAD_INIT(xprt_rdma.list), - .name = "rdma", - .owner = THIS_MODULE, - .ident = XPRT_TRANSPORT_RDMA, - .setup = xprt_setup_rdma, -}; - -static void __exit xprt_rdma_cleanup(void) -{ - int rc; - - dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); -#ifdef RPC_DEBUG - if (sunrpc_table_header) { - unregister_sysctl_table(sunrpc_table_header); - sunrpc_table_header = NULL; - } -#endif - rc = xprt_unregister_transport(&xprt_rdma); - if (rc) - dprintk("RPC: %s: xprt_unregister returned %i\n", - __func__, rc); -} - -static int __init xprt_rdma_init(void) -{ - int rc; - - rc = xprt_register_transport(&xprt_rdma); - - if (rc) - return rc; - - dprintk(KERN_INFO "RPCRDMA Module Init, register RPC RDMA transport\n"); - - dprintk(KERN_INFO "Defaults:\n"); - dprintk(KERN_INFO "\tSlots %d\n" - "\tMaxInlineRead %d\n\tMaxInlineWrite %d\n", - xprt_rdma_slot_table_entries, - xprt_rdma_max_inline_read, xprt_rdma_max_inline_write); - dprintk(KERN_INFO "\tPadding %d\n\tMemreg %d\n", - xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy); - -#ifdef RPC_DEBUG - if (!sunrpc_table_header) - sunrpc_table_header = register_sysctl_table(sunrpc_table); -#endif - return 0; -} - -module_init(xprt_rdma_init); -module_exit(xprt_rdma_cleanup); diff --git a/trunk/net/sunrpc/xprtrdma/verbs.c b/trunk/net/sunrpc/xprtrdma/verbs.c deleted file mode 100644 index 9ec8ca4f6028..000000000000 --- a/trunk/net/sunrpc/xprtrdma/verbs.c +++ /dev/null @@ -1,1626 +0,0 @@ -/* - * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the BSD-type - * license below: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * Neither the name of the Network Appliance, Inc. nor the names of - * its contributors may be used to endorse or promote products - * derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * verbs.c - * - * Encapsulates the major functions managing: - * o adapters - * o endpoints - * o connections - * o buffer memory - */ - -#include /* for Tavor hack below */ - -#include "xprt_rdma.h" - -/* - * Globals/Macros - */ - -#ifdef RPC_DEBUG -# define RPCDBG_FACILITY RPCDBG_TRANS -#endif - -/* - * internal functions - */ - -/* - * handle replies in tasklet context, using a single, global list - * rdma tasklet function -- just turn around and call the func - * for all replies on the list - */ - -static DEFINE_SPINLOCK(rpcrdma_tk_lock_g); -static LIST_HEAD(rpcrdma_tasklets_g); - -static void -rpcrdma_run_tasklet(unsigned long data) -{ - struct rpcrdma_rep *rep; - void (*func)(struct rpcrdma_rep *); - unsigned long flags; - - data = data; - spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); - while (!list_empty(&rpcrdma_tasklets_g)) { - rep = list_entry(rpcrdma_tasklets_g.next, - struct rpcrdma_rep, rr_list); - list_del(&rep->rr_list); - func = rep->rr_func; - rep->rr_func = NULL; - spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); - - if (func) - func(rep); - else - rpcrdma_recv_buffer_put(rep); - - spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); - } - spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); -} - -static DECLARE_TASKLET(rpcrdma_tasklet_g, rpcrdma_run_tasklet, 0UL); - -static inline void -rpcrdma_schedule_tasklet(struct rpcrdma_rep *rep) -{ - unsigned long flags; - - spin_lock_irqsave(&rpcrdma_tk_lock_g, flags); - list_add_tail(&rep->rr_list, &rpcrdma_tasklets_g); - spin_unlock_irqrestore(&rpcrdma_tk_lock_g, flags); - tasklet_schedule(&rpcrdma_tasklet_g); -} - -static void -rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) -{ - struct rpcrdma_ep *ep = context; - - dprintk("RPC: %s: QP error %X on device %s ep %p\n", - __func__, event->event, event->device->name, context); - if (ep->rep_connected == 1) { - ep->rep_connected = -EIO; - ep->rep_func(ep); - wake_up_all(&ep->rep_connect_wait); - } -} - -static void -rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context) -{ - struct rpcrdma_ep *ep = context; - - dprintk("RPC: %s: CQ error %X on device %s ep %p\n", - __func__, event->event, event->device->name, context); - if (ep->rep_connected == 1) { - ep->rep_connected = -EIO; - ep->rep_func(ep); - wake_up_all(&ep->rep_connect_wait); - } -} - -static inline -void rpcrdma_event_process(struct ib_wc *wc) -{ - struct rpcrdma_rep *rep = - (struct rpcrdma_rep *)(unsigned long) wc->wr_id; - - dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n", - __func__, rep, wc->status, wc->opcode, wc->byte_len); - - if (!rep) /* send or bind completion that we don't care about */ - return; - - if (IB_WC_SUCCESS != wc->status) { - dprintk("RPC: %s: %s WC status %X, connection lost\n", - __func__, (wc->opcode & IB_WC_RECV) ? "recv" : "send", - wc->status); - rep->rr_len = ~0U; - rpcrdma_schedule_tasklet(rep); - return; - } - - switch (wc->opcode) { - case IB_WC_RECV: - rep->rr_len = wc->byte_len; - ib_dma_sync_single_for_cpu( - rdmab_to_ia(rep->rr_buffer)->ri_id->device, - rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE); - /* Keep (only) the most recent credits, after check validity */ - if (rep->rr_len >= 16) { - struct rpcrdma_msg *p = - (struct rpcrdma_msg *) rep->rr_base; - unsigned int credits = ntohl(p->rm_credit); - if (credits == 0) { - dprintk("RPC: %s: server" - " dropped credits to 0!\n", __func__); - /* don't deadlock */ - credits = 1; - } else if (credits > rep->rr_buffer->rb_max_requests) { - dprintk("RPC: %s: server" - " over-crediting: %d (%d)\n", - __func__, credits, - rep->rr_buffer->rb_max_requests); - credits = rep->rr_buffer->rb_max_requests; - } - atomic_set(&rep->rr_buffer->rb_credits, credits); - } - /* fall through */ - case IB_WC_BIND_MW: - rpcrdma_schedule_tasklet(rep); - break; - default: - dprintk("RPC: %s: unexpected WC event %X\n", - __func__, wc->opcode); - break; - } -} - -static inline int -rpcrdma_cq_poll(struct ib_cq *cq) -{ - struct ib_wc wc; - int rc; - - for (;;) { - rc = ib_poll_cq(cq, 1, &wc); - if (rc < 0) { - dprintk("RPC: %s: ib_poll_cq failed %i\n", - __func__, rc); - return rc; - } - if (rc == 0) - break; - - rpcrdma_event_process(&wc); - } - - return 0; -} - -/* - * rpcrdma_cq_event_upcall - * - * This upcall handles recv, send, bind and unbind events. - * It is reentrant but processes single events in order to maintain - * ordering of receives to keep server credits. - * - * It is the responsibility of the scheduled tasklet to return - * recv buffers to the pool. NOTE: this affects synchronization of - * connection shutdown. That is, the structures required for - * the completion of the reply handler must remain intact until - * all memory has been reclaimed. - * - * Note that send events are suppressed and do not result in an upcall. - */ -static void -rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context) -{ - int rc; - - rc = rpcrdma_cq_poll(cq); - if (rc) - return; - - rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); - if (rc) { - dprintk("RPC: %s: ib_req_notify_cq failed %i\n", - __func__, rc); - return; - } - - rpcrdma_cq_poll(cq); -} - -#ifdef RPC_DEBUG -static const char * const conn[] = { - "address resolved", - "address error", - "route resolved", - "route error", - "connect request", - "connect response", - "connect error", - "unreachable", - "rejected", - "established", - "disconnected", - "device removal" -}; -#endif - -static int -rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) -{ - struct rpcrdma_xprt *xprt = id->context; - struct rpcrdma_ia *ia = &xprt->rx_ia; - struct rpcrdma_ep *ep = &xprt->rx_ep; - struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr; - struct ib_qp_attr attr; - struct ib_qp_init_attr iattr; - int connstate = 0; - - switch (event->event) { - case RDMA_CM_EVENT_ADDR_RESOLVED: - case RDMA_CM_EVENT_ROUTE_RESOLVED: - complete(&ia->ri_done); - break; - case RDMA_CM_EVENT_ADDR_ERROR: - ia->ri_async_rc = -EHOSTUNREACH; - dprintk("RPC: %s: CM address resolution error, ep 0x%p\n", - __func__, ep); - complete(&ia->ri_done); - break; - case RDMA_CM_EVENT_ROUTE_ERROR: - ia->ri_async_rc = -ENETUNREACH; - dprintk("RPC: %s: CM route resolution error, ep 0x%p\n", - __func__, ep); - complete(&ia->ri_done); - break; - case RDMA_CM_EVENT_ESTABLISHED: - connstate = 1; - ib_query_qp(ia->ri_id->qp, &attr, - IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC, - &iattr); - dprintk("RPC: %s: %d responder resources" - " (%d initiator)\n", - __func__, attr.max_dest_rd_atomic, attr.max_rd_atomic); - goto connected; - case RDMA_CM_EVENT_CONNECT_ERROR: - connstate = -ENOTCONN; - goto connected; - case RDMA_CM_EVENT_UNREACHABLE: - connstate = -ENETDOWN; - goto connected; - case RDMA_CM_EVENT_REJECTED: - connstate = -ECONNREFUSED; - goto connected; - case RDMA_CM_EVENT_DISCONNECTED: - connstate = -ECONNABORTED; - goto connected; - case RDMA_CM_EVENT_DEVICE_REMOVAL: - connstate = -ENODEV; -connected: - dprintk("RPC: %s: %s: %u.%u.%u.%u:%u" - " (ep 0x%p event 0x%x)\n", - __func__, - (event->event <= 11) ? conn[event->event] : - "unknown connection error", - NIPQUAD(addr->sin_addr.s_addr), - ntohs(addr->sin_port), - ep, event->event); - atomic_set(&rpcx_to_rdmax(ep->rep_xprt)->rx_buf.rb_credits, 1); - dprintk("RPC: %s: %sconnected\n", - __func__, connstate > 0 ? "" : "dis"); - ep->rep_connected = connstate; - ep->rep_func(ep); - wake_up_all(&ep->rep_connect_wait); - break; - default: - ia->ri_async_rc = -EINVAL; - dprintk("RPC: %s: unexpected CM event %X\n", - __func__, event->event); - complete(&ia->ri_done); - break; - } - - return 0; -} - -static struct rdma_cm_id * -rpcrdma_create_id(struct rpcrdma_xprt *xprt, - struct rpcrdma_ia *ia, struct sockaddr *addr) -{ - struct rdma_cm_id *id; - int rc; - - id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); - if (IS_ERR(id)) { - rc = PTR_ERR(id); - dprintk("RPC: %s: rdma_create_id() failed %i\n", - __func__, rc); - return id; - } - - ia->ri_async_rc = 0; - rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); - if (rc) { - dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", - __func__, rc); - goto out; - } - wait_for_completion(&ia->ri_done); - rc = ia->ri_async_rc; - if (rc) - goto out; - - ia->ri_async_rc = 0; - rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); - if (rc) { - dprintk("RPC: %s: rdma_resolve_route() failed %i\n", - __func__, rc); - goto out; - } - wait_for_completion(&ia->ri_done); - rc = ia->ri_async_rc; - if (rc) - goto out; - - return id; - -out: - rdma_destroy_id(id); - return ERR_PTR(rc); -} - -/* - * Drain any cq, prior to teardown. - */ -static void -rpcrdma_clean_cq(struct ib_cq *cq) -{ - struct ib_wc wc; - int count = 0; - - while (1 == ib_poll_cq(cq, 1, &wc)) - ++count; - - if (count) - dprintk("RPC: %s: flushed %d events (last 0x%x)\n", - __func__, count, wc.opcode); -} - -/* - * Exported functions. - */ - -/* - * Open and initialize an Interface Adapter. - * o initializes fields of struct rpcrdma_ia, including - * interface and provider attributes and protection zone. - */ -int -rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) -{ - int rc; - struct rpcrdma_ia *ia = &xprt->rx_ia; - - init_completion(&ia->ri_done); - - ia->ri_id = rpcrdma_create_id(xprt, ia, addr); - if (IS_ERR(ia->ri_id)) { - rc = PTR_ERR(ia->ri_id); - goto out1; - } - - ia->ri_pd = ib_alloc_pd(ia->ri_id->device); - if (IS_ERR(ia->ri_pd)) { - rc = PTR_ERR(ia->ri_pd); - dprintk("RPC: %s: ib_alloc_pd() failed %i\n", - __func__, rc); - goto out2; - } - - /* - * Optionally obtain an underlying physical identity mapping in - * order to do a memory window-based bind. This base registration - * is protected from remote access - that is enabled only by binding - * for the specific bytes targeted during each RPC operation, and - * revoked after the corresponding completion similar to a storage - * adapter. - */ - if (memreg > RPCRDMA_REGISTER) { - int mem_priv = IB_ACCESS_LOCAL_WRITE; - switch (memreg) { -#if RPCRDMA_PERSISTENT_REGISTRATION - case RPCRDMA_ALLPHYSICAL: - mem_priv |= IB_ACCESS_REMOTE_WRITE; - mem_priv |= IB_ACCESS_REMOTE_READ; - break; -#endif - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - mem_priv |= IB_ACCESS_MW_BIND; - break; - default: - break; - } - ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); - if (IS_ERR(ia->ri_bind_mem)) { - printk(KERN_ALERT "%s: ib_get_dma_mr for " - "phys register failed with %lX\n\t" - "Will continue with degraded performance\n", - __func__, PTR_ERR(ia->ri_bind_mem)); - memreg = RPCRDMA_REGISTER; - ia->ri_bind_mem = NULL; - } - } - - /* Else will do memory reg/dereg for each chunk */ - ia->ri_memreg_strategy = memreg; - - return 0; -out2: - rdma_destroy_id(ia->ri_id); -out1: - return rc; -} - -/* - * Clean up/close an IA. - * o if event handles and PD have been initialized, free them. - * o close the IA - */ -void -rpcrdma_ia_close(struct rpcrdma_ia *ia) -{ - int rc; - - dprintk("RPC: %s: entering\n", __func__); - if (ia->ri_bind_mem != NULL) { - rc = ib_dereg_mr(ia->ri_bind_mem); - dprintk("RPC: %s: ib_dereg_mr returned %i\n", - __func__, rc); - } - if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) - rdma_destroy_qp(ia->ri_id); - if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { - rc = ib_dealloc_pd(ia->ri_pd); - dprintk("RPC: %s: ib_dealloc_pd returned %i\n", - __func__, rc); - } - if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) - rdma_destroy_id(ia->ri_id); -} - -/* - * Create unconnected endpoint. - */ -int -rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, - struct rpcrdma_create_data_internal *cdata) -{ - struct ib_device_attr devattr; - int rc; - - rc = ib_query_device(ia->ri_id->device, &devattr); - if (rc) { - dprintk("RPC: %s: ib_query_device failed %d\n", - __func__, rc); - return rc; - } - - /* check provider's send/recv wr limits */ - if (cdata->max_requests > devattr.max_qp_wr) - cdata->max_requests = devattr.max_qp_wr; - - ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall; - ep->rep_attr.qp_context = ep; - /* send_cq and recv_cq initialized below */ - ep->rep_attr.srq = NULL; - ep->rep_attr.cap.max_send_wr = cdata->max_requests; - switch (ia->ri_memreg_strategy) { - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - /* Add room for mw_binds+unbinds - overkill! */ - ep->rep_attr.cap.max_send_wr++; - ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS); - if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) - return -EINVAL; - break; - default: - break; - } - ep->rep_attr.cap.max_recv_wr = cdata->max_requests; - ep->rep_attr.cap.max_send_sge = (cdata->padding ? 4 : 2); - ep->rep_attr.cap.max_recv_sge = 1; - ep->rep_attr.cap.max_inline_data = 0; - ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; - ep->rep_attr.qp_type = IB_QPT_RC; - ep->rep_attr.port_num = ~0; - - dprintk("RPC: %s: requested max: dtos: send %d recv %d; " - "iovs: send %d recv %d\n", - __func__, - ep->rep_attr.cap.max_send_wr, - ep->rep_attr.cap.max_recv_wr, - ep->rep_attr.cap.max_send_sge, - ep->rep_attr.cap.max_recv_sge); - - /* set trigger for requesting send completion */ - ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/; - switch (ia->ri_memreg_strategy) { - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - ep->rep_cqinit -= RPCRDMA_MAX_SEGS; - break; - default: - break; - } - if (ep->rep_cqinit <= 2) - ep->rep_cqinit = 0; - INIT_CQCOUNT(ep); - ep->rep_ia = ia; - init_waitqueue_head(&ep->rep_connect_wait); - - /* - * Create a single cq for receive dto and mw_bind (only ever - * care about unbind, really). Send completions are suppressed. - * Use single threaded tasklet upcalls to maintain ordering. - */ - ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall, - rpcrdma_cq_async_error_upcall, NULL, - ep->rep_attr.cap.max_recv_wr + - ep->rep_attr.cap.max_send_wr + 1, 0); - if (IS_ERR(ep->rep_cq)) { - rc = PTR_ERR(ep->rep_cq); - dprintk("RPC: %s: ib_create_cq failed: %i\n", - __func__, rc); - goto out1; - } - - rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP); - if (rc) { - dprintk("RPC: %s: ib_req_notify_cq failed: %i\n", - __func__, rc); - goto out2; - } - - ep->rep_attr.send_cq = ep->rep_cq; - ep->rep_attr.recv_cq = ep->rep_cq; - - /* Initialize cma parameters */ - - /* RPC/RDMA does not use private data */ - ep->rep_remote_cma.private_data = NULL; - ep->rep_remote_cma.private_data_len = 0; - - /* Client offers RDMA Read but does not initiate */ - switch (ia->ri_memreg_strategy) { - case RPCRDMA_BOUNCEBUFFERS: - ep->rep_remote_cma.responder_resources = 0; - break; - case RPCRDMA_MTHCAFMR: - case RPCRDMA_REGISTER: - ep->rep_remote_cma.responder_resources = cdata->max_requests * - (RPCRDMA_MAX_DATA_SEGS / 8); - break; - case RPCRDMA_MEMWINDOWS: - case RPCRDMA_MEMWINDOWS_ASYNC: -#if RPCRDMA_PERSISTENT_REGISTRATION - case RPCRDMA_ALLPHYSICAL: -#endif - ep->rep_remote_cma.responder_resources = cdata->max_requests * - (RPCRDMA_MAX_DATA_SEGS / 2); - break; - default: - break; - } - if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom) - ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; - ep->rep_remote_cma.initiator_depth = 0; - - ep->rep_remote_cma.retry_count = 7; - ep->rep_remote_cma.flow_control = 0; - ep->rep_remote_cma.rnr_retry_count = 0; - - return 0; - -out2: - if (ib_destroy_cq(ep->rep_cq)) - ; -out1: - return rc; -} - -/* - * rpcrdma_ep_destroy - * - * Disconnect and destroy endpoint. After this, the only - * valid operations on the ep are to free it (if dynamically - * allocated) or re-create it. - * - * The caller's error handling must be sure to not leak the endpoint - * if this function fails. - */ -int -rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) -{ - int rc; - - dprintk("RPC: %s: entering, connected is %d\n", - __func__, ep->rep_connected); - - if (ia->ri_id->qp) { - rc = rpcrdma_ep_disconnect(ep, ia); - if (rc) - dprintk("RPC: %s: rpcrdma_ep_disconnect" - " returned %i\n", __func__, rc); - } - - ep->rep_func = NULL; - - /* padding - could be done in rpcrdma_buffer_destroy... */ - if (ep->rep_pad_mr) { - rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); - ep->rep_pad_mr = NULL; - } - - if (ia->ri_id->qp) { - rdma_destroy_qp(ia->ri_id); - ia->ri_id->qp = NULL; - } - - rpcrdma_clean_cq(ep->rep_cq); - rc = ib_destroy_cq(ep->rep_cq); - if (rc) - dprintk("RPC: %s: ib_destroy_cq returned %i\n", - __func__, rc); - - return rc; -} - -/* - * Connect unconnected endpoint. - */ -int -rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) -{ - struct rdma_cm_id *id; - int rc = 0; - int retry_count = 0; - int reconnect = (ep->rep_connected != 0); - - if (reconnect) { - struct rpcrdma_xprt *xprt; -retry: - rc = rpcrdma_ep_disconnect(ep, ia); - if (rc && rc != -ENOTCONN) - dprintk("RPC: %s: rpcrdma_ep_disconnect" - " status %i\n", __func__, rc); - rpcrdma_clean_cq(ep->rep_cq); - - xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); - id = rpcrdma_create_id(xprt, ia, - (struct sockaddr *)&xprt->rx_data.addr); - if (IS_ERR(id)) { - rc = PTR_ERR(id); - goto out; - } - /* TEMP TEMP TEMP - fail if new device: - * Deregister/remarshal *all* requests! - * Close and recreate adapter, pd, etc! - * Re-determine all attributes still sane! - * More stuff I haven't thought of! - * Rrrgh! - */ - if (ia->ri_id->device != id->device) { - printk("RPC: %s: can't reconnect on " - "different device!\n", __func__); - rdma_destroy_id(id); - rc = -ENETDOWN; - goto out; - } - /* END TEMP */ - rdma_destroy_id(ia->ri_id); - ia->ri_id = id; - } - - rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); - if (rc) { - dprintk("RPC: %s: rdma_create_qp failed %i\n", - __func__, rc); - goto out; - } - -/* XXX Tavor device performs badly with 2K MTU! */ -if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { - struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device); - if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR && - (pcid->vendor == PCI_VENDOR_ID_MELLANOX || - pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) { - struct ib_qp_attr attr = { - .path_mtu = IB_MTU_1024 - }; - rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU); - } -} - - /* Theoretically a client initiator_depth > 0 is not needed, - * but many peers fail to complete the connection unless they - * == responder_resources! */ - if (ep->rep_remote_cma.initiator_depth != - ep->rep_remote_cma.responder_resources) - ep->rep_remote_cma.initiator_depth = - ep->rep_remote_cma.responder_resources; - - ep->rep_connected = 0; - - rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); - if (rc) { - dprintk("RPC: %s: rdma_connect() failed with %i\n", - __func__, rc); - goto out; - } - - if (reconnect) - return 0; - - wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); - - /* - * Check state. A non-peer reject indicates no listener - * (ECONNREFUSED), which may be a transient state. All - * others indicate a transport condition which has already - * undergone a best-effort. - */ - if (ep->rep_connected == -ECONNREFUSED - && ++retry_count <= RDMA_CONNECT_RETRY_MAX) { - dprintk("RPC: %s: non-peer_reject, retry\n", __func__); - goto retry; - } - if (ep->rep_connected <= 0) { - /* Sometimes, the only way to reliably connect to remote - * CMs is to use same nonzero values for ORD and IRD. */ - ep->rep_remote_cma.initiator_depth = - ep->rep_remote_cma.responder_resources; - if (ep->rep_remote_cma.initiator_depth == 0) - ++ep->rep_remote_cma.initiator_depth; - if (ep->rep_remote_cma.responder_resources == 0) - ++ep->rep_remote_cma.responder_resources; - if (retry_count++ == 0) - goto retry; - rc = ep->rep_connected; - } else { - dprintk("RPC: %s: connected\n", __func__); - } - -out: - if (rc) - ep->rep_connected = rc; - return rc; -} - -/* - * rpcrdma_ep_disconnect - * - * This is separate from destroy to facilitate the ability - * to reconnect without recreating the endpoint. - * - * This call is not reentrant, and must not be made in parallel - * on the same endpoint. - */ -int -rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) -{ - int rc; - - rpcrdma_clean_cq(ep->rep_cq); - rc = rdma_disconnect(ia->ri_id); - if (!rc) { - /* returns without wait if not connected */ - wait_event_interruptible(ep->rep_connect_wait, - ep->rep_connected != 1); - dprintk("RPC: %s: after wait, %sconnected\n", __func__, - (ep->rep_connected == 1) ? "still " : "dis"); - } else { - dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc); - ep->rep_connected = rc; - } - return rc; -} - -/* - * Initialize buffer memory - */ -int -rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, - struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata) -{ - char *p; - size_t len; - int i, rc; - - buf->rb_max_requests = cdata->max_requests; - spin_lock_init(&buf->rb_lock); - atomic_set(&buf->rb_credits, 1); - - /* Need to allocate: - * 1. arrays for send and recv pointers - * 2. arrays of struct rpcrdma_req to fill in pointers - * 3. array of struct rpcrdma_rep for replies - * 4. padding, if any - * 5. mw's, if any - * Send/recv buffers in req/rep need to be registered - */ - - len = buf->rb_max_requests * - (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); - len += cdata->padding; - switch (ia->ri_memreg_strategy) { - case RPCRDMA_MTHCAFMR: - /* TBD we are perhaps overallocating here */ - len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * - sizeof(struct rpcrdma_mw); - break; - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * - sizeof(struct rpcrdma_mw); - break; - default: - break; - } - - /* allocate 1, 4 and 5 in one shot */ - p = kzalloc(len, GFP_KERNEL); - if (p == NULL) { - dprintk("RPC: %s: req_t/rep_t/pad kzalloc(%zd) failed\n", - __func__, len); - rc = -ENOMEM; - goto out; - } - buf->rb_pool = p; /* for freeing it later */ - - buf->rb_send_bufs = (struct rpcrdma_req **) p; - p = (char *) &buf->rb_send_bufs[buf->rb_max_requests]; - buf->rb_recv_bufs = (struct rpcrdma_rep **) p; - p = (char *) &buf->rb_recv_bufs[buf->rb_max_requests]; - - /* - * Register the zeroed pad buffer, if any. - */ - if (cdata->padding) { - rc = rpcrdma_register_internal(ia, p, cdata->padding, - &ep->rep_pad_mr, &ep->rep_pad); - if (rc) - goto out; - } - p += cdata->padding; - - /* - * Allocate the fmr's, or mw's for mw_bind chunk registration. - * We "cycle" the mw's in order to minimize rkey reuse, - * and also reduce unbind-to-bind collision. - */ - INIT_LIST_HEAD(&buf->rb_mws); - switch (ia->ri_memreg_strategy) { - case RPCRDMA_MTHCAFMR: - { - struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; - struct ib_fmr_attr fa = { - RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT - }; - /* TBD we are perhaps overallocating here */ - for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { - r->r.fmr = ib_alloc_fmr(ia->ri_pd, - IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, - &fa); - if (IS_ERR(r->r.fmr)) { - rc = PTR_ERR(r->r.fmr); - dprintk("RPC: %s: ib_alloc_fmr" - " failed %i\n", __func__, rc); - goto out; - } - list_add(&r->mw_list, &buf->rb_mws); - ++r; - } - } - break; - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - { - struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; - /* Allocate one extra request's worth, for full cycling */ - for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { - r->r.mw = ib_alloc_mw(ia->ri_pd); - if (IS_ERR(r->r.mw)) { - rc = PTR_ERR(r->r.mw); - dprintk("RPC: %s: ib_alloc_mw" - " failed %i\n", __func__, rc); - goto out; - } - list_add(&r->mw_list, &buf->rb_mws); - ++r; - } - } - break; - default: - break; - } - - /* - * Allocate/init the request/reply buffers. Doing this - * using kmalloc for now -- one for each buf. - */ - for (i = 0; i < buf->rb_max_requests; i++) { - struct rpcrdma_req *req; - struct rpcrdma_rep *rep; - - len = cdata->inline_wsize + sizeof(struct rpcrdma_req); - /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */ - /* Typical ~2400b, so rounding up saves work later */ - if (len < 4096) - len = 4096; - req = kmalloc(len, GFP_KERNEL); - if (req == NULL) { - dprintk("RPC: %s: request buffer %d alloc" - " failed\n", __func__, i); - rc = -ENOMEM; - goto out; - } - memset(req, 0, sizeof(struct rpcrdma_req)); - buf->rb_send_bufs[i] = req; - buf->rb_send_bufs[i]->rl_buffer = buf; - - rc = rpcrdma_register_internal(ia, req->rl_base, - len - offsetof(struct rpcrdma_req, rl_base), - &buf->rb_send_bufs[i]->rl_handle, - &buf->rb_send_bufs[i]->rl_iov); - if (rc) - goto out; - - buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req); - - len = cdata->inline_rsize + sizeof(struct rpcrdma_rep); - rep = kmalloc(len, GFP_KERNEL); - if (rep == NULL) { - dprintk("RPC: %s: reply buffer %d alloc failed\n", - __func__, i); - rc = -ENOMEM; - goto out; - } - memset(rep, 0, sizeof(struct rpcrdma_rep)); - buf->rb_recv_bufs[i] = rep; - buf->rb_recv_bufs[i]->rr_buffer = buf; - init_waitqueue_head(&rep->rr_unbind); - - rc = rpcrdma_register_internal(ia, rep->rr_base, - len - offsetof(struct rpcrdma_rep, rr_base), - &buf->rb_recv_bufs[i]->rr_handle, - &buf->rb_recv_bufs[i]->rr_iov); - if (rc) - goto out; - - } - dprintk("RPC: %s: max_requests %d\n", - __func__, buf->rb_max_requests); - /* done */ - return 0; -out: - rpcrdma_buffer_destroy(buf); - return rc; -} - -/* - * Unregister and destroy buffer memory. Need to deal with - * partial initialization, so it's callable from failed create. - * Must be called before destroying endpoint, as registrations - * reference it. - */ -void -rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) -{ - int rc, i; - struct rpcrdma_ia *ia = rdmab_to_ia(buf); - - /* clean up in reverse order from create - * 1. recv mr memory (mr free, then kfree) - * 1a. bind mw memory - * 2. send mr memory (mr free, then kfree) - * 3. padding (if any) [moved to rpcrdma_ep_destroy] - * 4. arrays - */ - dprintk("RPC: %s: entering\n", __func__); - - for (i = 0; i < buf->rb_max_requests; i++) { - if (buf->rb_recv_bufs && buf->rb_recv_bufs[i]) { - rpcrdma_deregister_internal(ia, - buf->rb_recv_bufs[i]->rr_handle, - &buf->rb_recv_bufs[i]->rr_iov); - kfree(buf->rb_recv_bufs[i]); - } - if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { - while (!list_empty(&buf->rb_mws)) { - struct rpcrdma_mw *r; - r = list_entry(buf->rb_mws.next, - struct rpcrdma_mw, mw_list); - list_del(&r->mw_list); - switch (ia->ri_memreg_strategy) { - case RPCRDMA_MTHCAFMR: - rc = ib_dealloc_fmr(r->r.fmr); - if (rc) - dprintk("RPC: %s:" - " ib_dealloc_fmr" - " failed %i\n", - __func__, rc); - break; - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - rc = ib_dealloc_mw(r->r.mw); - if (rc) - dprintk("RPC: %s:" - " ib_dealloc_mw" - " failed %i\n", - __func__, rc); - break; - default: - break; - } - } - rpcrdma_deregister_internal(ia, - buf->rb_send_bufs[i]->rl_handle, - &buf->rb_send_bufs[i]->rl_iov); - kfree(buf->rb_send_bufs[i]); - } - } - - kfree(buf->rb_pool); -} - -/* - * Get a set of request/reply buffers. - * - * Reply buffer (if needed) is attached to send buffer upon return. - * Rule: - * rb_send_index and rb_recv_index MUST always be pointing to the - * *next* available buffer (non-NULL). They are incremented after - * removing buffers, and decremented *before* returning them. - */ -struct rpcrdma_req * -rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) -{ - struct rpcrdma_req *req; - unsigned long flags; - - spin_lock_irqsave(&buffers->rb_lock, flags); - if (buffers->rb_send_index == buffers->rb_max_requests) { - spin_unlock_irqrestore(&buffers->rb_lock, flags); - dprintk("RPC: %s: out of request buffers\n", __func__); - return ((struct rpcrdma_req *)NULL); - } - - req = buffers->rb_send_bufs[buffers->rb_send_index]; - if (buffers->rb_send_index < buffers->rb_recv_index) { - dprintk("RPC: %s: %d extra receives outstanding (ok)\n", - __func__, - buffers->rb_recv_index - buffers->rb_send_index); - req->rl_reply = NULL; - } else { - req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; - buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; - } - buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; - if (!list_empty(&buffers->rb_mws)) { - int i = RPCRDMA_MAX_SEGS - 1; - do { - struct rpcrdma_mw *r; - r = list_entry(buffers->rb_mws.next, - struct rpcrdma_mw, mw_list); - list_del(&r->mw_list); - req->rl_segments[i].mr_chunk.rl_mw = r; - } while (--i >= 0); - } - spin_unlock_irqrestore(&buffers->rb_lock, flags); - return req; -} - -/* - * Put request/reply buffers back into pool. - * Pre-decrement counter/array index. - */ -void -rpcrdma_buffer_put(struct rpcrdma_req *req) -{ - struct rpcrdma_buffer *buffers = req->rl_buffer; - struct rpcrdma_ia *ia = rdmab_to_ia(buffers); - int i; - unsigned long flags; - - BUG_ON(req->rl_nchunks != 0); - spin_lock_irqsave(&buffers->rb_lock, flags); - buffers->rb_send_bufs[--buffers->rb_send_index] = req; - req->rl_niovs = 0; - if (req->rl_reply) { - buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply; - init_waitqueue_head(&req->rl_reply->rr_unbind); - req->rl_reply->rr_func = NULL; - req->rl_reply = NULL; - } - switch (ia->ri_memreg_strategy) { - case RPCRDMA_MTHCAFMR: - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - /* - * Cycle mw's back in reverse order, and "spin" them. - * This delays and scrambles reuse as much as possible. - */ - i = 1; - do { - struct rpcrdma_mw **mw; - mw = &req->rl_segments[i].mr_chunk.rl_mw; - list_add_tail(&(*mw)->mw_list, &buffers->rb_mws); - *mw = NULL; - } while (++i < RPCRDMA_MAX_SEGS); - list_add_tail(&req->rl_segments[0].mr_chunk.rl_mw->mw_list, - &buffers->rb_mws); - req->rl_segments[0].mr_chunk.rl_mw = NULL; - break; - default: - break; - } - spin_unlock_irqrestore(&buffers->rb_lock, flags); -} - -/* - * Recover reply buffers from pool. - * This happens when recovering from error conditions. - * Post-increment counter/array index. - */ -void -rpcrdma_recv_buffer_get(struct rpcrdma_req *req) -{ - struct rpcrdma_buffer *buffers = req->rl_buffer; - unsigned long flags; - - if (req->rl_iov.length == 0) /* special case xprt_rdma_allocate() */ - buffers = ((struct rpcrdma_req *) buffers)->rl_buffer; - spin_lock_irqsave(&buffers->rb_lock, flags); - if (buffers->rb_recv_index < buffers->rb_max_requests) { - req->rl_reply = buffers->rb_recv_bufs[buffers->rb_recv_index]; - buffers->rb_recv_bufs[buffers->rb_recv_index++] = NULL; - } - spin_unlock_irqrestore(&buffers->rb_lock, flags); -} - -/* - * Put reply buffers back into pool when not attached to - * request. This happens in error conditions, and when - * aborting unbinds. Pre-decrement counter/array index. - */ -void -rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) -{ - struct rpcrdma_buffer *buffers = rep->rr_buffer; - unsigned long flags; - - rep->rr_func = NULL; - spin_lock_irqsave(&buffers->rb_lock, flags); - buffers->rb_recv_bufs[--buffers->rb_recv_index] = rep; - spin_unlock_irqrestore(&buffers->rb_lock, flags); -} - -/* - * Wrappers for internal-use kmalloc memory registration, used by buffer code. - */ - -int -rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, - struct ib_mr **mrp, struct ib_sge *iov) -{ - struct ib_phys_buf ipb; - struct ib_mr *mr; - int rc; - - /* - * All memory passed here was kmalloc'ed, therefore phys-contiguous. - */ - iov->addr = ib_dma_map_single(ia->ri_id->device, - va, len, DMA_BIDIRECTIONAL); - iov->length = len; - - if (ia->ri_bind_mem != NULL) { - *mrp = NULL; - iov->lkey = ia->ri_bind_mem->lkey; - return 0; - } - - ipb.addr = iov->addr; - ipb.size = iov->length; - mr = ib_reg_phys_mr(ia->ri_pd, &ipb, 1, - IB_ACCESS_LOCAL_WRITE, &iov->addr); - - dprintk("RPC: %s: phys convert: 0x%llx " - "registered 0x%llx length %d\n", - __func__, ipb.addr, iov->addr, len); - - if (IS_ERR(mr)) { - *mrp = NULL; - rc = PTR_ERR(mr); - dprintk("RPC: %s: failed with %i\n", __func__, rc); - } else { - *mrp = mr; - iov->lkey = mr->lkey; - rc = 0; - } - - return rc; -} - -int -rpcrdma_deregister_internal(struct rpcrdma_ia *ia, - struct ib_mr *mr, struct ib_sge *iov) -{ - int rc; - - ib_dma_unmap_single(ia->ri_id->device, - iov->addr, iov->length, DMA_BIDIRECTIONAL); - - if (NULL == mr) - return 0; - - rc = ib_dereg_mr(mr); - if (rc) - dprintk("RPC: %s: ib_dereg_mr failed %i\n", __func__, rc); - return rc; -} - -/* - * Wrappers for chunk registration, shared by read/write chunk code. - */ - -static void -rpcrdma_map_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg, int writing) -{ - seg->mr_dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE; - seg->mr_dmalen = seg->mr_len; - if (seg->mr_page) - seg->mr_dma = ib_dma_map_page(ia->ri_id->device, - seg->mr_page, offset_in_page(seg->mr_offset), - seg->mr_dmalen, seg->mr_dir); - else - seg->mr_dma = ib_dma_map_single(ia->ri_id->device, - seg->mr_offset, - seg->mr_dmalen, seg->mr_dir); -} - -static void -rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) -{ - if (seg->mr_page) - ib_dma_unmap_page(ia->ri_id->device, - seg->mr_dma, seg->mr_dmalen, seg->mr_dir); - else - ib_dma_unmap_single(ia->ri_id->device, - seg->mr_dma, seg->mr_dmalen, seg->mr_dir); -} - -int -rpcrdma_register_external(struct rpcrdma_mr_seg *seg, - int nsegs, int writing, struct rpcrdma_xprt *r_xprt) -{ - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : - IB_ACCESS_REMOTE_READ); - struct rpcrdma_mr_seg *seg1 = seg; - int i; - int rc = 0; - - switch (ia->ri_memreg_strategy) { - -#if RPCRDMA_PERSISTENT_REGISTRATION - case RPCRDMA_ALLPHYSICAL: - rpcrdma_map_one(ia, seg, writing); - seg->mr_rkey = ia->ri_bind_mem->rkey; - seg->mr_base = seg->mr_dma; - seg->mr_nsegs = 1; - nsegs = 1; - break; -#endif - - /* Registration using fast memory registration */ - case RPCRDMA_MTHCAFMR: - { - u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; - int len, pageoff = offset_in_page(seg->mr_offset); - seg1->mr_offset -= pageoff; /* start of page */ - seg1->mr_len += pageoff; - len = -pageoff; - if (nsegs > RPCRDMA_MAX_DATA_SEGS) - nsegs = RPCRDMA_MAX_DATA_SEGS; - for (i = 0; i < nsegs;) { - rpcrdma_map_one(ia, seg, writing); - physaddrs[i] = seg->mr_dma; - len += seg->mr_len; - ++seg; - ++i; - /* Check for holes */ - if ((i < nsegs && offset_in_page(seg->mr_offset)) || - offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) - break; - } - nsegs = i; - rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, - physaddrs, nsegs, seg1->mr_dma); - if (rc) { - dprintk("RPC: %s: failed ib_map_phys_fmr " - "%u@0x%llx+%i (%d)... status %i\n", __func__, - len, (unsigned long long)seg1->mr_dma, - pageoff, nsegs, rc); - while (nsegs--) - rpcrdma_unmap_one(ia, --seg); - } else { - seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; - seg1->mr_base = seg1->mr_dma + pageoff; - seg1->mr_nsegs = nsegs; - seg1->mr_len = len; - } - } - break; - - /* Registration using memory windows */ - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - { - struct ib_mw_bind param; - rpcrdma_map_one(ia, seg, writing); - param.mr = ia->ri_bind_mem; - param.wr_id = 0ULL; /* no send cookie */ - param.addr = seg->mr_dma; - param.length = seg->mr_len; - param.send_flags = 0; - param.mw_access_flags = mem_priv; - - DECR_CQCOUNT(&r_xprt->rx_ep); - rc = ib_bind_mw(ia->ri_id->qp, - seg->mr_chunk.rl_mw->r.mw, ¶m); - if (rc) { - dprintk("RPC: %s: failed ib_bind_mw " - "%u@0x%llx status %i\n", - __func__, seg->mr_len, - (unsigned long long)seg->mr_dma, rc); - rpcrdma_unmap_one(ia, seg); - } else { - seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; - seg->mr_base = param.addr; - seg->mr_nsegs = 1; - nsegs = 1; - } - } - break; - - /* Default registration each time */ - default: - { - struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; - int len = 0; - if (nsegs > RPCRDMA_MAX_DATA_SEGS) - nsegs = RPCRDMA_MAX_DATA_SEGS; - for (i = 0; i < nsegs;) { - rpcrdma_map_one(ia, seg, writing); - ipb[i].addr = seg->mr_dma; - ipb[i].size = seg->mr_len; - len += seg->mr_len; - ++seg; - ++i; - /* Check for holes */ - if ((i < nsegs && offset_in_page(seg->mr_offset)) || - offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) - break; - } - nsegs = i; - seg1->mr_base = seg1->mr_dma; - seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, - ipb, nsegs, mem_priv, &seg1->mr_base); - if (IS_ERR(seg1->mr_chunk.rl_mr)) { - rc = PTR_ERR(seg1->mr_chunk.rl_mr); - dprintk("RPC: %s: failed ib_reg_phys_mr " - "%u@0x%llx (%d)... status %i\n", - __func__, len, - (unsigned long long)seg1->mr_dma, nsegs, rc); - while (nsegs--) - rpcrdma_unmap_one(ia, --seg); - } else { - seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; - seg1->mr_nsegs = nsegs; - seg1->mr_len = len; - } - } - break; - } - if (rc) - return -1; - - return nsegs; -} - -int -rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, - struct rpcrdma_xprt *r_xprt, void *r) -{ - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct rpcrdma_mr_seg *seg1 = seg; - int nsegs = seg->mr_nsegs, rc; - - switch (ia->ri_memreg_strategy) { - -#if RPCRDMA_PERSISTENT_REGISTRATION - case RPCRDMA_ALLPHYSICAL: - BUG_ON(nsegs != 1); - rpcrdma_unmap_one(ia, seg); - rc = 0; - break; -#endif - - case RPCRDMA_MTHCAFMR: - { - LIST_HEAD(l); - list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l); - rc = ib_unmap_fmr(&l); - while (seg1->mr_nsegs--) - rpcrdma_unmap_one(ia, seg++); - } - if (rc) - dprintk("RPC: %s: failed ib_unmap_fmr," - " status %i\n", __func__, rc); - break; - - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - { - struct ib_mw_bind param; - BUG_ON(nsegs != 1); - param.mr = ia->ri_bind_mem; - param.addr = 0ULL; /* unbind */ - param.length = 0; - param.mw_access_flags = 0; - if (r) { - param.wr_id = (u64) (unsigned long) r; - param.send_flags = IB_SEND_SIGNALED; - INIT_CQCOUNT(&r_xprt->rx_ep); - } else { - param.wr_id = 0ULL; - param.send_flags = 0; - DECR_CQCOUNT(&r_xprt->rx_ep); - } - rc = ib_bind_mw(ia->ri_id->qp, - seg->mr_chunk.rl_mw->r.mw, ¶m); - rpcrdma_unmap_one(ia, seg); - } - if (rc) - dprintk("RPC: %s: failed ib_(un)bind_mw," - " status %i\n", __func__, rc); - else - r = NULL; /* will upcall on completion */ - break; - - default: - rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); - seg1->mr_chunk.rl_mr = NULL; - while (seg1->mr_nsegs--) - rpcrdma_unmap_one(ia, seg++); - if (rc) - dprintk("RPC: %s: failed ib_dereg_mr," - " status %i\n", __func__, rc); - break; - } - if (r) { - struct rpcrdma_rep *rep = r; - void (*func)(struct rpcrdma_rep *) = rep->rr_func; - rep->rr_func = NULL; - func(rep); /* dereg done, callback now */ - } - return nsegs; -} - -/* - * Prepost any receive buffer, then post send. - * - * Receive buffer is donated to hardware, reclaimed upon recv completion. - */ -int -rpcrdma_ep_post(struct rpcrdma_ia *ia, - struct rpcrdma_ep *ep, - struct rpcrdma_req *req) -{ - struct ib_send_wr send_wr, *send_wr_fail; - struct rpcrdma_rep *rep = req->rl_reply; - int rc; - - if (rep) { - rc = rpcrdma_ep_post_recv(ia, ep, rep); - if (rc) - goto out; - req->rl_reply = NULL; - } - - send_wr.next = NULL; - send_wr.wr_id = 0ULL; /* no send cookie */ - send_wr.sg_list = req->rl_send_iov; - send_wr.num_sge = req->rl_niovs; - send_wr.opcode = IB_WR_SEND; - send_wr.imm_data = 0; - if (send_wr.num_sge == 4) /* no need to sync any pad (constant) */ - ib_dma_sync_single_for_device(ia->ri_id->device, - req->rl_send_iov[3].addr, req->rl_send_iov[3].length, - DMA_TO_DEVICE); - ib_dma_sync_single_for_device(ia->ri_id->device, - req->rl_send_iov[1].addr, req->rl_send_iov[1].length, - DMA_TO_DEVICE); - ib_dma_sync_single_for_device(ia->ri_id->device, - req->rl_send_iov[0].addr, req->rl_send_iov[0].length, - DMA_TO_DEVICE); - - if (DECR_CQCOUNT(ep) > 0) - send_wr.send_flags = 0; - else { /* Provider must take a send completion every now and then */ - INIT_CQCOUNT(ep); - send_wr.send_flags = IB_SEND_SIGNALED; - } - - rc = ib_post_send(ia->ri_id->qp, &send_wr, &send_wr_fail); - if (rc) - dprintk("RPC: %s: ib_post_send returned %i\n", __func__, - rc); -out: - return rc; -} - -/* - * (Re)post a receive buffer. - */ -int -rpcrdma_ep_post_recv(struct rpcrdma_ia *ia, - struct rpcrdma_ep *ep, - struct rpcrdma_rep *rep) -{ - struct ib_recv_wr recv_wr, *recv_wr_fail; - int rc; - - recv_wr.next = NULL; - recv_wr.wr_id = (u64) (unsigned long) rep; - recv_wr.sg_list = &rep->rr_iov; - recv_wr.num_sge = 1; - - ib_dma_sync_single_for_cpu(ia->ri_id->device, - rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL); - - DECR_CQCOUNT(ep); - rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail); - - if (rc) - dprintk("RPC: %s: ib_post_recv returned %i\n", __func__, - rc); - return rc; -} diff --git a/trunk/net/sunrpc/xprtrdma/xprt_rdma.h b/trunk/net/sunrpc/xprtrdma/xprt_rdma.h deleted file mode 100644 index 2427822f8bd4..000000000000 --- a/trunk/net/sunrpc/xprtrdma/xprt_rdma.h +++ /dev/null @@ -1,330 +0,0 @@ -/* - * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the BSD-type - * license below: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials provided - * with the distribution. - * - * Neither the name of the Network Appliance, Inc. nor the names of - * its contributors may be used to endorse or promote products - * derived from this software without specific prior written - * permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _LINUX_SUNRPC_XPRT_RDMA_H -#define _LINUX_SUNRPC_XPRT_RDMA_H - -#include /* wait_queue_head_t, etc */ -#include /* spinlock_t, etc */ -#include /* atomic_t, etc */ - -#include /* RDMA connection api */ -#include /* RDMA verbs api */ - -#include /* rpc_xprt */ -#include /* RPC/RDMA protocol */ -#include /* xprt parameters */ - -/* - * Interface Adapter -- one per transport instance - */ -struct rpcrdma_ia { - struct rdma_cm_id *ri_id; - struct ib_pd *ri_pd; - struct ib_mr *ri_bind_mem; - struct completion ri_done; - int ri_async_rc; - enum rpcrdma_memreg ri_memreg_strategy; -}; - -/* - * RDMA Endpoint -- one per transport instance - */ - -struct rpcrdma_ep { - atomic_t rep_cqcount; - int rep_cqinit; - int rep_connected; - struct rpcrdma_ia *rep_ia; - struct ib_cq *rep_cq; - struct ib_qp_init_attr rep_attr; - wait_queue_head_t rep_connect_wait; - struct ib_sge rep_pad; /* holds zeroed pad */ - struct ib_mr *rep_pad_mr; /* holds zeroed pad */ - void (*rep_func)(struct rpcrdma_ep *); - struct rpc_xprt *rep_xprt; /* for rep_func */ - struct rdma_conn_param rep_remote_cma; - struct sockaddr_storage rep_remote_addr; -}; - -#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit) -#define DECR_CQCOUNT(ep) atomic_sub_return(1, &(ep)->rep_cqcount) - -/* - * struct rpcrdma_rep -- this structure encapsulates state required to recv - * and complete a reply, asychronously. It needs several pieces of - * state: - * o recv buffer (posted to provider) - * o ib_sge (also donated to provider) - * o status of reply (length, success or not) - * o bookkeeping state to get run by tasklet (list, etc) - * - * These are allocated during initialization, per-transport instance; - * however, the tasklet execution list itself is global, as it should - * always be pretty short. - * - * N of these are associated with a transport instance, and stored in - * struct rpcrdma_buffer. N is the max number of outstanding requests. - */ - -/* temporary static scatter/gather max */ -#define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */ -#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ -#define MAX_RPCRDMAHDR (\ - /* max supported RPC/RDMA header */ \ - sizeof(struct rpcrdma_msg) + (2 * sizeof(u32)) + \ - (sizeof(struct rpcrdma_read_chunk) * RPCRDMA_MAX_SEGS) + sizeof(u32)) - -struct rpcrdma_buffer; - -struct rpcrdma_rep { - unsigned int rr_len; /* actual received reply length */ - struct rpcrdma_buffer *rr_buffer; /* home base for this structure */ - struct rpc_xprt *rr_xprt; /* needed for request/reply matching */ - void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */ - struct list_head rr_list; /* tasklet list */ - wait_queue_head_t rr_unbind; /* optional unbind wait */ - struct ib_sge rr_iov; /* for posting */ - struct ib_mr *rr_handle; /* handle for mem in rr_iov */ - char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */ -}; - -/* - * struct rpcrdma_req -- structure central to the request/reply sequence. - * - * N of these are associated with a transport instance, and stored in - * struct rpcrdma_buffer. N is the max number of outstanding requests. - * - * It includes pre-registered buffer memory for send AND recv. - * The recv buffer, however, is not owned by this structure, and - * is "donated" to the hardware when a recv is posted. When a - * reply is handled, the recv buffer used is given back to the - * struct rpcrdma_req associated with the request. - * - * In addition to the basic memory, this structure includes an array - * of iovs for send operations. The reason is that the iovs passed to - * ib_post_{send,recv} must not be modified until the work request - * completes. - * - * NOTES: - * o RPCRDMA_MAX_SEGS is the max number of addressible chunk elements we - * marshal. The number needed varies depending on the iov lists that - * are passed to us, the memory registration mode we are in, and if - * physical addressing is used, the layout. - */ - -struct rpcrdma_mr_seg { /* chunk descriptors */ - union { /* chunk memory handles */ - struct ib_mr *rl_mr; /* if registered directly */ - struct rpcrdma_mw { /* if registered from region */ - union { - struct ib_mw *mw; - struct ib_fmr *fmr; - } r; - struct list_head mw_list; - } *rl_mw; - } mr_chunk; - u64 mr_base; /* registration result */ - u32 mr_rkey; /* registration result */ - u32 mr_len; /* length of chunk or segment */ - int mr_nsegs; /* number of segments in chunk or 0 */ - enum dma_data_direction mr_dir; /* segment mapping direction */ - dma_addr_t mr_dma; /* segment mapping address */ - size_t mr_dmalen; /* segment mapping length */ - struct page *mr_page; /* owning page, if any */ - char *mr_offset; /* kva if no page, else offset */ -}; - -struct rpcrdma_req { - size_t rl_size; /* actual length of buffer */ - unsigned int rl_niovs; /* 0, 2 or 4 */ - unsigned int rl_nchunks; /* non-zero if chunks */ - struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ - struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ - struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ - struct ib_sge rl_send_iov[4]; /* for active requests */ - struct ib_sge rl_iov; /* for posting */ - struct ib_mr *rl_handle; /* handle for mem in rl_iov */ - char rl_base[MAX_RPCRDMAHDR]; /* start of actual buffer */ - __u32 rl_xdr_buf[0]; /* start of returned rpc rq_buffer */ -}; -#define rpcr_to_rdmar(r) \ - container_of((r)->rq_buffer, struct rpcrdma_req, rl_xdr_buf[0]) - -/* - * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for - * inline requests/replies, and client/server credits. - * - * One of these is associated with a transport instance - */ -struct rpcrdma_buffer { - spinlock_t rb_lock; /* protects indexes */ - atomic_t rb_credits; /* most recent server credits */ - unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ - int rb_max_requests;/* client max requests */ - struct list_head rb_mws; /* optional memory windows/fmrs */ - int rb_send_index; - struct rpcrdma_req **rb_send_bufs; - int rb_recv_index; - struct rpcrdma_rep **rb_recv_bufs; - char *rb_pool; -}; -#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia) - -/* - * Internal structure for transport instance creation. This - * exists primarily for modularity. - * - * This data should be set with mount options - */ -struct rpcrdma_create_data_internal { - struct sockaddr_storage addr; /* RDMA server address */ - unsigned int max_requests; /* max requests (slots) in flight */ - unsigned int rsize; /* mount rsize - max read hdr+data */ - unsigned int wsize; /* mount wsize - max write hdr+data */ - unsigned int inline_rsize; /* max non-rdma read data payload */ - unsigned int inline_wsize; /* max non-rdma write data payload */ - unsigned int padding; /* non-rdma write header padding */ -}; - -#define RPCRDMA_INLINE_READ_THRESHOLD(rq) \ - (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_rsize) - -#define RPCRDMA_INLINE_WRITE_THRESHOLD(rq)\ - (rpcx_to_rdmad(rq->rq_task->tk_xprt).inline_wsize) - -#define RPCRDMA_INLINE_PAD_VALUE(rq)\ - rpcx_to_rdmad(rq->rq_task->tk_xprt).padding - -/* - * Statistics for RPCRDMA - */ -struct rpcrdma_stats { - unsigned long read_chunk_count; - unsigned long write_chunk_count; - unsigned long reply_chunk_count; - - unsigned long long total_rdma_request; - unsigned long long total_rdma_reply; - - unsigned long long pullup_copy_count; - unsigned long long fixup_copy_count; - unsigned long hardway_register_count; - unsigned long failed_marshal_count; - unsigned long bad_reply_count; -}; - -/* - * RPCRDMA transport -- encapsulates the structures above for - * integration with RPC. - * - * The contained structures are embedded, not pointers, - * for convenience. This structure need not be visible externally. - * - * It is allocated and initialized during mount, and released - * during unmount. - */ -struct rpcrdma_xprt { - struct rpc_xprt xprt; - struct rpcrdma_ia rx_ia; - struct rpcrdma_ep rx_ep; - struct rpcrdma_buffer rx_buf; - struct rpcrdma_create_data_internal rx_data; - struct delayed_work rdma_connect; - struct rpcrdma_stats rx_stats; -}; - -#define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) -#define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) - -/* - * Interface Adapter calls - xprtrdma/verbs.c - */ -int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int); -void rpcrdma_ia_close(struct rpcrdma_ia *); - -/* - * Endpoint calls - xprtrdma/verbs.c - */ -int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *, - struct rpcrdma_create_data_internal *); -int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *); -int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); -int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); - -int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, - struct rpcrdma_req *); -int rpcrdma_ep_post_recv(struct rpcrdma_ia *, struct rpcrdma_ep *, - struct rpcrdma_rep *); - -/* - * Buffer calls - xprtrdma/verbs.c - */ -int rpcrdma_buffer_create(struct rpcrdma_buffer *, struct rpcrdma_ep *, - struct rpcrdma_ia *, - struct rpcrdma_create_data_internal *); -void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); - -struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *); -void rpcrdma_buffer_put(struct rpcrdma_req *); -void rpcrdma_recv_buffer_get(struct rpcrdma_req *); -void rpcrdma_recv_buffer_put(struct rpcrdma_rep *); - -int rpcrdma_register_internal(struct rpcrdma_ia *, void *, int, - struct ib_mr **, struct ib_sge *); -int rpcrdma_deregister_internal(struct rpcrdma_ia *, - struct ib_mr *, struct ib_sge *); - -int rpcrdma_register_external(struct rpcrdma_mr_seg *, - int, int, struct rpcrdma_xprt *); -int rpcrdma_deregister_external(struct rpcrdma_mr_seg *, - struct rpcrdma_xprt *, void *); - -/* - * RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c - */ -void rpcrdma_conn_func(struct rpcrdma_ep *); -void rpcrdma_reply_handler(struct rpcrdma_rep *); - -/* - * RPC/RDMA protocol calls - xprtrdma/rpc_rdma.c - */ -int rpcrdma_marshal_req(struct rpc_rqst *); - -#endif /* _LINUX_SUNRPC_XPRT_RDMA_H */ diff --git a/trunk/net/sunrpc/xprtsock.c b/trunk/net/sunrpc/xprtsock.c index 02298f529dad..282efd447a61 100644 --- a/trunk/net/sunrpc/xprtsock.c +++ b/trunk/net/sunrpc/xprtsock.c @@ -13,14 +13,10 @@ * (C) 1999 Trond Myklebust * * IP socket transport implementation, (C) 2005 Chuck Lever - * - * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005. - * */ #include #include -#include #include #include #include @@ -32,7 +28,6 @@ #include #include #include -#include #include #include @@ -265,29 +260,14 @@ struct sock_xprt { #define TCP_RCV_COPY_XID (1UL << 2) #define TCP_RCV_COPY_DATA (1UL << 3) -static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) -{ - return (struct sockaddr *) &xprt->addr; -} - -static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) +static void xs_format_peer_addresses(struct rpc_xprt *xprt) { - return (struct sockaddr_in *) &xprt->addr; -} - -static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) -{ - return (struct sockaddr_in6 *) &xprt->addr; -} - -static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) -{ - struct sockaddr_in *addr = xs_addr_in(xprt); + struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; char *buf; buf = kzalloc(20, GFP_KERNEL); if (buf) { - snprintf(buf, 20, NIPQUAD_FMT, + snprintf(buf, 20, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr)); } xprt->address_strings[RPC_DISPLAY_ADDR] = buf; @@ -299,123 +279,26 @@ static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) } xprt->address_strings[RPC_DISPLAY_PORT] = buf; - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - if (xprt->prot == IPPROTO_UDP) - snprintf(buf, 8, "udp"); - else - snprintf(buf, 8, "tcp"); - } - xprt->address_strings[RPC_DISPLAY_PROTO] = buf; + if (xprt->prot == IPPROTO_UDP) + xprt->address_strings[RPC_DISPLAY_PROTO] = "udp"; + else + xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp"; buf = kzalloc(48, GFP_KERNEL); if (buf) { - snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", + snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s", NIPQUAD(addr->sin_addr.s_addr), ntohs(addr->sin_port), xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); } xprt->address_strings[RPC_DISPLAY_ALL] = buf; - - buf = kzalloc(10, GFP_KERNEL); - if (buf) { - snprintf(buf, 10, "%02x%02x%02x%02x", - NIPQUAD(addr->sin_addr.s_addr)); - } - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - snprintf(buf, 8, "%4hx", - ntohs(addr->sin_port)); - } - xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - - buf = kzalloc(30, GFP_KERNEL); - if (buf) { - snprintf(buf, 30, NIPQUAD_FMT".%u.%u", - NIPQUAD(addr->sin_addr.s_addr), - ntohs(addr->sin_port) >> 8, - ntohs(addr->sin_port) & 0xff); - } - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; - - xprt->address_strings[RPC_DISPLAY_NETID] = - kstrdup(xprt->prot == IPPROTO_UDP ? - RPCBIND_NETID_UDP : RPCBIND_NETID_TCP, GFP_KERNEL); -} - -static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) -{ - struct sockaddr_in6 *addr = xs_addr_in6(xprt); - char *buf; - - buf = kzalloc(40, GFP_KERNEL); - if (buf) { - snprintf(buf, 40, NIP6_FMT, - NIP6(addr->sin6_addr)); - } - xprt->address_strings[RPC_DISPLAY_ADDR] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - snprintf(buf, 8, "%u", - ntohs(addr->sin6_port)); - } - xprt->address_strings[RPC_DISPLAY_PORT] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - if (xprt->prot == IPPROTO_UDP) - snprintf(buf, 8, "udp"); - else - snprintf(buf, 8, "tcp"); - } - xprt->address_strings[RPC_DISPLAY_PROTO] = buf; - - buf = kzalloc(64, GFP_KERNEL); - if (buf) { - snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s", - NIP6(addr->sin6_addr), - ntohs(addr->sin6_port), - xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); - } - xprt->address_strings[RPC_DISPLAY_ALL] = buf; - - buf = kzalloc(36, GFP_KERNEL); - if (buf) { - snprintf(buf, 36, NIP6_SEQFMT, - NIP6(addr->sin6_addr)); - } - xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; - - buf = kzalloc(8, GFP_KERNEL); - if (buf) { - snprintf(buf, 8, "%4hx", - ntohs(addr->sin6_port)); - } - xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; - - buf = kzalloc(50, GFP_KERNEL); - if (buf) { - snprintf(buf, 50, NIP6_FMT".%u.%u", - NIP6(addr->sin6_addr), - ntohs(addr->sin6_port) >> 8, - ntohs(addr->sin6_port) & 0xff); - } - xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; - - xprt->address_strings[RPC_DISPLAY_NETID] = - kstrdup(xprt->prot == IPPROTO_UDP ? - RPCBIND_NETID_UDP6 : RPCBIND_NETID_TCP6, GFP_KERNEL); } static void xs_free_peer_addresses(struct rpc_xprt *xprt) { - int i; - - for (i = 0; i < RPC_DISPLAY_MAX; i++) - kfree(xprt->address_strings[i]); + kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); + kfree(xprt->address_strings[RPC_DISPLAY_PORT]); + kfree(xprt->address_strings[RPC_DISPLAY_ALL]); } #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) @@ -580,20 +463,19 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, - xs_addr(xprt), + (struct sockaddr *) &xprt->addr, xprt->addrlen, xdr, req->rq_bytes_sent); dprintk("RPC: xs_udp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); - if (status >= 0) { - task->tk_bytes_sent += status; - if (status >= req->rq_slen) - return 0; - /* Still some bytes left; set up for a retry later. */ + if (likely(status >= (int) req->rq_slen)) + return 0; + + /* Still some bytes left; set up for a retry later. */ + if (status > 0) status = -EAGAIN; - } switch (status) { case -ENETUNREACH: @@ -641,8 +523,7 @@ static int xs_tcp_send_request(struct rpc_task *task) struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; - int status; - unsigned int retry = 0; + int status, retry = 0; xs_encode_tcp_record_marker(&req->rq_snd_buf); @@ -780,7 +661,6 @@ static void xs_destroy(struct rpc_xprt *xprt) xs_free_peer_addresses(xprt); kfree(xprt->slot); kfree(xprt); - module_put(THIS_MODULE); } static inline struct rpc_xprt *xprt_from_sock(struct sock *sk) @@ -1259,23 +1139,14 @@ static unsigned short xs_get_random_port(void) */ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) { - struct sockaddr *addr = xs_addr(xprt); + struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr; dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); - switch (addr->sa_family) { - case AF_INET: - ((struct sockaddr_in *)addr)->sin_port = htons(port); - break; - case AF_INET6: - ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); - break; - default: - BUG(); - } + sap->sin_port = htons(port); } -static int xs_bind4(struct sock_xprt *transport, struct socket *sock) +static int xs_bind(struct sock_xprt *transport, struct socket *sock) { struct sockaddr_in myaddr = { .sin_family = AF_INET, @@ -1303,42 +1174,8 @@ static int xs_bind4(struct sock_xprt *transport, struct socket *sock) else port--; } while (err == -EADDRINUSE && port != transport->port); - dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n", - __FUNCTION__, NIPQUAD(myaddr.sin_addr), - port, err ? "failed" : "ok", err); - return err; -} - -static int xs_bind6(struct sock_xprt *transport, struct socket *sock) -{ - struct sockaddr_in6 myaddr = { - .sin6_family = AF_INET6, - }; - struct sockaddr_in6 *sa; - int err; - unsigned short port = transport->port; - - if (!transport->xprt.resvport) - port = 0; - sa = (struct sockaddr_in6 *)&transport->addr; - myaddr.sin6_addr = sa->sin6_addr; - do { - myaddr.sin6_port = htons(port); - err = kernel_bind(sock, (struct sockaddr *) &myaddr, - sizeof(myaddr)); - if (!transport->xprt.resvport) - break; - if (err == 0) { - transport->port = port; - break; - } - if (port <= xprt_min_resvport) - port = xprt_max_resvport; - else - port--; - } while (err == -EADDRINUSE && port != transport->port); - dprintk("RPC: xs_bind6 "NIP6_FMT":%u: %s (%d)\n", - NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err); + dprintk("RPC: xs_bind "NIPQUAD_FMT":%u: %s (%d)\n", + NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err); return err; } @@ -1346,69 +1183,38 @@ static int xs_bind6(struct sock_xprt *transport, struct socket *sock) static struct lock_class_key xs_key[2]; static struct lock_class_key xs_slock_key[2]; -static inline void xs_reclassify_socket4(struct socket *sock) +static inline void xs_reclassify_socket(struct socket *sock) { struct sock *sk = sock->sk; - BUG_ON(sock_owned_by_user(sk)); - sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC", - &xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]); -} + switch (sk->sk_family) { + case AF_INET: + sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS", + &xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]); + break; -static inline void xs_reclassify_socket6(struct socket *sock) -{ - struct sock *sk = sock->sk; + case AF_INET6: + sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS", + &xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]); + break; - BUG_ON(sock_owned_by_user(sk)); - sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", - &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); + default: + BUG(); + } } #else -static inline void xs_reclassify_socket4(struct socket *sock) -{ -} - -static inline void xs_reclassify_socket6(struct socket *sock) +static inline void xs_reclassify_socket(struct socket *sock) { } #endif -static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) -{ - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - - if (!transport->inet) { - struct sock *sk = sock->sk; - - write_lock_bh(&sk->sk_callback_lock); - - sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; - sk->sk_data_ready = xs_udp_data_ready; - sk->sk_write_space = xs_udp_write_space; - sk->sk_no_check = UDP_CSUM_NORCV; - sk->sk_allocation = GFP_ATOMIC; - - xprt_set_connected(xprt); - - /* Reset to new socket */ - transport->sock = sock; - transport->inet = sk; - - write_unlock_bh(&sk->sk_callback_lock); - } - xs_udp_do_set_buffer_size(xprt); -} - /** - * xs_udp_connect_worker4 - set up a UDP socket + * xs_udp_connect_worker - set up a UDP socket * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_udp_connect_worker4(struct work_struct *work) +static void xs_udp_connect_worker(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); @@ -1426,9 +1232,9 @@ static void xs_udp_connect_worker4(struct work_struct *work) dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } - xs_reclassify_socket4(sock); + xs_reclassify_socket(sock); - if (xs_bind4(transport, sock)) { + if (xs_bind(transport, sock)) { sock_release(sock); goto out; } @@ -1436,48 +1242,29 @@ static void xs_udp_connect_worker4(struct work_struct *work) dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - xs_udp_finish_connecting(xprt, sock); - status = 0; -out: - xprt_wake_pending_tasks(xprt, status); - xprt_clear_connecting(xprt); -} + if (!transport->inet) { + struct sock *sk = sock->sk; -/** - * xs_udp_connect_worker6 - set up a UDP socket - * @work: RPC transport to connect - * - * Invoked by a work queue tasklet. - */ -static void xs_udp_connect_worker6(struct work_struct *work) -{ - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); - struct rpc_xprt *xprt = &transport->xprt; - struct socket *sock = transport->sock; - int err, status = -EIO; + write_lock_bh(&sk->sk_callback_lock); - if (xprt->shutdown || !xprt_bound(xprt)) - goto out; + sk->sk_user_data = xprt; + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; + sk->sk_data_ready = xs_udp_data_ready; + sk->sk_write_space = xs_udp_write_space; + sk->sk_no_check = UDP_CSUM_NORCV; + sk->sk_allocation = GFP_ATOMIC; - /* Start by resetting any existing state */ - xs_close(xprt); + xprt_set_connected(xprt); - if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { - dprintk("RPC: can't create UDP transport socket (%d).\n", -err); - goto out; - } - xs_reclassify_socket6(sock); + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; - if (xs_bind6(transport, sock) < 0) { - sock_release(sock); - goto out; + write_unlock_bh(&sk->sk_callback_lock); } - - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - - xs_udp_finish_connecting(xprt, sock); + xs_udp_do_set_buffer_size(xprt); status = 0; out: xprt_wake_pending_tasks(xprt, status); @@ -1508,52 +1295,13 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) result); } -static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) -{ - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - - if (!transport->inet) { - struct sock *sk = sock->sk; - - write_lock_bh(&sk->sk_callback_lock); - - sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; - sk->sk_data_ready = xs_tcp_data_ready; - sk->sk_state_change = xs_tcp_state_change; - sk->sk_write_space = xs_tcp_write_space; - sk->sk_allocation = GFP_ATOMIC; - - /* socket options */ - sk->sk_userlocks |= SOCK_BINDPORT_LOCK; - sock_reset_flag(sk, SOCK_LINGER); - tcp_sk(sk)->linger2 = 0; - tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; - - xprt_clear_connected(xprt); - - /* Reset to new socket */ - transport->sock = sock; - transport->inet = sk; - - write_unlock_bh(&sk->sk_callback_lock); - } - - /* Tell the socket layer to start connecting... */ - xprt->stat.connect_count++; - xprt->stat.connect_start = jiffies; - return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); -} - /** - * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint + * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_tcp_connect_worker4(struct work_struct *work) +static void xs_tcp_connect_worker(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); @@ -1567,12 +1315,13 @@ static void xs_tcp_connect_worker4(struct work_struct *work) if (!sock) { /* start from scratch */ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { - dprintk("RPC: can't create TCP transport socket (%d).\n", -err); + dprintk("RPC: can't create TCP transport " + "socket (%d).\n", -err); goto out; } - xs_reclassify_socket4(sock); + xs_reclassify_socket(sock); - if (xs_bind4(transport, sock) < 0) { + if (xs_bind(transport, sock)) { sock_release(sock); goto out; } @@ -1583,70 +1332,43 @@ static void xs_tcp_connect_worker4(struct work_struct *work) dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - status = xs_tcp_finish_connecting(xprt, sock); - dprintk("RPC: %p connect status %d connected %d sock state %d\n", - xprt, -status, xprt_connected(xprt), - sock->sk->sk_state); - if (status < 0) { - switch (status) { - case -EINPROGRESS: - case -EALREADY: - goto out_clear; - case -ECONNREFUSED: - case -ECONNRESET: - /* retry with existing socket, after a delay */ - break; - default: - /* get rid of existing socket, and retry */ - xs_close(xprt); - break; - } - } -out: - xprt_wake_pending_tasks(xprt, status); -out_clear: - xprt_clear_connecting(xprt); -} + if (!transport->inet) { + struct sock *sk = sock->sk; -/** - * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint - * @work: RPC transport to connect - * - * Invoked by a work queue tasklet. - */ -static void xs_tcp_connect_worker6(struct work_struct *work) -{ - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); - struct rpc_xprt *xprt = &transport->xprt; - struct socket *sock = transport->sock; - int err, status = -EIO; + write_lock_bh(&sk->sk_callback_lock); - if (xprt->shutdown || !xprt_bound(xprt)) - goto out; + sk->sk_user_data = xprt; + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; + sk->sk_data_ready = xs_tcp_data_ready; + sk->sk_state_change = xs_tcp_state_change; + sk->sk_write_space = xs_tcp_write_space; + sk->sk_allocation = GFP_ATOMIC; - if (!sock) { - /* start from scratch */ - if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { - dprintk("RPC: can't create TCP transport socket (%d).\n", -err); - goto out; - } - xs_reclassify_socket6(sock); + /* socket options */ + sk->sk_userlocks |= SOCK_BINDPORT_LOCK; + sock_reset_flag(sk, SOCK_LINGER); + tcp_sk(sk)->linger2 = 0; + tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; - if (xs_bind6(transport, sock) < 0) { - sock_release(sock); - goto out; - } - } else - /* "close" the socket, preserving the local port */ - xs_tcp_reuse_connection(xprt); + xprt_clear_connected(xprt); - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; - status = xs_tcp_finish_connecting(xprt, sock); + write_unlock_bh(&sk->sk_callback_lock); + } + + /* Tell the socket layer to start connecting... */ + xprt->stat.connect_count++; + xprt->stat.connect_start = jiffies; + status = kernel_connect(sock, (struct sockaddr *) &xprt->addr, + xprt->addrlen, O_NONBLOCK); dprintk("RPC: %p connect status %d connected %d sock state %d\n", - xprt, -status, xprt_connected(xprt), sock->sk->sk_state); + xprt, -status, xprt_connected(xprt), + sock->sk->sk_state); if (status < 0) { switch (status) { case -EINPROGRESS: @@ -1786,8 +1508,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .print_stats = xs_tcp_print_stats, }; -static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, - unsigned int slot_table_size) +static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned int slot_table_size) { struct rpc_xprt *xprt; struct sock_xprt *new; @@ -1828,9 +1549,8 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_udp(struct xprt_create *args) +struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) { - struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; struct sock_xprt *transport; @@ -1839,11 +1559,15 @@ struct rpc_xprt *xs_setup_udp(struct xprt_create *args) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); + if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) + xprt_set_bound(xprt); + xprt->prot = IPPROTO_UDP; xprt->tsh_size = 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); + INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_UDP_CONN_TO; xprt->reestablish_timeout = XS_UDP_REEST_TO; @@ -1856,37 +1580,11 @@ struct rpc_xprt *xs_setup_udp(struct xprt_create *args) else xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); - switch (addr->sa_family) { - case AF_INET: - if (((struct sockaddr_in *)addr)->sin_port != htons(0)) - xprt_set_bound(xprt); - - INIT_DELAYED_WORK(&transport->connect_worker, - xs_udp_connect_worker4); - xs_format_ipv4_peer_addresses(xprt); - break; - case AF_INET6: - if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) - xprt_set_bound(xprt); - - INIT_DELAYED_WORK(&transport->connect_worker, - xs_udp_connect_worker6); - xs_format_ipv6_peer_addresses(xprt); - break; - default: - kfree(xprt); - return ERR_PTR(-EAFNOSUPPORT); - } - + xs_format_peer_addresses(xprt); dprintk("RPC: set up transport to address %s\n", xprt->address_strings[RPC_DISPLAY_ALL]); - if (try_module_get(THIS_MODULE)) - return xprt; - - kfree(xprt->slot); - kfree(xprt); - return ERR_PTR(-EINVAL); + return xprt; } /** @@ -1894,9 +1592,8 @@ struct rpc_xprt *xs_setup_udp(struct xprt_create *args) * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) +struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) { - struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; struct sock_xprt *transport; @@ -1905,10 +1602,14 @@ struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); + if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) + xprt_set_bound(xprt); + xprt->prot = IPPROTO_TCP; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; + INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_TCP_CONN_TO; xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; @@ -1921,55 +1622,15 @@ struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) else xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); - switch (addr->sa_family) { - case AF_INET: - if (((struct sockaddr_in *)addr)->sin_port != htons(0)) - xprt_set_bound(xprt); - - INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4); - xs_format_ipv4_peer_addresses(xprt); - break; - case AF_INET6: - if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) - xprt_set_bound(xprt); - - INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6); - xs_format_ipv6_peer_addresses(xprt); - break; - default: - kfree(xprt); - return ERR_PTR(-EAFNOSUPPORT); - } - + xs_format_peer_addresses(xprt); dprintk("RPC: set up transport to address %s\n", xprt->address_strings[RPC_DISPLAY_ALL]); - if (try_module_get(THIS_MODULE)) - return xprt; - - kfree(xprt->slot); - kfree(xprt); - return ERR_PTR(-EINVAL); + return xprt; } -static struct xprt_class xs_udp_transport = { - .list = LIST_HEAD_INIT(xs_udp_transport.list), - .name = "udp", - .owner = THIS_MODULE, - .ident = IPPROTO_UDP, - .setup = xs_setup_udp, -}; - -static struct xprt_class xs_tcp_transport = { - .list = LIST_HEAD_INIT(xs_tcp_transport.list), - .name = "tcp", - .owner = THIS_MODULE, - .ident = IPPROTO_TCP, - .setup = xs_setup_tcp, -}; - /** - * init_socket_xprt - set up xprtsock's sysctls, register with RPC client + * init_socket_xprt - set up xprtsock's sysctls * */ int init_socket_xprt(void) @@ -1979,14 +1640,11 @@ int init_socket_xprt(void) sunrpc_table_header = register_sysctl_table(sunrpc_table); #endif - xprt_register_transport(&xs_udp_transport); - xprt_register_transport(&xs_tcp_transport); - return 0; } /** - * cleanup_socket_xprt - remove xprtsock's sysctls, unregister + * cleanup_socket_xprt - remove xprtsock's sysctls * */ void cleanup_socket_xprt(void) @@ -1997,7 +1655,4 @@ void cleanup_socket_xprt(void) sunrpc_table_header = NULL; } #endif - - xprt_unregister_transport(&xs_udp_transport); - xprt_unregister_transport(&xs_tcp_transport); } diff --git a/trunk/security/selinux/hooks.c b/trunk/security/selinux/hooks.c index cf76150e623e..3c3fff33d1ce 100644 --- a/trunk/security/selinux/hooks.c +++ b/trunk/security/selinux/hooks.c @@ -3932,7 +3932,7 @@ static int selinux_ip_postroute_last_compat(struct sock *sk, struct net_device * } static unsigned int selinux_ip_postroute_last(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *), @@ -3941,6 +3941,7 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, char *addrp; int len, err = 0; struct sock *sk; + struct sk_buff *skb = *pskb; struct avc_audit_data ad; struct net_device *dev = (struct net_device *)out; struct sk_security_struct *sksec; @@ -3976,23 +3977,23 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, } static unsigned int selinux_ipv4_postroute_last(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return selinux_ip_postroute_last(hooknum, skb, in, out, okfn, PF_INET); + return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static unsigned int selinux_ipv6_postroute_last(unsigned int hooknum, - struct sk_buff *skb, + struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return selinux_ip_postroute_last(hooknum, skb, in, out, okfn, PF_INET6); + return selinux_ip_postroute_last(hooknum, pskb, in, out, okfn, PF_INET6); } #endif /* IPV6 */