diff --git a/[refs] b/[refs] index c333d1516763..761a43800edf 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 302fe1758d85ad9c868e77625f61b7edad106381 +refs/heads/master: 5cd16ee934eafca74a6bb790328950cec68a8b78 diff --git a/trunk/include/asm-powerpc/page.h b/trunk/include/asm-powerpc/page.h new file mode 100644 index 000000000000..18c1e5ee81a3 --- /dev/null +++ b/trunk/include/asm-powerpc/page.h @@ -0,0 +1,179 @@ +#ifndef _ASM_POWERPC_PAGE_H +#define _ASM_POWERPC_PAGE_H + +/* + * Copyright (C) 2001,2005 IBM Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifdef __KERNEL__ +#include +#include + +/* + * On PPC32 page size is 4K. For PPC64 we support either 4K or 64K software + * page size. When using 64K pages however, whether we are really supporting + * 64K pages in HW or not is irrelevant to those definitions. + */ +#ifdef CONFIG_PPC_64K_PAGES +#define PAGE_SHIFT 16 +#else +#define PAGE_SHIFT 12 +#endif + +#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) + +/* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ +#define __HAVE_ARCH_GATE_AREA 1 + +/* + * Subtle: (1 << PAGE_SHIFT) is an int, not an unsigned long. So if we + * assign PAGE_MASK to a larger type it gets extended the way we want + * (i.e. with 1s in the high bits) + */ +#define PAGE_MASK (~((1 << PAGE_SHIFT) - 1)) + +#define PAGE_OFFSET ASM_CONST(CONFIG_KERNEL_START) +#define KERNELBASE PAGE_OFFSET + +#ifdef CONFIG_DISCONTIGMEM +#define page_to_pfn(page) discontigmem_page_to_pfn(page) +#define pfn_to_page(pfn) discontigmem_pfn_to_page(pfn) +#define pfn_valid(pfn) discontigmem_pfn_valid(pfn) +#endif + +#ifdef CONFIG_FLATMEM +#define pfn_to_page(pfn) (mem_map + (pfn)) +#define page_to_pfn(page) ((unsigned long)((page) - mem_map)) +#define pfn_valid(pfn) ((pfn) < max_mapnr) +#endif + +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +#define virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) + +#define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) +#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) + +/* + * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI, + * and needs to be executable. This means the whole heap ends + * up being executable. + */ +#define VM_DATA_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#define VM_DATA_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#ifdef __powerpc64__ +#include +#else +#include +#endif + +/* align addr on a size boundary - adjust address up/down if needed */ +#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) +#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) + +/* align addr on a size boundary - adjust address up if needed */ +#define _ALIGN(addr,size) _ALIGN_UP(addr,size) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) + +#ifndef __ASSEMBLY__ + +#undef STRICT_MM_TYPECHECKS + +#ifdef STRICT_MM_TYPECHECKS +/* These are used to make use of C type-checking. */ + +/* PTE level */ +typedef struct { pte_basic_t pte; } pte_t; +#define pte_val(x) ((x).pte) +#define __pte(x) ((pte_t) { (x) }) + +/* 64k pages additionally define a bigger "real PTE" type that gathers + * the "second half" part of the PTE for pseudo 64k pages + */ +#ifdef CONFIG_PPC_64K_PAGES +typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; +#else +typedef struct { pte_t pte; } real_pte_t; +#endif + +/* PMD level */ +typedef struct { unsigned long pmd; } pmd_t; +#define pmd_val(x) ((x).pmd) +#define __pmd(x) ((pmd_t) { (x) }) + +/* PUD level exusts only on 4k pages */ +#ifndef CONFIG_PPC_64K_PAGES +typedef struct { unsigned long pud; } pud_t; +#define pud_val(x) ((x).pud) +#define __pud(x) ((pud_t) { (x) }) +#endif + +/* PGD level */ +typedef struct { unsigned long pgd; } pgd_t; +#define pgd_val(x) ((x).pgd) +#define __pgd(x) ((pgd_t) { (x) }) + +/* Page protection bits */ +typedef struct { unsigned long pgprot; } pgprot_t; +#define pgprot_val(x) ((x).pgprot) +#define __pgprot(x) ((pgprot_t) { (x) }) + +#else + +/* + * .. while these make it easier on the compiler + */ + +typedef pte_basic_t pte_t; +#define pte_val(x) (x) +#define __pte(x) (x) + +#ifdef CONFIG_PPC_64K_PAGES +typedef struct { pte_t pte; unsigned long hidx; } real_pte_t; +#else +typedef unsigned long real_pte_t; +#endif + + +typedef unsigned long pmd_t; +#define pmd_val(x) (x) +#define __pmd(x) (x) + +#ifndef CONFIG_PPC_64K_PAGES +typedef unsigned long pud_t; +#define pud_val(x) (x) +#define __pud(x) (x) +#endif + +typedef unsigned long pgd_t; +#define pgd_val(x) (x) +#define pgprot_val(x) (x) + +typedef unsigned long pgprot_t; +#define __pgd(x) (x) +#define __pgprot(x) (x) + +#endif + +struct page; +extern void clear_user_page(void *page, unsigned long vaddr, struct page *pg); +extern void copy_user_page(void *to, void *from, unsigned long vaddr, + struct page *p); +extern int page_is_ram(unsigned long pfn); + +#endif /* __ASSEMBLY__ */ + +#endif /* __KERNEL__ */ + +#endif /* _ASM_POWERPC_PAGE_H */ diff --git a/trunk/include/asm-powerpc/page_32.h b/trunk/include/asm-powerpc/page_32.h new file mode 100644 index 000000000000..35221300a2ce --- /dev/null +++ b/trunk/include/asm-powerpc/page_32.h @@ -0,0 +1,38 @@ +#ifndef _ASM_POWERPC_PAGE_32_H +#define _ASM_POWERPC_PAGE_32_H + +#define VM_DATA_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS32 + +#ifndef __ASSEMBLY__ +/* + * The basic type of a PTE - 64 bits for those CPUs with > 32 bit + * physical addressing. For now this just the IBM PPC440. + */ +#ifdef CONFIG_PTE_64BIT +typedef unsigned long long pte_basic_t; +#define PTE_SHIFT (PAGE_SHIFT - 3) /* 512 ptes per page */ +#define PTE_FMT "%16Lx" +#else +typedef unsigned long pte_basic_t; +#define PTE_SHIFT (PAGE_SHIFT - 2) /* 1024 ptes per page */ +#define PTE_FMT "%.8lx" +#endif + +struct page; +extern void clear_pages(void *page, int order); +static inline void clear_page(void *page) { clear_pages(page, 0); } +extern void copy_page(void *to, void *from); + +/* Pure 2^n version of get_order */ +extern __inline__ int get_order(unsigned long size) +{ + int lz; + + size = (size-1) >> PAGE_SHIFT; + asm ("cntlzw %0,%1" : "=r" (lz) : "r" (size)); + return 32 - lz; +} + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_PAGE_32_H */ diff --git a/trunk/include/asm-powerpc/page_64.h b/trunk/include/asm-powerpc/page_64.h new file mode 100644 index 000000000000..c16f106b5373 --- /dev/null +++ b/trunk/include/asm-powerpc/page_64.h @@ -0,0 +1,174 @@ +#ifndef _ASM_POWERPC_PAGE_64_H +#define _ASM_POWERPC_PAGE_64_H + +/* + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* + * We always define HW_PAGE_SHIFT to 12 as use of 64K pages remains Linux + * specific, every notion of page number shared with the firmware, TCEs, + * iommu, etc... still uses a page size of 4K. + */ +#define HW_PAGE_SHIFT 12 +#define HW_PAGE_SIZE (ASM_CONST(1) << HW_PAGE_SHIFT) +#define HW_PAGE_MASK (~(HW_PAGE_SIZE-1)) + +/* + * PAGE_FACTOR is the number of bits factor between PAGE_SHIFT and + * HW_PAGE_SHIFT, that is 4K pages. + */ +#define PAGE_FACTOR (PAGE_SHIFT - HW_PAGE_SHIFT) + +#define REGION_SIZE 4UL +#define REGION_SHIFT 60UL +#define REGION_MASK (((1UL<> REGION_SHIFT) +#define KERNEL_REGION_ID (KERNELBASE >> REGION_SHIFT) +#define USER_REGION_ID (0UL) +#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) + +/* Segment size */ +#define SID_SHIFT 28 +#define SID_MASK 0xfffffffffUL +#define ESID_MASK 0xfffffffff0000000UL +#define GET_ESID(x) (((x) >> SID_SHIFT) & SID_MASK) + +#ifndef __ASSEMBLY__ +#include + +typedef unsigned long pte_basic_t; + +static __inline__ void clear_page(void *addr) +{ + unsigned long lines, line_size; + + line_size = ppc64_caches.dline_size; + lines = ppc64_caches.dlines_per_page; + + __asm__ __volatile__( + "mtctr %1 # clear_page\n\ +1: dcbz 0,%0\n\ + add %0,%0,%3\n\ + bdnz+ 1b" + : "=r" (addr) + : "r" (lines), "0" (addr), "r" (line_size) + : "ctr", "memory"); +} + +extern void copy_4K_page(void *to, void *from); + +#ifdef CONFIG_PPC_64K_PAGES +static inline void copy_page(void *to, void *from) +{ + unsigned int i; + for (i=0; i < (1 << (PAGE_SHIFT - 12)); i++) { + copy_4K_page(to, from); + to += 4096; + from += 4096; + } +} +#else /* CONFIG_PPC_64K_PAGES */ +static inline void copy_page(void *to, void *from) +{ + copy_4K_page(to, from); +} +#endif /* CONFIG_PPC_64K_PAGES */ + +/* Log 2 of page table size */ +extern u64 ppc64_pft_size; + +/* Large pages size */ +extern unsigned int HPAGE_SHIFT; +#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT) +#define HPAGE_MASK (~(HPAGE_SIZE - 1)) +#define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) + +#endif /* __ASSEMBLY__ */ + +#ifdef CONFIG_HUGETLB_PAGE + +#define HTLB_AREA_SHIFT 40 +#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT) +#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT) + +#define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ + - (1U << GET_ESID(addr))) & 0xffff) +#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ + - (1U << GET_HTLB_AREA(addr))) & 0xffff) + +#define ARCH_HAS_HUGEPAGE_ONLY_RANGE +#define ARCH_HAS_PREPARE_HUGEPAGE_RANGE +#define ARCH_HAS_SETCLEAR_HUGE_PTE + +#define touches_hugepage_low_range(mm, addr, len) \ + (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas) +#define touches_hugepage_high_range(mm, addr, len) \ + (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas) + +#define __within_hugepage_low_range(addr, len, segmask) \ + ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)) +#define within_hugepage_low_range(addr, len) \ + __within_hugepage_low_range((addr), (len), \ + current->mm->context.low_htlb_areas) +#define __within_hugepage_high_range(addr, len, zonemask) \ + ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask)) +#define within_hugepage_high_range(addr, len) \ + __within_hugepage_high_range((addr), (len), \ + current->mm->context.high_htlb_areas) + +#define is_hugepage_only_range(mm, addr, len) \ + (touches_hugepage_high_range((mm), (addr), (len)) || \ + touches_hugepage_low_range((mm), (addr), (len))) +#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA + +#define in_hugepage_area(context, addr) \ + (cpu_has_feature(CPU_FTR_16M_PAGE) && \ + ( ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) || \ + ( ((addr) < 0x100000000L) && \ + ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) ) + +#else /* !CONFIG_HUGETLB_PAGE */ + +#define in_hugepage_area(mm, addr) 0 + +#endif /* !CONFIG_HUGETLB_PAGE */ + +#ifdef MODULE +#define __page_aligned __attribute__((__aligned__(PAGE_SIZE))) +#else +#define __page_aligned \ + __attribute__((__aligned__(PAGE_SIZE), \ + __section__(".data.page_aligned"))) +#endif + +#define VM_DATA_DEFAULT_FLAGS \ + (test_thread_flag(TIF_32BIT) ? \ + VM_DATA_DEFAULT_FLAGS32 : VM_DATA_DEFAULT_FLAGS64) + +/* + * This is the default if a program doesn't have a PT_GNU_STACK + * program header entry. The PPC64 ELF ABI has a non executable stack + * stack by default, so in the absense of a PT_GNU_STACK program header + * we turn execute permission off. + */ +#define VM_STACK_DEFAULT_FLAGS32 (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#define VM_STACK_DEFAULT_FLAGS64 (VM_READ | VM_WRITE | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#define VM_STACK_DEFAULT_FLAGS \ + (test_thread_flag(TIF_32BIT) ? \ + VM_STACK_DEFAULT_FLAGS32 : VM_STACK_DEFAULT_FLAGS64) + +#include + +#endif /* _ASM_POWERPC_PAGE_64_H */ diff --git a/trunk/include/linux/netfilter/nfnetlink.h b/trunk/include/linux/netfilter/nfnetlink.h index 934a2479f160..72975fa8795d 100644 --- a/trunk/include/linux/netfilter/nfnetlink.h +++ b/trunk/include/linux/netfilter/nfnetlink.h @@ -112,6 +112,7 @@ struct nfnl_callback { int (*call)(struct sock *nl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *cda[], int *errp); + kernel_cap_t cap_required; /* capabilities required for this msg */ u_int16_t attr_count; /* number of nfattr's */ }; @@ -153,14 +154,11 @@ extern void nfattr_parse(struct nfattr *tb[], int maxattr, #define nfattr_bad_size(tb, max, cta_min) \ ({ int __i, __res = 0; \ - for (__i=0; __imin_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]); @@ -675,14 +667,6 @@ ctnetlink_parse_help(struct nfattr *attr, char **helper_name) return 0; } -static const size_t cta_min[CTA_MAX] = { - [CTA_STATUS-1] = sizeof(u_int32_t), - [CTA_TIMEOUT-1] = sizeof(u_int32_t), - [CTA_MARK-1] = sizeof(u_int32_t), - [CTA_USE-1] = sizeof(u_int32_t), - [CTA_ID-1] = sizeof(u_int32_t) -}; - static int ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) @@ -694,9 +678,6 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, DEBUGP("entered %s\n", __FUNCTION__); - if (nfattr_bad_size(cda, CTA_MAX, cta_min)) - return -EINVAL; - if (cda[CTA_TUPLE_ORIG-1]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); else if (cda[CTA_TUPLE_REPLY-1]) @@ -779,9 +760,6 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, return 0; } - if (nfattr_bad_size(cda, CTA_MAX, cta_min)) - return -EINVAL; - if (cda[CTA_TUPLE_ORIG-1]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); else if (cda[CTA_TUPLE_REPLY-1]) @@ -1069,9 +1047,6 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, DEBUGP("entered %s\n", __FUNCTION__); - if (nfattr_bad_size(cda, CTA_MAX, cta_min)) - return -EINVAL; - if (cda[CTA_TUPLE_ORIG-1]) { err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG); if (err < 0) @@ -1277,11 +1252,6 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static const size_t cta_min_exp[CTA_EXPECT_MAX] = { - [CTA_EXPECT_TIMEOUT-1] = sizeof(u_int32_t), - [CTA_EXPECT_ID-1] = sizeof(u_int32_t) -}; - static int ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) @@ -1293,9 +1263,6 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, DEBUGP("entered %s\n", __FUNCTION__); - if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) - return -EINVAL; - if (nlh->nlmsg_flags & NLM_F_DUMP) { struct nfgenmsg *msg = NLMSG_DATA(nlh); u32 rlen; @@ -1366,9 +1333,6 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct ip_conntrack_helper *h; int err; - if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) - return -EINVAL; - if (cda[CTA_EXPECT_TUPLE-1]) { /* delete a single expect by tuple */ err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); @@ -1498,9 +1462,6 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, DEBUGP("entered %s\n", __FUNCTION__); - if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) - return -EINVAL; - if (!cda[CTA_EXPECT_TUPLE-1] || !cda[CTA_EXPECT_MASK-1] || !cda[CTA_EXPECT_MASTER-1]) @@ -1543,22 +1504,29 @@ static struct notifier_block ctnl_notifier_exp = { static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack, - .attr_count = CTA_MAX, }, + .attr_count = CTA_MAX, + .cap_required = CAP_NET_ADMIN }, [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack, - .attr_count = CTA_MAX, }, + .attr_count = CTA_MAX, + .cap_required = CAP_NET_ADMIN }, [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack, - .attr_count = CTA_MAX, }, + .attr_count = CTA_MAX, + .cap_required = CAP_NET_ADMIN }, [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, - .attr_count = CTA_MAX, }, + .attr_count = CTA_MAX, + .cap_required = CAP_NET_ADMIN }, }; static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, - .attr_count = CTA_EXPECT_MAX, }, + .attr_count = CTA_EXPECT_MAX, + .cap_required = CAP_NET_ADMIN }, [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect, - .attr_count = CTA_EXPECT_MAX, }, + .attr_count = CTA_EXPECT_MAX, + .cap_required = CAP_NET_ADMIN }, [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, - .attr_count = CTA_EXPECT_MAX, }, + .attr_count = CTA_EXPECT_MAX, + .cap_required = CAP_NET_ADMIN }, }; static struct nfnetlink_subsystem ctnl_subsys = { diff --git a/trunk/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/trunk/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index ee3b7d6c4d2e..5b3f5220f289 100644 --- a/trunk/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/trunk/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -357,10 +357,6 @@ static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, return -1; } -static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = { - [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t), -}; - static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct) { struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; @@ -373,9 +369,6 @@ static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct) nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr); - if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp)) - return -EINVAL; - if (!tb[CTA_PROTOINFO_TCP_STATE-1]) return -EINVAL; diff --git a/trunk/net/ipv6/netfilter/Kconfig b/trunk/net/ipv6/netfilter/Kconfig index 060d61202412..971ba60bf6e9 100644 --- a/trunk/net/ipv6/netfilter/Kconfig +++ b/trunk/net/ipv6/netfilter/Kconfig @@ -5,20 +5,10 @@ menu "IPv6: Netfilter Configuration (EXPERIMENTAL)" depends on INET && IPV6 && NETFILTER && EXPERIMENTAL -config NF_CONNTRACK_IPV6 - tristate "IPv6 support for new connection tracking (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK - ---help--- - Connection tracking keeps a record of what packets have passed - through your machine, in order to figure out how they are related - into connections. - - This is IPv6 support on Layer 3 independent connection tracking. - Layer 3 independent connection tracking is experimental scheme - which generalize ip_conntrack to support other layer 3 protocols. - - To compile it as a module, choose M here. If unsure, say N. - +#tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP6_NF_CONNTRACK +#if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then +# dep_tristate ' FTP protocol support' CONFIG_IP6_NF_FTP $CONFIG_IP6_NF_CONNTRACK +#fi config IP6_NF_QUEUE tristate "IP6 Userspace queueing via NETLINK (OBSOLETE)" ---help--- @@ -124,6 +114,7 @@ config IP6_NF_MATCH_OWNER To compile it as a module, choose M here. If unsure, say N. +# dep_tristate ' MAC address match support' CONFIG_IP6_NF_MATCH_MAC $CONFIG_IP6_NF_IPTABLES config IP6_NF_MATCH_MARK tristate "netfilter MARK match support" depends on IP6_NF_IPTABLES @@ -179,6 +170,15 @@ config IP6_NF_MATCH_PHYSDEV To compile it as a module, choose M here. If unsure, say N. +# dep_tristate ' Multiple port match support' CONFIG_IP6_NF_MATCH_MULTIPORT $CONFIG_IP6_NF_IPTABLES +# dep_tristate ' TOS match support' CONFIG_IP6_NF_MATCH_TOS $CONFIG_IP6_NF_IPTABLES +# if [ "$CONFIG_IP6_NF_CONNTRACK" != "n" ]; then +# dep_tristate ' Connection state match support' CONFIG_IP6_NF_MATCH_STATE $CONFIG_IP6_NF_CONNTRACK $CONFIG_IP6_NF_IPTABLES +# fi +# if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then +# dep_tristate ' Unclean match support (EXPERIMENTAL)' CONFIG_IP6_NF_MATCH_UNCLEAN $CONFIG_IP6_NF_IPTABLES +# dep_tristate ' Owner match support (EXPERIMENTAL)' CONFIG_IP6_NF_MATCH_OWNER $CONFIG_IP6_NF_IPTABLES +# fi # The targets config IP6_NF_FILTER tristate "Packet filtering" @@ -220,6 +220,12 @@ config IP6_NF_TARGET_NFQUEUE To compile it as a module, choose M here. If unsure, say N. +# if [ "$CONFIG_IP6_NF_FILTER" != "n" ]; then +# dep_tristate ' REJECT target support' CONFIG_IP6_NF_TARGET_REJECT $CONFIG_IP6_NF_FILTER +# if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then +# dep_tristate ' MIRROR target support (EXPERIMENTAL)' CONFIG_IP6_NF_TARGET_MIRROR $CONFIG_IP6_NF_FILTER +# fi +# fi config IP6_NF_MANGLE tristate "Packet mangling" depends on IP6_NF_IPTABLES @@ -230,6 +236,7 @@ config IP6_NF_MANGLE To compile it as a module, choose M here. If unsure, say N. +# dep_tristate ' TOS target support' CONFIG_IP6_NF_TARGET_TOS $CONFIG_IP_NF_MANGLE config IP6_NF_TARGET_MARK tristate "MARK target support" depends on IP6_NF_MANGLE @@ -259,6 +266,7 @@ config IP6_NF_TARGET_HL To compile it as a module, choose M here. If unsure, say N. +#dep_tristate ' LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES config IP6_NF_RAW tristate 'raw table support (required for TRACE)' depends on IP6_NF_IPTABLES @@ -270,5 +278,19 @@ config IP6_NF_RAW If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NF_CONNTRACK_IPV6 + tristate "IPv6 support for new connection tracking (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK + ---help--- + Connection tracking keeps a record of what packets have passed + through your machine, in order to figure out how they are related + into connections. + + This is IPv6 support on Layer 3 independent connection tracking. + Layer 3 independent connection tracking is experimental scheme + which generalize ip_conntrack to support other layer 3 protocols. + + To compile it as a module, choose M here. If unsure, say N. + endmenu diff --git a/trunk/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/trunk/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 753a3ae8502b..e2c90b3a8074 100644 --- a/trunk/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/trunk/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -339,8 +339,8 @@ extern unsigned long nf_ct_icmpv6_timeout; /* From nf_conntrack_frag6.c */ extern unsigned long nf_ct_frag6_timeout; -extern unsigned int nf_ct_frag6_low_thresh; -extern unsigned int nf_ct_frag6_high_thresh; +extern unsigned long nf_ct_frag6_low_thresh; +extern unsigned long nf_ct_frag6_high_thresh; static struct ctl_table_header *nf_ct_ipv6_sysctl_header; @@ -367,7 +367,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &nf_ct_frag6_low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_dointvec_jiffies, }, { .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, @@ -375,7 +375,7 @@ static ctl_table nf_ct_sysctl_table[] = { .data = &nf_ct_frag6_high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_dointvec, + .proc_handler = &proc_dointvec_jiffies, }, { .ctl_name = 0 } }; diff --git a/trunk/net/ipv6/netfilter/nf_conntrack_reasm.c b/trunk/net/ipv6/netfilter/nf_conntrack_reasm.c index c2c52af9e560..7640b9bb7694 100644 --- a/trunk/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/trunk/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -55,9 +55,9 @@ #define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT -unsigned int nf_ct_frag6_high_thresh = 256*1024; -unsigned int nf_ct_frag6_low_thresh = 192*1024; -unsigned long nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT; +int nf_ct_frag6_high_thresh = 256*1024; +int nf_ct_frag6_low_thresh = 192*1024; +int nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT; struct nf_ct_frag6_skb_cb { @@ -190,10 +190,8 @@ static void nf_ct_frag6_secret_rebuild(unsigned long dummy) atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0); /* Memory Tracking Functions. */ -static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) +static inline void frag_kfree_skb(struct sk_buff *skb) { - if (work) - *work -= skb->truesize; atomic_sub(skb->truesize, &nf_ct_frag6_mem); if (NFCT_FRAG6_CB(skb)->orig) kfree_skb(NFCT_FRAG6_CB(skb)->orig); @@ -201,11 +199,8 @@ static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work) kfree_skb(skb); } -static inline void frag_free_queue(struct nf_ct_frag6_queue *fq, - unsigned int *work) +static inline void frag_free_queue(struct nf_ct_frag6_queue *fq) { - if (work) - *work -= sizeof(struct nf_ct_frag6_queue); atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); kfree(fq); } @@ -223,8 +218,7 @@ static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) /* Destruction primitives. */ /* Complete destruction of fq. */ -static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq, - unsigned int *work) +static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq) { struct sk_buff *fp; @@ -236,17 +230,17 @@ static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq, while (fp) { struct sk_buff *xp = fp->next; - frag_kfree_skb(fp, work); + frag_kfree_skb(fp); fp = xp; } - frag_free_queue(fq, work); + frag_free_queue(fq); } -static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work) +static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) { if (atomic_dec_and_test(&fq->refcnt)) - nf_ct_frag6_destroy(fq, work); + nf_ct_frag6_destroy(fq); } /* Kill fq entry. It is not destroyed immediately, @@ -268,21 +262,16 @@ static void nf_ct_frag6_evictor(void) { struct nf_ct_frag6_queue *fq; struct list_head *tmp; - unsigned int work; - work = atomic_read(&nf_ct_frag6_mem); - if (work <= nf_ct_frag6_low_thresh) - return; - - work -= nf_ct_frag6_low_thresh; - while (work > 0) { + for (;;) { + if (atomic_read(&nf_ct_frag6_mem) <= nf_ct_frag6_low_thresh) + return; read_lock(&nf_ct_frag6_lock); if (list_empty(&nf_ct_frag6_lru_list)) { read_unlock(&nf_ct_frag6_lock); return; } tmp = nf_ct_frag6_lru_list.next; - BUG_ON(tmp == NULL); fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list); atomic_inc(&fq->refcnt); read_unlock(&nf_ct_frag6_lock); @@ -292,7 +281,7 @@ static void nf_ct_frag6_evictor(void) fq_kill(fq); spin_unlock(&fq->lock); - fq_put(fq, &work); + fq_put(fq); } } @@ -309,7 +298,7 @@ static void nf_ct_frag6_expire(unsigned long data) out: spin_unlock(&fq->lock); - fq_put(fq, NULL); + fq_put(fq); } /* Creation primitives. */ @@ -329,7 +318,7 @@ static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, atomic_inc(&fq->refcnt); write_unlock(&nf_ct_frag6_lock); fq_in->last_in |= COMPLETE; - fq_put(fq_in, NULL); + fq_put(fq_in); return fq; } } @@ -546,7 +535,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, fq->fragments = next; fq->meat -= free_it->len; - frag_kfree_skb(free_it, NULL); + frag_kfree_skb(free_it); } } @@ -822,7 +811,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { spin_unlock(&fq->lock); DEBUGP("Can't insert skb to queue\n"); - fq_put(fq, NULL); + fq_put(fq); goto ret_orig; } @@ -833,7 +822,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) } spin_unlock(&fq->lock); - fq_put(fq, NULL); + fq_put(fq); return ret_skb; ret_orig: @@ -892,6 +881,5 @@ int nf_ct_frag6_init(void) void nf_ct_frag6_cleanup(void) { del_timer(&nf_ct_frag6_secret_timer); - nf_ct_frag6_low_thresh = 0; nf_ct_frag6_evictor(); } diff --git a/trunk/net/netfilter/nf_conntrack_proto_tcp.c b/trunk/net/netfilter/nf_conntrack_proto_tcp.c index 5a6fcf349bdf..156680ddb042 100644 --- a/trunk/net/netfilter/nf_conntrack_proto_tcp.c +++ b/trunk/net/netfilter/nf_conntrack_proto_tcp.c @@ -970,12 +970,6 @@ static int tcp_packet(struct nf_conn *conntrack, conntrack->timeout.function((unsigned long) conntrack); return -NF_REPEAT; - } else { - write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(pf, 0, skb, NULL, NULL, - NULL, "nf_ct_tcp: invalid SYN"); - return -NF_ACCEPT; } case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET diff --git a/trunk/net/netfilter/nfnetlink.c b/trunk/net/netfilter/nfnetlink.c index a60c59b97631..83f4c53030fc 100644 --- a/trunk/net/netfilter/nfnetlink.c +++ b/trunk/net/netfilter/nfnetlink.c @@ -223,12 +223,6 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, NFNL_SUBSYS_ID(nlh->nlmsg_type), NFNL_MSG_TYPE(nlh->nlmsg_type)); - if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) { - DEBUGP("missing CAP_NET_ADMIN\n"); - *errp = -EPERM; - return -1; - } - /* Only requests are handled by kernel now. */ if (!(nlh->nlmsg_flags & NLM_F_REQUEST)) { DEBUGP("received non-request message\n"); @@ -246,12 +240,15 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, ss = nfnetlink_get_subsys(type); if (!ss) { #ifdef CONFIG_KMOD - /* don't call nfnl_shunlock, since it would reenter - * with further packet processing */ - up(&nfnl_sem); - request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); - nfnl_shlock(); - ss = nfnetlink_get_subsys(type); + if (cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) { + /* don't call nfnl_shunlock, since it would reenter + * with further packet processing */ + up(&nfnl_sem); + request_module("nfnetlink-subsys-%d", + NFNL_SUBSYS_ID(type)); + nfnl_shlock(); + ss = nfnetlink_get_subsys(type); + } if (!ss) #endif goto err_inval; @@ -263,6 +260,13 @@ static inline int nfnetlink_rcv_msg(struct sk_buff *skb, goto err_inval; } + if (nc->cap_required && + !cap_raised(NETLINK_CB(skb).eff_cap, nc->cap_required)) { + DEBUGP("permission denied for type %d\n", type); + *errp = -EPERM; + return -1; + } + { u_int16_t attr_count = ss->cb[NFNL_MSG_TYPE(nlh->nlmsg_type)].attr_count; diff --git a/trunk/net/netfilter/nfnetlink_log.c b/trunk/net/netfilter/nfnetlink_log.c index cba63729313d..d194676f3655 100644 --- a/trunk/net/netfilter/nfnetlink_log.c +++ b/trunk/net/netfilter/nfnetlink_log.c @@ -862,9 +862,11 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, static struct nfnl_callback nfulnl_cb[NFULNL_MSG_MAX] = { [NFULNL_MSG_PACKET] = { .call = nfulnl_recv_unsupp, - .attr_count = NFULA_MAX, }, + .attr_count = NFULA_MAX, + .cap_required = CAP_NET_ADMIN, }, [NFULNL_MSG_CONFIG] = { .call = nfulnl_recv_config, - .attr_count = NFULA_CFG_MAX, }, + .attr_count = NFULA_CFG_MAX, + .cap_required = CAP_NET_ADMIN }, }; static struct nfnetlink_subsystem nfulnl_subsys = { diff --git a/trunk/net/netfilter/nfnetlink_queue.c b/trunk/net/netfilter/nfnetlink_queue.c index f28460b61e47..f065a6c94953 100644 --- a/trunk/net/netfilter/nfnetlink_queue.c +++ b/trunk/net/netfilter/nfnetlink_queue.c @@ -931,11 +931,14 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, static struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = { [NFQNL_MSG_PACKET] = { .call = nfqnl_recv_unsupp, - .attr_count = NFQA_MAX, }, + .attr_count = NFQA_MAX, + .cap_required = CAP_NET_ADMIN }, [NFQNL_MSG_VERDICT] = { .call = nfqnl_recv_verdict, - .attr_count = NFQA_MAX, }, + .attr_count = NFQA_MAX, + .cap_required = CAP_NET_ADMIN }, [NFQNL_MSG_CONFIG] = { .call = nfqnl_recv_config, - .attr_count = NFQA_CFG_MAX, }, + .attr_count = NFQA_CFG_MAX, + .cap_required = CAP_NET_ADMIN }, }; static struct nfnetlink_subsystem nfqnl_subsys = {