From 2930425314152936a47356807e5efa1312bebd73 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 23 Apr 2009 14:50:54 +0300 Subject: [PATCH] --- yaml --- r: 144333 b: refs/heads/master c: f3784d834c71689336fa272df420b45345cb6b84 h: refs/heads/master i: 144331: 8c447e09f0c6807d92cda99a807a1cc6353c6c3e v: v3 --- [refs] | 2 +- trunk/include/linux/netfilter/x_tables.h | 73 +------------ trunk/include/net/bluetooth/hci_core.h | 3 +- trunk/net/bluetooth/hci_sysfs.c | 37 +++---- trunk/net/ipv4/netfilter/arp_tables.c | 125 +++++++++++++++------- trunk/net/ipv4/netfilter/ip_tables.c | 126 ++++++++++++++++------- trunk/net/ipv6/netfilter/ip6_tables.c | 123 +++++++++++++++------- trunk/net/netfilter/x_tables.c | 53 +++++----- 8 files changed, 315 insertions(+), 227 deletions(-) diff --git a/[refs] b/[refs] index b0eff1a5282e..aee0d7c36fc6 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 942e4a2bd680c606af0211e64eb216be2e19bf61 +refs/heads/master: f3784d834c71689336fa272df420b45345cb6b84 diff --git a/trunk/include/linux/netfilter/x_tables.h b/trunk/include/linux/netfilter/x_tables.h index 1b2e43502ef7..7b1a652066c0 100644 --- a/trunk/include/linux/netfilter/x_tables.h +++ b/trunk/include/linux/netfilter/x_tables.h @@ -354,6 +354,9 @@ struct xt_table /* What hooks you will enter on */ unsigned int valid_hooks; + /* Lock for the curtain */ + struct mutex lock; + /* Man behind the curtain... */ struct xt_table_info *private; @@ -431,74 +434,8 @@ extern void xt_proto_fini(struct net *net, u_int8_t af); extern struct xt_table_info *xt_alloc_table_info(unsigned int size); extern void xt_free_table_info(struct xt_table_info *info); - -/* - * Per-CPU spinlock associated with per-cpu table entries, and - * with a counter for the "reading" side that allows a recursive - * reader to avoid taking the lock and deadlocking. - * - * "reading" is used by ip/arp/ip6 tables rule processing which runs per-cpu. - * It needs to ensure that the rules are not being changed while the packet - * is being processed. In some cases, the read lock will be acquired - * twice on the same CPU; this is okay because of the count. - * - * "writing" is used when reading counters. - * During replace any readers that are using the old tables have to complete - * before freeing the old table. This is handled by the write locking - * necessary for reading the counters. - */ -struct xt_info_lock { - spinlock_t lock; - unsigned char readers; -}; -DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks); - -/* - * Note: we need to ensure that preemption is disabled before acquiring - * the per-cpu-variable, so we do it as a two step process rather than - * using "spin_lock_bh()". - * - * We _also_ need to disable bottom half processing before updating our - * nesting count, to make sure that the only kind of re-entrancy is this - * code being called by itself: since the count+lock is not an atomic - * operation, we can allow no races. - * - * _Only_ that special combination of being per-cpu and never getting - * re-entered asynchronously means that the count is safe. - */ -static inline void xt_info_rdlock_bh(void) -{ - struct xt_info_lock *lock; - - local_bh_disable(); - lock = &__get_cpu_var(xt_info_locks); - if (!lock->readers++) - spin_lock(&lock->lock); -} - -static inline void xt_info_rdunlock_bh(void) -{ - struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks); - - if (!--lock->readers) - spin_unlock(&lock->lock); - local_bh_enable(); -} - -/* - * The "writer" side needs to get exclusive access to the lock, - * regardless of readers. This must be called with bottom half - * processing (and thus also preemption) disabled. - */ -static inline void xt_info_wrlock(unsigned int cpu) -{ - spin_lock(&per_cpu(xt_info_locks, cpu).lock); -} - -static inline void xt_info_wrunlock(unsigned int cpu) -{ - spin_unlock(&per_cpu(xt_info_locks, cpu).lock); -} +extern void xt_table_entry_swap_rcu(struct xt_table_info *old, + struct xt_table_info *new); /* * This helper is performance critical and must be inlined diff --git a/trunk/include/net/bluetooth/hci_core.h b/trunk/include/net/bluetooth/hci_core.h index 01f9316b4c23..1224bba24bdd 100644 --- a/trunk/include/net/bluetooth/hci_core.h +++ b/trunk/include/net/bluetooth/hci_core.h @@ -180,7 +180,8 @@ struct hci_conn { struct timer_list disc_timer; struct timer_list idle_timer; - struct work_struct work; + struct work_struct work_add; + struct work_struct work_del; struct device dev; diff --git a/trunk/net/bluetooth/hci_sysfs.c b/trunk/net/bluetooth/hci_sysfs.c index ed82796d4a0f..b7c51082ddeb 100644 --- a/trunk/net/bluetooth/hci_sysfs.c +++ b/trunk/net/bluetooth/hci_sysfs.c @@ -9,8 +9,7 @@ struct class *bt_class = NULL; EXPORT_SYMBOL_GPL(bt_class); -static struct workqueue_struct *btaddconn; -static struct workqueue_struct *btdelconn; +static struct workqueue_struct *bluetooth; static inline char *link_typetostr(int type) { @@ -88,9 +87,10 @@ static struct device_type bt_link = { static void add_conn(struct work_struct *work) { - struct hci_conn *conn = container_of(work, struct hci_conn, work); + struct hci_conn *conn = container_of(work, struct hci_conn, work_add); - flush_workqueue(btdelconn); + /* ensure previous add/del is complete */ + flush_workqueue(bluetooth); if (device_add(&conn->dev) < 0) { BT_ERR("Failed to register connection device"); @@ -114,9 +114,9 @@ void hci_conn_add_sysfs(struct hci_conn *conn) device_initialize(&conn->dev); - INIT_WORK(&conn->work, add_conn); + INIT_WORK(&conn->work_add, add_conn); - queue_work(btaddconn, &conn->work); + queue_work(bluetooth, &conn->work_add); } /* @@ -131,9 +131,12 @@ static int __match_tty(struct device *dev, void *data) static void del_conn(struct work_struct *work) { - struct hci_conn *conn = container_of(work, struct hci_conn, work); + struct hci_conn *conn = container_of(work, struct hci_conn, work_del); struct hci_dev *hdev = conn->hdev; + /* ensure previous add/del is complete */ + flush_workqueue(bluetooth); + while (1) { struct device *dev; @@ -156,9 +159,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn) if (!device_is_registered(&conn->dev)) return; - INIT_WORK(&conn->work, del_conn); + INIT_WORK(&conn->work_del, del_conn); - queue_work(btdelconn, &conn->work); + queue_work(bluetooth, &conn->work_del); } static inline char *host_typetostr(int type) @@ -435,20 +438,13 @@ void hci_unregister_sysfs(struct hci_dev *hdev) int __init bt_sysfs_init(void) { - btaddconn = create_singlethread_workqueue("btaddconn"); - if (!btaddconn) - return -ENOMEM; - - btdelconn = create_singlethread_workqueue("btdelconn"); - if (!btdelconn) { - destroy_workqueue(btaddconn); + bluetooth = create_singlethread_workqueue("bluetooth"); + if (!bluetooth) return -ENOMEM; - } bt_class = class_create(THIS_MODULE, "bluetooth"); if (IS_ERR(bt_class)) { - destroy_workqueue(btdelconn); - destroy_workqueue(btaddconn); + destroy_workqueue(bluetooth); return PTR_ERR(bt_class); } @@ -457,8 +453,7 @@ int __init bt_sysfs_init(void) void bt_sysfs_cleanup(void) { - destroy_workqueue(btaddconn); - destroy_workqueue(btdelconn); + destroy_workqueue(bluetooth); class_destroy(bt_class); } diff --git a/trunk/net/ipv4/netfilter/arp_tables.c b/trunk/net/ipv4/netfilter/arp_tables.c index 831fe1879dc0..5ba533d234db 100644 --- a/trunk/net/ipv4/netfilter/arp_tables.c +++ b/trunk/net/ipv4/netfilter/arp_tables.c @@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb, indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - xt_info_rdlock_bh(); - private = table->private; - table_base = private->entries[smp_processor_id()]; + rcu_read_lock_bh(); + private = rcu_dereference(table->private); + table_base = rcu_dereference(private->entries[smp_processor_id()]); e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -273,7 +273,6 @@ unsigned int arpt_do_table(struct sk_buff *skb, hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + (2 * skb->dev->addr_len); - ADD_COUNTER(e->counters, hdr_len, 1); t = arpt_get_target(e); @@ -329,7 +328,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - xt_info_rdunlock_bh(); + + rcu_read_unlock_bh(); if (hotdrop) return NF_DROP; @@ -711,12 +711,9 @@ static void get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters * with data used by 'current' CPU - * - * Bottom half has to be disabled to prevent deadlock - * if new softirq were to run and call ipt_do_table + * We dont care about preemption here. */ - local_bh_disable(); - curcpu = smp_processor_id(); + curcpu = raw_smp_processor_id(); i = 0; ARPT_ENTRY_ITERATE(t->entries[curcpu], @@ -729,22 +726,73 @@ static void get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; - xt_info_wrlock(cpu); ARPT_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); - xt_info_wrunlock(cpu); } +} + + +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct arpt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +/* Take values from counters and add them back onto the current cpu */ +static void put_counters(struct xt_table_info *t, + const struct xt_counters counters[]) +{ + unsigned int i, cpu; + + local_bh_disable(); + cpu = smp_processor_id(); + i = 0; + ARPT_ENTRY_ITERATE(t->entries[cpu], + t->size, + add_counter_to_entry, + counters, + &i); local_bh_enable(); } +static inline int +zero_entry_counter(struct arpt_entry *e, void *arg) +{ + e->counters.bcnt = 0; + e->counters.pcnt = 0; + return 0; +} + +static void +clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) +{ + unsigned int cpu; + const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; + + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + for_each_possible_cpu(cpu) { + memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); + ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, + zero_entry_counter, NULL); + } +} + static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; + struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -754,11 +802,30 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return ERR_PTR(-ENOMEM); + goto nomem; + + info = xt_alloc_table_info(private->size); + if (!info) + goto free_counters; + + clone_counters(info, private); + + mutex_lock(&table->lock); + xt_table_entry_swap_rcu(private, info); + synchronize_net(); /* Wait until smoke has cleared */ - get_counters(private, counters); + get_counters(info, counters); + put_counters(private, counters); + mutex_unlock(&table->lock); + + xt_free_table_info(info); return counters; + + free_counters: + vfree(counters); + nomem: + return ERR_PTR(-ENOMEM); } static int copy_entries_to_user(unsigned int total_size, @@ -1027,9 +1094,8 @@ static int __do_replace(struct net *net, const char *name, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters, and synchronize with replace */ + /* Get the old counters. */ get_counters(oldinfo, counters); - /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1099,23 +1165,10 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct arpt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i, curcpu; + unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1171,26 +1224,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - local_bh_disable(); + mutex_lock(&t->lock); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } + preempt_disable(); i = 0; /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); - loc_cpu_entry = private->entries[curcpu]; - xt_info_wrlock(curcpu); + loc_cpu_entry = private->entries[smp_processor_id()]; ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - xt_info_wrunlock(curcpu); + preempt_enable(); unlock_up_free: - local_bh_enable(); + mutex_unlock(&t->lock); + xt_table_unlock(t); module_put(t->me); free: diff --git a/trunk/net/ipv4/netfilter/ip_tables.c b/trunk/net/ipv4/netfilter/ip_tables.c index 2ec8d7290c40..810c0b62c7d4 100644 --- a/trunk/net/ipv4/netfilter/ip_tables.c +++ b/trunk/net/ipv4/netfilter/ip_tables.c @@ -338,9 +338,10 @@ ipt_do_table(struct sk_buff *skb, tgpar.hooknum = hook; IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - xt_info_rdlock_bh(); - private = table->private; - table_base = private->entries[smp_processor_id()]; + + rcu_read_lock_bh(); + private = rcu_dereference(table->private); + table_base = rcu_dereference(private->entries[smp_processor_id()]); e = get_entry(table_base, private->hook_entry[hook]); @@ -435,7 +436,8 @@ ipt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - xt_info_rdunlock_bh(); + + rcu_read_unlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -894,13 +896,10 @@ get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters - * with data used by 'current' CPU. - * - * Bottom half has to be disabled to prevent deadlock - * if new softirq were to run and call ipt_do_table + * with data used by 'current' CPU + * We dont care about preemption here. */ - local_bh_disable(); - curcpu = smp_processor_id(); + curcpu = raw_smp_processor_id(); i = 0; IPT_ENTRY_ITERATE(t->entries[curcpu], @@ -913,22 +912,74 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; - xt_info_wrlock(cpu); IPT_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); - xt_info_wrunlock(cpu); } + +} + +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ipt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +/* Take values from counters and add them back onto the current cpu */ +static void put_counters(struct xt_table_info *t, + const struct xt_counters counters[]) +{ + unsigned int i, cpu; + + local_bh_disable(); + cpu = smp_processor_id(); + i = 0; + IPT_ENTRY_ITERATE(t->entries[cpu], + t->size, + add_counter_to_entry, + counters, + &i); local_bh_enable(); } + +static inline int +zero_entry_counter(struct ipt_entry *e, void *arg) +{ + e->counters.bcnt = 0; + e->counters.pcnt = 0; + return 0; +} + +static void +clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) +{ + unsigned int cpu; + const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; + + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + for_each_possible_cpu(cpu) { + memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); + IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, + zero_entry_counter, NULL); + } +} + static struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; + struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -937,11 +988,30 @@ static struct xt_counters * alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return ERR_PTR(-ENOMEM); + goto nomem; - get_counters(private, counters); + info = xt_alloc_table_info(private->size); + if (!info) + goto free_counters; + + clone_counters(info, private); + + mutex_lock(&table->lock); + xt_table_entry_swap_rcu(private, info); + synchronize_net(); /* Wait until smoke has cleared */ + + get_counters(info, counters); + put_counters(private, counters); + mutex_unlock(&table->lock); + + xt_free_table_info(info); return counters; + + free_counters: + vfree(counters); + nomem: + return ERR_PTR(-ENOMEM); } static int @@ -1236,9 +1306,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters, and synchronize with replace */ + /* Get the old counters. */ get_counters(oldinfo, counters); - /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1308,23 +1377,11 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ipt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i, curcpu; + unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1380,26 +1437,25 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat goto free; } - local_bh_disable(); + mutex_lock(&t->lock); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } + preempt_disable(); i = 0; /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); - loc_cpu_entry = private->entries[curcpu]; - xt_info_wrlock(curcpu); + loc_cpu_entry = private->entries[raw_smp_processor_id()]; IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - xt_info_wrunlock(curcpu); + preempt_enable(); unlock_up_free: - local_bh_enable(); + mutex_unlock(&t->lock); xt_table_unlock(t); module_put(t->me); free: diff --git a/trunk/net/ipv6/netfilter/ip6_tables.c b/trunk/net/ipv6/netfilter/ip6_tables.c index 219e165aea10..800ae8542471 100644 --- a/trunk/net/ipv6/netfilter/ip6_tables.c +++ b/trunk/net/ipv6/netfilter/ip6_tables.c @@ -365,9 +365,9 @@ ip6t_do_table(struct sk_buff *skb, IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - xt_info_rdlock_bh(); - private = table->private; - table_base = private->entries[smp_processor_id()]; + rcu_read_lock_bh(); + private = rcu_dereference(table->private); + table_base = rcu_dereference(private->entries[smp_processor_id()]); e = get_entry(table_base, private->hook_entry[hook]); @@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb, #ifdef CONFIG_NETFILTER_DEBUG ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; #endif - xt_info_rdunlock_bh(); + rcu_read_unlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -926,12 +926,9 @@ get_counters(const struct xt_table_info *t, /* Instead of clearing (by a previous call to memset()) * the counters and using adds, we set the counters * with data used by 'current' CPU - * - * Bottom half has to be disabled to prevent deadlock - * if new softirq were to run and call ipt_do_table + * We dont care about preemption here. */ - local_bh_disable(); - curcpu = smp_processor_id(); + curcpu = raw_smp_processor_id(); i = 0; IP6T_ENTRY_ITERATE(t->entries[curcpu], @@ -944,22 +941,72 @@ get_counters(const struct xt_table_info *t, if (cpu == curcpu) continue; i = 0; - xt_info_wrlock(cpu); IP6T_ENTRY_ITERATE(t->entries[cpu], t->size, add_entry_to_counter, counters, &i); - xt_info_wrunlock(cpu); } +} + +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ip6t_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +/* Take values from counters and add them back onto the current cpu */ +static void put_counters(struct xt_table_info *t, + const struct xt_counters counters[]) +{ + unsigned int i, cpu; + + local_bh_disable(); + cpu = smp_processor_id(); + i = 0; + IP6T_ENTRY_ITERATE(t->entries[cpu], + t->size, + add_counter_to_entry, + counters, + &i); local_bh_enable(); } +static inline int +zero_entry_counter(struct ip6t_entry *e, void *arg) +{ + e->counters.bcnt = 0; + e->counters.pcnt = 0; + return 0; +} + +static void +clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) +{ + unsigned int cpu; + const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; + + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + for_each_possible_cpu(cpu) { + memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); + IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, + zero_entry_counter, NULL); + } +} + static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; struct xt_table_info *private = table->private; + struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -968,11 +1015,30 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return ERR_PTR(-ENOMEM); + goto nomem; - get_counters(private, counters); + info = xt_alloc_table_info(private->size); + if (!info) + goto free_counters; + + clone_counters(info, private); + + mutex_lock(&table->lock); + xt_table_entry_swap_rcu(private, info); + synchronize_net(); /* Wait until smoke has cleared */ + + get_counters(info, counters); + put_counters(private, counters); + mutex_unlock(&table->lock); + + xt_free_table_info(info); return counters; + + free_counters: + vfree(counters); + nomem: + return ERR_PTR(-ENOMEM); } static int @@ -1268,9 +1334,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters, and synchronize with replace */ + /* Get the old counters. */ get_counters(oldinfo, counters); - /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry, @@ -1340,24 +1405,11 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ip6t_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { - unsigned int i, curcpu; + unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1413,28 +1465,25 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - - local_bh_disable(); + mutex_lock(&t->lock); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } + preempt_disable(); i = 0; /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); - xt_info_wrlock(curcpu); - loc_cpu_entry = private->entries[curcpu]; + loc_cpu_entry = private->entries[raw_smp_processor_id()]; IP6T_ENTRY_ITERATE(loc_cpu_entry, private->size, add_counter_to_entry, paddc, &i); - xt_info_wrunlock(curcpu); - + preempt_enable(); unlock_up_free: - local_bh_enable(); + mutex_unlock(&t->lock); xt_table_unlock(t); module_put(t->me); free: diff --git a/trunk/net/netfilter/x_tables.c b/trunk/net/netfilter/x_tables.c index 150e5cf62f85..509a95621f9f 100644 --- a/trunk/net/netfilter/x_tables.c +++ b/trunk/net/netfilter/x_tables.c @@ -625,6 +625,20 @@ void xt_free_table_info(struct xt_table_info *info) } EXPORT_SYMBOL(xt_free_table_info); +void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo, + struct xt_table_info *newinfo) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) { + void *p = oldinfo->entries[cpu]; + rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]); + newinfo->entries[cpu] = p; + } + +} +EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu); + /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) @@ -662,43 +676,32 @@ void xt_compat_unlock(u_int8_t af) EXPORT_SYMBOL_GPL(xt_compat_unlock); #endif -DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks); -EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks); - - struct xt_table_info * xt_replace_table(struct xt_table *table, unsigned int num_counters, struct xt_table_info *newinfo, int *error) { - struct xt_table_info *private; + struct xt_table_info *oldinfo, *private; /* Do the substitution. */ - local_bh_disable(); + mutex_lock(&table->lock); private = table->private; - /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { duprintf("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); - local_bh_enable(); + mutex_unlock(&table->lock); *error = -EAGAIN; return NULL; } + oldinfo = private; + rcu_assign_pointer(table->private, newinfo); + newinfo->initial_entries = oldinfo->initial_entries; + mutex_unlock(&table->lock); - table->private = newinfo; - newinfo->initial_entries = private->initial_entries; - - /* - * Even though table entries have now been swapped, other CPU's - * may still be using the old entries. This is okay, because - * resynchronization happens because of the locking done - * during the get_counters() routine. - */ - local_bh_enable(); - - return private; + synchronize_net(); + return oldinfo; } EXPORT_SYMBOL_GPL(xt_replace_table); @@ -731,6 +734,7 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table, /* Simplifies replace_table code. */ table->private = bootstrap; + mutex_init(&table->lock); if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; @@ -1143,14 +1147,7 @@ static struct pernet_operations xt_net_ops = { static int __init xt_init(void) { - unsigned int i; - int rv; - - for_each_possible_cpu(i) { - struct xt_info_lock *lock = &per_cpu(xt_info_locks, i); - spin_lock_init(&lock->lock); - lock->readers = 0; - } + int i, rv; xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); if (!xt)