Skip to content

Commit

Permalink
[INET]: Collect frag queues management objects together
Browse files Browse the repository at this point in the history
There are some objects that are common in all the places
which are used to keep track of frag queues, they are:

 * hash table
 * LRU list
 * rw lock
 * rnd number for hash function
 * the number of queues
 * the amount of memory occupied by queues
 * secret timer

Move all this stuff into one structure (struct inet_frags)
to make it possible use them uniformly in the future. Like
with the previous patch this mostly consists of hunks like

-    write_lock(&ipfrag_lock);
+    write_lock(&ip4_frags.lock);

To address the issue with exporting the number of queues and
the amount of memory occupied by queues outside the .c file
they are declared in, I introduce a couple of helpers.

Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Pavel Emelyanov authored and David S. Miller committed Oct 15, 2007
1 parent 5ab11c9 commit 7eb9515
Show file tree
Hide file tree
Showing 10 changed files with 224 additions and 179 deletions.
15 changes: 15 additions & 0 deletions include/net/inet_frag.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,19 @@ struct inet_frag_queue {
#define LAST_IN 1
};

#define INETFRAGS_HASHSZ 64

struct inet_frags {
struct list_head lru_list;
struct hlist_head hash[INETFRAGS_HASHSZ];
rwlock_t lock;
u32 rnd;
int nqueues;
atomic_t mem;
struct timer_list secret_timer;
};

void inet_frags_init(struct inet_frags *);
void inet_frags_fini(struct inet_frags *);

#endif
4 changes: 2 additions & 2 deletions include/net/ip.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,8 +333,8 @@ enum ip_defrag_users
};

int ip_defrag(struct sk_buff *skb, u32 user);
extern int ip_frag_nqueues;
extern atomic_t ip_frag_mem;
int ip_frag_mem(void);
int ip_frag_nqueues(void);

/*
* Functions provided by ip_forward.c
Expand Down
4 changes: 2 additions & 2 deletions include/net/ipv6.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,8 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space,

extern int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb);

extern int ip6_frag_nqueues;
extern atomic_t ip6_frag_mem;
int ip6_frag_nqueues(void);
int ip6_frag_mem(void);

#define IPV6_FRAG_TIMEOUT (60*HZ) /* 60 seconds */

Expand Down
3 changes: 2 additions & 1 deletion net/ipv4/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ obj-y := route.o inetpeer.o protocol.o \
tcp_minisocks.o tcp_cong.o \
datagram.o raw.o udp.o udplite.o \
arp.o icmp.o devinet.o af_inet.o igmp.o \
sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
sysctl_net_ipv4.o fib_frontend.o fib_semantics.o \
inet_fragment.o

obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
Expand Down
44 changes: 44 additions & 0 deletions net/ipv4/inet_fragment.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* inet fragments management
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Pavel Emelyanov <xemul@openvz.org>
* Started as consolidation of ipv4/ip_fragment.c,
* ipv6/reassembly. and ipv6 nf conntrack reassembly
*/

#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/module.h>
#include <linux/timer.h>
#include <linux/mm.h>

#include <net/inet_frag.h>

void inet_frags_init(struct inet_frags *f)
{
int i;

for (i = 0; i < INETFRAGS_HASHSZ; i++)
INIT_HLIST_HEAD(&f->hash[i]);

INIT_LIST_HEAD(&f->lru_list);
rwlock_init(&f->lock);

f->rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
(jiffies ^ (jiffies >> 6)));

f->nqueues = 0;
atomic_set(&f->mem, 0);

}
EXPORT_SYMBOL(inet_frags_init);

void inet_frags_fini(struct inet_frags *f)
{
}
EXPORT_SYMBOL(inet_frags_fini);
109 changes: 53 additions & 56 deletions net/ipv4/ip_fragment.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,17 @@ struct ipq {
struct inet_peer *peer;
};

/* Hash table. */
static struct inet_frags ip4_frags;

#define IPQ_HASHSZ 64
int ip_frag_nqueues(void)
{
return ip4_frags.nqueues;
}

/* Per-bucket lock is easy to add now. */
static struct hlist_head ipq_hash[IPQ_HASHSZ];
static DEFINE_RWLOCK(ipfrag_lock);
static u32 ipfrag_hash_rnd;
static LIST_HEAD(ipq_lru_list);
int ip_frag_nqueues = 0;
int ip_frag_mem(void)
{
return atomic_read(&ip4_frags.mem);
}

static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
struct net_device *dev);
Expand All @@ -105,70 +106,67 @@ static __inline__ void __ipq_unlink(struct ipq *qp)
{
hlist_del(&qp->q.list);
list_del(&qp->q.lru_list);
ip_frag_nqueues--;
ip4_frags.nqueues--;
}

static __inline__ void ipq_unlink(struct ipq *ipq)
{
write_lock(&ipfrag_lock);
write_lock(&ip4_frags.lock);
__ipq_unlink(ipq);
write_unlock(&ipfrag_lock);
write_unlock(&ip4_frags.lock);
}

static unsigned int ipqhashfn(__be16 id, __be32 saddr, __be32 daddr, u8 prot)
{
return jhash_3words((__force u32)id << 16 | prot,
(__force u32)saddr, (__force u32)daddr,
ipfrag_hash_rnd) & (IPQ_HASHSZ - 1);
ip4_frags.rnd) & (INETFRAGS_HASHSZ - 1);
}

static struct timer_list ipfrag_secret_timer;
int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ;

static void ipfrag_secret_rebuild(unsigned long dummy)
{
unsigned long now = jiffies;
int i;

write_lock(&ipfrag_lock);
get_random_bytes(&ipfrag_hash_rnd, sizeof(u32));
for (i = 0; i < IPQ_HASHSZ; i++) {
write_lock(&ip4_frags.lock);
get_random_bytes(&ip4_frags.rnd, sizeof(u32));
for (i = 0; i < INETFRAGS_HASHSZ; i++) {
struct ipq *q;
struct hlist_node *p, *n;

hlist_for_each_entry_safe(q, p, n, &ipq_hash[i], q.list) {
hlist_for_each_entry_safe(q, p, n, &ip4_frags.hash[i], q.list) {
unsigned int hval = ipqhashfn(q->id, q->saddr,
q->daddr, q->protocol);

if (hval != i) {
hlist_del(&q->q.list);

/* Relink to new hash chain. */
hlist_add_head(&q->q.list, &ipq_hash[hval]);
hlist_add_head(&q->q.list, &ip4_frags.hash[hval]);
}
}
}
write_unlock(&ipfrag_lock);
write_unlock(&ip4_frags.lock);

mod_timer(&ipfrag_secret_timer, now + sysctl_ipfrag_secret_interval);
mod_timer(&ip4_frags.secret_timer, now + sysctl_ipfrag_secret_interval);
}

atomic_t ip_frag_mem = ATOMIC_INIT(0); /* Memory used for fragments */

/* Memory Tracking Functions. */
static __inline__ void frag_kfree_skb(struct sk_buff *skb, int *work)
{
if (work)
*work -= skb->truesize;
atomic_sub(skb->truesize, &ip_frag_mem);
atomic_sub(skb->truesize, &ip4_frags.mem);
kfree_skb(skb);
}

static __inline__ void frag_free_queue(struct ipq *qp, int *work)
{
if (work)
*work -= sizeof(struct ipq);
atomic_sub(sizeof(struct ipq), &ip_frag_mem);
atomic_sub(sizeof(struct ipq), &ip4_frags.mem);
kfree(qp);
}

Expand All @@ -178,7 +176,7 @@ static __inline__ struct ipq *frag_alloc_queue(void)

if (!qp)
return NULL;
atomic_add(sizeof(struct ipq), &ip_frag_mem);
atomic_add(sizeof(struct ipq), &ip4_frags.mem);
return qp;
}

Expand Down Expand Up @@ -239,20 +237,20 @@ static void ip_evictor(void)
struct list_head *tmp;
int work;

work = atomic_read(&ip_frag_mem) - sysctl_ipfrag_low_thresh;
work = atomic_read(&ip4_frags.mem) - sysctl_ipfrag_low_thresh;
if (work <= 0)
return;

while (work > 0) {
read_lock(&ipfrag_lock);
if (list_empty(&ipq_lru_list)) {
read_unlock(&ipfrag_lock);
read_lock(&ip4_frags.lock);
if (list_empty(&ip4_frags.lru_list)) {
read_unlock(&ip4_frags.lock);
return;
}
tmp = ipq_lru_list.next;
tmp = ip4_frags.lru_list.next;
qp = list_entry(tmp, struct ipq, q.lru_list);
atomic_inc(&qp->q.refcnt);
read_unlock(&ipfrag_lock);
read_unlock(&ip4_frags.lock);

spin_lock(&qp->q.lock);
if (!(qp->q.last_in&COMPLETE))
Expand Down Expand Up @@ -304,22 +302,22 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
#endif
unsigned int hash;

write_lock(&ipfrag_lock);
write_lock(&ip4_frags.lock);
hash = ipqhashfn(qp_in->id, qp_in->saddr, qp_in->daddr,
qp_in->protocol);
#ifdef CONFIG_SMP
/* With SMP race we have to recheck hash table, because
* such entry could be created on other cpu, while we
* promoted read lock to write lock.
*/
hlist_for_each_entry(qp, n, &ipq_hash[hash], q.list) {
hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
if (qp->id == qp_in->id &&
qp->saddr == qp_in->saddr &&
qp->daddr == qp_in->daddr &&
qp->protocol == qp_in->protocol &&
qp->user == qp_in->user) {
atomic_inc(&qp->q.refcnt);
write_unlock(&ipfrag_lock);
write_unlock(&ip4_frags.lock);
qp_in->q.last_in |= COMPLETE;
ipq_put(qp_in, NULL);
return qp;
Expand All @@ -332,11 +330,11 @@ static struct ipq *ip_frag_intern(struct ipq *qp_in)
atomic_inc(&qp->q.refcnt);

atomic_inc(&qp->q.refcnt);
hlist_add_head(&qp->q.list, &ipq_hash[hash]);
hlist_add_head(&qp->q.list, &ip4_frags.hash[hash]);
INIT_LIST_HEAD(&qp->q.lru_list);
list_add_tail(&qp->q.lru_list, &ipq_lru_list);
ip_frag_nqueues++;
write_unlock(&ipfrag_lock);
list_add_tail(&qp->q.lru_list, &ip4_frags.lru_list);
ip4_frags.nqueues++;
write_unlock(&ip4_frags.lock);
return qp;
}

Expand Down Expand Up @@ -387,20 +385,20 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
struct ipq *qp;
struct hlist_node *n;

read_lock(&ipfrag_lock);
read_lock(&ip4_frags.lock);
hash = ipqhashfn(id, saddr, daddr, protocol);
hlist_for_each_entry(qp, n, &ipq_hash[hash], q.list) {
hlist_for_each_entry(qp, n, &ip4_frags.hash[hash], q.list) {
if (qp->id == id &&
qp->saddr == saddr &&
qp->daddr == daddr &&
qp->protocol == protocol &&
qp->user == user) {
atomic_inc(&qp->q.refcnt);
read_unlock(&ipfrag_lock);
read_unlock(&ip4_frags.lock);
return qp;
}
}
read_unlock(&ipfrag_lock);
read_unlock(&ip4_frags.lock);

return ip_frag_create(iph, user);
}
Expand Down Expand Up @@ -599,16 +597,16 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
}
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
atomic_add(skb->truesize, &ip_frag_mem);
atomic_add(skb->truesize, &ip4_frags.mem);
if (offset == 0)
qp->q.last_in |= FIRST_IN;

if (qp->q.last_in == (FIRST_IN | LAST_IN) && qp->q.meat == qp->q.len)
return ip_frag_reasm(qp, prev, dev);

write_lock(&ipfrag_lock);
list_move_tail(&qp->q.lru_list, &ipq_lru_list);
write_unlock(&ipfrag_lock);
write_lock(&ip4_frags.lock);
list_move_tail(&qp->q.lru_list, &ip4_frags.lru_list);
write_unlock(&ip4_frags.lock);
return -EINPROGRESS;

err:
Expand Down Expand Up @@ -684,12 +682,12 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
atomic_add(clone->truesize, &ip_frag_mem);
atomic_add(clone->truesize, &ip4_frags.mem);
}

skb_shinfo(head)->frag_list = head->next;
skb_push(head, head->data - skb_network_header(head));
atomic_sub(head->truesize, &ip_frag_mem);
atomic_sub(head->truesize, &ip4_frags.mem);

for (fp=head->next; fp; fp = fp->next) {
head->data_len += fp->len;
Expand All @@ -699,7 +697,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
else if (head->ip_summed == CHECKSUM_COMPLETE)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
atomic_sub(fp->truesize, &ip_frag_mem);
atomic_sub(fp->truesize, &ip4_frags.mem);
}

head->next = NULL;
Expand Down Expand Up @@ -735,7 +733,7 @@ int ip_defrag(struct sk_buff *skb, u32 user)
IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);

/* Start by cleaning up the memory. */
if (atomic_read(&ip_frag_mem) > sysctl_ipfrag_high_thresh)
if (atomic_read(&ip4_frags.mem) > sysctl_ipfrag_high_thresh)
ip_evictor();

/* Lookup (or create) queue header */
Expand All @@ -758,13 +756,12 @@ int ip_defrag(struct sk_buff *skb, u32 user)

void __init ipfrag_init(void)
{
ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
(jiffies ^ (jiffies >> 6)));
init_timer(&ip4_frags.secret_timer);
ip4_frags.secret_timer.function = ipfrag_secret_rebuild;
ip4_frags.secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval;
add_timer(&ip4_frags.secret_timer);

init_timer(&ipfrag_secret_timer);
ipfrag_secret_timer.function = ipfrag_secret_rebuild;
ipfrag_secret_timer.expires = jiffies + sysctl_ipfrag_secret_interval;
add_timer(&ipfrag_secret_timer);
inet_frags_init(&ip4_frags);
}

EXPORT_SYMBOL(ip_defrag);
4 changes: 2 additions & 2 deletions net/ipv4/proc.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot));
seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot));
seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot));
seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues,
atomic_read(&ip_frag_mem));
seq_printf(seq, "FRAG: inuse %d memory %d\n",
ip_frag_nqueues(), ip_frag_mem());
return 0;
}

Expand Down
Loading

0 comments on commit 7eb9515

Please sign in to comment.