From 7351a22a3ae005422488139365e9a80f560c80b9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 5 Nov 2007 20:33:46 -0800 Subject: [PATCH 01/44] [NETFILTER]: ip{,6}_queue: convert to seq_file interface I plan to kill ->get_info which means killing proc_net_create(). Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/ip_queue.c | 37 ++++++++++++++++++---------------- net/ipv6/netfilter/ip6_queue.c | 37 ++++++++++++++++++---------------- 2 files changed, 40 insertions(+), 34 deletions(-) diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 10a2ce09fd8e..14d64a383db1 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -607,15 +608,11 @@ static ctl_table ipq_root_table[] = { { .ctl_name = 0 } }; -#ifdef CONFIG_PROC_FS -static int -ipq_get_info(char *buffer, char **start, off_t offset, int length) +static int ip_queue_show(struct seq_file *m, void *v) { - int len; - read_lock_bh(&queue_lock); - len = sprintf(buffer, + seq_printf(m, "Peer PID : %d\n" "Copy mode : %hu\n" "Copy range : %u\n" @@ -632,16 +629,21 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) queue_user_dropped); read_unlock_bh(&queue_lock); + return 0; +} - *start = buffer + offset; - len -= offset; - if (len > length) - len = length; - else if (len < 0) - len = 0; - return len; +static int ip_queue_open(struct inode *inode, struct file *file) +{ + return single_open(file, ip_queue_show, NULL); } -#endif /* CONFIG_PROC_FS */ + +static const struct file_operations ip_queue_proc_fops = { + .open = ip_queue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; static struct nf_queue_handler nfqh = { .name = "ip_queue", @@ -661,10 +663,11 @@ static int __init ip_queue_init(void) goto cleanup_netlink_notifier; } - proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); - if (proc) + proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net); + if (proc) { proc->owner = THIS_MODULE; - else { + proc->proc_fops = &ip_queue_proc_fops; + } else { printk(KERN_ERR "ip_queue: failed to create proc entry\n"); goto cleanup_ipqnl; } diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 6413a30d9f68..e273605eef85 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -596,15 +597,11 @@ static ctl_table ipq_root_table[] = { { .ctl_name = 0 } }; -#ifdef CONFIG_PROC_FS -static int -ipq_get_info(char *buffer, char **start, off_t offset, int length) +static int ip6_queue_show(struct seq_file *m, void *v) { - int len; - read_lock_bh(&queue_lock); - len = sprintf(buffer, + seq_printf(m, "Peer PID : %d\n" "Copy mode : %hu\n" "Copy range : %u\n" @@ -621,16 +618,21 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) queue_user_dropped); read_unlock_bh(&queue_lock); + return 0; +} - *start = buffer + offset; - len -= offset; - if (len > length) - len = length; - else if (len < 0) - len = 0; - return len; +static int ip6_queue_open(struct inode *inode, struct file *file) +{ + return single_open(file, ip6_queue_show, NULL); } -#endif /* CONFIG_PROC_FS */ + +static const struct file_operations ip6_queue_proc_fops = { + .open = ip6_queue_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; static struct nf_queue_handler nfqh = { .name = "ip6_queue", @@ -650,10 +652,11 @@ static int __init ip6_queue_init(void) goto cleanup_netlink_notifier; } - proc = proc_net_create(&init_net, IPQ_PROC_FS_NAME, 0, ipq_get_info); - if (proc) + proc = create_proc_entry(IPQ_PROC_FS_NAME, 0, init_net.proc_net); + if (proc) { proc->owner = THIS_MODULE; - else { + proc->proc_fops = &ip6_queue_proc_fops; + } else { printk(KERN_ERR "ip6_queue: failed to create proc entry\n"); goto cleanup_ipqnl; } From ba5dc2756cc305c055dbb253b8fcdc459f0f8e73 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 5 Nov 2007 20:35:56 -0800 Subject: [PATCH 02/44] [NETFILTER]: Copyright/Email update Transfer all my copyright over to our company. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_connlimit.c | 5 +++-- net/netfilter/xt_time.c | 3 ++- net/netfilter/xt_u32.c | 5 +++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 06cff1d13690..d7becf08a93a 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -4,7 +4,8 @@ * (c) 2000 Gerd Knorr * Nov 2002: Martin Bene : * only ignore TIME_WAIT or gone connections - * Copyright © Jan Engelhardt , 2007 + * (C) CC Computer Consultants GmbH, 2007 + * Contact: * * based on ... * @@ -306,7 +307,7 @@ static void __exit xt_connlimit_exit(void) module_init(xt_connlimit_init); module_exit(xt_connlimit_exit); -MODULE_AUTHOR("Jan Engelhardt "); +MODULE_AUTHOR("Jan Engelhardt "); MODULE_DESCRIPTION("netfilter xt_connlimit match module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_connlimit"); diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index ef48bbd93573..ff44f86c24ce 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -1,6 +1,7 @@ /* * xt_time - * Copyright © Jan Engelhardt , 2007 + * Copyright © CC Computer Consultants GmbH, 2007 + * Contact: * * based on ipt_time by Fabrice MARIE * This is a module which is used for time matching diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c index bec427915b30..af75b8c3f20b 100644 --- a/net/netfilter/xt_u32.c +++ b/net/netfilter/xt_u32.c @@ -2,7 +2,8 @@ * xt_u32 - kernel module to match u32 packet content * * Original author: Don Cohen - * © Jan Engelhardt , 2007 + * (C) CC Computer Consultants GmbH, 2007 + * Contact: */ #include @@ -129,7 +130,7 @@ static void __exit xt_u32_exit(void) module_init(xt_u32_init); module_exit(xt_u32_exit); -MODULE_AUTHOR("Jan Engelhardt "); +MODULE_AUTHOR("Jan Engelhardt "); MODULE_DESCRIPTION("netfilter u32 match module"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_u32"); From b98e1747eecc19b872572c5fffedc1868531dac6 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 5 Nov 2007 20:42:16 -0800 Subject: [PATCH 03/44] [NETFILTER]: Sort matches/targets in Kbuild file Sort matches and targets in the Kbuild file. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter/Kbuild | 18 +++++++++--------- include/linux/netfilter_ipv4/Kbuild | 28 ++++++++++++++-------------- include/linux/netfilter_ipv6/Kbuild | 2 +- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild index f2eaea2234ec..b87e83a5e070 100644 --- a/include/linux/netfilter/Kbuild +++ b/include/linux/netfilter/Kbuild @@ -4,25 +4,28 @@ header-y += nfnetlink_conntrack.h header-y += nfnetlink_log.h header-y += nfnetlink_queue.h header-y += xt_CLASSIFY.h +header-y += xt_CONNMARK.h +header-y += xt_CONNSECMARK.h +header-y += xt_DSCP.h +header-y += xt_MARK.h +header-y += xt_NFLOG.h +header-y += xt_NFQUEUE.h +header-y += xt_SECMARK.h +header-y += xt_TCPMSS.h header-y += xt_comment.h header-y += xt_connbytes.h header-y += xt_connmark.h -header-y += xt_CONNMARK.h header-y += xt_conntrack.h header-y += xt_dccp.h header-y += xt_dscp.h -header-y += xt_DSCP.h header-y += xt_esp.h -header-y += xt_helper.h header-y += xt_hashlimit.h +header-y += xt_helper.h header-y += xt_length.h header-y += xt_limit.h header-y += xt_mac.h header-y += xt_mark.h -header-y += xt_MARK.h header-y += xt_multiport.h -header-y += xt_NFQUEUE.h -header-y += xt_NFLOG.h header-y += xt_pkttype.h header-y += xt_policy.h header-y += xt_realm.h @@ -32,9 +35,6 @@ header-y += xt_statistic.h header-y += xt_string.h header-y += xt_tcpmss.h header-y += xt_tcpudp.h -header-y += xt_SECMARK.h -header-y += xt_CONNSECMARK.h -header-y += xt_TCPMSS.h unifdef-y += nf_conntrack_common.h unifdef-y += nf_conntrack_ftp.h diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild index 7185792b900f..3a7105bb8f33 100644 --- a/include/linux/netfilter_ipv4/Kbuild +++ b/include/linux/netfilter_ipv4/Kbuild @@ -1,47 +1,47 @@ -header-y += ipt_addrtype.h -header-y += ipt_ah.h header-y += ipt_CLASSIFY.h header-y += ipt_CLUSTERIP.h +header-y += ipt_CONNMARK.h +header-y += ipt_DSCP.h +header-y += ipt_ECN.h +header-y += ipt_LOG.h +header-y += ipt_MARK.h +header-y += ipt_NFQUEUE.h +header-y += ipt_REJECT.h +header-y += ipt_SAME.h +header-y += ipt_TCPMSS.h +header-y += ipt_TOS.h +header-y += ipt_TTL.h +header-y += ipt_ULOG.h +header-y += ipt_addrtype.h +header-y += ipt_ah.h header-y += ipt_comment.h header-y += ipt_connbytes.h header-y += ipt_connmark.h -header-y += ipt_CONNMARK.h header-y += ipt_conntrack.h header-y += ipt_dccp.h header-y += ipt_dscp.h -header-y += ipt_DSCP.h header-y += ipt_ecn.h -header-y += ipt_ECN.h header-y += ipt_esp.h header-y += ipt_hashlimit.h header-y += ipt_helper.h header-y += ipt_iprange.h header-y += ipt_length.h header-y += ipt_limit.h -header-y += ipt_LOG.h header-y += ipt_mac.h header-y += ipt_mark.h -header-y += ipt_MARK.h header-y += ipt_multiport.h -header-y += ipt_NFQUEUE.h header-y += ipt_owner.h header-y += ipt_physdev.h header-y += ipt_pkttype.h header-y += ipt_policy.h header-y += ipt_realm.h header-y += ipt_recent.h -header-y += ipt_REJECT.h -header-y += ipt_SAME.h header-y += ipt_sctp.h header-y += ipt_state.h header-y += ipt_string.h header-y += ipt_tcpmss.h -header-y += ipt_TCPMSS.h header-y += ipt_tos.h -header-y += ipt_TOS.h header-y += ipt_ttl.h -header-y += ipt_TTL.h -header-y += ipt_ULOG.h unifdef-y += ip_queue.h unifdef-y += ip_tables.h diff --git a/include/linux/netfilter_ipv6/Kbuild b/include/linux/netfilter_ipv6/Kbuild index 9dd978d149ff..8887a5fcd1d0 100644 --- a/include/linux/netfilter_ipv6/Kbuild +++ b/include/linux/netfilter_ipv6/Kbuild @@ -14,8 +14,8 @@ header-y += ip6t_mark.h header-y += ip6t_multiport.h header-y += ip6t_opts.h header-y += ip6t_owner.h -header-y += ip6t_policy.h header-y += ip6t_physdev.h +header-y += ip6t_policy.h header-y += ip6t_rt.h unifdef-y += ip6_tables.h From 0795c65d9f8de2bf9a62ae1f56e928c6b5ed75ab Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 5 Nov 2007 20:42:54 -0800 Subject: [PATCH 04/44] [NETFILTER]: Clean up Makefile Sort matches and targets in the NF makefiles. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/Makefile | 20 ++++++++++---------- net/ipv6/netfilter/Makefile | 28 ++++++++++++++++------------ net/netfilter/Makefile | 14 +++++++------- 3 files changed, 33 insertions(+), 29 deletions(-) diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 409d273f6f82..7456833d6ade 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -41,27 +41,27 @@ obj-$(CONFIG_NF_NAT) += iptable_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o # matches +obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o +obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o +obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o -obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o -obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o +obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o -obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o # targets -obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o -obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o +obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o +obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o -obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o +obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o +obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o -obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o -obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o -obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o +obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o +obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o # generic ARP tables obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 4513eab77397..e789ec44d23b 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -4,25 +4,29 @@ # Link order matters here. obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o -obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o -obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o -obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o -obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o -obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o -obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o -obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o -obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o -obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o -obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o -obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o -obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o # objects for l3 independent conntrack nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o # l3 independent conntrack obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o + +# matches +obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o +obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o +obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o +obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o +obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o +obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o +obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o +obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o +obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o + +# targets +obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o +obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o +obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 93c58f973831..ad0e36ebea3d 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -40,15 +40,15 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o # targets obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o -obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o +obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o -obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o -obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o # matches obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o @@ -59,22 +59,22 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o +obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o -obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o +obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o +obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o -obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o -obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o +obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o -obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o From d1332e0ab84479d941de5cf4a69c71dfd385a25e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 5 Nov 2007 20:43:30 -0800 Subject: [PATCH 05/44] [NETFILTER]: remove unneeded rcu_dereference() calls As noticed by Paul McKenney, the rcu_dereference calls in the init path of NAT modules are unneeded, remove them. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv4/netfilter/nf_nat_amanda.c | 2 +- net/ipv4/netfilter/nf_nat_ftp.c | 2 +- net/ipv4/netfilter/nf_nat_h323.c | 18 +++++++++--------- net/ipv4/netfilter/nf_nat_irc.c | 2 +- net/ipv4/netfilter/nf_nat_pptp.c | 8 ++++---- net/ipv4/netfilter/nf_nat_sip.c | 4 ++-- net/ipv4/netfilter/nf_nat_tftp.c | 2 +- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c index 35a5aa69cd92..c31b87668250 100644 --- a/net/ipv4/netfilter/nf_nat_amanda.c +++ b/net/ipv4/netfilter/nf_nat_amanda.c @@ -69,7 +69,7 @@ static void __exit nf_nat_amanda_fini(void) static int __init nf_nat_amanda_init(void) { - BUG_ON(rcu_dereference(nf_nat_amanda_hook)); + BUG_ON(nf_nat_amanda_hook != NULL); rcu_assign_pointer(nf_nat_amanda_hook, help); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c index e1a16d3ea4cb..a1d5d58a58bf 100644 --- a/net/ipv4/netfilter/nf_nat_ftp.c +++ b/net/ipv4/netfilter/nf_nat_ftp.c @@ -147,7 +147,7 @@ static void __exit nf_nat_ftp_fini(void) static int __init nf_nat_ftp_init(void) { - BUG_ON(rcu_dereference(nf_nat_ftp_hook)); + BUG_ON(nf_nat_ftp_hook != NULL); rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index a868c8c41328..93e18ef114f2 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -544,15 +544,15 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, /****************************************************************************/ static int __init init(void) { - BUG_ON(rcu_dereference(set_h245_addr_hook) != NULL); - BUG_ON(rcu_dereference(set_h225_addr_hook) != NULL); - BUG_ON(rcu_dereference(set_sig_addr_hook) != NULL); - BUG_ON(rcu_dereference(set_ras_addr_hook) != NULL); - BUG_ON(rcu_dereference(nat_rtp_rtcp_hook) != NULL); - BUG_ON(rcu_dereference(nat_t120_hook) != NULL); - BUG_ON(rcu_dereference(nat_h245_hook) != NULL); - BUG_ON(rcu_dereference(nat_callforwarding_hook) != NULL); - BUG_ON(rcu_dereference(nat_q931_hook) != NULL); + BUG_ON(set_h245_addr_hook != NULL); + BUG_ON(set_h225_addr_hook != NULL); + BUG_ON(set_sig_addr_hook != NULL); + BUG_ON(set_ras_addr_hook != NULL); + BUG_ON(nat_rtp_rtcp_hook != NULL); + BUG_ON(nat_t120_hook != NULL); + BUG_ON(nat_h245_hook != NULL); + BUG_ON(nat_callforwarding_hook != NULL); + BUG_ON(nat_q931_hook != NULL); rcu_assign_pointer(set_h245_addr_hook, set_h245_addr); rcu_assign_pointer(set_h225_addr_hook, set_h225_addr); diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c index 766e2c16c6b9..fe6f9cef6c85 100644 --- a/net/ipv4/netfilter/nf_nat_irc.c +++ b/net/ipv4/netfilter/nf_nat_irc.c @@ -74,7 +74,7 @@ static void __exit nf_nat_irc_fini(void) static int __init nf_nat_irc_init(void) { - BUG_ON(rcu_dereference(nf_nat_irc_hook)); + BUG_ON(nf_nat_irc_hook != NULL); rcu_assign_pointer(nf_nat_irc_hook, help); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index e1385a099079..6817e7995f35 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -281,16 +281,16 @@ static int __init nf_nat_helper_pptp_init(void) { nf_nat_need_gre(); - BUG_ON(rcu_dereference(nf_nat_pptp_hook_outbound)); + BUG_ON(nf_nat_pptp_hook_outbound != NULL); rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt); - BUG_ON(rcu_dereference(nf_nat_pptp_hook_inbound)); + BUG_ON(nf_nat_pptp_hook_inbound != NULL); rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt); - BUG_ON(rcu_dereference(nf_nat_pptp_hook_exp_gre)); + BUG_ON(nf_nat_pptp_hook_exp_gre != NULL); rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre); - BUG_ON(rcu_dereference(nf_nat_pptp_hook_expectfn)); + BUG_ON(nf_nat_pptp_hook_expectfn != NULL); rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected); return 0; } diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index ce9edbcc01e3..3ca98971a1e9 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -293,8 +293,8 @@ static void __exit nf_nat_sip_fini(void) static int __init nf_nat_sip_init(void) { - BUG_ON(rcu_dereference(nf_nat_sip_hook)); - BUG_ON(rcu_dereference(nf_nat_sdp_hook)); + BUG_ON(nf_nat_sip_hook != NULL); + BUG_ON(nf_nat_sdp_hook != NULL); rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip); rcu_assign_pointer(nf_nat_sdp_hook, ip_nat_sdp); return 0; diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c index 0ecec701cb44..1360a94766dd 100644 --- a/net/ipv4/netfilter/nf_nat_tftp.c +++ b/net/ipv4/netfilter/nf_nat_tftp.c @@ -43,7 +43,7 @@ static void __exit nf_nat_tftp_fini(void) static int __init nf_nat_tftp_init(void) { - BUG_ON(rcu_dereference(nf_nat_tftp_hook)); + BUG_ON(nf_nat_tftp_hook != NULL); rcu_assign_pointer(nf_nat_tftp_hook, help); return 0; } From 55d84acd366f08e11ff00139f32fe4394fb0016a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 5 Nov 2007 20:44:06 -0800 Subject: [PATCH 06/44] [NETFILTER]: nf_sockopts list head cleanup Code is using knowledge that nf_sockopt_ops::list list_head is first field in structure by using casts. Switch to list_for_each_entry() itetators while I am at it. Signed-off-by: Alexey Dobriyan Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/nf_sockopt.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index aa2831587b82..2dfac3253569 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -23,14 +23,13 @@ static inline int overlap(int min1, int max1, int min2, int max2) /* Functions to register sockopt ranges (exclusive). */ int nf_register_sockopt(struct nf_sockopt_ops *reg) { - struct list_head *i; + struct nf_sockopt_ops *ops; int ret = 0; if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) return -EINTR; - list_for_each(i, &nf_sockopts) { - struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i; + list_for_each_entry(ops, &nf_sockopts, list) { if (ops->pf == reg->pf && (overlap(ops->set_optmin, ops->set_optmax, reg->set_optmin, reg->set_optmax) @@ -65,7 +64,6 @@ EXPORT_SYMBOL(nf_unregister_sockopt); static int nf_sockopt(struct sock *sk, int pf, int val, char __user *opt, int *len, int get) { - struct list_head *i; struct nf_sockopt_ops *ops; int ret; @@ -75,8 +73,7 @@ static int nf_sockopt(struct sock *sk, int pf, int val, if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) return -EINTR; - list_for_each(i, &nf_sockopts) { - ops = (struct nf_sockopt_ops *)i; + list_for_each_entry(ops, &nf_sockopts, list) { if (ops->pf == pf) { if (!try_module_get(ops->owner)) goto out_nosup; @@ -124,7 +121,6 @@ EXPORT_SYMBOL(nf_getsockopt); static int compat_nf_sockopt(struct sock *sk, int pf, int val, char __user *opt, int *len, int get) { - struct list_head *i; struct nf_sockopt_ops *ops; int ret; @@ -135,8 +131,7 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) return -EINTR; - list_for_each(i, &nf_sockopts) { - ops = (struct nf_sockopt_ops *)i; + list_for_each_entry(ops, &nf_sockopts, list) { if (ops->pf == pf) { if (!try_module_get(ops->owner)) goto out_nosup; From e011ff48abc1b0ee97cde26b7700d2cca689e7c3 Mon Sep 17 00:00:00 2001 From: Bart De Schuymer Date: Mon, 5 Nov 2007 20:59:47 -0800 Subject: [PATCH 07/44] [NETFILTER]: ebt_arp: fix --arp-gratuitous matching dependence on --arp-ip-{src,dst} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix --arp-gratuitous matching dependence on --arp-ip-{src,dst} Signed-off-by: Bart De Schuymer Signed-off-by: Lutz Preßler Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/netfilter/ebt_arp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c index 1a46952a56d9..18141392a9b4 100644 --- a/net/bridge/netfilter/ebt_arp.c +++ b/net/bridge/netfilter/ebt_arp.c @@ -34,7 +34,7 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in ah->ar_pro, EBT_ARP_PTYPE)) return EBT_NOMATCH; - if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP)) { + if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) { __be32 saddr, daddr, *sap, *dap; if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP)) From 429f08e950a88cd826b203ea898c2f2d0f7db9de Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 5 Nov 2007 21:03:24 -0800 Subject: [PATCH 08/44] [IPV4]: Consolidate the ip cork destruction in ip_output.c The ip_push_pending_frames and ip_flush_pending_frames do the same things to flush the sock's cork. Move this into a separate function and save ~80 bytes from the .text Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e5f7dc2de303..fd99fbd685ea 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1183,6 +1183,17 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, return err; } +static void ip_cork_release(struct inet_sock *inet) +{ + inet->cork.flags &= ~IPCORK_OPT; + kfree(inet->cork.opt); + inet->cork.opt = NULL; + if (inet->cork.rt) { + ip_rt_put(inet->cork.rt); + inet->cork.rt = NULL; + } +} + /* * Combined all pending IP fragments on the socket as one IP datagram * and push them out. @@ -1276,13 +1287,7 @@ int ip_push_pending_frames(struct sock *sk) } out: - inet->cork.flags &= ~IPCORK_OPT; - kfree(inet->cork.opt); - inet->cork.opt = NULL; - if (inet->cork.rt) { - ip_rt_put(inet->cork.rt); - inet->cork.rt = NULL; - } + ip_cork_release(inet); return err; error: @@ -1295,19 +1300,12 @@ int ip_push_pending_frames(struct sock *sk) */ void ip_flush_pending_frames(struct sock *sk) { - struct inet_sock *inet = inet_sk(sk); struct sk_buff *skb; while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) kfree_skb(skb); - inet->cork.flags &= ~IPCORK_OPT; - kfree(inet->cork.opt); - inet->cork.opt = NULL; - if (inet->cork.rt) { - ip_rt_put(inet->cork.rt); - inet->cork.rt = NULL; - } + ip_cork_release(inet_sk(sk)); } From bf138862b162b6eaf3d7336f759f6e6485e481df Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 5 Nov 2007 21:04:31 -0800 Subject: [PATCH 09/44] [IPV6]: Consolidate the ip cork destruction in ip6_output.c The ip6_push_pending_frames and ip6_flush_pending_frames do the same things to flush the sock's cork. Move this into a separate function and save ~100 bytes from the .text Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 653fc0a8235b..86e1835ce4e4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1339,6 +1339,19 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, return err; } +static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) +{ + inet->cork.flags &= ~IPCORK_OPT; + kfree(np->cork.opt); + np->cork.opt = NULL; + if (np->cork.rt) { + dst_release(&np->cork.rt->u.dst); + np->cork.rt = NULL; + inet->cork.flags &= ~IPCORK_ALLFRAG; + } + memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); +} + int ip6_push_pending_frames(struct sock *sk) { struct sk_buff *skb, *tmp_skb; @@ -1415,15 +1428,7 @@ int ip6_push_pending_frames(struct sock *sk) } out: - inet->cork.flags &= ~IPCORK_OPT; - kfree(np->cork.opt); - np->cork.opt = NULL; - if (np->cork.rt) { - dst_release(&np->cork.rt->u.dst); - np->cork.rt = NULL; - inet->cork.flags &= ~IPCORK_ALLFRAG; - } - memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); + ip6_cork_release(inet, np); return err; error: goto out; @@ -1431,8 +1436,6 @@ int ip6_push_pending_frames(struct sock *sk) void ip6_flush_pending_frames(struct sock *sk) { - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { @@ -1442,14 +1445,5 @@ void ip6_flush_pending_frames(struct sock *sk) kfree_skb(skb); } - inet->cork.flags &= ~IPCORK_OPT; - - kfree(np->cork.opt); - np->cork.opt = NULL; - if (np->cork.rt) { - dst_release(&np->cork.rt->u.dst); - np->cork.rt = NULL; - inet->cork.flags &= ~IPCORK_ALLFRAG; - } - memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); + ip6_cork_release(inet_sk(sk), inet6_sk(sk)); } From 3f192b5c584b8ecddc6069717aaf36d8fa244713 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 5 Nov 2007 21:28:13 -0800 Subject: [PATCH 10/44] [NET]: Remove /proc/net/stat/*_arp_cache upon module removal neigh_table_init_no_netlink() creates them, but they aren't removed anywhere. Steps to reproduce: modprobe clip rmmod clip cat /proc/net/stat/clip_arp_cache BUG: unable to handle kernel paging request at virtual address f89d7758 printing eip: c05a99da *pdpt = 0000000000004001 *pde = 0000000004408067 *pte = 0000000000000000 Oops: 0000 [#1] PREEMPT SMP Modules linked in: atm af_packet ipv6 binfmt_misc sbs sbshc fan dock battery backlight ac power_supply parport loop rtc_cmos rtc_core rtc_lib serio_raw button k8temp hwmon amd_rng sr_mod cdrom shpchp pci_hotplug ehci_hcd ohci_hcd uhci_hcd usbcore Pid: 2082, comm: cat Not tainted (2.6.24-rc1-b1d08ac064268d0ae2281e98bf5e82627e0f0c56-bloat #4) EIP: 0060:[] EFLAGS: 00210256 CPU: 0 EIP is at neigh_stat_seq_next+0x26/0x3f EAX: 00000001 EBX: f89d7600 ECX: c587bf40 EDX: 00000000 ESI: 00000000 EDI: 00000001 EBP: 00000400 ESP: c587bf1c DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 Process cat (pid: 2082, ti=c587b000 task=c5984e10 task.ti=c587b000) Stack: c06228cc c5313790 c049e5c0 0804f000 c45a7b00 c53137b0 00000000 00000000 00000082 00000001 00000000 00000000 00000000 fffffffb c58d6780 c049e437 c45a7b00 c04b1f93 c587bfa0 00000400 0804f000 00000400 0804f000 c04b1f2f Call Trace: [] seq_read+0x189/0x281 [] seq_read+0x0/0x281 [] proc_reg_read+0x64/0x77 [] proc_reg_read+0x0/0x77 [] vfs_read+0x80/0xd1 [] sys_read+0x41/0x67 [] sysenter_past_esp+0x6b/0xc1 ======================= Code: e9 ec 8d 05 00 56 8b 11 53 8b 40 70 8b 58 3c eb 29 0f a3 15 80 91 7b c0 19 c0 85 c0 8d 42 01 74 17 89 c6 c1 fe 1f 89 01 89 71 04 <8b> 83 58 01 00 00 f7 d0 8b 04 90 eb 09 89 c2 83 fa 01 7e d2 31 EIP: [] neigh_stat_seq_next+0x26/0x3f SS:ESP 0068:c587bf1c Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/core/neighbour.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 05979e356963..29b8ee4e35d6 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1435,6 +1435,8 @@ int neigh_table_clear(struct neigh_table *tbl) kfree(tbl->phash_buckets); tbl->phash_buckets = NULL; + remove_proc_entry(tbl->id, init_net.proc_net_stat); + free_percpu(tbl->stats); tbl->stats = NULL; From 7a0ff716c2282f4b8d89c65850a4f17399628154 Mon Sep 17 00:00:00 2001 From: Mitsuru Chinen Date: Mon, 5 Nov 2007 21:29:17 -0800 Subject: [PATCH 11/44] [IPv6] SNMP: Restore Udp6InErrors incrementation As the checksum verification is postponed till user calls recv or poll, the inrementation of Udp6InErrors counter should be also postponed. Currently, it is postponed in non-blocking operation case. However it should be postponed in all case like the IPv4 code. Signed-off-by: Mitsuru Chinen Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv6/udp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index caebad6ee510..8344d8c87219 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -205,12 +205,11 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, return err; csum_copy_err: + UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); skb_kill_datagram(sk, skb, flags); - if (flags & MSG_DONTWAIT) { - UDP6_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + if (flags & MSG_DONTWAIT) return -EAGAIN; - } goto try_again; } From 4e058063f49f53f6d75f707e36c82edee6d2e919 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 5 Nov 2007 21:30:11 -0800 Subject: [PATCH 12/44] [DECNET]: "addr" module param can't be __initdata sysfs keeps references to module parameters via /sys/module/*/parameters, so marking them as __initdata can't work. Steps to reproduce: modprobe decnet cat /sys/module/decnet/parameters/addr BUG: unable to handle kernel paging request at virtual address f88cd410 printing eip: c043dfd1 *pdpt = 0000000000004001 *pde = 0000000004408067 *pte = 0000000000000000 Oops: 0000 [#1] PREEMPT SMP Modules linked in: decnet sunrpc af_packet ipv6 binfmt_misc dm_mirror dm_multipath dm_mod sbs sbshc fan dock battery backlight ac power_supply parport loop rtc_cmos serio_raw rtc_core rtc_lib button amd_rng sr_mod cdrom shpchp pci_hotplug ehci_hcd ohci_hcd uhci_hcd usbcore Pid: 2099, comm: cat Not tainted (2.6.24-rc1-b1d08ac064268d0ae2281e98bf5e82627e0f0c56-bloat #6) EIP: 0060:[] EFLAGS: 00210286 CPU: 1 EIP is at param_get_int+0x6/0x20 EAX: c5c87000 EBX: 00000000 ECX: 000080d0 EDX: f88cd410 ESI: f8a108f8 EDI: c5c87000 EBP: 00000000 ESP: c5c97f00 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 Process cat (pid: 2099, ti=c5c97000 task=c641ee10 task.ti=c5c97000) Stack: 00000000 f8a108f8 c5c87000 c043db6b f8a108f1 00000124 c043de1a c043db2f f88cd410 ffffffff c5c87000 f8a16bc8 f8a16bc8 c043dd69 c043dd54 c5dd5078 c043dbc8 c5cc7580 c06ee64c c5d679f8 c04c431f c641f480 c641f484 00001000 Call Trace: [] param_array_get+0x3c/0x62 [] param_array_set+0x0/0xdf [] param_array_get+0x0/0x62 [] param_attr_show+0x15/0x2d [] param_attr_show+0x0/0x2d [] module_attr_show+0x1a/0x1e [] sysfs_read_file+0x7c/0xd9 [] sysfs_read_file+0x0/0xd9 [] vfs_read+0x88/0x134 [] do_page_fault+0x0/0x7d5 [] sys_read+0x41/0x67 [] sysenter_past_esp+0x6b/0xc1 ======================= Code: 00 83 c4 0c c3 83 ec 0c 8b 52 10 8b 12 c7 44 24 04 27 dd 6c c0 89 04 24 89 54 24 08 e8 ea 01 0c 00 83 c4 0c c3 83 ec 0c 8b 52 10 <8b> 12 c7 44 24 04 58 8c 6a c0 89 04 24 89 54 24 08 e8 ca 01 0c EIP: [] param_get_int+0x6/0x20 SS:ESP 0068:c5c97f00 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/decnet/dn_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 26130afd8029..66e266fb5908 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1439,7 +1439,7 @@ static const struct file_operations dn_dev_seq_fops = { #endif /* CONFIG_PROC_FS */ -static int __initdata addr[2]; +static int addr[2]; module_param_array(addr, int, NULL, 0444); MODULE_PARM_DESC(addr, "The DECnet address of this machine: area,node"); From 6a9fb9479f2672fa392711735de9e642395c9a14 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 5 Nov 2007 21:32:31 -0800 Subject: [PATCH 13/44] [IPV4]: Clean the ip_sockglue.c from some ugly ifdefs The #idfed CONFIG_IP_MROUTE is sometimes places inside the if-s, which looks completely bad. Similar ifdefs inside the functions looks a bit better, but they are also not recommended to be used. Provide an ifdef-ed ip_mroute_opt() helper to cleanup the code. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/mroute.h | 12 ++++++++++++ net/ipv4/ip_sockglue.c | 39 ++++++++++++--------------------------- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 7da2cee8e132..35a8277ec1bd 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -128,6 +128,18 @@ struct igmpmsg #ifdef __KERNEL__ #include +#ifdef CONFIG_IP_MROUTE +static inline int ip_mroute_opt(int opt) +{ + return (opt >= MRT_BASE) && (opt <= MRT_BASE + 10); +} +#else +static inline int ip_mroute_opt(int opt) +{ + return 0; +} +#endif + extern int ip_mroute_setsockopt(struct sock *, int, char __user *, int); extern int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *); extern int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index f51f20e487c8..82817e554363 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -437,10 +437,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, /* If optlen==0, it is equivalent to val == 0 */ -#ifdef CONFIG_IP_MROUTE - if (optname >= MRT_BASE && optname <= (MRT_BASE + 10)) + if (ip_mroute_opt(optname)) return ip_mroute_setsockopt(sk,optname,optval,optlen); -#endif err = 0; lock_sock(sk); @@ -909,11 +907,9 @@ int ip_setsockopt(struct sock *sk, int level, #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_HDRINCL && - optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY -#ifdef CONFIG_IP_MROUTE - && (optname < MRT_BASE || optname > (MRT_BASE + 10)) -#endif - ) { + optname != IP_IPSEC_POLICY && + optname != IP_XFRM_POLICY && + !ip_mroute_opt(optname)) { lock_sock(sk); err = nf_setsockopt(sk, PF_INET, optname, optval, optlen); release_sock(sk); @@ -935,11 +931,9 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ if (err == -ENOPROTOOPT && optname != IP_HDRINCL && - optname != IP_IPSEC_POLICY && optname != IP_XFRM_POLICY -#ifdef CONFIG_IP_MROUTE - && (optname < MRT_BASE || optname > (MRT_BASE + 10)) -#endif - ) { + optname != IP_IPSEC_POLICY && + optname != IP_XFRM_POLICY && + !ip_mroute_opt(optname)) { lock_sock(sk); err = compat_nf_setsockopt(sk, PF_INET, optname, optval, optlen); @@ -967,11 +961,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, if (level != SOL_IP) return -EOPNOTSUPP; -#ifdef CONFIG_IP_MROUTE - if (optname >= MRT_BASE && optname <= MRT_BASE+10) { + if (ip_mroute_opt(optname)) return ip_mroute_getsockopt(sk,optname,optval,optlen); - } -#endif if (get_user(len,optlen)) return -EFAULT; @@ -1171,11 +1162,8 @@ int ip_getsockopt(struct sock *sk, int level, err = do_ip_getsockopt(sk, level, optname, optval, optlen); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ - if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS -#ifdef CONFIG_IP_MROUTE - && (optname < MRT_BASE || optname > MRT_BASE+10) -#endif - ) { + if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && + !ip_mroute_opt(optname)) { int len; if (get_user(len,optlen)) @@ -1200,11 +1188,8 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, int err = do_ip_getsockopt(sk, level, optname, optval, optlen); #ifdef CONFIG_NETFILTER /* we need to exclude all possible ENOPROTOOPTs except default case */ - if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS -#ifdef CONFIG_IP_MROUTE - && (optname < MRT_BASE || optname > MRT_BASE+10) -#endif - ) { + if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS && + !ip_mroute_opt(optname)) { int len; if (get_user(len, optlen)) From 91781004b9c029ee55b7aa9ef950a373ba865dc6 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Mon, 5 Nov 2007 23:32:37 -0800 Subject: [PATCH 14/44] [PPP]: L2TP: Fix oops in transmit and receive paths Changes made on 18-sep to fix skb handling in the pppol2tp driver broke the transmit and receive paths. Users are only running into this now because distros are now using 2.6.23 and I must have messed up when I tested the change. For receive, we now do our own calculation of how much to pull from the skb (variable length L2TP header) rather than using skb_transport_offset(). Also, if the skb isn't a data packet, it must be passed back to UDP with skb->data pointing to the UDP header. For transmit, make sure skb->sk is set up because ip_queue_xmit() needs it. Signed-off-by: James Chapman Signed-off-by: David S. Miller --- drivers/net/pppol2tp.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index f8904fd92369..a7556cd2df79 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -488,7 +488,7 @@ static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb) { struct pppol2tp_session *session = NULL; struct pppol2tp_tunnel *tunnel; - unsigned char *ptr; + unsigned char *ptr, *optr; u16 hdrflags; u16 tunnel_id, session_id; int length; @@ -496,7 +496,7 @@ static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb) tunnel = pppol2tp_sock_to_tunnel(sock); if (tunnel == NULL) - goto error; + goto no_tunnel; /* UDP always verifies the packet length. */ __skb_pull(skb, sizeof(struct udphdr)); @@ -509,7 +509,7 @@ static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb) } /* Point to L2TP header */ - ptr = skb->data; + optr = ptr = skb->data; /* Get L2TP header flags */ hdrflags = ntohs(*(__be16*)ptr); @@ -637,12 +637,14 @@ static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb) /* If offset bit set, skip it. */ if (hdrflags & L2TP_HDRFLAG_O) { offset = ntohs(*(__be16 *)ptr); - skb->transport_header += 2 + offset; - if (!pskb_may_pull(skb, skb_transport_offset(skb) + 2)) - goto discard; + ptr += 2 + offset; } - __skb_pull(skb, skb_transport_offset(skb)); + offset = ptr - optr; + if (!pskb_may_pull(skb, offset)) + goto discard; + + __skb_pull(skb, offset); /* Skip PPP header, if present. In testing, Microsoft L2TP clients * don't send the PPP header (PPP header compression enabled), but @@ -652,6 +654,9 @@ static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb) * Note that skb->data[] isn't dereferenced from a u16 ptr here since * the field may be unaligned. */ + if (!pskb_may_pull(skb, 2)) + goto discard; + if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03)) skb_pull(skb, 2); @@ -709,6 +714,10 @@ static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb) return 0; error: + /* Put UDP header back */ + __skb_push(skb, sizeof(struct udphdr)); + +no_tunnel: return 1; } @@ -1050,6 +1059,8 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) /* Get routing info from the tunnel socket */ dst_release(skb->dst); skb->dst = sk_dst_get(sk_tun); + skb_orphan(skb); + skb->sk = sk_tun; /* Queue the packet to IP for output */ len = skb->len; From 286ab3d46058840d68e5d7d52e316c1f7e98c59f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Nov 2007 23:38:39 -0800 Subject: [PATCH 15/44] [NET]: Define infrastructure to keep 'inuse' changes in an efficent SMP/NUMA way. "struct proto" currently uses an array stats[NR_CPUS] to track change on 'inuse' sockets per protocol. If NR_CPUS is big, this means we use a big memory area for this. Moreover, all this memory area is located on a single node on NUMA machines, increasing memory pressure on the boot node. In this patch, I tried to : - Keep a fast !CONFIG_SMP implementation - Keep a fast CONFIG_SMP implementation for often used protocols (tcp,udp,raw,...) - Introduce a NUMA efficient implementation Some helper macros are defined in include/net/sock.h These macros take into account CONFIG_SMP If a "struct proto" is declared without using DEFINE_PROTO_INUSE / REF_PROTO_INUSE macros, it will automatically use a default implementation, using a dynamically allocated percpu zone. This default implementation will be NUMA efficient, but might use 32/64 bytes per possible cpu because of current alloc_percpu() implementation. However it still should be better than previous implementation based on stats[NR_CPUS] field. When a "struct proto" is changed to use the new macros, we use a single static "int" percpu variable, lowering the memory and cpu costs, still preserving NUMA efficiency. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 63 +++++++++++++++++++++++++++++++++++++++++----- net/core/sock.c | 48 ++++++++++++++++++++++++++++++++++- net/ipv4/proc.c | 19 +++----------- net/ipv6/proc.c | 19 +++----------- 4 files changed, 112 insertions(+), 37 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index 20de3fa7ae40..5504fb9fa88a 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -560,6 +560,14 @@ struct proto { void (*unhash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); +#ifdef CONFIG_SMP + /* Keeping track of sockets in use */ + void (*inuse_add)(struct proto *prot, int inc); + int (*inuse_getval)(const struct proto *prot); + int *inuse_ptr; +#else + int inuse; +#endif /* Memory pressure */ void (*enter_memory_pressure)(void); atomic_t *memory_allocated; /* Current allocated memory. */ @@ -592,12 +600,38 @@ struct proto { #ifdef SOCK_REFCNT_DEBUG atomic_t socks; #endif - struct { - int inuse; - u8 __pad[SMP_CACHE_BYTES - sizeof(int)]; - } stats[NR_CPUS]; }; +/* + * Special macros to let protos use a fast version of inuse{get|add} + * using a static percpu variable per proto instead of an allocated one, + * saving one dereference. + * This might be changed if/when dynamic percpu vars become fast. + */ +#ifdef CONFIG_SMP +# define DEFINE_PROTO_INUSE(NAME) \ +static DEFINE_PER_CPU(int, NAME##_inuse); \ +static void NAME##_inuse_add(struct proto *prot, int inc) \ +{ \ + __get_cpu_var(NAME##_inuse) += inc; \ +} \ + \ +static int NAME##_inuse_getval(const struct proto *prot)\ +{ \ + int res = 0, cpu; \ + \ + for_each_possible_cpu(cpu) \ + res += per_cpu(NAME##_inuse, cpu); \ + return res; \ +} +# define REF_PROTO_INUSE(NAME) \ + .inuse_add = NAME##_inuse_add, \ + .inuse_getval = NAME##_inuse_getval, +#else +# define DEFINE_PROTO_INUSE(NAME) +# define REF_PROTO_INUSE(NAME) +#endif + extern int proto_register(struct proto *prot, int alloc_slab); extern void proto_unregister(struct proto *prot); @@ -629,12 +663,29 @@ static inline void sk_refcnt_debug_release(const struct sock *sk) /* Called with local bh disabled */ static __inline__ void sock_prot_inc_use(struct proto *prot) { - prot->stats[smp_processor_id()].inuse++; +#ifdef CONFIG_SMP + prot->inuse_add(prot, 1); +#else + prot->inuse++; +#endif } static __inline__ void sock_prot_dec_use(struct proto *prot) { - prot->stats[smp_processor_id()].inuse--; +#ifdef CONFIG_SMP + prot->inuse_add(prot, -1); +#else + prot->inuse--; +#endif +} + +static __inline__ int sock_prot_inuse(struct proto *proto) +{ +#ifdef CONFIG_SMP + return proto->inuse_getval(proto); +#else + return proto->inuse; +#endif } /* With per-bucket locks this operation is not-atomic, so that diff --git a/net/core/sock.c b/net/core/sock.c index 12ad2067a988..e077f263b730 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1801,12 +1801,41 @@ EXPORT_SYMBOL(sk_common_release); static DEFINE_RWLOCK(proto_list_lock); static LIST_HEAD(proto_list); +#ifdef CONFIG_SMP +/* + * Define default functions to keep track of inuse sockets per protocol + * Note that often used protocols use dedicated functions to get a speed increase. + * (see DEFINE_PROTO_INUSE/REF_PROTO_INUSE) + */ +static void inuse_add(struct proto *prot, int inc) +{ + per_cpu_ptr(prot->inuse_ptr, smp_processor_id())[0] += inc; +} + +static int inuse_get(const struct proto *prot) +{ + int res = 0, cpu; + for_each_possible_cpu(cpu) + res += per_cpu_ptr(prot->inuse_ptr, cpu)[0]; + return res; +} +#endif + int proto_register(struct proto *prot, int alloc_slab) { char *request_sock_slab_name = NULL; char *timewait_sock_slab_name; int rc = -ENOBUFS; +#ifdef CONFIG_SMP + if (!prot->inuse_getval || !prot->inuse_add) { + prot->inuse_ptr = alloc_percpu(int); + if (prot->inuse_ptr == NULL) + goto out; + prot->inuse_getval = inuse_get; + prot->inuse_add = inuse_add; + } +#endif if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -1814,7 +1843,7 @@ int proto_register(struct proto *prot, int alloc_slab) if (prot->slab == NULL) { printk(KERN_CRIT "%s: Can't create sock SLAB cache!\n", prot->name); - goto out; + goto out_free_inuse; } if (prot->rsk_prot != NULL) { @@ -1873,6 +1902,15 @@ int proto_register(struct proto *prot, int alloc_slab) out_free_sock_slab: kmem_cache_destroy(prot->slab); prot->slab = NULL; +out_free_inuse: +#ifdef CONFIG_SMP + if (prot->inuse_ptr != NULL) { + free_percpu(prot->inuse_ptr); + prot->inuse_ptr = NULL; + prot->inuse_getval = NULL; + prot->inuse_add = NULL; + } +#endif goto out; } @@ -1884,6 +1922,14 @@ void proto_unregister(struct proto *prot) list_del(&prot->node); write_unlock(&proto_list_lock); +#ifdef CONFIG_SMP + if (prot->inuse_ptr != NULL) { + free_percpu(prot->inuse_ptr); + prot->inuse_ptr = NULL; + prot->inuse_getval = NULL; + prot->inuse_add = NULL; + } +#endif if (prot->slab != NULL) { kmem_cache_destroy(prot->slab); prot->slab = NULL; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index ffdccc0972e0..ce34b281803f 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -46,17 +46,6 @@ #include #include -static int fold_prot_inuse(struct proto *proto) -{ - int res = 0; - int cpu; - - for_each_possible_cpu(cpu) - res += proto->stats[cpu].inuse; - - return res; -} - /* * Report socket allocation statistics [mea@utu.fi] */ @@ -64,12 +53,12 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) { socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %d\n", - fold_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), + sock_prot_inuse(&tcp_prot), atomic_read(&tcp_orphan_count), tcp_death_row.tw_count, atomic_read(&tcp_sockets_allocated), atomic_read(&tcp_memory_allocated)); - seq_printf(seq, "UDP: inuse %d\n", fold_prot_inuse(&udp_prot)); - seq_printf(seq, "UDPLITE: inuse %d\n", fold_prot_inuse(&udplite_prot)); - seq_printf(seq, "RAW: inuse %d\n", fold_prot_inuse(&raw_prot)); + seq_printf(seq, "UDP: inuse %d\n", sock_prot_inuse(&udp_prot)); + seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse(&udplite_prot)); + seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse(&raw_prot)); seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues(), ip_frag_mem()); return 0; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index be526ad92543..8631ed7fe8a9 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -32,27 +32,16 @@ static struct proc_dir_entry *proc_net_devsnmp6; -static int fold_prot_inuse(struct proto *proto) -{ - int res = 0; - int cpu; - - for_each_possible_cpu(cpu) - res += proto->stats[cpu].inuse; - - return res; -} - static int sockstat6_seq_show(struct seq_file *seq, void *v) { seq_printf(seq, "TCP6: inuse %d\n", - fold_prot_inuse(&tcpv6_prot)); + sock_prot_inuse(&tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", - fold_prot_inuse(&udpv6_prot)); + sock_prot_inuse(&udpv6_prot)); seq_printf(seq, "UDPLITE6: inuse %d\n", - fold_prot_inuse(&udplitev6_prot)); + sock_prot_inuse(&udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", - fold_prot_inuse(&rawv6_prot)); + sock_prot_inuse(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", ip6_frag_nqueues(), ip6_frag_mem()); return 0; From 47a31a6ffcca3b55149bccd5b99763e5eea60ac4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Nov 2007 23:39:16 -0800 Subject: [PATCH 16/44] [IPV4]: Use the {DEFINE|REF}_PROTO_INUSE infrastructure Trivial patch to make "tcp,udp,udplite,raw" protocols uses the fast "inuse sockets" infrastructure Each protocol use then a static percpu var, instead of a dynamic one. This saves some ram and some cpu cycles Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/raw.c | 3 +++ net/ipv4/tcp_ipv4.c | 3 +++ net/ipv4/udp.c | 3 +++ net/ipv4/udplite.c | 3 +++ 4 files changed, 12 insertions(+) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 3916faca3afe..66b42f547bf9 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -760,6 +760,8 @@ static int raw_ioctl(struct sock *sk, int cmd, unsigned long arg) } } +DEFINE_PROTO_INUSE(raw) + struct proto raw_prot = { .name = "RAW", .owner = THIS_MODULE, @@ -781,6 +783,7 @@ struct proto raw_prot = { .compat_setsockopt = compat_raw_setsockopt, .compat_getsockopt = compat_raw_getsockopt, #endif + REF_PROTO_INUSE(raw) }; #ifdef CONFIG_PROC_FS diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d438dfb0c8f3..e9127cdced20 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2417,6 +2417,8 @@ void tcp4_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +DEFINE_PROTO_INUSE(tcp) + struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, @@ -2451,6 +2453,7 @@ struct proto tcp_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif + REF_PROTO_INUSE(tcp) }; void __init tcp_v4_init(struct net_proto_family *ops) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4bc25b46f33f..03c400ca14c5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1430,6 +1430,8 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) } +DEFINE_PROTO_INUSE(udp) + struct proto udp_prot = { .name = "UDP", .owner = THIS_MODULE, @@ -1452,6 +1454,7 @@ struct proto udp_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif + REF_PROTO_INUSE(udp) }; /* ------------------------------------------------------------------------ */ diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 94977205abb4..f5baeb3e8b85 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -44,6 +44,8 @@ static struct net_protocol udplite_protocol = { .no_policy = 1, }; +DEFINE_PROTO_INUSE(udplite) + struct proto udplite_prot = { .name = "UDP-Lite", .owner = THIS_MODULE, @@ -67,6 +69,7 @@ struct proto udplite_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif + REF_PROTO_INUSE(udplite) }; static struct inet_protosw udplite4_protosw = { From c5a432f1a18b4b2efe691dd6bbb30d86a281f783 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Nov 2007 23:39:51 -0800 Subject: [PATCH 17/44] [IPV6]: Use the {DEFINE|REF}_PROTO_INUSE infrastructure Trivial patch to make "tcpv6,udpv6,udplitev6,rawv6" protocols uses the fast "inuse sockets" infrastructure Each protocol use then a static percpu var, instead of a dynamic one. This saves some ram and some cpu cycles Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/raw.c | 3 +++ net/ipv6/tcp_ipv6.c | 3 +++ net/ipv6/udp.c | 3 +++ net/ipv6/udplite.c | 3 +++ 4 files changed, 12 insertions(+) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ca24ef19cd8f..807260d03586 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1144,6 +1144,8 @@ static int rawv6_init_sk(struct sock *sk) return(0); } +DEFINE_PROTO_INUSE(rawv6) + struct proto rawv6_prot = { .name = "RAWv6", .owner = THIS_MODULE, @@ -1166,6 +1168,7 @@ struct proto rawv6_prot = { .compat_setsockopt = compat_rawv6_setsockopt, .compat_getsockopt = compat_rawv6_getsockopt, #endif + REF_PROTO_INUSE(rawv6) }; #ifdef CONFIG_PROC_FS diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 06be2a1f2730..3aad861975a0 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2107,6 +2107,8 @@ void tcp6_proc_exit(void) } #endif +DEFINE_PROTO_INUSE(tcpv6) + struct proto tcpv6_prot = { .name = "TCPv6", .owner = THIS_MODULE, @@ -2141,6 +2143,7 @@ struct proto tcpv6_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif + REF_PROTO_INUSE(tcpv6) }; static struct inet6_protocol tcpv6_protocol = { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 8344d8c87219..ee1cc3f8599f 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -970,6 +970,8 @@ void udp6_proc_exit(void) { /* ------------------------------------------------------------------------ */ +DEFINE_PROTO_INUSE(udpv6) + struct proto udpv6_prot = { .name = "UDPv6", .owner = THIS_MODULE, @@ -991,6 +993,7 @@ struct proto udpv6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif + REF_PROTO_INUSE(udpv6) }; static struct inet_protosw udpv6_protosw = { diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index 766566f7de47..5a0379f71415 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -40,6 +40,8 @@ static int udplite_v6_get_port(struct sock *sk, unsigned short snum) return udplite_get_port(sk, snum, ipv6_rcv_saddr_equal); } +DEFINE_PROTO_INUSE(udplitev6) + struct proto udplitev6_prot = { .name = "UDPLITEv6", .owner = THIS_MODULE, @@ -62,6 +64,7 @@ struct proto udplitev6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif + REF_PROTO_INUSE(udplitev6) }; static struct inet_protosw udplite6_protosw = { From 8295b6d9e623879344ed0ca7565336e4fd698e42 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 5 Nov 2007 23:40:28 -0800 Subject: [PATCH 18/44] [SCTP]: Use the {DEFINE|REF}_PROTO_INUSE infrastructure Trivial patch to make "sctcp,sctpv6" protocols uses the fast "inuse sockets" infrastructure Each protocol use then a static percpu var, instead of a dynamic one. This saves some ram and some cpu cycles Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sctp/socket.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index bd6f42a15a4b..a7ecf3159e53 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6455,6 +6455,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, } +DEFINE_PROTO_INUSE(sctp) + /* This proto struct describes the ULP interface for SCTP. */ struct proto sctp_prot = { .name = "SCTP", @@ -6483,9 +6485,12 @@ struct proto sctp_prot = { .memory_pressure = &sctp_memory_pressure, .enter_memory_pressure = sctp_enter_memory_pressure, .memory_allocated = &sctp_memory_allocated, + REF_PROTO_INUSE(sctp) }; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +DEFINE_PROTO_INUSE(sctpv6) + struct proto sctpv6_prot = { .name = "SCTPv6", .owner = THIS_MODULE, @@ -6513,5 +6518,6 @@ struct proto sctpv6_prot = { .memory_pressure = &sctp_memory_pressure, .enter_memory_pressure = sctp_enter_memory_pressure, .memory_allocated = &sctp_memory_allocated, + REF_PROTO_INUSE(sctpv6) }; #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ From c62cf5cb173a5b8446e513a14448460cad435db2 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 5 Nov 2007 23:42:25 -0800 Subject: [PATCH 19/44] [DCCP]: Use DEFINE_PROTO_INUSE infrastructure. Signed-off-by: David S. Miller --- net/dccp/ipv4.c | 3 +++ net/dccp/ipv6.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 01a6a808bdb7..db17b83e8d3e 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -922,6 +922,8 @@ static struct timewait_sock_ops dccp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct inet_timewait_sock), }; +DEFINE_PROTO_INUSE(dccp_v4) + static struct proto dccp_v4_prot = { .name = "DCCP", .owner = THIS_MODULE, @@ -950,6 +952,7 @@ static struct proto dccp_v4_prot = { .compat_setsockopt = compat_dccp_setsockopt, .compat_getsockopt = compat_dccp_getsockopt, #endif + REF_PROTO_INUSE(dccp_v4) }; static struct net_protocol dccp_v4_protocol = { diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 62428ff137dd..87c98fb86fa8 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -1107,6 +1107,8 @@ static struct timewait_sock_ops dccp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct dccp6_timewait_sock), }; +DEFINE_PROTO_INUSE(dccp_v6) + static struct proto dccp_v6_prot = { .name = "DCCPv6", .owner = THIS_MODULE, @@ -1135,6 +1137,7 @@ static struct proto dccp_v6_prot = { .compat_setsockopt = compat_dccp_setsockopt, .compat_getsockopt = compat_dccp_getsockopt, #endif + REF_PROTO_INUSE(dccp_v6) }; static struct inet6_protocol dccp_v6_protocol = { From 4f9f8311a08c0d95c70261264a2b47f2ae99683a Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Tue, 6 Nov 2007 03:08:09 -0800 Subject: [PATCH 20/44] [PKT_SCHED]: Fix OOPS when removing devices from a teql queuing discipline tecl_reset() is called from deactivate and qdisc is set to noop already, but subsequent teql_xmit does not know about it and dereference private data as teql qdisc and thus oopses. not catch it first :) Signed-off-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- net/sched/sch_teql.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 421281d9dd1d..c0ed06d4a504 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -252,6 +252,9 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * static inline int teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) { + if (dev->qdisc == &noop_qdisc) + return -ENODEV; + if (dev->header_ops == NULL || skb->dst == NULL || skb->dst->neighbour == NULL) From 33120b30cc3b8665204d4fcde7288638b0dd04d5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Nov 2007 05:27:11 -0800 Subject: [PATCH 21/44] [IPV6]: Convert /proc/net/ipv6_route to seq_file interface This removes last proc_net_create() user. Kudos to Benjamin Thery and Stephen Hemminger for comments on previous version. Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- net/ipv6/route.c | 91 +++++++++++++++--------------------------------- 1 file changed, 29 insertions(+), 62 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 95f8e4a62f68..973a97abc446 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -38,12 +38,8 @@ #include #include #include - -#ifdef CONFIG_PROC_FS #include #include -#endif - #include #include #include @@ -2288,71 +2284,50 @@ struct rt6_proc_arg static int rt6_info_route(struct rt6_info *rt, void *p_arg) { - struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg; + struct seq_file *m = p_arg; - if (arg->skip < arg->offset / RT6_INFO_LEN) { - arg->skip++; - return 0; - } - - if (arg->len >= arg->length) - return 0; - - arg->len += sprintf(arg->buffer + arg->len, - NIP6_SEQFMT " %02x ", - NIP6(rt->rt6i_dst.addr), - rt->rt6i_dst.plen); + seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr), + rt->rt6i_dst.plen); #ifdef CONFIG_IPV6_SUBTREES - arg->len += sprintf(arg->buffer + arg->len, - NIP6_SEQFMT " %02x ", - NIP6(rt->rt6i_src.addr), - rt->rt6i_src.plen); + seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr), + rt->rt6i_src.plen); #else - arg->len += sprintf(arg->buffer + arg->len, - "00000000000000000000000000000000 00 "); + seq_puts(m, "00000000000000000000000000000000 00 "); #endif if (rt->rt6i_nexthop) { - arg->len += sprintf(arg->buffer + arg->len, - NIP6_SEQFMT, - NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key))); + seq_printf(m, NIP6_SEQFMT, + NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key))); } else { - arg->len += sprintf(arg->buffer + arg->len, - "00000000000000000000000000000000"); + seq_puts(m, "00000000000000000000000000000000"); } - arg->len += sprintf(arg->buffer + arg->len, - " %08x %08x %08x %08x %8s\n", - rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), - rt->u.dst.__use, rt->rt6i_flags, - rt->rt6i_dev ? rt->rt6i_dev->name : ""); + seq_printf(m, " %08x %08x %08x %08x %8s\n", + rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt), + rt->u.dst.__use, rt->rt6i_flags, + rt->rt6i_dev ? rt->rt6i_dev->name : ""); return 0; } -static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) +static int ipv6_route_show(struct seq_file *m, void *v) { - struct rt6_proc_arg arg = { - .buffer = buffer, - .offset = offset, - .length = length, - }; - - fib6_clean_all(rt6_info_route, 0, &arg); - - *start = buffer; - if (offset) - *start += offset % RT6_INFO_LEN; - - arg.len -= offset % RT6_INFO_LEN; - - if (arg.len > length) - arg.len = length; - if (arg.len < 0) - arg.len = 0; + fib6_clean_all(rt6_info_route, 0, m); + return 0; +} - return arg.len; +static int ipv6_route_open(struct inode *inode, struct file *file) +{ + return single_open(file, ipv6_route_show, NULL); } +static const struct file_operations ipv6_route_proc_fops = { + .owner = THIS_MODULE, + .open = ipv6_route_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + static int rt6_stats_seq_show(struct seq_file *seq, void *v) { seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", @@ -2489,22 +2464,14 @@ ctl_table ipv6_route_table[] = { void __init ip6_route_init(void) { -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *p; -#endif ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep; fib6_init(); -#ifdef CONFIG_PROC_FS - p = proc_net_create(&init_net, "ipv6_route", 0, rt6_proc_info); - if (p) - p->owner = THIS_MODULE; - + proc_net_fops_create(&init_net, "ipv6_route", 0, &ipv6_route_proc_fops); proc_net_fops_create(&init_net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); -#endif #ifdef CONFIG_XFRM xfrm6_init(); #endif From 44656ba1286d82b5a5f8817eb2e4ea744143c3ca Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 7 Nov 2007 04:10:52 -0800 Subject: [PATCH 22/44] [NET]: Kill proc_net_create() There are no more users. Signed-off-by: David S. Miller --- fs/proc/proc_net.c | 7 ------- include/linux/proc_fs.h | 3 --- 2 files changed, 10 deletions(-) diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 749def054a34..153554cf5575 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -26,13 +26,6 @@ #include "internal.h" -struct proc_dir_entry *proc_net_create(struct net *net, - const char *name, mode_t mode, get_info_t *get_info) -{ - return create_proc_info_entry(name,mode, net->proc_net, get_info); -} -EXPORT_SYMBOL_GPL(proc_net_create); - struct proc_dir_entry *proc_net_fops_create(struct net *net, const char *name, mode_t mode, const struct file_operations *fops) { diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 1ff461672060..1273c6ec535c 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -196,8 +196,6 @@ static inline struct proc_dir_entry *create_proc_info_entry(const char *name, return res; } -extern struct proc_dir_entry *proc_net_create(struct net *net, - const char *name, mode_t mode, get_info_t *get_info); extern struct proc_dir_entry *proc_net_fops_create(struct net *net, const char *name, mode_t mode, const struct file_operations *fops); extern void proc_net_remove(struct net *net, const char *name); @@ -208,7 +206,6 @@ extern void proc_net_remove(struct net *net, const char *name); #define proc_bus NULL #define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; }) -#define proc_net_create(net, name, mode, info) ({ (void)(mode), NULL; }) static inline void proc_net_remove(struct net *net, const char *name) {} static inline void proc_flush_task(struct task_struct *task) From 6a9a025086ac70f0f285365cbaf1df8643266b72 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Tue, 6 Nov 2007 20:35:55 -0800 Subject: [PATCH 23/44] [VETH]: Clarify "virtual ethernet device" to "virtual ethernet pair device". It'd also be nice to mention "containers" somewhere in the help text (I'm assuming that's what it's for?). Signed-off-by: Rusty Russell Signed-off-by: David S. Miller --- drivers/net/Kconfig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 5f800a6dd978..cb581ebbe3c5 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -136,10 +136,11 @@ config TUN If you don't know what to use this for, you don't need it. config VETH - tristate "Virtual ethernet device" + tristate "Virtual ethernet pair device" ---help--- - The device is an ethernet tunnel. Devices are created in pairs. When - one end receives the packet it appears on its pair and vice versa. + This device is a local ethernet tunnel. Devices are created in pairs. + When one end receives the packet it appears on its pair and vice + versa. config NET_SB1000 tristate "General Instruments Surfboard 1000" From c3e9a353d8fc64a82ab11a07e21902e25e1e96d1 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 6 Nov 2007 23:34:04 -0800 Subject: [PATCH 24/44] [IPV4]: Compact some ifdefs in the fib code. There are places that check for CONFIG_IP_MULTIPLE_TABLES twice in the same file, but the internals of these #ifdefs can be merged. As a side effect - remove one ifdef from inside a function. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/ip_fib.h | 15 ++++++--------- net/ipv4/fib_frontend.c | 15 ++++++++------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 8cadc77c7df4..ed514bfb61ba 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -185,6 +185,12 @@ static inline void fib_select_default(const struct flowi *flp, struct fib_result } #else /* CONFIG_IP_MULTIPLE_TABLES */ +extern void __init fib4_rules_init(void); + +#ifdef CONFIG_NET_CLS_ROUTE +extern u32 fib_rules_tclass(struct fib_result *res); +#endif + #define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL) #define ip_fib_main_table fib_get_table(RT_TABLE_MAIN) @@ -214,15 +220,6 @@ extern __be32 __fib_res_prefsrc(struct fib_result *res); /* Exported by fib_hash.c */ extern struct fib_table *fib_hash_init(u32 id); -#ifdef CONFIG_IP_MULTIPLE_TABLES -extern void __init fib4_rules_init(void); - -#ifdef CONFIG_NET_CLS_ROUTE -extern u32 fib_rules_tclass(struct fib_result *res); -#endif - -#endif - static inline void fib_combine_itag(u32 *itag, struct fib_result *res) { #ifdef CONFIG_NET_CLS_ROUTE diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 60123905dbbf..732d8f088b13 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -59,6 +59,13 @@ struct fib_table *ip_fib_main_table; #define FIB_TABLE_HASHSZ 1 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; +static void __init fib4_rules_init(void) +{ + ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); + hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); + ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); + hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); +} #else #define FIB_TABLE_HASHSZ 256 @@ -905,14 +912,8 @@ void __init ip_fib_init(void) for (i = 0; i < FIB_TABLE_HASHSZ; i++) INIT_HLIST_HEAD(&fib_table_hash[i]); -#ifndef CONFIG_IP_MULTIPLE_TABLES - ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); - hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); - ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); - hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); -#else + fib4_rules_init(); -#endif register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); From df61c952622f51facac21dd8dfa4d8a24dcb9657 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 6 Nov 2007 23:48:57 -0800 Subject: [PATCH 25/44] [DLM] lowcomms: Do not muck with sysctl_rmem_max. Use SO_RCVBUFFORCE instead. Signed-off-by: David S. Miller --- fs/dlm/lowcomms.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 58bf3f5cdbe2..e9923ca9c2d9 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1062,7 +1062,7 @@ static int sctp_listen_for_all(void) subscribe.sctp_shutdown_event = 1; subscribe.sctp_partial_delivery_event = 1; - result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUF, + result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE, (char *)&bufsize, sizeof(bufsize)); if (result) log_print("Error increasing buffer space on socket %d", result); @@ -1454,10 +1454,6 @@ int dlm_lowcomms_start(void) if (!con_cache) goto out; - /* Set some sysctl minima */ - if (sysctl_rmem_max < NEEDED_RMEM) - sysctl_rmem_max = NEEDED_RMEM; - /* Start listening */ if (dlm_config.ci_protocol == 0) error = tcp_listen_for_all(); From df1e6e54842a47675a2f69a089ecb8ad409f167f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 6 Nov 2007 23:49:37 -0800 Subject: [PATCH 26/44] [RRUNNER]: Do not muck with sysctl_{r,w}mem_max Drivers have no business changing these values. Signed-off-by: David S. Miller --- drivers/net/rrunner.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/net/rrunner.c b/drivers/net/rrunner.c index b822859c8de3..73a7e6529ee0 100644 --- a/drivers/net/rrunner.c +++ b/drivers/net/rrunner.c @@ -78,12 +78,6 @@ static char version[] __devinitdata = "rrunner.c: v0.50 11/11/2002 Jes Sorensen * stack will need to know about I/O vectors or something similar. */ -/* - * sysctl_[wr]mem_max are checked at init time to see if they are at - * least 256KB and increased to 256KB if they are not. This is done to - * avoid ending up with socket buffers smaller than the MTU size, - */ - static int __devinit rr_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -561,18 +555,6 @@ static int __devinit rr_init(struct net_device *dev) sram_size = rr_read_eeprom_word(rrpriv, (void *)8); printk(" SRAM size 0x%06x\n", sram_size); - if (sysctl_rmem_max < 262144){ - printk(" Receive socket buffer limit too low (%i), " - "setting to 262144\n", sysctl_rmem_max); - sysctl_rmem_max = 262144; - } - - if (sysctl_wmem_max < 262144){ - printk(" Transmit socket buffer limit too low (%i), " - "setting to 262144\n", sysctl_wmem_max); - sysctl_wmem_max = 262144; - } - return 0; } From 4aa92cd9acd18ae9c94e87a30f664e77f699dc78 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 7 Nov 2007 00:10:31 -0800 Subject: [PATCH 27/44] [NET]: Let USB_USBNET always select MII. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All this USB_USBNET_MII trickery is simply not worth it considering how few code it saves. As a side effect, this also fixes the following compile error reported by Toralf Förster: <-- snip --> ... LD .tmp_vmlinux1 drivers/built-in.o: In function `usbnet_set_settings': (.text+0xf1876): undefined reference to `mii_ethtool_sset' drivers/built-in.o: In function `usbnet_get_settings': (.text+0xf1836): undefined reference to `mii_ethtool_gset' drivers/built-in.o: In function `usbnet_get_link': (.text+0xf18d6): undefined reference to `mii_link_ok' drivers/built-in.o: In function `usbnet_nway_reset': (.text+0xf18f6): undefined reference to `mii_nway_restart' make: *** [.tmp_vmlinux1] Error 1 <-- snip --> Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- drivers/net/usb/Kconfig | 9 +-------- drivers/net/usb/usbnet.c | 7 ------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/net/usb/Kconfig b/drivers/net/usb/Kconfig index 5a96d74e4ce8..a12c9c41b217 100644 --- a/drivers/net/usb/Kconfig +++ b/drivers/net/usb/Kconfig @@ -93,13 +93,9 @@ config USB_RTL8150 To compile this driver as a module, choose M here: the module will be called rtl8150. -config USB_USBNET_MII - tristate - default n - config USB_USBNET tristate "Multi-purpose USB Networking Framework" - select MII if USB_USBNET_MII != n + select MII ---help--- This driver supports several kinds of network links over USB, with "minidrivers" built around a common network driver core @@ -135,7 +131,6 @@ config USB_NET_AX8817X tristate "ASIX AX88xxx Based USB 2.0 Ethernet Adapters" depends on USB_USBNET && NET_ETHERNET select CRC32 - select USB_USBNET_MII default y help This option adds support for ASIX AX88xxx based USB 2.0 @@ -190,7 +185,6 @@ config USB_NET_DM9601 tristate "Davicom DM9601 based USB 1.1 10/100 ethernet devices" depends on USB_USBNET select CRC32 - select USB_USBNET_MII help This option adds support for Davicom DM9601 based USB 1.1 10/100 Ethernet adapters. @@ -225,7 +219,6 @@ config USB_NET_PLUSB config USB_NET_MCS7830 tristate "MosChip MCS7830 based Ethernet adapters" depends on USB_USBNET - select USB_USBNET_MII help Choose this option if you're using a 10/100 Ethernet USB2 adapter based on the MosChip 7830 controller. This includes diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index acd5f1c0e63a..8ed1fc5cbc70 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -683,9 +683,6 @@ static int usbnet_open (struct net_device *net) * they'll probably want to use this base set. */ -#if defined(CONFIG_MII) || defined(CONFIG_MII_MODULE) -#define HAVE_MII - int usbnet_get_settings (struct net_device *net, struct ethtool_cmd *cmd) { struct usbnet *dev = netdev_priv(net); @@ -744,8 +741,6 @@ int usbnet_nway_reset(struct net_device *net) } EXPORT_SYMBOL_GPL(usbnet_nway_reset); -#endif /* HAVE_MII */ - void usbnet_get_drvinfo (struct net_device *net, struct ethtool_drvinfo *info) { struct usbnet *dev = netdev_priv(net); @@ -776,12 +771,10 @@ EXPORT_SYMBOL_GPL(usbnet_set_msglevel); /* drivers may override default ethtool_ops in their bind() routine */ static struct ethtool_ops usbnet_ethtool_ops = { -#ifdef HAVE_MII .get_settings = usbnet_get_settings, .set_settings = usbnet_set_settings, .get_link = usbnet_get_link, .nway_reset = usbnet_nway_reset, -#endif .get_drvinfo = usbnet_get_drvinfo, .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, From 40208d71e0c6b5f912b185e637272b6481fcef3f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 7 Nov 2007 00:49:04 -0800 Subject: [PATCH 28/44] [NET]: Removing duplicit #includes Removing duplicit #includes for net/ Signed-off-by: Jiri Olsa Signed-off-by: David S. Miller --- net/core/dst.c | 1 - net/ieee80211/ieee80211_crypt_tkip.c | 1 - net/ieee80211/ieee80211_crypt_wep.c | 1 - 3 files changed, 3 deletions(-) diff --git a/net/core/dst.c b/net/core/dst.c index 16958e64e577..03daead3592a 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -18,7 +18,6 @@ #include #include -#include #include /* diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index 4cce3534e408..58b22619ab15 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c @@ -25,7 +25,6 @@ #include #include -#include #include MODULE_AUTHOR("Jouni Malinen"); diff --git a/net/ieee80211/ieee80211_crypt_wep.c b/net/ieee80211/ieee80211_crypt_wep.c index 866fc04c44f9..3fa30c40779f 100644 --- a/net/ieee80211/ieee80211_crypt_wep.c +++ b/net/ieee80211/ieee80211_crypt_wep.c @@ -22,7 +22,6 @@ #include #include -#include #include MODULE_AUTHOR("Jouni Malinen"); From 543821c6f5dea5221426eaf1eac98b100249c7ac Mon Sep 17 00:00:00 2001 From: Radu Rendec Date: Wed, 7 Nov 2007 01:20:12 -0800 Subject: [PATCH 29/44] [PKT_SCHED] CLS_U32: Fix endianness problem with u32 classifier hash masks. While trying to implement u32 hashes in my shaping machine I ran into a possible bug in the u32 hash/bucket computing algorithm (net/sched/cls_u32.c). The problem occurs only with hash masks that extend over the octet boundary, on little endian machines (where htonl() actually does something). Let's say that I would like to use 0x3fc0 as the hash mask. This means 8 contiguous "1" bits starting at b6. With such a mask, the expected (and logical) behavior is to hash any address in, for instance, 192.168.0.0/26 in bucket 0, then any address in 192.168.0.64/26 in bucket 1, then 192.168.0.128/26 in bucket 2 and so on. This is exactly what would happen on a big endian machine, but on little endian machines, what would actually happen with current implementation is 0x3fc0 being reversed (into 0xc03f0000) by htonl() in the userspace tool and then applied to 192.168.x.x in the u32 classifier. When shifting right by 16 bits (rank of first "1" bit in the reversed mask) and applying the divisor mask (0xff for divisor 256), what would actually remain is 0x3f applied on the "168" octet of the address. One could say is this can be easily worked around by taking endianness into account in userspace and supplying an appropriate mask (0xfc03) that would be turned into contiguous "1" bits when reversed (0x03fc0000). But the actual problem is the network address (inside the packet) not being converted to host order, but used as a host-order value when computing the bucket. Let's say the network address is written as n31 n30 ... n0, with n0 being the least significant bit. When used directly (without any conversion) on a little endian machine, it becomes n7 ... n0 n8 ..n15 etc in the machine's registers. Thus bits n7 and n8 would no longer be adjacent and 192.168.64.0/26 and 192.168.128.0/26 would no longer be consecutive. The fix is to apply ntohl() on the hmask before computing fshift, and in u32_hash_fold() convert the packet data to host order before shifting down by fshift. With helpful feedback from Jamal Hadi Salim and Jarek Poplawski. Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 9e98c6e567dd..53171029439f 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -91,7 +91,7 @@ static struct tc_u_common *u32_list; static __inline__ unsigned u32_hash_fold(u32 key, struct tc_u32_sel *sel, u8 fshift) { - unsigned h = (key & sel->hmask)>>fshift; + unsigned h = ntohl(key & sel->hmask)>>fshift; return h; } @@ -615,7 +615,7 @@ static int u32_change(struct tcf_proto *tp, unsigned long base, u32 handle, n->handle = handle; { u8 i = 0; - u32 mask = s->hmask; + u32 mask = ntohl(s->hmask); if (mask) { while (!(mask & 1)) { i++; From 0fc00e2440b717e19bab1ae0015f03936bdf7967 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 7 Nov 2007 01:24:56 -0800 Subject: [PATCH 30/44] [TTY]: Fix network driver interactions with TCGET/SET calls. Dave Miller noted various cases where line disciplines for things like ppp go poking around in termios themselves in ways that broke with the new termios code. Rather than have them all learning about termios internals provide proper methods for this - tty_mode_ioctl() This handles all the terminal mode handling for speed/carrier etc and none of the methods are ldisc dependant so they can be called by any user - tty_perform_flush() This extracts the flush functionality and enables pppd the ppp layer to share it cleanly. The existing n_tty_ioctl code is refactored in this patch to provide the new functions and to call them itself appropriately. This patch has no (intended) behaviour changes and simply prepares for the other fixes. Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- drivers/char/tty_ioctl.c | 170 ++++++++++++++++++++++++--------------- include/linux/tty.h | 4 +- 2 files changed, 107 insertions(+), 67 deletions(-) diff --git a/drivers/char/tty_ioctl.c b/drivers/char/tty_ioctl.c index 7a003504c265..1bdd2bf4f37d 100644 --- a/drivers/char/tty_ioctl.c +++ b/drivers/char/tty_ioctl.c @@ -730,13 +730,23 @@ static int send_prio_char(struct tty_struct *tty, char ch) return 0; } -int n_tty_ioctl(struct tty_struct * tty, struct file * file, - unsigned int cmd, unsigned long arg) +/** + * tty_mode_ioctl - mode related ioctls + * @tty: tty for the ioctl + * @file: file pointer for the tty + * @cmd: command + * @arg: ioctl argument + * + * Perform non line discipline specific mode control ioctls. This + * is designed to be called by line disciplines to ensure they provide + * consistent mode setting. + */ + +int tty_mode_ioctl(struct tty_struct * tty, struct file *file, + unsigned int cmd, unsigned long arg) { struct tty_struct * real_tty; void __user *p = (void __user *)arg; - int retval; - struct tty_ldisc *ld; if (tty->driver->type == TTY_DRIVER_TYPE_PTY && tty->driver->subtype == PTY_TYPE_MASTER) @@ -799,6 +809,93 @@ int n_tty_ioctl(struct tty_struct * tty, struct file * file, return set_termios(real_tty, p, TERMIOS_WAIT | TERMIOS_TERMIO); case TCSETA: return set_termios(real_tty, p, TERMIOS_TERMIO); +#ifndef TCGETS2 + case TIOCGLCKTRMIOS: + if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios_locked)) + return -EFAULT; + return 0; + + case TIOCSLCKTRMIOS: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (user_termios_to_kernel_termios(real_tty->termios_locked, (struct termios __user *) arg)) + return -EFAULT; + return 0; +#else + case TIOCGLCKTRMIOS: + if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios_locked)) + return -EFAULT; + return 0; + + case TIOCSLCKTRMIOS: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (user_termios_to_kernel_termios_1(real_tty->termios_locked, (struct termios __user *) arg)) + return -EFAULT; + return 0; +#endif + case TIOCGSOFTCAR: + return put_user(C_CLOCAL(tty) ? 1 : 0, (int __user *)arg); + case TIOCSSOFTCAR: + if (get_user(arg, (unsigned int __user *) arg)) + return -EFAULT; + mutex_lock(&tty->termios_mutex); + tty->termios->c_cflag = + ((tty->termios->c_cflag & ~CLOCAL) | + (arg ? CLOCAL : 0)); + mutex_unlock(&tty->termios_mutex); + return 0; + default: + return -ENOIOCTLCMD; + } +} + +EXPORT_SYMBOL_GPL(tty_mode_ioctl); + +int tty_perform_flush(struct tty_struct *tty, unsigned long arg) +{ + struct tty_ldisc *ld; + int retval = tty_check_change(tty); + if (retval) + return retval; + + ld = tty_ldisc_ref(tty); + switch (arg) { + case TCIFLUSH: + if (ld && ld->flush_buffer) + ld->flush_buffer(tty); + break; + case TCIOFLUSH: + if (ld && ld->flush_buffer) + ld->flush_buffer(tty); + /* fall through */ + case TCOFLUSH: + if (tty->driver->flush_buffer) + tty->driver->flush_buffer(tty); + break; + default: + tty_ldisc_deref(ld); + return -EINVAL; + } + tty_ldisc_deref(ld); + return 0; +} + +EXPORT_SYMBOL_GPL(tty_perform_flush); + +int n_tty_ioctl(struct tty_struct * tty, struct file * file, + unsigned int cmd, unsigned long arg) +{ + struct tty_struct * real_tty; + int retval; + + if (tty->driver->type == TTY_DRIVER_TYPE_PTY && + tty->driver->subtype == PTY_TYPE_MASTER) + real_tty = tty->link; + else + real_tty = tty; + + switch (cmd) { case TCXONC: retval = tty_check_change(tty); if (retval) @@ -829,30 +926,7 @@ int n_tty_ioctl(struct tty_struct * tty, struct file * file, } return 0; case TCFLSH: - retval = tty_check_change(tty); - if (retval) - return retval; - - ld = tty_ldisc_ref(tty); - switch (arg) { - case TCIFLUSH: - if (ld && ld->flush_buffer) - ld->flush_buffer(tty); - break; - case TCIOFLUSH: - if (ld && ld->flush_buffer) - ld->flush_buffer(tty); - /* fall through */ - case TCOFLUSH: - if (tty->driver->flush_buffer) - tty->driver->flush_buffer(tty); - break; - default: - tty_ldisc_deref(ld); - return -EINVAL; - } - tty_ldisc_deref(ld); - return 0; + return tty_perform_flush(tty, arg); case TIOCOUTQ: return put_user(tty->driver->chars_in_buffer ? tty->driver->chars_in_buffer(tty) : 0, @@ -862,32 +936,6 @@ int n_tty_ioctl(struct tty_struct * tty, struct file * file, if (L_ICANON(tty)) retval = inq_canon(tty); return put_user(retval, (unsigned int __user *) arg); -#ifndef TCGETS2 - case TIOCGLCKTRMIOS: - if (kernel_termios_to_user_termios((struct termios __user *)arg, real_tty->termios_locked)) - return -EFAULT; - return 0; - - case TIOCSLCKTRMIOS: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (user_termios_to_kernel_termios(real_tty->termios_locked, (struct termios __user *) arg)) - return -EFAULT; - return 0; -#else - case TIOCGLCKTRMIOS: - if (kernel_termios_to_user_termios_1((struct termios __user *)arg, real_tty->termios_locked)) - return -EFAULT; - return 0; - - case TIOCSLCKTRMIOS: - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - if (user_termios_to_kernel_termios_1(real_tty->termios_locked, (struct termios __user *) arg)) - return -EFAULT; - return 0; -#endif - case TIOCPKT: { int pktmode; @@ -906,19 +954,9 @@ int n_tty_ioctl(struct tty_struct * tty, struct file * file, tty->packet = 0; return 0; } - case TIOCGSOFTCAR: - return put_user(C_CLOCAL(tty) ? 1 : 0, (int __user *)arg); - case TIOCSSOFTCAR: - if (get_user(arg, (unsigned int __user *) arg)) - return -EFAULT; - mutex_lock(&tty->termios_mutex); - tty->termios->c_cflag = - ((tty->termios->c_cflag & ~CLOCAL) | - (arg ? CLOCAL : 0)); - mutex_unlock(&tty->termios_mutex); - return 0; default: - return -ENOIOCTLCMD; + /* Try the mode commands */ + return tty_mode_ioctl(tty, file, cmd, arg); } } diff --git a/include/linux/tty.h b/include/linux/tty.h index 56164d7ba0ad..c555f5442bd7 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -332,7 +332,9 @@ extern void tty_ldisc_flush(struct tty_struct *tty); extern int tty_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg); - +extern int tty_mode_ioctl(struct tty_struct *tty, struct file *file, + unsigned int cmd, unsigned long arg); +extern int tty_perform_flush(struct tty_struct *tty, unsigned long arg); extern dev_t tty_devnum(struct tty_struct *tty); extern void proc_clear_tty(struct task_struct *p); extern struct tty_struct *get_current_tty(void); From d0127539ea9b5fcfe1a1d7d4d57f12384da5190c Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Wed, 7 Nov 2007 01:27:34 -0800 Subject: [PATCH 31/44] [TTY]: Use tty_mode_ioctl() in network drivers. We conciously make a change here - we permit mode and speed setting to be done in things like SLIP mode. There isn't actually a technical reason to disallow this. It's usually a silly thing to do but we can do it and soemone might wish to do so. Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- drivers/net/hamradio/6pack.c | 26 ++++++++++---------------- drivers/net/irda/irtty-sir.c | 7 +------ drivers/net/ppp_async.c | 10 +++------- drivers/net/ppp_synctty.c | 10 +++------- drivers/net/slip.c | 8 +------- drivers/net/wan/x25_asy.c | 6 +----- drivers/net/wireless/strip.c | 10 +--------- 7 files changed, 20 insertions(+), 57 deletions(-) diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index e0119f6a3319..580cb4ab2af1 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -762,26 +762,20 @@ static int sixpack_ioctl(struct tty_struct *tty, struct file *file, if (copy_from_user(&addr, (void __user *) arg, AX25_ADDR_LEN)) { - err = -EFAULT; - break; - } + err = -EFAULT; + break; + } - netif_tx_lock_bh(dev); - memcpy(dev->dev_addr, &addr, AX25_ADDR_LEN); - netif_tx_unlock_bh(dev); + netif_tx_lock_bh(dev); + memcpy(dev->dev_addr, &addr, AX25_ADDR_LEN); + netif_tx_unlock_bh(dev); - err = 0; - break; - } - - /* Allow stty to read, but not set, the serial port */ - case TCGETS: - case TCGETA: - err = n_tty_ioctl(tty, (struct file *) file, cmd, arg); - break; + err = 0; + break; + } default: - err = -ENOIOCTLCMD; + err = tty_mode_ioctl(tty, file, cmd, arg); } sp_put(sp); diff --git a/drivers/net/irda/irtty-sir.c b/drivers/net/irda/irtty-sir.c index 2c6f7be36e8a..fc753d7f674e 100644 --- a/drivers/net/irda/irtty-sir.c +++ b/drivers/net/irda/irtty-sir.c @@ -434,11 +434,6 @@ static int irtty_ioctl(struct tty_struct *tty, struct file *file, unsigned int c IRDA_ASSERT(dev != NULL, return -1;); switch (cmd) { - case TCGETS: - case TCGETA: - err = n_tty_ioctl(tty, file, cmd, arg); - break; - case IRTTY_IOCTDONGLE: /* this call blocks for completion */ err = sirdev_set_dongle(dev, (IRDA_DONGLE) arg); @@ -454,7 +449,7 @@ static int irtty_ioctl(struct tty_struct *tty, struct file *file, unsigned int c err = -EFAULT; break; default: - err = -ENOIOCTLCMD; + err = tty_mode_ioctl(tty, file, cmd, arg); break; } return err; diff --git a/drivers/net/ppp_async.c b/drivers/net/ppp_async.c index 27f5b904f48e..8d278c87ba48 100644 --- a/drivers/net/ppp_async.c +++ b/drivers/net/ppp_async.c @@ -309,16 +309,11 @@ ppp_asynctty_ioctl(struct tty_struct *tty, struct file *file, err = 0; break; - case TCGETS: - case TCGETA: - err = n_tty_ioctl(tty, file, cmd, arg); - break; - case TCFLSH: /* flush our buffers and the serial port's buffer */ if (arg == TCIOFLUSH || arg == TCOFLUSH) ppp_async_flush_output(ap); - err = n_tty_ioctl(tty, file, cmd, arg); + err = tty_perform_flush(tty, arg); break; case FIONREAD: @@ -329,7 +324,8 @@ ppp_asynctty_ioctl(struct tty_struct *tty, struct file *file, break; default: - err = -ENOIOCTLCMD; + /* Try the various mode ioctls */ + err = tty_mode_ioctl(tty, file, cmd, arg); } ap_put(ap); diff --git a/drivers/net/ppp_synctty.c b/drivers/net/ppp_synctty.c index ce64032a465a..00e2fb48b4ae 100644 --- a/drivers/net/ppp_synctty.c +++ b/drivers/net/ppp_synctty.c @@ -349,16 +349,11 @@ ppp_synctty_ioctl(struct tty_struct *tty, struct file *file, err = 0; break; - case TCGETS: - case TCGETA: - err = n_tty_ioctl(tty, file, cmd, arg); - break; - case TCFLSH: /* flush our buffers and the serial port's buffer */ if (arg == TCIOFLUSH || arg == TCOFLUSH) ppp_sync_flush_output(ap); - err = n_tty_ioctl(tty, file, cmd, arg); + err = tty_perform_flush(tty, arg); break; case FIONREAD: @@ -369,7 +364,8 @@ ppp_synctty_ioctl(struct tty_struct *tty, struct file *file, break; default: - err = -ENOIOCTLCMD; + err = tty_mode_ioctl(tty, file, cmd, arg); + break; } sp_put(ap); diff --git a/drivers/net/slip.c b/drivers/net/slip.c index 335b7cc80eba..251a3ce376ac 100644 --- a/drivers/net/slip.c +++ b/drivers/net/slip.c @@ -1218,14 +1218,8 @@ static int slip_ioctl(struct tty_struct *tty, struct file *file, unsigned int cm return 0; /* VSV changes end */ #endif - - /* Allow stty to read, but not set, the serial port */ - case TCGETS: - case TCGETA: - return n_tty_ioctl(tty, file, cmd, arg); - default: - return -ENOIOCTLCMD; + return tty_mode_ioctl(tty, file, cmd, arg); } } diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c index c48b1cc63fd5..1e89d4de1bb7 100644 --- a/drivers/net/wan/x25_asy.c +++ b/drivers/net/wan/x25_asy.c @@ -719,12 +719,8 @@ static int x25_asy_ioctl(struct tty_struct *tty, struct file *file, return 0; case SIOCSIFHWADDR: return -EINVAL; - /* Allow stty to read, but not set, the serial port */ - case TCGETS: - case TCGETA: - return n_tty_ioctl(tty, file, cmd, arg); default: - return -ENOIOCTLCMD; + return tty_mode_ioctl(tty, file, cmd, arg); } } diff --git a/drivers/net/wireless/strip.c b/drivers/net/wireless/strip.c index 4bd14b331862..88efe1bae58f 100644 --- a/drivers/net/wireless/strip.c +++ b/drivers/net/wireless/strip.c @@ -2735,16 +2735,8 @@ static int strip_ioctl(struct tty_struct *tty, struct file *file, return -EFAULT; return set_mac_address(strip_info, &addr); } - /* - * Allow stty to read, but not set, the serial port - */ - - case TCGETS: - case TCGETA: - return n_tty_ioctl(tty, file, cmd, arg); - break; default: - return -ENOIOCTLCMD; + return tty_mode_ioctl(tty, file, cmd, arg); break; } return 0; From 45a19b0a725a04f3255d9d3da1fca30bb97f1481 Mon Sep 17 00:00:00 2001 From: Johann Felix Soden Date: Wed, 7 Nov 2007 01:30:30 -0800 Subject: [PATCH 32/44] [NETNS]: Fix compiler error in net_namespace.c Because net_free is called by copy_net_ns before its declaration, the compiler gives an error. This patch puts net_free before copy_net_ns to fix this. The compiler error: net/core/net_namespace.c: In function 'copy_net_ns': net/core/net_namespace.c:97: error: implicit declaration of function 'net_free' net/core/net_namespace.c: At top level: net/core/net_namespace.c:104: warning: conflicting types for 'net_free' net/core/net_namespace.c:104: error: static declaration of 'net_free' follows non-static declaration net/core/net_namespace.c:97: error: previous implicit declaration of 'net_free' was here The error was introduced by the '[NET]: Hide the dead code in the net_namespace.c' patch (6a1a3b9f686bb04820a232cc1657ef2c45670709). Signed-off-by: Johann Felix Soden Signed-off-by: David S. Miller --- net/core/net_namespace.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index e9f0964ce70b..3f6d37deac45 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -64,6 +64,20 @@ static struct net *net_alloc(void) return kmem_cache_zalloc(net_cachep, GFP_KERNEL); } +static void net_free(struct net *net) +{ + if (!net) + return; + + if (unlikely(atomic_read(&net->use_count) != 0)) { + printk(KERN_EMERG "network namespace not free! Usage: %d\n", + atomic_read(&net->use_count)); + return; + } + + kmem_cache_free(net_cachep, net); +} + struct net *copy_net_ns(unsigned long flags, struct net *old_net) { struct net *new_net = NULL; @@ -100,20 +114,6 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) return new_net; } -static void net_free(struct net *net) -{ - if (!net) - return; - - if (unlikely(atomic_read(&net->use_count) != 0)) { - printk(KERN_EMERG "network namespace not free! Usage: %d\n", - atomic_read(&net->use_count)); - return; - } - - kmem_cache_free(net_cachep, net); -} - static void cleanup_net(struct work_struct *work) { struct pernet_operations *ops; From fffe470a803e7f7b74c016291e542a0162761209 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 7 Nov 2007 01:31:32 -0800 Subject: [PATCH 33/44] [VLAN]: Fix SET_VLAN_INGRESS_PRIORITY_CMD ioctl Based on report and patch by Doug Kehn : vconfig returns the following error when attempting to execute the set_ingress_map command: vconfig: socket or ioctl error for set_ingress_map: Operation not permitted In vlan.c, vlan_ioctl_handler for SET_VLAN_INGRESS_PRIORITY_CMD sets err = -EPERM and calls vlan_dev_set_ingress_priority. vlan_dev_set_ingress_priority is a void function so err remains at -EPERM and results in the vconfig error (even though the ingress map was set). Fix by setting err = 0 after the vlan_dev_set_ingress_priority call. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 3fe4fc86055f..1037748c14db 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -747,6 +747,7 @@ static int vlan_ioctl_handler(struct net *net, void __user *arg) vlan_dev_set_ingress_priority(dev, args.u.skb_priority, args.vlan_qos); + err = 0; break; case SET_VLAN_EGRESS_PRIORITY_CMD: From 4999f3621f4da622e77931b3d33ada6c7083c705 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 7 Nov 2007 02:21:47 -0800 Subject: [PATCH 34/44] [IPSEC]: Fix crypto_alloc_comp error checking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function crypto_alloc_comp returns an errno instead of NULL to indicate error. So it needs to be tested with IS_ERR. This is based on a patch by Vicenç Beltran Querol. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/ipcomp.c | 3 ++- net/ipv6/ipcomp6.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index ca1b5fdb8d31..2c44a94c2135 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -344,7 +345,7 @@ static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) for_each_possible_cpu(cpu) { struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, CRYPTO_ALG_ASYNC); - if (!tfm) + if (IS_ERR(tfm)) goto error; *per_cpu_ptr(tfms, cpu) = tfm; } diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 85eb4798d8d2..0cd4056f9127 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -358,7 +359,7 @@ static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name) for_each_possible_cpu(cpu) { struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, CRYPTO_ALG_ASYNC); - if (!tfm) + if (IS_ERR(tfm)) goto error; *per_cpu_ptr(tfms, cpu) = tfm; } From b733c007edad6f3e05109951bacc6f87dd807917 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 7 Nov 2007 02:23:38 -0800 Subject: [PATCH 35/44] [NET]: Clean proto_(un)register from in-code ifdefs The struct proto has the per-cpu "inuse" counter, which is handled with a special care. All the handling code hides under the ifdef CONFIG_SMP and it introduces some code duplication and makes it look worse than it could. Clean this. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/core/sock.c | 67 ++++++++++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 28 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index e077f263b730..8fc2f84209e4 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1819,23 +1819,48 @@ static int inuse_get(const struct proto *prot) res += per_cpu_ptr(prot->inuse_ptr, cpu)[0]; return res; } -#endif -int proto_register(struct proto *prot, int alloc_slab) +static int inuse_init(struct proto *prot) { - char *request_sock_slab_name = NULL; - char *timewait_sock_slab_name; - int rc = -ENOBUFS; - -#ifdef CONFIG_SMP if (!prot->inuse_getval || !prot->inuse_add) { prot->inuse_ptr = alloc_percpu(int); if (prot->inuse_ptr == NULL) - goto out; + return -ENOBUFS; + prot->inuse_getval = inuse_get; prot->inuse_add = inuse_add; } + return 0; +} + +static void inuse_fini(struct proto *prot) +{ + if (prot->inuse_ptr != NULL) { + free_percpu(prot->inuse_ptr); + prot->inuse_ptr = NULL; + prot->inuse_getval = NULL; + prot->inuse_add = NULL; + } +} +#else +static inline int inuse_init(struct proto *prot) +{ + return 0; +} + +static inline void inuse_fini(struct proto *prot) +{ +} #endif + +int proto_register(struct proto *prot, int alloc_slab) +{ + char *request_sock_slab_name = NULL; + char *timewait_sock_slab_name; + + if (inuse_init(prot)) + goto out; + if (alloc_slab) { prot->slab = kmem_cache_create(prot->name, prot->obj_size, 0, SLAB_HWCACHE_ALIGN, NULL); @@ -1887,9 +1912,8 @@ int proto_register(struct proto *prot, int alloc_slab) write_lock(&proto_list_lock); list_add(&prot->node, &proto_list); write_unlock(&proto_list_lock); - rc = 0; -out: - return rc; + return 0; + out_free_timewait_sock_slab_name: kfree(timewait_sock_slab_name); out_free_request_sock_slab: @@ -1903,15 +1927,9 @@ int proto_register(struct proto *prot, int alloc_slab) kmem_cache_destroy(prot->slab); prot->slab = NULL; out_free_inuse: -#ifdef CONFIG_SMP - if (prot->inuse_ptr != NULL) { - free_percpu(prot->inuse_ptr); - prot->inuse_ptr = NULL; - prot->inuse_getval = NULL; - prot->inuse_add = NULL; - } -#endif - goto out; + inuse_fini(prot); +out: + return -ENOBUFS; } EXPORT_SYMBOL(proto_register); @@ -1922,14 +1940,7 @@ void proto_unregister(struct proto *prot) list_del(&prot->node); write_unlock(&proto_list_lock); -#ifdef CONFIG_SMP - if (prot->inuse_ptr != NULL) { - free_percpu(prot->inuse_ptr); - prot->inuse_ptr = NULL; - prot->inuse_getval = NULL; - prot->inuse_add = NULL; - } -#endif + inuse_fini(prot); if (prot->slab != NULL) { kmem_cache_destroy(prot->slab); prot->slab = NULL; From 240e546445709dd9a883a0629b55961f8efe7f27 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 7 Nov 2007 02:26:15 -0800 Subject: [PATCH 36/44] [NET]: Remove Documentation/networking/Configurable After more than 11 years this file does no longer contain much useful information. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- Documentation/networking/00-INDEX | 2 -- Documentation/networking/Configurable | 34 --------------------------- 2 files changed, 36 deletions(-) delete mode 100644 Documentation/networking/Configurable diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index f5a5e6d3d541..53adb0512193 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -4,8 +4,6 @@ - information on the 3Com EtherLink Plus (3c505) driver. 6pack.txt - info on the 6pack protocol, an alternative to KISS for AX.25 -Configurable - - info on some of the configurable network parameters DLINK.txt - info on the D-Link DE-600/DE-620 parallel port pocket adapters PLIP.txt diff --git a/Documentation/networking/Configurable b/Documentation/networking/Configurable deleted file mode 100644 index 69c0dd466ead..000000000000 --- a/Documentation/networking/Configurable +++ /dev/null @@ -1,34 +0,0 @@ - -There are a few network parameters that can be tuned to better match -the kernel to your system hardware and intended usage. The defaults -are usually a good choice for 99% of the people 99% of the time, but -you should be aware they do exist and can be changed. - -The current list of parameters can be found in the files: - - linux/net/TUNABLE - Documentation/networking/ip-sysctl.txt - -Some of these are accessible via the sysctl interface, and many more are -scheduled to be added in this way. For example, some parameters related -to Address Resolution Protocol (ARP) are very easily viewed and altered. - - # cat /proc/sys/net/ipv4/arp_timeout - 6000 - # echo 7000 > /proc/sys/net/ipv4/arp_timeout - # cat /proc/sys/net/ipv4/arp_timeout - 7000 - -Others are already accessible via the related user space programs. -For example, MAX_WINDOW has a default of 32 k which is a good choice for -modern hardware, but if you have a slow (8 bit) Ethernet card and/or a slow -machine, then this will be far too big for the card to keep up with fast -machines transmitting on the same net, resulting in overruns and receive errors. -A value of about 4 k would be more appropriate, which can be set via: - - # route add -net 192.168.3.0 window 4096 - -The remainder of these can only be presently changed by altering a #define -in the related header file. This means an edit and recompile cycle. - - Paul Gortmaker 06/96 From 915590cf642d82e4c9eddf8051ce2eb159ef9af4 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 7 Nov 2007 02:28:52 -0800 Subject: [PATCH 37/44] [NET]: Remove comx driver docs. The drivers have already been removed 3.5 years ago. Signed-off-by: Adrian Bunk Acked-by: Alan Cox Signed-off-by: David S. Miller --- Documentation/networking/00-INDEX | 2 - Documentation/networking/comx.txt | 248 ----------------- Documentation/networking/slicecom.hun | 371 -------------------------- Documentation/networking/slicecom.txt | 369 ------------------------- 4 files changed, 990 deletions(-) delete mode 100644 Documentation/networking/comx.txt delete mode 100644 Documentation/networking/slicecom.hun delete mode 100644 Documentation/networking/slicecom.txt diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index 53adb0512193..a9f4accd8989 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -24,8 +24,6 @@ baycom.txt - info on the driver for Baycom style amateur radio modems bridge.txt - where to get user space programs for ethernet bridging with Linux. -comx.txt - - info on drivers for COMX line of synchronous serial adapters. cops.txt - info on the COPS LocalTalk Linux driver cs89x0.txt diff --git a/Documentation/networking/comx.txt b/Documentation/networking/comx.txt deleted file mode 100644 index d1526eba2645..000000000000 --- a/Documentation/networking/comx.txt +++ /dev/null @@ -1,248 +0,0 @@ - - COMX drivers for the 2.2 kernel - -Originally written by: Tivadar Szemethy, -Currently maintained by: Gergely Madarasz - -Last change: 21/06/1999. - -INTRODUCTION - -This document describes the software drivers and their use for the -COMX line of synchronous serial adapters for Linux version 2.2.0 and -above. -The cards are produced and sold by ITC-Pro Ltd. Budapest, Hungary -For further info contact -or http://www.itc.hu (mostly in Hungarian). -The firmware files and software are available from ftp://ftp.itc.hu - -Currently, the drivers support the following cards and protocols: - -COMX (2x64 kbps intelligent board) -CMX (1x256 + 1x128 kbps intelligent board) -HiCOMX (2x2Mbps intelligent board) -LoCOMX (1x512 kbps passive board) -MixCOM (1x512 or 2x512kbps passive board with a hardware watchdog an - optional BRI interface and optional flashROM (1-32M)) -SliceCOM (1x2Mbps channelized E1 board) -PciCOM (X21) - -At the moment of writing this document, the (Cisco)-HDLC, LAPB, SyncPPP and -Frame Relay (DTE, rfc1294 IP encapsulation with partially implemented Q933a -LMI) protocols are available as link-level protocol. -X.25 support is being worked on. - -USAGE - -Load the comx.o module and the hardware-specific and protocol-specific -modules you'll need into the running kernel using the insmod utility. -This creates the /proc/comx directory. -See the example scripts in the 'etc' directory. - -/proc INTERFACE INTRO - -The COMX driver set has a new type of user interface based on the /proc -filesystem which eliminates the need for external user-land software doing -IOCTL calls. -Each network interface or device (i.e. those ones you configure with 'ifconfig' -and 'route' etc.) has a corresponding directory under /proc/comx. You can -dynamically create a new interface by saying 'mkdir /proc/comx/comx0' (or you -can name it whatever you want up to 8 characters long, comx[n] is just a -convention). -Generally the files contained in these directories are text files, which can -be viewed by 'cat filename' and you can write a string to such a file by -saying 'echo _string_ >filename'. This is very similar to the sysctl interface. -Don't use a text editor to edit these files, always use 'echo' (or 'cat' -where appropriate). -When you've created the comx[n] directory, two files are created automagically -in it: 'boardtype' and 'protocol'. You have to fill in these files correctly -for your board and protocol you intend to use (see the board and protocol -descriptions in this file below or the example scripts in the 'etc' directory). -After filling in these files, other files will appear in the directory for -setting the various hardware- and protocol-related informations (for example -irq and io addresses, keepalive values etc.) These files are set to default -values upon creation, so you don't necessarily have to change all of them. - -When you're ready with filling in the files in the comx[n] directory, you can -configure the corresponding network interface with the standard network -configuration utilities. If you're unable to bring the interfaces up, look up -the various kernel log files on your system, and consult the messages for -a probable reason. - -EXAMPLE - -To create the interface 'comx0' which is the first channel of a COMX card: - -insmod comx -# insmod comx-hw-comx ; insmod comx-proto-ppp (these are usually -autoloaded if you use the kernel module loader) - -mkdir /proc/comx/comx0 -echo comx >/proc/comx/comx0/boardtype -echo 0x360 >/proc/comx/comx0/io <- jumper-selectable I/O port -echo 0x0a >/proc/comx/comx0/irq <- jumper-selectable IRQ line -echo 0xd000 >/proc/comx/comx0/memaddr <- software-configurable memory - address. COMX uses 64 KB, and this - can be: 0xa000, 0xb000, 0xc000, - 0xd000, 0xe000. Avoid conflicts - with other hardware. -cat /proc/comx/comx0/firmware <- the firmware for the card -echo HDLC >/proc/comx/comx0/protocol <- the data-link protocol -echo 10 >/proc/comx/comx0/keepalive <- the keepalive for the protocol -ifconfig comx0 1.2.3.4 pointopoint 5.6.7.8 netmask 255.255.255.255 <- - finally configure it with ifconfig -Check its status: -cat /proc/comx/comx0/status - -If you want to use the second channel of this board: - -mkdir /proc/comx/comx1 -echo comx >/proc/comx/comx1/boardtype -echo 0x360 >/proc/comx/comx1/io -echo 10 >/proc/comx/comx1/irq -echo 0xd000 >/proc/comx/comx1/memaddr -echo 1 >/proc/comx/comx1/channel <- channels are numbered - as 0 (default) and 1 - -Now, check if the driver recognized that you're going to use the other -channel of the same adapter: - -cat /proc/comx/comx0/twin -comx1 -cat /proc/comx/comx1/twin -comx0 - -You don't have to load the firmware twice, if you use both channels of -an adapter, just write it into the channel 0's /proc firmware file. - -Default values: io 0x360 for COMX, 0x320 (HICOMX), irq 10, memaddr 0xd0000 - -THE LOCOMX HARDWARE DRIVER - -The LoCOMX driver doesn't require firmware, and it doesn't use memory either, -but it uses DMA channels 1 and 3. You can set the clock rate (if enabled by -jumpers on the board) by writing the kbps value into the file named 'clock'. -Set it to 'external' (it is the default) if you have external clock source. - -(Note: currently the LoCOMX driver does not support the internal clock) - -THE COMX, CMX AND HICOMX DRIVERS - -On the HICOMX, COMX and CMX, you have to load the firmware (it is different for -the three cards!). All these adapters can share the same memory -address (we usually use 0xd0000). On the CMX you can set the internal -clock rate (if enabled by jumpers on the small adapter boards) by writing -the kbps value into the 'clock' file. You have to do this before initializing -the card. If you use both HICOMX and CMX/COMX cards, initialize the HICOMX -first. The I/O address of the HICOMX board is not configurable by any -method available to the user: it is hardwired to 0x320, and if you have to -change it, consult ITC-Pro Ltd. - -THE MIXCOM DRIVER - -The MixCOM board doesn't require firmware, the driver communicates with -it through I/O ports. You can have three of these cards in one machine. - -THE SLICECOM DRIVER - -The SliceCOM board doesn't require firmware. You can have 4 of these cards -in one machine. The driver doesn't (yet) support shared interrupts, so -you will need a separate IRQ line for every board. -Read Documentation/networking/slicecom.txt for help on configuring -this adapter. - -THE HDLC/PPP LINE PROTOCOL DRIVER - -The HDLC/SyncPPP line protocol driver uses the kernel's built-in syncppp -driver (syncppp.o). You don't have to manually select syncppp.o when building -the kernel, the dependencies compile it in automatically. - - - - -EXAMPLE -(setting up hw parameters, see above) - -# using HDLC: -echo hdlc >/proc/comx/comx0/protocol -echo 10 >/proc/comx/comx0/keepalive <- not necessary, 10 is the default -ifconfig comx0 1.2.3.4 pointopoint 5.6.7.8 netmask 255.255.255.255 - -(setting up hw parameters, see above) - -# using PPP: -echo ppp >/proc/comx/comx0/protocol -ifconfig comx0 up -ifconfig comx0 1.2.3.4 pointopoint 5.6.7.8 netmask 255.255.255.255 - - -THE LAPB LINE PROTOCOL DRIVER - -For this, you'll need to configure LAPB support (See 'LAPB Data Link Driver' in -'Network options' section) into your kernel (thanks to Jonathan Naylor for his -excellent implementation). -comx-proto-lapb.o provides the following files in the appropriate directory -(the default values in parens): t1 (5), t2 (1), n2 (20), mode (DTE, STD) and -window (7). Agree with the administrator of your peer router on these -settings (most people use defaults, but you have to know if you are DTE or -DCE). - -EXAMPLE - -(setting up hw parameters, see above) -echo lapb >/proc/comx/comx0/protocol -echo dce >/proc/comx/comx0/mode <- DCE interface in this example -ifconfig comx0 1.2.3.4 pointopoint 5.6.7.8 netmask 255.255.255.255 - - -THE FRAME RELAY PROTOCOL DRIVER - -You DON'T need any other frame relay related modules from the kernel to use -COMX-Frame Relay. This protocol is a bit more complicated than the others, -because it allows to use 'subinterfaces' or DLCIs within one physical device. -First you have to create the 'master' device (the actual physical interface) -as you would do for other protocols. Specify 'frad' as protocol type. -Now you can bring this interface up by saying 'ifconfig comx0 up' (or whatever -you've named the interface). Do not assign any IP address to this interface -and do not set any routes through it. -Then, set up your DLCIs the following way: create a comx interface for each -DLCI you intend to use (with mkdir), and write 'dlci' to the 'boardtype' file, -and 'ietf-ip' to the 'protocol' file. Currently, the only supported -encapsulation type is this (also called as RFC1294/1490 IP encapsulation). -Write the DLCI number to the 'dlci' file, and write the name of the physical -COMX device to the file called 'master'. -Now you can assign an IP address to this interface and set routes using it. -See the example file for further info and example config script. -Notes: this driver implements a DTE interface with partially implemented -Q933a LMI. -You can find an extensively commented example in the 'etc' directory. - -FURTHER /proc FILES - -boardtype: -Type of the hardware. Valid values are: - 'comx', 'hicomx', 'locomx', 'cmx', 'slicecom'. - -protocol: -Data-link protocol on this channel. Can be: HDLC, LAPB, PPP, FRAD - -status: -You can read the channel's actual status from the 'status' file, for example -'cat /proc/comx/comx3/status'. - -lineup_delay: -Interpreted in seconds (default is 1). Used to avoid line jitter: the system -will consider the line status 'UP' only if it is up for at least this number -of seconds. - -debug: -You can set various debug options through this file. Valid options are: -'comx_events', 'comx_tx', 'comx_rx', 'hw_events', 'hw_tx', 'hw_rx'. -You can enable a debug options by writing its name prepended by a '+' into -the debug file, for example 'echo +comx_rx >comx0/debug'. -Disabling an option happens similarly, use the '-' prefix -(e.g. 'echo -hw_rx >debug'). -Debug results can be read from the debug file, for example: -tail -f /proc/comx/comx2/debug - - diff --git a/Documentation/networking/slicecom.hun b/Documentation/networking/slicecom.hun deleted file mode 100644 index bed2f045e550..000000000000 --- a/Documentation/networking/slicecom.hun +++ /dev/null @@ -1,371 +0,0 @@ - -SliceCOM adapter felhasznaloi dokumentacioja - 0.51 verziohoz - -Bartók István -Utolso modositas: Wed Aug 29 17:26:58 CEST 2001 - ------------------------------------------------------------------ - -Hasznalata: - -Forditas: - -Code maturity level options - [*] Prompt for development and/or incomplete code/drivers - -Network device support - Wan interfaces - MultiGate (COMX) synchronous - Support for MUNICH based boards: SliceCOM, PCICOM (NEW) - Support for HDLC and syncPPP... - - -A modulok betoltese: - -modprobe comx - -modprobe comx-proto-ppp # a Cisco-HDLC es a SyncPPP protokollt is - # ez a modul adja - -modprobe comx-hw-munich # a modul betoltodeskor azonnal jelent a - # syslogba a detektalt kartyakrol - - -Konfiguralas: - -# Ezen az interfeszen Cisco-HDLC vonali protokoll fog futni -# Az interfeszhez rendelt idoszeletek: 1,2 (128 kbit/sec-es vonal) -# (a G.703 keretben az elso adatot vivo idoszelet az 1-es) -# -mkdir /proc/comx/comx0.1/ -echo slicecom >/proc/comx/comx0.1/boardtype -echo hdlc >/proc/comx/comx0.1/protocol -echo 1 2 >/proc/comx/comx0.1/timeslots - - -# Ezen az interfeszen SyncPPP vonali protokoll fog futni -# Az interfeszhez rendelt idoszelet: 3 (64 kbit/sec-es vonal) -# -mkdir /proc/comx/comx0.2/ -echo slicecom >/proc/comx/comx0.2/boardtype -echo ppp >/proc/comx/comx0.2/protocol -echo 3 >/proc/comx/comx0.2/timeslots - -... - -ifconfig comx0.1 up -ifconfig comx0.2 up - ------------------------------------------------------------------ - -A COMX driverek default 20 csomagnyi transmit queue-t rendelnek a halozati -interfeszekhez. WAN halozatokban ennel hosszabbat is szokas hasznalni -(20 es 100 kozott), hogy a vonal kihasznaltsaga nagy terheles eseten jobb -legyen (bar ezzel megno a varhato kesleltetes a csomagok sorban allasa miatt): - -# ifconfig comx0 txqueuelen 50 - -Ezt a beallitasi lehetoseget csak az ujabb disztribuciok ifconfig parancsa -tamogatja (amik mar a 2.2 kernelekhez keszultek, mint a RedHat 6.1 vagy a -Debian 2.2). - -A 2.1-es Debian disztribuciohoz a http://www.debian.org/~rcw/2.2/netbase/ -cimrol toltheto le ujabb netbase csomag, ami mar ilyet tamogato ifconfig -parancsot tartalmaz. Bovebben a 2.2 kernel hasznalatarol Debian 2.1 alatt: -http://www.debian.org/releases/stable/running-kernel-2.2 - ------------------------------------------------------------------ - -A kartya LED-jeinek jelentese: - -piros - eg, ha Remote Alarm-ot kuld a tuloldal -zold - eg, ha a vett jelben megtalalja a keretszinkront - -Reszletesebben: - -piros: zold: jelentes: - -- - nincs keretszinkron (nincs jel, vagy rossz a jel) -- eg "minden rendben" -eg eg a vetel OK, de a tuloldal Remote Alarm-ot kuld -eg - ez nincs ertelmezve, egyelore funkcio nelkul - ------------------------------------------------------------------ - -Reszletesebb leiras a hardver beallitasi lehetosegeirol: - -Az altalanos,- es a protokoll-retegek beallitasi lehetosegeirol a 'comx.txt' -fajlban leirtak SliceCOM kartyanal is ervenyesek, itt csak a hardver-specifikus -beallitasi lehetosegek vannak osszefoglalva: - -Konfiguralasi interfesz a /proc/comx/ alatt: - -Minden timeslot-csoportnak kulon comx* interfeszt kell letrehozni mkdir-rel: -comx0, comx1, .. stb. Itt beallithato, hogy az adott interfesz hanyadik kartya -melyik timeslotja(i)bol alljon ossze. A Cisco-fele serial3:1 elnevezesek -(serial3:1 = a 3. kartyaban az 1-es idoszelet-csoport) Linuxon aliasing-ot -jelentenenek, ezert mi nem tudunk ilyen elnevezest hasznalni. - -Tobb kartya eseten a comx0.1, comx0.2, ... vagy slice0.1, slice0.2 nevek -hasznalhatoak. - -Tobb SliceCOM kartya is lehet egy gepben, de sajat interrupt kell mindegyiknek, -nem tud meg megosztott interruptot kezelni. - -Az egesz kartyat erinto beallitasok: - -Az ioport es irq beallitas nincs: amit a PCI BIOS kioszt a rendszernek, -azt hasznalja a driver. - - -comx0/boardnum - hanyadik SliceCOM kartya a gepben (a 'termeszetes' PCI - sorrendben ertve: ahogyan a /proc/pci-ban vagy az 'lspci' - kimeneteben megjelenik, altalaban az alaplapi PCI meghajto - aramkorokhoz kozelebb eso kartyak a kisebb sorszamuak) - - Default: 0 (0-tol kezdodik a szamolas) - - -Bar a kovetkezoket csak egy-egy interfeszen allitjuk at, megis az egesz kartya -mukodeset egyszerre allitjak. A megkotes hogy csak UP-ban levo interfeszen -hasznalhatoak, azert van, mert kulonben nem vart eredmenyekre vezetne egy ilyen -paranccsorozat: - - echo 0 >boardnum - echo internal >clock_source - echo 1 >boardnum - -- Ez a 0-s board clock_source-at allitana at. - -Ezek a beallitasok megmaradnak az osszes interfesz torlesekor, de torlodnek -a driver modul ki/betoltesekor. - - -comx0/clock_source - A Tx orajelforrasa, a Cisco-val hasonlatosra keszult. - Hasznalata: - - papaya:# echo line >/proc/comx/comx0/clock_source - papaya:# echo internal >/proc/comx/comx0/clock_source - - line - A Tx orajelet a vett adatfolyambol dekodolja, igyekszik - igazodni hozza. Ha nem lat orajelet az inputon, akkor - atall a sajat orajelgeneratorara. - internal - A Tx orajelet a sajat orajelgeneratora szolgaltatja. - - Default: line - - Normal osszeallitas eseten a tavkozlesi szolgaltato eszkoze - (pl. HDSL modem) adja az orajelet, ezert ez a default. - - -comx0/framing - A CRC4 ki/be kapcsolasa - - A CRC4: 16 PCM keretet (A PCM keret az, amibe a 32 darab 64 - kilobites csatorna van bemultiplexalva. Nem osszetevesztendo a HDLC - kerettel.) 2x8 -as csoportokra osztanak, es azokhoz 4-4 bites CRC-t - szamolnak. Elsosorban a vonal minosegenek a monitorozasara szolgal. - - papaya:~# echo crc4 >/proc/comx/comx0/framing - papaya:~# echo no-crc4 >/proc/comx/comx0/framing - - Default a 'crc4', a MATAV vonalak altalaban igy futnak. De ha nem - egyforma is a beallitas a vonal ket vegen, attol a forgalom altalaban - at tud menni. - - -comx0/linecode - A vonali kodolas beallitasa - - papaya:~# echo hdb3 >/proc/comx/comx0/linecode - papaya:~# echo ami >/proc/comx/comx0/linecode - - Default a 'hdb3', a MATAV vonalak igy futnak. - - (az AMI kodolas igen ritka E1-es vonalaknal). Ha ez a beallitas nem - egyezik a vonal ket vegen, akkor elofordulhat hogy a keretszinkron - osszejon, de CRC4-hibak es a vonalakon atvitt adatokban is hibak - keletkeznek (amit a HDLC/SyncPPP szinten CRC-hibaval jelez) - - -comx0/reg - a kartya aramkoreinek, a MUNICH (reg) es a FALC (lbireg) -comx0/lbireg regisztereinek kozvetlen elerese. Hasznalata: - - echo >reg 0x04 0x0 - a 4-es regiszterbe 0-t ir - echo >reg 0x104 - printk()-val kiirja a 4-es regiszter - tartalmat a syslogba. - - WARNING: ezek csak a fejleszteshez keszultek, sok galibat - lehet veluk okozni! - - -comx0/loopback - A kartya G.703 jelenek a visszahurkolasara is van lehetoseg: - - papaya:# echo none >/proc/comx/comx0/loopback - papaya:# echo local >/proc/comx/comx0/loopback - papaya:# echo remote >/proc/comx/comx0/loopback - - none - nincs visszahurkolas, normal mukodes - local - a kartya a sajat maga altal adott jelet kapja vissza - remote - a kartya a kivulrol vett jelet adja kifele - - Default: none - ------------------------------------------------------------------ - -Az interfeszhez (Cisco terminologiaban 'channel-group') kapcsolodo beallitasok: - -comx0/timeslots - mely timeslotok (idoszeletek) tartoznak az adott interfeszhez. - - papaya:~# cat /proc/comx/comx0/timeslots - 1 3 4 5 6 - papaya:~# - - Egy timeslot megkeresese (hanyas interfeszbe tartozik nalunk): - - papaya:~# grep ' 4' /proc/comx/comx*/timeslots - /proc/comx/comx0/timeslots:1 3 4 5 6 - papaya:~# - - Beallitasa: - papaya:~# echo '1 5 2 6 7 8' >/proc/comx/comx0/timeslots - - A timeslotok sorrendje nem szamit, '1 3 2' ugyanaz mint az '1 2 3'. - - Beallitashoz az adott interfesznek DOWN-ban kell lennie - (ifconfig comx0 down), de ugyanannak a kartyanak a tobbi interfesze - uzemelhet kozben. - - Beallitaskor leellenorzi, hogy az uj timeslotok nem utkoznek-e egy - masik interfesz timeslotjaival. Ha utkoznek, akkor nem allitja at. - - Mindig 10-es szamrendszerben tortenik a timeslotok ertelmezese, nehogy - a 08, 09 alaku felirast rosszul ertelmezze. - ------------------------------------------------------------------ - -Az interfeszek es a kartya allapotanak lekerdezese: - -- A ' '-szel kezdodo sorok az eredeti kimenetet, a //-rel kezdodo sorok a -magyarazatot jelzik. - - papaya:~$ cat /proc/comx/comx1/status - Interface administrative status is UP, modem status is UP, protocol is UP - Modem status changes: 0, Transmitter status is IDLE, tbusy: 0 - Interface load (input): 978376 / 947808 / 951024 bits/s (5s/5m/15m) - (output): 978376 / 947848 / 951024 bits/s (5s/5m/15m) - Debug flags: none - RX errors: len: 22, overrun: 1, crc: 0, aborts: 0 - buffer overrun: 0, pbuffer overrun: 0 - TX errors: underrun: 0 - Line keepalive (value: 10) status UP [0] - -// Itt kezdodik a hardver-specifikus resz: - Controller status: - No alarms - -// Alarm: hibajelzes: -// -// No alarms - minden rendben -// -// LOS - Loss Of Signal - nem erzekel jelet a bemeneten. -// AIS - Alarm Indication Signal - csak egymas utani 1-esek jonnek -// a bemeneten, a tuloldal igy is jelezheti hogy meghibasodott vagy -// nincs inicializalva. -// AUXP - Auxiliary Pattern Indication - 01010101.. sorozat jon a bemeneten. -// LFA - Loss of Frame Alignment - nincs keretszinkron -// RRA - Receive Remote Alarm - a tuloldal el, de hibat jelez. -// LMFA - Loss of CRC4 Multiframe Alignment - nincs CRC4-multikeret-szinkron -// NMF - No Multiframe alignment Found after 400 msec - ilyen alarm a no-crc4 -// es crc4 keretezesek eseten nincs, lasd lentebb -// -// Egyeb lehetseges hibajelzesek: -// -// Transmit Line Short - a kartya ugy erzi hogy az adasi kimenete rovidre -// van zarva, ezert kikapcsolta az adast. (nem feltetlenul veszi eszre -// a kulso rovidzarat) - -// A veteli oldal csomagjainak lancolt listai, debug celokra: - - Rx ring: - rafutott: 0 - lastcheck: 50845731, jiffies: 51314281 - base: 017b1858 - rx_desc_ptr: 0 - rx_desc_ptr: 017b1858 - hw_curr_ptr: 017b1858 - 06040000 017b1868 017b1898 c016ff00 - 06040000 017b1878 017b1e9c c016ff00 - 46040000 017b1888 017b24a0 c016ff00 - 06040000 017b1858 017b2aa4 c016ff00 - -// A kartyat hasznalo tobbi interfesz: a 0-s channel-group a comx1 interfesz, -// es az 1,2,...,16 timeslotok tartoznak hozza: - - Interfaces using this board: (channel-group, interface, timeslots) - 0 comx1: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 - 1 comx2: 17 - 2 comx3: 18 - 3 comx4: 19 - 4 comx5: 20 - 5 comx6: 21 - 6 comx7: 22 - 7 comx8: 23 - 8 comx9: 24 - 9 comx10: 25 - 10 comx11: 26 - 11 comx12: 27 - 12 comx13: 28 - 13 comx14: 29 - 14 comx15: 30 - 15 comx16: 31 - -// Hany esemenyt kezelt le a driver egy-egy hardver-interrupt kiszolgalasanal: - - Interrupt work histogram: - hist[ 0]: 0 hist[ 1]: 2 hist[ 2]: 18574 hist[ 3]: 79 - hist[ 4]: 14 hist[ 5]: 1 hist[ 6]: 0 hist[ 7]: 1 - hist[ 8]: 0 hist[ 9]: 7 - -// Hany kikuldendo csomag volt mar a Tx-ringben amikor ujabb lett irva bele: - - Tx ring histogram: - hist[ 0]: 2329 hist[ 1]: 0 hist[ 2]: 0 hist[ 3]: 0 - -// Az E1-interfesz hiba-szamlaloi, az rfc2495-nek megfeleloen: -// (kb. a Cisco routerek "show controllers e1" formatumaban: http://www.cisco.com/univercd/cc/td/doc/product/software/ios11/rbook/rinterfc.htm#xtocid25669126) - -Data in current interval (91 seconds elapsed): - 9516 Line Code Violations, 65 Path Code Violations, 2 E-Bit Errors - 0 Slip Secs, 2 Fr Loss Secs, 2 Line Err Secs, 0 Degraded Mins - 0 Errored Secs, 0 Bursty Err Secs, 0 Severely Err Secs, 11 Unavail Secs -Data in Interval 1 (15 minutes): - 0 Line Code Violations, 0 Path Code Violations, 0 E-Bit Errors - 0 Slip Secs, 0 Fr Loss Secs, 0 Line Err Secs, 0 Degraded Mins - 0 Errored Secs, 0 Bursty Err Secs, 0 Severely Err Secs, 0 Unavail Secs -Data in last 4 intervals (1 hour): - 0 Line Code Violations, 0 Path Code Violations, 0 E-Bit Errors - 0 Slip Secs, 0 Fr Loss Secs, 0 Line Err Secs, 0 Degraded Mins - 0 Errored Secs, 0 Bursty Err Secs, 0 Severely Err Secs, 0 Unavail Secs -Data in last 96 intervals (24 hours): - 0 Line Code Violations, 0 Path Code Violations, 0 E-Bit Errors - 0 Slip Secs, 0 Fr Loss Secs, 0 Line Err Secs, 0 Degraded Mins - 0 Errored Secs, 0 Bursty Err Secs, 0 Severely Err Secs, 0 Unavail Secs - ------------------------------------------------------------------ - -Nehany kulonlegesebb beallitasi lehetoseg (idovel beepulhetnek majd a driverbe): -Ezekkel sok galibat lehet okozni, nagyon ovatosan kell oket hasznalni! - - modified CRC-4, for improved interworking of CRC-4 and non-CRC-4 - devices: (lasd page 107 es g706 Annex B) - lbireg[ 0x1b ] |= 0x08 - lbireg[ 0x1c ] |= 0xc0 - - ilyenkor ertelmezett az NMF - 'No Multiframe alignment Found after - 400 msec' alarm. - - FALC - a vonali meghajto IC - local loop - a sajat adasomat halljam vissza - remote loop - a kivulrol jovo adast adom vissza - - Egy hibakeresesre hasznalhato dolog: - - 1-es timeslot local loop a FALC-ban: echo >lbireg 0x1d 0x21 - - local loop kikapcsolasa: echo >lbireg 0x1d 0x00 diff --git a/Documentation/networking/slicecom.txt b/Documentation/networking/slicecom.txt deleted file mode 100644 index c82c0cf981b4..000000000000 --- a/Documentation/networking/slicecom.txt +++ /dev/null @@ -1,369 +0,0 @@ - -SliceCOM adapter user's documentation - for the 0.51 driver version - -Written by Bartók István - -English translation: Lakatos György -Mon Dec 11 15:28:42 CET 2000 - -Last modified: Wed Aug 29 17:25:37 CEST 2001 - ------------------------------------------------------------------ - -Usage: - -Compiling the kernel: - -Code maturity level options - [*] Prompt for development and/or incomplete code/drivers - -Network device support - Wan interfaces - MultiGate (COMX) synchronous - Support for MUNICH based boards: SliceCOM, PCICOM (NEW) - Support for HDLC and syncPPP... - - -Loading the modules: - -modprobe comx - -modprobe comx-proto-ppp # module for Cisco-HDLC and SyncPPP protocols - -modprobe comx-hw-munich # the module logs information by the kernel - # about the detected boards - - -Configuring the board: - -# This interface will use the Cisco-HDLC line protocol, -# the timeslices assigned are 1,2 (128 KiBit line speed) -# (the first data timeslice in the G.703 frame is no. 1) -# -mkdir /proc/comx/comx0.1/ -echo slicecom >/proc/comx/comx0.1/boardtype -echo hdlc >/proc/comx/comx0.1/protocol -echo 1 2 >/proc/comx/comx0.1/timeslots - - -# This interface uses SyncPPP line protocol, the assigned -# is no. 3 (64 KiBit line speed) -# -mkdir /proc/comx/comx0.2/ -echo slicecom >/proc/comx/comx0.2/boardtype -echo ppp >/proc/comx/comx0.2/protocol -echo 3 >/proc/comx/comx0.2/timeslots - -... - -ifconfig comx0.1 up -ifconfig comx0.2 up - ------------------------------------------------------------------ - -The COMX interfaces use a 10 packet transmit queue by default, however WAN -networks sometimes use bigger values (20 to 100), to utilize the line better -by large traffic (though the line delay increases because of more packets -join the queue). - -# ifconfig comx0 txqueuelen 50 - -This option is only supported by the ifconfig command of the later -distributions, which came with 2.2 kernels, such as RedHat 6.1 or Debian 2.2. - -You can download a newer netbase packet from -http://www.debian.org/~rcw/2.2/netbase/ for Debian 2.1, which has a new -ifconfig. You can get further information about using 2.2 kernel with -Debian 2.1 from http://www.debian.org/releases/stable/running-kernel-2.2 - ------------------------------------------------------------------ - -The SliceCom LEDs: - -red - on, if the interface is unconfigured, or it gets Remote Alarm-s -green - on, if the board finds frame-sync in the received signal - -A bit more detailed: - -red: green: meaning: - -- - no frame-sync, no signal received, or signal SNAFU. -- on "Everything is OK" -on on Reception is ok, but the remote end sends Remote Alarm -on - The interface is unconfigured - ------------------------------------------------------------------ - -A more detailed description of the hardware setting options: - -The general and the protocol layer options described in the 'comx.txt' file -apply to the SliceCom as well, I only summarize the SliceCom hardware specific -settings below. - -The '/proc/comx' configuring interface: - -An interface directory should be created for every timeslot group with -'mkdir', e,g: 'comx0', 'comx1' etc. The timeslots can be assigned here to the -specific interface. The Cisco-like naming convention (serial3:1 - first -timeslot group of the 3rd. board) can't be used here, because these mean IP -aliasing in Linux. - -You can give any meaningful name to keep the configuration clear; -e.g: 'comx0.1', 'comx0.2', 'comx1.1', comx1.2', if you have two boards -with two interfaces each. - -Settings, which apply to the board: - -Neither 'io' nor 'irq' settings required, the driver uses the resources -given by the PCI BIOS. - -comx0/boardnum - board number of the SliceCom in the PC (using the 'natural' - PCI order) as listed in '/proc/pci' or the output of the - 'lspci' command, generally the slots nearer to the motherboard - PCI driver chips have the lower numbers. - - Default: 0 (the counting starts with 0) - -Though the options below are to be set on a single interface, they apply to the -whole board. The restriction, to use them on 'UP' interfaces, is because the -command sequence below could lead to unpredictable results. - - # echo 0 >boardnum - # echo internal >clock_source - # echo 1 >boardnum - -The sequence would set the clock source of board 0. - -These settings will persist after all the interfaces are cleared, but are -cleared when the driver module is unloaded and loaded again. - -comx0/clock_source - source of the transmit clock - Usage: - - # echo line >/proc/comx/comx0/clock_source - # echo internal >/proc/comx/comx0/clock_source - - line - The Tx clock is being decoded if the input data stream, - if no clock seen on the input, then the board will use it's - own clock generator. - - internal - The Tx clock is supplied by the builtin clock generator. - - Default: line - - Normally, the telecommunication company's end device (the HDSL - modem) provides the Tx clock, that's why 'line' is the default. - -comx0/framing - Switching CRC4 off/on - - CRC4: 16 PCM frames (The 32 64Kibit channels are multiplexed into a - PCM frame, nothing to do with HDLC frames) are divided into 2x8 - groups, each group has a 4 bit CRC. - - # echo crc4 >/proc/comx/comx0/framing - # echo no-crc4 >/proc/comx/comx0/framing - - Default is 'crc4', the Hungarian MATAV lines behave like this. - The traffic generally passes if this setting on both ends don't match. - -comx0/linecode - Setting the line coding - - # echo hdb3 >/proc/comx/comx0/linecode - # echo ami >/proc/comx/comx0/linecode - - Default a 'hdb3', MATAV lines use this. - - (AMI coding is rarely used with E1 lines). Frame sync may occur, if - this setting doesn't match the other end's, but CRC4 and data errors - will come, which will result in CRC errors on HDLC/SyncPPP level. - -comx0/reg - direct access to the board's MUNICH (reg) and FALC (lbireg) -comx0/lbireg circuit's registers - - # echo >reg 0x04 0x0 - write 0 to register 4 - # echo >reg 0x104 - write the contents of register 4 with - printk() to syslog - -WARNING! These are only for development purposes, messing with this will - result much trouble! - -comx0/loopback - Places a loop to the board's G.703 signals - - # echo none >/proc/comx/comx0/loopback - # echo local >/proc/comx/comx0/loopback - # echo remote >/proc/comx/comx0/loopback - - none - normal operation, no loop - local - the board receives it's own output - remote - the board sends the received data to the remote side - - Default: none - ------------------------------------------------------------------ - -Interface (channel group in Cisco terms) settings: - -comx0/timeslots - which timeslots belong to the given interface - - Setting: - - # echo '1 5 2 6 7 8' >/proc/comx/comx0/timeslots - - # cat /proc/comx/comx0/timeslots - 1 2 5 6 7 8 - # - - Finding a timeslot: - - # grep ' 4' /proc/comx/comx*/timeslots - /proc/comx/comx0/timeslots:1 3 4 5 6 - # - - The timeslots can be in any order, '1 2 3' is the same as '1 3 2'. - - The interface has to be DOWN during the setting ('ifconfig comx0 - down'), but the other interfaces could operate normally. - - The driver checks if the assigned timeslots are vacant, if not, then - the setting won't be applied. - - The timeslot values are treated as decimal numbers, not to misunderstand - values of 08, 09 form. - ------------------------------------------------------------------ - -Checking the interface and board status: - -- Lines beginning with ' ' (space) belong to the original output, the lines -which begin with '//' are the comments. - - papaya:~$ cat /proc/comx/comx1/status - Interface administrative status is UP, modem status is UP, protocol is UP - Modem status changes: 0, Transmitter status is IDLE, tbusy: 0 - Interface load (input): 978376 / 947808 / 951024 bits/s (5s/5m/15m) - (output): 978376 / 947848 / 951024 bits/s (5s/5m/15m) - Debug flags: none - RX errors: len: 22, overrun: 1, crc: 0, aborts: 0 - buffer overrun: 0, pbuffer overrun: 0 - TX errors: underrun: 0 - Line keepalive (value: 10) status UP [0] - -// The hardware specific part starts here: - Controller status: - No alarms - -// Alarm: -// -// No alarms - Everything OK -// -// LOS - Loss Of Signal - No signal sensed on the input -// AIS - Alarm Indication Signal - The remote side sends '11111111'-s, -// it tells, that there's an error condition, or it's not -// initialised. -// AUXP - Auxiliary Pattern Indication - 01010101.. received. -// LFA - Loss of Frame Alignment - no frame sync received. -// RRA - Receive Remote Alarm - the remote end's OK, but signals error cond. -// LMFA - Loss of CRC4 Multiframe Alignment - no CRC4 multiframe sync. -// NMF - No Multiframe alignment Found after 400 msec - no such alarm using -// no-crc4 or crc4 framing, see below. -// -// Other possible error messages: -// -// Transmit Line Short - the board felt, that it's output is short-circuited, -// so it switched the transmission off. (The board can't definitely tell, -// that it's output is short-circuited.) - -// Chained list of the received packets, for debug purposes: - - Rx ring: - rafutott: 0 - lastcheck: 50845731, jiffies: 51314281 - base: 017b1858 - rx_desc_ptr: 0 - rx_desc_ptr: 017b1858 - hw_curr_ptr: 017b1858 - 06040000 017b1868 017b1898 c016ff00 - 06040000 017b1878 017b1e9c c016ff00 - 46040000 017b1888 017b24a0 c016ff00 - 06040000 017b1858 017b2aa4 c016ff00 - -// All the interfaces using the board: comx1, using the 1,2,...16 timeslots, -// comx2, using timeslot 17, etc. - - Interfaces using this board: (channel-group, interface, timeslots) - 0 comx1: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 - 1 comx2: 17 - 2 comx3: 18 - 3 comx4: 19 - 4 comx5: 20 - 5 comx6: 21 - 6 comx7: 22 - 7 comx8: 23 - 8 comx9: 24 - 9 comx10: 25 - 10 comx11: 26 - 11 comx12: 27 - 12 comx13: 28 - 13 comx14: 29 - 14 comx15: 30 - 15 comx16: 31 - -// The number of events handled by the driver during an interrupt cycle: - - Interrupt work histogram: - hist[ 0]: 0 hist[ 1]: 2 hist[ 2]: 18574 hist[ 3]: 79 - hist[ 4]: 14 hist[ 5]: 1 hist[ 6]: 0 hist[ 7]: 1 - hist[ 8]: 0 hist[ 9]: 7 - -// The number of packets to send in the Tx ring, when a new one arrived: - - Tx ring histogram: - hist[ 0]: 2329 hist[ 1]: 0 hist[ 2]: 0 hist[ 3]: 0 - -// The error counters of the E1 interface, according to the RFC2495, -// (similar to the Cisco "show controllers e1" command's output: -// http://www.cisco.com/univercd/cc/td/doc/product/software/ios11/rbook/rinterfc.htm#xtocid25669126) - -Data in current interval (91 seconds elapsed): - 9516 Line Code Violations, 65 Path Code Violations, 2 E-Bit Errors - 0 Slip Secs, 2 Fr Loss Secs, 2 Line Err Secs, 0 Degraded Mins - 0 Errored Secs, 0 Bursty Err Secs, 0 Severely Err Secs, 11 Unavail Secs -Data in Interval 1 (15 minutes): - 0 Line Code Violations, 0 Path Code Violations, 0 E-Bit Errors - 0 Slip Secs, 0 Fr Loss Secs, 0 Line Err Secs, 0 Degraded Mins - 0 Errored Secs, 0 Bursty Err Secs, 0 Severely Err Secs, 0 Unavail Secs -Data in last 4 intervals (1 hour): - 0 Line Code Violations, 0 Path Code Violations, 0 E-Bit Errors - 0 Slip Secs, 0 Fr Loss Secs, 0 Line Err Secs, 0 Degraded Mins - 0 Errored Secs, 0 Bursty Err Secs, 0 Severely Err Secs, 0 Unavail Secs -Data in last 96 intervals (24 hours): - 0 Line Code Violations, 0 Path Code Violations, 0 E-Bit Errors - 0 Slip Secs, 0 Fr Loss Secs, 0 Line Err Secs, 0 Degraded Mins - 0 Errored Secs, 0 Bursty Err Secs, 0 Severely Err Secs, 0 Unavail Secs - ------------------------------------------------------------------ - -Some unique options, (may get into the driver later): -Treat them very carefully, these can cause much trouble! - - modified CRC-4, for improved interworking of CRC-4 and non-CRC-4 - devices: (see page 107 and g706 Annex B) - lbireg[ 0x1b ] |= 0x08 - lbireg[ 0x1c ] |= 0xc0 - - - The NMF - 'No Multiframe alignment Found after 400 msec' alarm - comes into account. - - FALC - the line driver chip. - local loop - I hear my transmission back. - remote loop - I echo the remote transmission back. - - Something useful for finding errors: - - - local loop for timeslot 1 in the FALC chip: - - # echo >lbireg 0x1d 0x21 - - - Switching the loop off: - - # echo >lbireg 0x1d 0x00 From 17a83c75ada2cc482981d13ad17fcd5fdc956e4e Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 7 Nov 2007 02:29:33 -0800 Subject: [PATCH 38/44] [NET]: Remove Documentation/networking/ncsa-telnet Newsflash: There once was a version of NCSA telnet that had some bug. Spotted by Pekka Pietikainen. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- Documentation/networking/00-INDEX | 2 -- Documentation/networking/ncsa-telnet | 16 ---------------- 2 files changed, 18 deletions(-) delete mode 100644 Documentation/networking/ncsa-telnet diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index a9f4accd8989..9c64042fb88b 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -74,8 +74,6 @@ ltpc.txt - the Apple or Farallon LocalTalk PC card driver multicast.txt - Behaviour of cards under Multicast -ncsa-telnet - - notes on how NCSA telnet (DOS) breaks with MTU discovery enabled. netdevices.txt - info on network device driver functions exported to the kernel. olympic.txt diff --git a/Documentation/networking/ncsa-telnet b/Documentation/networking/ncsa-telnet deleted file mode 100644 index d77d28b09093..000000000000 --- a/Documentation/networking/ncsa-telnet +++ /dev/null @@ -1,16 +0,0 @@ -NCSA telnet doesn't work with path MTU discovery enabled. This is due to a -bug in NCSA that also stops it working with other modern networking code -such as Solaris. - -The following information is courtesy of -Marek - -There is a fixed version somewhere on ftp.upe.ac.za (sorry, I don't -remember the exact pathname, and this site is very slow from here). -It may or may not be faster for you to get it from -ftp://ftp.ists.pwr.wroc.pl/pub/msdos/telnet/ncsa_upe/tel23074.zip -(source is in v230704s.zip). I have tested it with 1.3.79 (with -path mtu discovery enabled - ncsa 2.3.08 didn't work) and it seems -to work. I don't know if anyone is working on this code - this -version is over a year old. Too bad - it's faster and often more -stable than these windoze telnets, and runs on almost anything... From e8b2cadde643780395b45e8974df911cde47744b Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 7 Nov 2007 02:30:03 -0800 Subject: [PATCH 39/44] [NET]: Remove Documentation/networking/routing.txt This file is so outdated that I can't see any value in keeping it. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- Documentation/networking/00-INDEX | 2 -- Documentation/networking/routing.txt | 46 ---------------------------- 2 files changed, 48 deletions(-) delete mode 100644 Documentation/networking/routing.txt diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index 9c64042fb88b..f3b02378ecd6 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -84,8 +84,6 @@ pt.txt - the Gracilis Packetwin AX.25 device driver ray_cs.txt - Raylink Wireless LAN card driver info. -routing.txt - - the new routing mechanism shaper.txt - info on the module that can shape/limit transmitted traffic. sk98lin.txt diff --git a/Documentation/networking/routing.txt b/Documentation/networking/routing.txt deleted file mode 100644 index a26838b930f2..000000000000 --- a/Documentation/networking/routing.txt +++ /dev/null @@ -1,46 +0,0 @@ -The directory ftp.inr.ac.ru:/ip-routing contains: - -- iproute.c - "professional" routing table maintenance utility. - -- rdisc.tar.gz - rdisc daemon, ported from Sun. - STRONGLY RECOMMENDED FOR ALL HOSTS. - -- routing.tgz - original Mike McLagan's route by source patch. - Currently it is obsolete. - -- gated.dif-ss.gz - gated-R3_6Alpha_2 fixes. - Look at README.gated - -- mrouted-3.8.dif.gz - mrouted-3.8 fixes. - -- rtmon.c - trivial debugging utility: reads and stores netlink. - - -NEWS for user. - -- Policy based routing. Routing decisions are made on the basis - not only of destination address, but also source address, - TOS and incoming interface. -- Complete set of IP level control messages. - Now Linux is the only OS in the world complying to RFC requirements. - Great win 8) -- New interface addressing paradigm. - Assignment of address ranges to interface, - multiple prefixes etc. etc. - Do not bother, it is compatible with the old one. Moreover: -- You don't need to do "route add aaa.bbb.ccc... eth0" anymore, - it is done automatically. -- "Abstract" UNIX sockets and security enhancements. - This is necessary to use TIRPC and TLI emulation library. - -NEWS for hacker. - -- New destination cache. Flexible, robust and just beautiful. -- Network stack is reordered, simplified, optimized, a lot of bugs fixed. - (well, and new bugs were introduced, but I haven't seen them yet 8)) - It is difficult to describe all the changes, look into source. - -If you see this file, then this patch works 8) - -Alexey Kuznetsov. -kuznet@ms2.inr.ac.ru From c183783e28969e92f3df23f8b7e18d5c3e5bc8dd Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 7 Nov 2007 02:30:43 -0800 Subject: [PATCH 40/44] [NET]: Remove Documentation/networking/pt.txt There's no no point in keeping documentation for a driver that was removed many years ago. Signed-off-by: Adrian Bunk Acked-by: Alan Cox Signed-off-by: David S. Miller --- Documentation/networking/00-INDEX | 2 -- Documentation/networking/pt.txt | 58 ------------------------------- 2 files changed, 60 deletions(-) delete mode 100644 Documentation/networking/pt.txt diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index f3b02378ecd6..563e442f2d42 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -80,8 +80,6 @@ olympic.txt - IBM PCI Pit/Pit-Phy/Olympic Token Ring driver info. policy-routing.txt - IP policy-based routing -pt.txt - - the Gracilis Packetwin AX.25 device driver ray_cs.txt - Raylink Wireless LAN card driver info. shaper.txt diff --git a/Documentation/networking/pt.txt b/Documentation/networking/pt.txt deleted file mode 100644 index 72e888c1d988..000000000000 --- a/Documentation/networking/pt.txt +++ /dev/null @@ -1,58 +0,0 @@ -This is the README for the Gracilis Packetwin device driver, version 0.5 -ALPHA for Linux 1.3.43. - -These files will allow you to talk to the PackeTwin (now know as PT) and -connect through it just like a pair of TNCs. To do this you will also -require the AX.25 code in the kernel enabled. - -There are four files in this archive; this readme, a patch file, a .c file -and finally a .h file. The two program files need to be put into the -drivers/net directory in the Linux source tree, for me this is the -directory /usr/src/linux/drivers/net. The patch file needs to be patched in -at the top of the Linux source tree (/usr/src/linux in my case). - -You will most probably have to edit the pt.c file to suit your own setup, -this should just involve changing some of the defines at the top of the file. -Please note that if you run an external modem you must specify a speed of 0. - -The program is currently setup to run a 4800 baud external modem on port A -and a Kantronics DE-9600 daughter board on port B so if you have this (or -something similar) then you're right. - -To compile in the driver, put the files in the correct place and patch in -the diff. You will have to re-configure the kernel again before you -recompile it. - -The driver is not real good at the moment for finding the card. You can -'help' it by changing the order of the potential addresses in the structure -found in the pt_init() function so the address of where the card is is put -first. - -After compiling, you have to get them going, they are pretty well like any -other net device and just need ifconfig to get them going. -As an example, here is my /etc/rc.net --------------------------- - -# -# Configure the PackeTwin, port A. -/sbin/ifconfig pt0a 44.136.8.87 hw ax25 vk2xlz mtu 512 -/sbin/ifconfig pt0a 44.136.8.87 broadcast 44.136.8.255 netmask 255.255.255.0 -/sbin/route add -net 44.136.8.0 netmask 255.255.255.0 dev pt0a -/sbin/route add -net 44.0.0.0 netmask 255.0.0.0 gw 44.136.8.68 dev pt0a -/sbin/route add -net 138.25.16.0 netmask 255.255.240.0 dev pt0a -/sbin/route add -host 44.136.8.255 dev pt0a -# -# Configure the PackeTwin, port B. -/sbin/ifconfig pt0b 44.136.8.87 hw ax25 vk2xlz-1 mtu 512 -/sbin/ifconfig pt0b 44.136.8.87 broadcast 44.255.255.255 netmask 255.0.0.0 -/sbin/route add -host 44.136.8.216 dev pt0b -/sbin/route add -host 44.136.8.95 dev pt0b -/sbin/route add -host 44.255.255.255 dev pt0b - -This version of the driver comes under the GNU GPL. If you have one of my -previous (non-GPL) versions of the driver, please update to this one. - -I hope that this all works well for you. I would be pleased to hear how -many people use the driver and if it does its job. - - - Craig vk2xlz From 1e356f9cdfa885c78791d5d6e5d2baef22f01853 Mon Sep 17 00:00:00 2001 From: "Rumen G. Bogdanovski" Date: Wed, 7 Nov 2007 02:35:54 -0800 Subject: [PATCH 41/44] [IPVS]: Bind connections on stanby if the destination exists This patch fixes the problem with node overload on director fail-over. Given the scenario: 2 nodes each accepting 3 connections at a time and 2 directors, director failover occurs when the nodes are fully loaded (6 connections to the cluster) in this case the new director will assign another 6 connections to the cluster, If the same real servers exist there. The problem turned to be in not binding the inherited connections to the real servers (destinations) on the backup director. Therefore: "ipvsadm -l" reports 0 connections: root@test2:~# ipvsadm -l IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP test2.local:5999 wlc -> node473.local:5999 Route 1000 0 0 -> node484.local:5999 Route 1000 0 0 while "ipvs -lnc" is right root@test2:~# ipvsadm -lnc IPVS connection entries pro expire state source virtual destination TCP 14:56 ESTABLISHED 192.168.0.10:39164 192.168.0.222:5999 192.168.0.51:5999 TCP 14:59 ESTABLISHED 192.168.0.10:39165 192.168.0.222:5999 192.168.0.52:5999 So the patch I am sending fixes the problem by binding the received connections to the appropriate service on the backup director, if it exists, else the connection will be handled the old way. So if the master and the backup directors are synchronized in terms of real services there will be no problem with server over-committing since new connections will not be created on the nonexistent real services on the backup. However if the service is created later on the backup, the binding will be performed when the next connection update is received. With this patch the inherited connections will show as inactive on the backup: root@test2:~# ipvsadm -l IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP test2.local:5999 wlc -> node473.local:5999 Route 1000 0 1 -> node484.local:5999 Route 1000 0 1 rumen@test2:~$ cat /proc/net/ip_vs IP Virtual Server version 1.2.1 (size=4096) Prot LocalAddress:Port Scheduler Flags -> RemoteAddress:Port Forward Weight ActiveConn InActConn TCP C0A800DE:176F wlc -> C0A80033:176F Route 1000 0 1 -> C0A80032:176F Route 1000 0 1 Regards, Rumen Bogdanovski Acked-by: Julian Anastasov Signed-off-by: Rumen G. Bogdanovski Signed-off-by: Simon Horman --- include/net/ip_vs.h | 4 ++++ net/ipv4/ipvs/ip_vs_conn.c | 19 +++++++++++++++++++ net/ipv4/ipvs/ip_vs_ctl.c | 26 ++++++++++++++++++++++++++ net/ipv4/ipvs/ip_vs_sync.c | 24 ++++++++++++++++++++---- 4 files changed, 69 insertions(+), 4 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 41870564df8e..1fd1ee896f39 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -901,6 +901,10 @@ extern int ip_vs_use_count_inc(void); extern void ip_vs_use_count_dec(void); extern int ip_vs_control_init(void); extern void ip_vs_control_cleanup(void); +extern struct ip_vs_dest * +ip_vs_find_dest(__be32 daddr, __be16 dport, + __be32 vaddr, __be16 vport, __u16 protocol); +extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp); /* diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 4b702f708d30..b7eeae622d9b 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -425,6 +425,25 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) } +/* + * Check if there is a destination for the connection, if so + * bind the connection to the destination. + */ +struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) +{ + struct ip_vs_dest *dest; + + if ((cp) && (!cp->dest)) { + dest = ip_vs_find_dest(cp->daddr, cp->dport, + cp->vaddr, cp->vport, cp->protocol); + ip_vs_bind_dest(cp, dest); + return dest; + } else + return NULL; +} +EXPORT_SYMBOL(ip_vs_try_bind_dest); + + /* * Unbind a connection entry with its VS destination * Called by the ip_vs_conn_expire function. diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 7345fc252a23..3c4d22a468ec 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -579,6 +579,32 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) return NULL; } +/* + * Find destination by {daddr,dport,vaddr,protocol} + * Cretaed to be used in ip_vs_process_message() in + * the backup synchronization daemon. It finds the + * destination to be bound to the received connection + * on the backup. + * + * ip_vs_lookup_real_service() looked promissing, but + * seems not working as expected. + */ +struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport, + __be32 vaddr, __be16 vport, __u16 protocol) +{ + struct ip_vs_dest *dest; + struct ip_vs_service *svc; + + svc = ip_vs_service_get(0, protocol, vaddr, vport); + if (!svc) + return NULL; + dest = ip_vs_lookup_dest(svc, daddr, dport); + if (dest) + atomic_inc(&dest->refcnt); + ip_vs_service_put(svc); + return dest; +} +EXPORT_SYMBOL(ip_vs_find_dest); /* * Lookup dest by {svc,addr,port} in the destination trash. diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 0d4d9721cbd4..b1694d67abb9 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -284,6 +284,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) struct ip_vs_sync_conn_options *opt; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; + struct ip_vs_dest *dest; char *p; int i; @@ -317,20 +318,35 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) s->caddr, s->cport, s->vaddr, s->vport); if (!cp) { + /* + * Find the appropriate destination for the connection. + * If it is not found the connection will remain unbound + * but still handled. + */ + dest = ip_vs_find_dest(s->daddr, s->dport, + s->vaddr, s->vport, + s->protocol); cp = ip_vs_conn_new(s->protocol, s->caddr, s->cport, s->vaddr, s->vport, s->daddr, s->dport, - flags, NULL); + flags, dest); + if (dest) + atomic_dec(&dest->refcnt); if (!cp) { IP_VS_ERR("ip_vs_conn_new failed\n"); return; } cp->state = ntohs(s->state); } else if (!cp->dest) { - /* it is an entry created by the synchronization */ - cp->state = ntohs(s->state); - cp->flags = flags | IP_VS_CONN_F_HASHED; + dest = ip_vs_try_bind_dest(cp); + if (!dest) { + /* it is an unbound entry created by + * synchronization */ + cp->state = ntohs(s->state); + cp->flags = flags | IP_VS_CONN_F_HASHED; + } else + atomic_dec(&dest->refcnt); } /* Note that we don't touch its state and flags if it is a normal entry. */ From efac52762b1e3fe3035d29e82d8ee1aebc45e4a7 Mon Sep 17 00:00:00 2001 From: "Rumen G. Bogdanovski" Date: Wed, 7 Nov 2007 02:36:55 -0800 Subject: [PATCH 42/44] [IPVS]: Synchronize closing of Connections This patch makes the master daemon to sync the connection when it is about to close. This makes the connections on the backup to close or timeout according their state. Before the sync was performed only if the connection is in ESTABLISHED state which always made the connections to timeout in the hard coded 3 minutes. However the Andy Gospodarek's patch ([IPVS]: use proper timeout instead of fixed value) effectively did nothing more than increasing this to 15 minutes (Established state timeout). So this patch makes use of proper timeout since it syncs the connections on status changes to FIN_WAIT (2min timeout) and CLOSE (10sec timeout). However if the backup misses CLOSE hopefully it did not miss FIN_WAIT. Otherwise we will just have to wait for the ESTABLISHED state timeout. As it is without this patch. This way the number of the hanging connections on the backup is kept to minimum. And very few of them will be left to timeout with a long timeout. This is important if we want to make use of the fix for the real server overcommit on master/backup fail-over. Signed-off-by: Rumen G. Bogdanovski Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/ip_vs.h | 4 ++++ net/ipv4/ipvs/ip_vs_core.c | 20 ++++++++++++++------ net/ipv4/ipvs/ip_vs_sync.c | 2 +- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 1fd1ee896f39..67ea2c0c0ab7 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -520,6 +520,10 @@ struct ip_vs_conn { spinlock_t lock; /* lock for state transition */ volatile __u16 flags; /* status flags */ volatile __u16 state; /* state info */ + volatile __u16 old_state; /* old state, to be used for + * state transition triggerd + * synchronization + */ /* Control members */ struct ip_vs_conn *control; /* Master control connection */ diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index c6ed7654e839..20c884a57721 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -979,15 +979,23 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, ret = NF_ACCEPT; } - /* increase its packet counter and check if it is needed - to be synchronized */ + /* Increase its packet counter and check if it is needed + * to be synchronized + * + * Sync connection if it is about to close to + * encorage the standby servers to update the connections timeout + */ atomic_inc(&cp->in_pkts); if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && - (cp->protocol != IPPROTO_TCP || - cp->state == IP_VS_TCP_S_ESTABLISHED) && - (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1] - == sysctl_ip_vs_sync_threshold[0])) + (((cp->protocol != IPPROTO_TCP || + cp->state == IP_VS_TCP_S_ESTABLISHED) && + (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1] + == sysctl_ip_vs_sync_threshold[0])) || + ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) && + ((cp->state == IP_VS_TCP_S_FIN_WAIT) || + (cp->state == IP_VS_TCP_S_CLOSE))))) ip_vs_sync_conn(cp); + cp->old_state = cp->state; ip_vs_conn_put(cp); return ret; diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index b1694d67abb9..bd930efc18da 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -343,7 +343,6 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) if (!dest) { /* it is an unbound entry created by * synchronization */ - cp->state = ntohs(s->state); cp->flags = flags | IP_VS_CONN_F_HASHED; } else atomic_dec(&dest->refcnt); @@ -358,6 +357,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) p += SIMPLE_CONN_SIZE; atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); + cp->state = ntohs(s->state); pp = ip_vs_proto_get(s->protocol); cp->timeout = pp->timeout_table[cp->state]; ip_vs_conn_put(cp); From 230140cffa7feae90ad50bf259db1fa07674f3a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 7 Nov 2007 02:40:20 -0800 Subject: [PATCH 43/44] [INET]: Remove per bucket rwlock in tcp/dccp ehash table. As done two years ago on IP route cache table (commit 22c047ccbc68fa8f3fa57f0e8f906479a062c426) , we can avoid using one lock per hash bucket for the huge TCP/DCCP hash tables. On a typical x86_64 platform, this saves about 2MB or 4MB of ram, for litle performance differences. (we hit a different cache line for the rwlock, but then the bucket cache line have a better sharing factor among cpus, since we dirty it less often). For netstat or ss commands that want a full scan of hash table, we perform fewer memory accesses. Using a 'small' table of hashed rwlocks should be more than enough to provide correct SMP concurrency between different buckets, without using too much memory. Sizing of this table depends on num_possible_cpus() and various CONFIG settings. This patch provides some locking abstraction that may ease a future work using a different model for TCP/DCCP table. Signed-off-by: Eric Dumazet Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 71 ++++++++++++++++++++++++++++++++--- net/dccp/proto.c | 9 ++++- net/ipv4/inet_diag.c | 9 +++-- net/ipv4/inet_hashtables.c | 7 ++-- net/ipv4/inet_timewait_sock.c | 13 ++++--- net/ipv4/tcp.c | 4 +- net/ipv4/tcp_ipv4.c | 11 +++--- net/ipv6/inet6_hashtables.c | 19 +++++----- 8 files changed, 106 insertions(+), 37 deletions(-) diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 4427dcd1e53a..8461cda37490 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -37,7 +37,6 @@ * I'll experiment with dynamic table growth later. */ struct inet_ehash_bucket { - rwlock_t lock; struct hlist_head chain; struct hlist_head twchain; }; @@ -100,6 +99,9 @@ struct inet_hashinfo { * TIME_WAIT sockets use a separate chain (twchain). */ struct inet_ehash_bucket *ehash; + rwlock_t *ehash_locks; + unsigned int ehash_size; + unsigned int ehash_locks_mask; /* Ok, let's try this, I give up, we do need a local binding * TCP hash as well as the others for fast bind/connect. @@ -107,7 +109,7 @@ struct inet_hashinfo { struct inet_bind_hashbucket *bhash; unsigned int bhash_size; - unsigned int ehash_size; + /* Note : 4 bytes padding on 64 bit arches */ /* All sockets in TCP_LISTEN state will be in here. This is the only * table where wildcard'd TCP sockets can exist. Hash function here @@ -134,6 +136,62 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket( return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; } +static inline rwlock_t *inet_ehash_lockp( + struct inet_hashinfo *hashinfo, + unsigned int hash) +{ + return &hashinfo->ehash_locks[hash & hashinfo->ehash_locks_mask]; +} + +static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) +{ + unsigned int i, size = 256; +#if defined(CONFIG_PROVE_LOCKING) + unsigned int nr_pcpus = 2; +#else + unsigned int nr_pcpus = num_possible_cpus(); +#endif + if (nr_pcpus >= 4) + size = 512; + if (nr_pcpus >= 8) + size = 1024; + if (nr_pcpus >= 16) + size = 2048; + if (nr_pcpus >= 32) + size = 4096; + if (sizeof(rwlock_t) != 0) { +#ifdef CONFIG_NUMA + if (size * sizeof(rwlock_t) > PAGE_SIZE) + hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t)); + else +#endif + hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t), + GFP_KERNEL); + if (!hashinfo->ehash_locks) + return ENOMEM; + for (i = 0; i < size; i++) + rwlock_init(&hashinfo->ehash_locks[i]); + } + hashinfo->ehash_locks_mask = size - 1; + return 0; +} + +static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) +{ + if (hashinfo->ehash_locks) { +#ifdef CONFIG_NUMA + unsigned int size = (hashinfo->ehash_locks_mask + 1) * + sizeof(rwlock_t); + if (size > PAGE_SIZE) + vfree(hashinfo->ehash_locks); + else +#else + kfree(hashinfo->ehash_locks); +#endif + hashinfo->ehash_locks = NULL; + } +} + extern struct inet_bind_bucket * inet_bind_bucket_create(struct kmem_cache *cachep, struct inet_bind_hashbucket *head, @@ -222,7 +280,7 @@ static inline void __inet_hash(struct inet_hashinfo *hashinfo, sk->sk_hash = inet_sk_ehashfn(sk); head = inet_ehash_bucket(hashinfo, sk->sk_hash); list = &head->chain; - lock = &head->lock; + lock = inet_ehash_lockp(hashinfo, sk->sk_hash); write_lock(lock); } __sk_add_node(sk, list); @@ -253,7 +311,7 @@ static inline void inet_unhash(struct inet_hashinfo *hashinfo, struct sock *sk) inet_listen_wlock(hashinfo); lock = &hashinfo->lhash_lock; } else { - lock = &inet_ehash_bucket(hashinfo, sk->sk_hash)->lock; + lock = inet_ehash_lockp(hashinfo, sk->sk_hash); write_lock_bh(lock); } @@ -354,9 +412,10 @@ static inline struct sock * */ unsigned int hash = inet_ehashfn(daddr, hnum, saddr, sport); struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); + rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); prefetch(head->chain.first); - read_lock(&head->lock); + read_lock(lock); sk_for_each(sk, node, &head->chain) { if (INET_MATCH(sk, hash, acookie, saddr, daddr, ports, dif)) goto hit; /* You sunk my battleship! */ @@ -369,7 +428,7 @@ static inline struct sock * } sk = NULL; out: - read_unlock(&head->lock); + read_unlock(lock); return sk; hit: sock_hold(sk); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index d84973928033..7a3bea9c28c1 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1072,11 +1072,13 @@ static int __init dccp_init(void) } for (i = 0; i < dccp_hashinfo.ehash_size; i++) { - rwlock_init(&dccp_hashinfo.ehash[i].lock); INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); } + if (inet_ehash_locks_alloc(&dccp_hashinfo)) + goto out_free_dccp_ehash; + bhash_order = ehash_order; do { @@ -1091,7 +1093,7 @@ static int __init dccp_init(void) if (!dccp_hashinfo.bhash) { DCCP_CRIT("Failed to allocate DCCP bind hash table"); - goto out_free_dccp_ehash; + goto out_free_dccp_locks; } for (i = 0; i < dccp_hashinfo.bhash_size; i++) { @@ -1121,6 +1123,8 @@ static int __init dccp_init(void) out_free_dccp_bhash: free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); dccp_hashinfo.bhash = NULL; +out_free_dccp_locks: + inet_ehash_locks_free(&dccp_hashinfo); out_free_dccp_ehash: free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); dccp_hashinfo.ehash = NULL; @@ -1139,6 +1143,7 @@ static void __exit dccp_fini(void) free_pages((unsigned long)dccp_hashinfo.ehash, get_order(dccp_hashinfo.ehash_size * sizeof(struct inet_ehash_bucket))); + inet_ehash_locks_free(&dccp_hashinfo); kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); dccp_ackvec_exit(); dccp_sysctl_exit(); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index dc429b6b0ba6..b0170732b5e9 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -747,13 +747,14 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) for (i = s_i; i < hashinfo->ehash_size; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; + rwlock_t *lock = inet_ehash_lockp(hashinfo, i); struct sock *sk; struct hlist_node *node; if (i > s_i) s_num = 0; - read_lock_bh(&head->lock); + read_lock_bh(lock); num = 0; sk_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); @@ -769,7 +770,7 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) r->id.idiag_dport) goto next_normal; if (inet_csk_diag_dump(sk, skb, cb) < 0) { - read_unlock_bh(&head->lock); + read_unlock_bh(lock); goto done; } next_normal: @@ -791,14 +792,14 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) r->id.idiag_dport) goto next_dying; if (inet_twsk_diag_dump(tw, skb, cb) < 0) { - read_unlock_bh(&head->lock); + read_unlock_bh(lock); goto done; } next_dying: ++num; } } - read_unlock_bh(&head->lock); + read_unlock_bh(lock); } done: diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 16eecc7046a3..67704da04fc4 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -204,12 +204,13 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); + rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; prefetch(head->chain.first); - write_lock(&head->lock); + write_lock(lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &head->twchain) { @@ -239,7 +240,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, BUG_TRAP(sk_unhashed(sk)); __sk_add_node(sk, &head->chain); sock_prot_inc_use(sk->sk_prot); - write_unlock(&head->lock); + write_unlock(lock); if (twp) { *twp = tw; @@ -255,7 +256,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, return 0; not_unique: - write_unlock(&head->lock); + write_unlock(lock); return -EADDRNOTAVAIL; } diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 4e189e28f306..a60b99e0ebdc 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, struct inet_bind_hashbucket *bhead; struct inet_bind_bucket *tb; /* Unlink from established hashes. */ - struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, tw->tw_hash); + rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); - write_lock(&ehead->lock); + write_lock(lock); if (hlist_unhashed(&tw->tw_node)) { - write_unlock(&ehead->lock); + write_unlock(lock); return; } __hlist_del(&tw->tw_node); sk_node_init(&tw->tw_node); - write_unlock(&ehead->lock); + write_unlock(lock); /* Disassociate with bind bucket. */ bhead = &hashinfo->bhash[inet_bhashfn(tw->tw_num, hashinfo->bhash_size)]; @@ -59,6 +59,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, const struct inet_sock *inet = inet_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); + rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); struct inet_bind_hashbucket *bhead; /* Step 1: Put TW into bind hash. Original socket stays there too. Note, that any socket with inet->num != 0 MUST be bound in @@ -71,7 +72,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); spin_unlock(&bhead->lock); - write_lock(&ehead->lock); + write_lock(lock); /* Step 2: Remove SK from established hash. */ if (__sk_del_node_init(sk)) @@ -81,7 +82,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, inet_twsk_add_node(tw, &ehead->twchain); atomic_inc(&tw->tw_refcnt); - write_unlock(&ehead->lock); + write_unlock(lock); } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c64072bb504b..8e65182f7af1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2456,11 +2456,11 @@ void __init tcp_init(void) thash_entries ? 0 : 512 * 1024); tcp_hashinfo.ehash_size = 1 << tcp_hashinfo.ehash_size; for (i = 0; i < tcp_hashinfo.ehash_size; i++) { - rwlock_init(&tcp_hashinfo.ehash[i].lock); INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].chain); INIT_HLIST_HEAD(&tcp_hashinfo.ehash[i].twchain); } - + if (inet_ehash_locks_alloc(&tcp_hashinfo)) + panic("TCP: failed to alloc ehash_locks"); tcp_hashinfo.bhash = alloc_large_system_hash("TCP bind", sizeof(struct inet_bind_hashbucket), diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e9127cdced20..e566f3c67677 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2049,8 +2049,9 @@ static void *established_get_first(struct seq_file *seq) struct sock *sk; struct hlist_node *node; struct inet_timewait_sock *tw; + rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); - read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); + read_lock_bh(lock); sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family) { continue; @@ -2067,7 +2068,7 @@ static void *established_get_first(struct seq_file *seq) rc = tw; goto out; } - read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); + read_unlock_bh(lock); st->state = TCP_SEQ_STATE_ESTABLISHED; } out: @@ -2094,11 +2095,11 @@ static void *established_get_next(struct seq_file *seq, void *cur) cur = tw; goto out; } - read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); + read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); st->state = TCP_SEQ_STATE_ESTABLISHED; if (++st->bucket < tcp_hashinfo.ehash_size) { - read_lock_bh(&tcp_hashinfo.ehash[st->bucket].lock); + read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); } else { cur = NULL; @@ -2206,7 +2207,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_ESTABLISHED: if (v) - read_unlock_bh(&tcp_hashinfo.ehash[st->bucket].lock); + read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); break; } } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index d6f1026f1943..adc73adadfae 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -37,9 +37,8 @@ void __inet6_hash(struct inet_hashinfo *hashinfo, } else { unsigned int hash; sk->sk_hash = hash = inet6_sk_ehashfn(sk); - hash &= (hashinfo->ehash_size - 1); - list = &hashinfo->ehash[hash].chain; - lock = &hashinfo->ehash[hash].lock; + list = &inet_ehash_bucket(hashinfo, hash)->chain; + lock = inet_ehash_lockp(hashinfo, hash); write_lock(lock); } @@ -70,9 +69,10 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, */ unsigned int hash = inet6_ehashfn(daddr, hnum, saddr, sport); struct inet_ehash_bucket *head = inet_ehash_bucket(hashinfo, hash); + rwlock_t *lock = inet_ehash_lockp(hashinfo, hash); prefetch(head->chain.first); - read_lock(&head->lock); + read_lock(lock); sk_for_each(sk, node, &head->chain) { /* For IPV6 do the cheaper port and family tests first. */ if (INET6_MATCH(sk, hash, saddr, daddr, ports, dif)) @@ -92,12 +92,12 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, goto hit; } } - read_unlock(&head->lock); + read_unlock(lock); return NULL; hit: sock_hold(sk); - read_unlock(&head->lock); + read_unlock(lock); return sk; } EXPORT_SYMBOL(__inet6_lookup_established); @@ -175,12 +175,13 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const unsigned int hash = inet6_ehashfn(daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); + rwlock_t *lock = inet_ehash_lockp(hinfo, hash); struct sock *sk2; const struct hlist_node *node; struct inet_timewait_sock *tw; prefetch(head->chain.first); - write_lock(&head->lock); + write_lock(lock); /* Check TIME-WAIT sockets first. */ sk_for_each(sk2, node, &head->twchain) { @@ -216,7 +217,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, __sk_add_node(sk, &head->chain); sk->sk_hash = hash; sock_prot_inc_use(sk->sk_prot); - write_unlock(&head->lock); + write_unlock(lock); if (twp != NULL) { *twp = tw; @@ -231,7 +232,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, return 0; not_unique: - write_unlock(&head->lock); + write_unlock(lock); return -EADDRNOTAVAIL; } From c3d8d1e30cace31fed6186a4b8c6b1401836d89c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 7 Nov 2007 02:42:09 -0800 Subject: [PATCH 44/44] [NETLINK]: Fix unicast timeouts Commit ed6dcf4a in the history.git tree broke netlink_unicast timeouts by moving the schedule_timeout() call to a new function that doesn't propagate the remaining timeout back to the caller. This means on each retry we start with the full timeout again. ipc/mqueue.c seems to actually want to wait indefinitely so this behaviour is retained. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netlink.h | 2 +- ipc/mqueue.c | 6 ++++-- net/netlink/af_netlink.c | 10 +++++----- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 7c1f3b1d2ee5..d5bfaba595c7 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -192,7 +192,7 @@ extern int netlink_unregister_notifier(struct notifier_block *nb); /* finegrained unicast helpers: */ struct sock *netlink_getsockbyfilp(struct file *filp); int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, - long timeo, struct sock *ssk); + long *timeo, struct sock *ssk); void netlink_detachskb(struct sock *sk, struct sk_buff *skb); int netlink_sendskb(struct sock *sk, struct sk_buff *skb); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index bfa274ba9ed4..1e04cd464af9 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -1010,6 +1010,8 @@ asmlinkage long sys_mq_notify(mqd_t mqdes, return -EINVAL; } if (notification.sigev_notify == SIGEV_THREAD) { + long timeo; + /* create the notify skb */ nc = alloc_skb(NOTIFY_COOKIE_LEN, GFP_KERNEL); ret = -ENOMEM; @@ -1038,8 +1040,8 @@ asmlinkage long sys_mq_notify(mqd_t mqdes, goto out; } - ret = netlink_attachskb(sock, nc, 0, - MAX_SCHEDULE_TIMEOUT, NULL); + timeo = MAX_SCHEDULE_TIMEOUT; + ret = netlink_attachskb(sock, nc, 0, &timeo, NULL); if (ret == 1) goto retry; if (ret) { diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 260171255576..415c97236f63 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -752,7 +752,7 @@ struct sock *netlink_getsockbyfilp(struct file *filp) * 1: repeat lookup - reference dropped while waiting for socket memory. */ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, - long timeo, struct sock *ssk) + long *timeo, struct sock *ssk) { struct netlink_sock *nlk; @@ -761,7 +761,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || test_bit(0, &nlk->state)) { DECLARE_WAITQUEUE(wait, current); - if (!timeo) { + if (!*timeo) { if (!ssk || netlink_is_kernel(ssk)) netlink_overrun(sk); sock_put(sk); @@ -775,7 +775,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || test_bit(0, &nlk->state)) && !sock_flag(sk, SOCK_DEAD)) - timeo = schedule_timeout(timeo); + *timeo = schedule_timeout(*timeo); __set_current_state(TASK_RUNNING); remove_wait_queue(&nlk->wait, &wait); @@ -783,7 +783,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, if (signal_pending(current)) { kfree_skb(skb); - return sock_intr_errno(timeo); + return sock_intr_errno(*timeo); } return 1; } @@ -877,7 +877,7 @@ int netlink_unicast(struct sock *ssk, struct sk_buff *skb, if (netlink_is_kernel(sk)) return netlink_unicast_kernel(sk, skb); - err = netlink_attachskb(sk, skb, nonblock, timeo, ssk); + err = netlink_attachskb(sk, skb, nonblock, &timeo, ssk); if (err == 1) goto retry; if (err)