From c53160ec101791d912e6af320fd90448a1dbd275 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 18 Sep 2005 21:11:08 +0100 Subject: [PATCH] --- yaml --- r: 9070 b: refs/heads/master c: 7f8c0fd78dccaf30e60cb4303bd7a21c7d9e6da3 h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/CREDITS | 26 +- trunk/MAINTAINERS | 13 +- trunk/drivers/net/bonding/bond_main.c | 8 - trunk/drivers/net/tg3.c | 108 +++-- trunk/include/asm-arm/futex.h | 2 +- trunk/include/linux/dccp.h | 59 +-- trunk/include/linux/pci_ids.h | 1 - trunk/include/linux/tfrc.h | 35 -- trunk/net/Kconfig | 3 +- trunk/net/dccp/Makefile | 2 - trunk/net/dccp/ackvec.c | 419 ------------------ trunk/net/dccp/ackvec.h | 133 ------ trunk/net/dccp/ccid.h | 31 -- trunk/net/dccp/ccids/ccid3.c | 56 --- trunk/net/dccp/ccids/ccid3.h | 23 +- trunk/net/dccp/dccp.h | 91 +++- trunk/net/dccp/input.c | 89 +++- trunk/net/dccp/ipv4.c | 101 +++-- trunk/net/dccp/minisocks.c | 19 +- trunk/net/dccp/options.c | 443 ++++++++++++++++++- trunk/net/dccp/output.c | 26 +- trunk/net/dccp/proto.c | 94 +--- trunk/net/ipv4/netfilter/Kconfig | 14 +- trunk/net/ipv4/netfilter/ip_conntrack_core.c | 5 +- trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c | 223 +++++----- trunk/net/ipv6/udp.c | 10 +- trunk/net/socket.c | 3 +- 28 files changed, 882 insertions(+), 1157 deletions(-) delete mode 100644 trunk/include/linux/tfrc.h delete mode 100644 trunk/net/dccp/ackvec.c delete mode 100644 trunk/net/dccp/ackvec.h diff --git a/[refs] b/[refs] index 549081aecb9e..c34ccecfcf48 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 628f87f3d585bd0c2b0e39df039585d7a5831cc9 +refs/heads/master: 7f8c0fd78dccaf30e60cb4303bd7a21c7d9e6da3 diff --git a/trunk/CREDITS b/trunk/CREDITS index a347520bef2d..f553f8cfaa62 100644 --- a/trunk/CREDITS +++ b/trunk/CREDITS @@ -2211,15 +2211,6 @@ D: OV511 driver S: (address available on request) S: USA -N: Ian McDonald -E: iam4@cs.waikato.ac.nz -E: imcdnzl@gmail.com -W: http://wand.net.nz/~iam4 -W: http://imcdnzl.blogspot.com -D: DCCP, CCID3 -S: Hamilton -S: New Zealand - N: Patrick McHardy E: kaber@trash.net P: 1024D/12155E80 B128 7DE6 FF0A C2B2 48BE AB4C C9D4 964E 1215 5E80 @@ -2255,12 +2246,19 @@ S: D-90453 Nuernberg S: Germany N: Arnaldo Carvalho de Melo -E: acme@mandriva.com -E: acme@ghostprotocols.net -W: http://oops.ghostprotocols.net:81/blog/ +E: acme@conectiva.com.br +E: acme@kernel.org +E: acme@gnu.org +W: http://bazar2.conectiva.com.br/~acme +W: http://advogato.org/person/acme P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01 -D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks -S: Mandriva +D: wanrouter hacking +D: misc Makefile, Config.in, drivers and network stacks fixes +D: IPX & LLC network stacks maintainer +D: Cyclom 2X synchronous card driver +D: wl3501 PCMCIA wireless card driver +D: i18n for minicom, net-tools, util-linux, fetchmail, etc +S: Conectiva S.A. S: R. Tocantins, 89 - Cristo Rei S: 80050-430 - Curitiba - Paraná S: Brazil diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index dc8f3babcabd..d1e0eb46d201 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -686,13 +686,6 @@ P: Guennadi Liakhovetski M: g.liakhovetski@gmx.de S: Maintained -DCCP PROTOCOL -P: Arnaldo Carvalho de Melo -M: acme@mandriva.com -L: dccp@vger.kernel.org -W: http://www.wlug.org.nz/DCCP -S: Maintained - DECnet NETWORK LAYER P: Patrick Caulfield M: patrick@tykepenguin.com @@ -2278,6 +2271,12 @@ M: R.E.Wolff@BitWizard.nl L: linux-kernel@vger.kernel.org ? S: Supported +SPX NETWORK LAYER +P: Jay Schulist +M: jschlst@samba.org +L: netdev@vger.kernel.org +S: Supported + SRM (Alpha) environment access P: Jan-Benedict Glaw M: jbglaw@lug-owl.de diff --git a/trunk/drivers/net/bonding/bond_main.c b/trunk/drivers/net/bonding/bond_main.c index 90449a0f2a6c..f8dedb623dc0 100644 --- a/trunk/drivers/net/bonding/bond_main.c +++ b/trunk/drivers/net/bonding/bond_main.c @@ -5039,14 +5039,6 @@ static int __init bonding_init(void) return 0; out_err: - /* - * rtnl_unlock() will run netdev_run_todo(), putting the - * thus-far-registered bonding devices into a state which - * unregigister_netdevice() will accept - */ - rtnl_unlock(); - rtnl_lock(); - /* free and unregister all bonds that were successfully added */ bond_free_all(); diff --git a/trunk/drivers/net/tg3.c b/trunk/drivers/net/tg3.c index 81f4aedf534c..7599f52e15b3 100644 --- a/trunk/drivers/net/tg3.c +++ b/trunk/drivers/net/tg3.c @@ -67,8 +67,8 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "3.40" -#define DRV_MODULE_RELDATE "September 15, 2005" +#define DRV_MODULE_VERSION "3.39" +#define DRV_MODULE_RELDATE "September 5, 2005" #define TG3_DEF_MAC_MODE 0 #define TG3_DEF_RX_MODE 0 @@ -3442,47 +3442,31 @@ static void tg3_tx_timeout(struct net_device *dev) schedule_work(&tp->reset_task); } -/* Test for DMA buffers crossing any 4GB boundaries: 4G, 8G, etc */ -static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len) -{ - u32 base = (u32) mapping & 0xffffffff; - - return ((base > 0xffffdcc0) && - (base + len + 8 < base)); -} - static void tg3_set_txd(struct tg3 *, int, dma_addr_t, int, u32, u32); static int tigon3_4gb_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb, - u32 last_plus_one, u32 *start, - u32 base_flags, u32 mss) + u32 guilty_entry, int guilty_len, + u32 last_plus_one, u32 *start, u32 mss) { struct sk_buff *new_skb = skb_copy(skb, GFP_ATOMIC); - dma_addr_t new_addr = 0; + dma_addr_t new_addr; u32 entry = *start; - int i, ret = 0; + int i; if (!new_skb) { - ret = -1; - } else { - /* New SKB is guaranteed to be linear. */ - entry = *start; - new_addr = pci_map_single(tp->pdev, new_skb->data, new_skb->len, - PCI_DMA_TODEVICE); - /* Make sure new skb does not cross any 4G boundaries. - * Drop the packet if it does. - */ - if (tg3_4g_overflow_test(new_addr, new_skb->len)) { - ret = -1; - dev_kfree_skb(new_skb); - new_skb = NULL; - } else { - tg3_set_txd(tp, entry, new_addr, new_skb->len, - base_flags, 1 | (mss << 1)); - *start = NEXT_TX(entry); - } + dev_kfree_skb(skb); + return -1; } + /* New SKB is guaranteed to be linear. */ + entry = *start; + new_addr = pci_map_single(tp->pdev, new_skb->data, new_skb->len, + PCI_DMA_TODEVICE); + tg3_set_txd(tp, entry, new_addr, new_skb->len, + (skb->ip_summed == CHECKSUM_HW) ? + TXD_FLAG_TCPUDP_CSUM : 0, 1 | (mss << 1)); + *start = NEXT_TX(entry); + /* Now clean up the sw ring entries. */ i = 0; while (entry != last_plus_one) { @@ -3507,7 +3491,7 @@ static int tigon3_4gb_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb, dev_kfree_skb(skb); - return ret; + return 0; } static void tg3_set_txd(struct tg3 *tp, int entry, @@ -3533,10 +3517,19 @@ static void tg3_set_txd(struct tg3 *tp, int entry, txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT; } +static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len) +{ + u32 base = (u32) mapping & 0xffffffff; + + return ((base > 0xffffdcc0) && + (base + len + 8 < base)); +} + static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct tg3 *tp = netdev_priv(dev); dma_addr_t mapping; + unsigned int i; u32 len, entry, base_flags, mss; int would_hit_hwbug; @@ -3631,7 +3624,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) would_hit_hwbug = 0; if (tg3_4g_overflow_test(mapping, len)) - would_hit_hwbug = 1; + would_hit_hwbug = entry + 1; tg3_set_txd(tp, entry, mapping, len, base_flags, (skb_shinfo(skb)->nr_frags == 0) | (mss << 1)); @@ -3655,8 +3648,12 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) tp->tx_buffers[entry].skb = NULL; pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, mapping); - if (tg3_4g_overflow_test(mapping, len)) - would_hit_hwbug = 1; + if (tg3_4g_overflow_test(mapping, len)) { + /* Only one should match. */ + if (would_hit_hwbug) + BUG(); + would_hit_hwbug = entry + 1; + } if (tp->tg3_flags2 & TG3_FLG2_HW_TSO) tg3_set_txd(tp, entry, mapping, len, @@ -3672,15 +3669,34 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) if (would_hit_hwbug) { u32 last_plus_one = entry; u32 start; + unsigned int len = 0; + + would_hit_hwbug -= 1; + entry = entry - 1 - skb_shinfo(skb)->nr_frags; + entry &= (TG3_TX_RING_SIZE - 1); + start = entry; + i = 0; + while (entry != last_plus_one) { + if (i == 0) + len = skb_headlen(skb); + else + len = skb_shinfo(skb)->frags[i-1].size; - start = entry - 1 - skb_shinfo(skb)->nr_frags; - start &= (TG3_TX_RING_SIZE - 1); + if (entry == would_hit_hwbug) + break; + + i++; + entry = NEXT_TX(entry); + + } /* If the workaround fails due to memory/mapping * failure, silently drop this packet. */ - if (tigon3_4gb_hwbug_workaround(tp, skb, last_plus_one, - &start, base_flags, mss)) + if (tigon3_4gb_hwbug_workaround(tp, skb, + entry, len, + last_plus_one, + &start, mss)) goto out_unlock; entry = start; @@ -9255,8 +9271,6 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) static struct pci_device_id write_reorder_chipsets[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_FE_GATE_700C) }, - { PCI_DEVICE(PCI_VENDOR_ID_AMD, - PCI_DEVICE_ID_AMD_K8_NB) }, { }, }; u32 misc_ctrl_reg; @@ -9271,7 +9285,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tp->tg3_flags2 |= TG3_FLG2_SUN_570X; #endif - /* If we have an AMD 762 or K8 chipset, write + /* If we have an AMD 762 chipset, write * reordering to the mailbox registers done by the host * controller can cause major troubles. We read back from * every mailbox register write to force the writes to be @@ -9518,7 +9532,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tp->write32_rx_mbox = tg3_write_indirect_mbox; iounmap(tp->regs); - tp->regs = NULL; + tp->regs = 0; pci_read_config_word(tp->pdev, PCI_COMMAND, &pci_cmd); pci_cmd &= ~PCI_COMMAND_MEMORY; @@ -10666,7 +10680,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, err_out_iounmap: if (tp->regs) { iounmap(tp->regs); - tp->regs = NULL; + tp->regs = 0; } err_out_free_dev: @@ -10691,7 +10705,7 @@ static void __devexit tg3_remove_one(struct pci_dev *pdev) unregister_netdev(dev); if (tp->regs) { iounmap(tp->regs); - tp->regs = NULL; + tp->regs = 0; } free_netdev(dev); pci_release_regions(pdev); diff --git a/trunk/include/asm-arm/futex.h b/trunk/include/asm-arm/futex.h index 2cac5ecd9d00..9feff4ce1424 100644 --- a/trunk/include/asm-arm/futex.h +++ b/trunk/include/asm-arm/futex.h @@ -14,7 +14,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) int cmp = (encoded_op >> 24) & 15; int oparg = (encoded_op << 8) >> 20; int cmparg = (encoded_op << 20) >> 20; - int oldval = 0, ret, tem; + int oldval = 0, ret; if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) oparg = 1 << oparg; diff --git a/trunk/include/linux/dccp.h b/trunk/include/linux/dccp.h index 71fab4311e92..8bf4bacb5051 100644 --- a/trunk/include/linux/dccp.h +++ b/trunk/include/linux/dccp.h @@ -4,6 +4,16 @@ #include #include +/* Structure describing an Internet (DCCP) socket address. */ +struct sockaddr_dccp { + __u16 sdccp_family; /* Address family */ + __u16 sdccp_port; /* Port number */ + __u32 sdccp_addr; /* Internet address */ + __u32 sdccp_service; /* Service */ + /* Pad to size of `struct sockaddr': 16 bytes . */ + __u32 sdccp_pad; +}; + /** * struct dccp_hdr - generic part of DCCP packet header * @@ -178,11 +188,6 @@ enum { /* DCCP socket options */ #define DCCP_SOCKOPT_PACKET_SIZE 1 -#define DCCP_SOCKOPT_SERVICE 2 -#define DCCP_SOCKOPT_CCID_RX_INFO 128 -#define DCCP_SOCKOPT_CCID_TX_INFO 192 - -#define DCCP_SERVICE_LIST_MAX_LEN 32 #ifdef __KERNEL__ @@ -332,8 +337,7 @@ static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) */ struct dccp_options { __u64 dccpo_sequence_window; - __u8 dccpo_rx_ccid; - __u8 dccpo_tx_ccid; + __u8 dccpo_ccid; __u8 dccpo_send_ack_vector; __u8 dccpo_send_ndp_count; }; @@ -356,8 +360,14 @@ static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) extern struct inet_timewait_death_row dccp_death_row; +/* Read about the ECN nonce to see why it is 253 */ +#define DCCP_MAX_ACK_VECTOR_LEN 253 + struct dccp_options_received { - u32 dccpor_ndp; /* only 24 bits */ + u32 dccpor_ndp:24, + dccpor_ack_vector_len:8; + u32 dccpor_ack_vector_idx:10; + /* 22 bits hole, try to pack */ u32 dccpor_timestamp; u32 dccpor_timestamp_echo; u32 dccpor_elapsed_time; @@ -372,27 +382,6 @@ enum dccp_role { DCCP_ROLE_SERVER, }; -struct dccp_service_list { - __u32 dccpsl_nr; - __u32 dccpsl_list[0]; -}; - -#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) - -static inline int dccp_list_has_service(const struct dccp_service_list *sl, - const u32 service) -{ - if (likely(sl != NULL)) { - u32 i = sl->dccpsl_nr; - while (i--) - if (sl->dccpsl_list[i] == service) - return 1; - } - return 0; -} - -struct dccp_ackvec; - /** * struct dccp_sock - DCCP socket state * @@ -413,7 +402,7 @@ struct dccp_ackvec; * @dccps_packet_size - Set thru setsockopt * @dccps_role - Role of this sock, one of %dccp_role * @dccps_ndp_count - number of Non Data Packets since last data packet - * @dccps_hc_rx_ackvec - rx half connection ack vector + * @dccps_hc_rx_ackpkts - receiver half connection acked packets */ struct dccp_sock { /* inet_connection_sock has to be the first member of dccp_sock */ @@ -428,8 +417,7 @@ struct dccp_sock { __u64 dccps_gss; __u64 dccps_gsr; __u64 dccps_gar; - __u32 dccps_service; - struct dccp_service_list *dccps_service_list; + unsigned long dccps_service; struct timeval dccps_timestamp_time; __u32 dccps_timestamp_echo; __u32 dccps_packet_size; @@ -438,7 +426,7 @@ struct dccp_sock { __u32 dccps_pmtu_cookie; __u32 dccps_mss_cache; struct dccp_options dccps_options; - struct dccp_ackvec *dccps_hc_rx_ackvec; + struct dccp_ackpkts *dccps_hc_rx_ackpkts; void *dccps_hc_rx_ccid_private; void *dccps_hc_tx_ccid_private; struct ccid *dccps_hc_rx_ccid; @@ -455,11 +443,6 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk) return (struct dccp_sock *)sk; } -static inline int dccp_service_not_initialized(const struct sock *sk) -{ - return dccp_sk(sk)->dccps_service == DCCP_SERVICE_INVALID_VALUE; -} - static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { diff --git a/trunk/include/linux/pci_ids.h b/trunk/include/linux/pci_ids.h index c49d28eca561..486d1c1676bd 100644 --- a/trunk/include/linux/pci_ids.h +++ b/trunk/include/linux/pci_ids.h @@ -491,7 +491,6 @@ #define PCI_DEVICE_ID_AMI_MEGARAID2 0x9060 #define PCI_VENDOR_ID_AMD 0x1022 -#define PCI_DEVICE_ID_AMD_K8_NB 0x1100 #define PCI_DEVICE_ID_AMD_LANCE 0x2000 #define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001 #define PCI_DEVICE_ID_AMD_SCSI 0x2020 diff --git a/trunk/include/linux/tfrc.h b/trunk/include/linux/tfrc.h deleted file mode 100644 index 7dab7831c3cb..000000000000 --- a/trunk/include/linux/tfrc.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef _LINUX_TFRC_H_ -#define _LINUX_TFRC_H_ -/* - * include/linux/tfrc.h - * - * Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand. - * Copyright (c) 2005 Ian McDonald - * Copyright (c) 2005 Arnaldo Carvalho de Melo - * Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#include - -struct tfrc_rx_info { - __u32 tfrcrx_x_recv; - __u32 tfrcrx_rtt; - __u32 tfrcrx_p; -}; - -struct tfrc_tx_info { - __u32 tfrctx_x; - __u32 tfrctx_x_recv; - __u32 tfrctx_x_calc; - __u32 tfrctx_rtt; - __u32 tfrctx_p; - __u32 tfrctx_rto; - __u32 tfrctx_ipi; -}; - -#endif /* _LINUX_TFRC_H_ */ diff --git a/trunk/net/Kconfig b/trunk/net/Kconfig index 60f6f321bd76..2bdd5623fdd5 100644 --- a/trunk/net/Kconfig +++ b/trunk/net/Kconfig @@ -140,7 +140,6 @@ config BRIDGE_NETFILTER If unsure, say N. -source "net/netfilter/Kconfig" source "net/ipv4/netfilter/Kconfig" source "net/ipv6/netfilter/Kconfig" source "net/decnet/netfilter/Kconfig" @@ -207,6 +206,8 @@ config NET_PKTGEN To compile this code as a module, choose M here: the module will be called pktgen. +source "net/netfilter/Kconfig" + endmenu endmenu diff --git a/trunk/net/dccp/Makefile b/trunk/net/dccp/Makefile index 344a8da153fc..fb97bb042455 100644 --- a/trunk/net/dccp/Makefile +++ b/trunk/net/dccp/Makefile @@ -3,8 +3,6 @@ obj-$(CONFIG_IP_DCCP) += dccp.o dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o \ timer.o -dccp-$(CONFIG_IP_DCCP_ACKVEC) += ackvec.o - obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o dccp_diag-y := diag.o diff --git a/trunk/net/dccp/ackvec.c b/trunk/net/dccp/ackvec.c deleted file mode 100644 index 6530283eafca..000000000000 --- a/trunk/net/dccp/ackvec.c +++ /dev/null @@ -1,419 +0,0 @@ -/* - * net/dccp/ackvec.c - * - * An implementation of the DCCP protocol - * Copyright (c) 2005 Arnaldo Carvalho de Melo - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; version 2 of the License; - */ - -#include "ackvec.h" -#include "dccp.h" - -#include -#include - -#include - -int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec; - int len = av->dccpav_vec_len + 2; - struct timeval now; - u32 elapsed_time; - unsigned char *to, *from; - - dccp_timestamp(sk, &now); - elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10; - - if (elapsed_time != 0) - dccp_insert_option_elapsed_time(sk, skb, elapsed_time); - - if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) - return -1; - - /* - * XXX: now we have just one ack vector sent record, so - * we have to wait for it to be cleared. - * - * Of course this is not acceptable, but this is just for - * basic testing now. - */ - if (av->dccpav_ack_seqno != DCCP_MAX_SEQNO + 1) - return -1; - - DCCP_SKB_CB(skb)->dccpd_opt_len += len; - - to = skb_push(skb, len); - *to++ = DCCPO_ACK_VECTOR_0; - *to++ = len; - - len = av->dccpav_vec_len; - from = av->dccpav_buf + av->dccpav_buf_head; - - /* Check if buf_head wraps */ - if (av->dccpav_buf_head + len > av->dccpav_vec_len) { - const u32 tailsize = (av->dccpav_vec_len - av->dccpav_buf_head); - - memcpy(to, from, tailsize); - to += tailsize; - len -= tailsize; - from = av->dccpav_buf; - } - - memcpy(to, from, len); - /* - * From draft-ietf-dccp-spec-11.txt: - * - * For each acknowledgement it sends, the HC-Receiver will add an - * acknowledgement record. ack_seqno will equal the HC-Receiver - * sequence number it used for the ack packet; ack_ptr will equal - * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will - * equal buf_nonce. - * - * This implemention uses just one ack record for now. - */ - av->dccpav_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; - av->dccpav_ack_ptr = av->dccpav_buf_head; - av->dccpav_ack_ackno = av->dccpav_buf_ackno; - av->dccpav_ack_nonce = av->dccpav_buf_nonce; - av->dccpav_sent_len = av->dccpav_vec_len; - - dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " - "ack_ackno=%llu\n", - debug_prefix, av->dccpav_sent_len, - (unsigned long long)av->dccpav_ack_seqno, - (unsigned long long)av->dccpav_ack_ackno); - return -1; -} - -struct dccp_ackvec *dccp_ackvec_alloc(const unsigned int len, - const unsigned int __nocast priority) -{ - struct dccp_ackvec *av = kmalloc(sizeof(*av) + len, priority); - - if (av != NULL) { - av->dccpav_buf_len = len; - av->dccpav_buf_head = - av->dccpav_buf_tail = av->dccpav_buf_len - 1; - av->dccpav_buf_ackno = - av->dccpav_ack_ackno = av->dccpav_ack_seqno = ~0LLU; - av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; - av->dccpav_ack_ptr = 0; - av->dccpav_time.tv_sec = 0; - av->dccpav_time.tv_usec = 0; - av->dccpav_sent_len = av->dccpav_vec_len = 0; - } - - return av; -} - -void dccp_ackvec_free(struct dccp_ackvec *av) -{ - kfree(av); -} - -static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av, - const unsigned int index) -{ - return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK; -} - -static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av, - const unsigned int index) -{ - return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK; -} - -/* - * If several packets are missing, the HC-Receiver may prefer to enter multiple - * bytes with run length 0, rather than a single byte with a larger run length; - * this simplifies table updates if one of the missing packets arrives. - */ -static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av, - const unsigned int packets, - const unsigned char state) -{ - unsigned int gap; - signed long new_head; - - if (av->dccpav_vec_len + packets > av->dccpav_buf_len) - return -ENOBUFS; - - gap = packets - 1; - new_head = av->dccpav_buf_head - packets; - - if (new_head < 0) { - if (gap > 0) { - memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED, - gap + new_head + 1); - gap = -new_head; - } - new_head += av->dccpav_buf_len; - } - - av->dccpav_buf_head = new_head; - - if (gap > 0) - memset(av->dccpav_buf + av->dccpav_buf_head + 1, - DCCP_ACKVEC_STATE_NOT_RECEIVED, gap); - - av->dccpav_buf[av->dccpav_buf_head] = state; - av->dccpav_vec_len += packets; - return 0; -} - -/* - * Implements the draft-ietf-dccp-spec-11.txt Appendix A - */ -int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, - const u64 ackno, const u8 state) -{ - /* - * Check at the right places if the buffer is full, if it is, tell the - * caller to start dropping packets till the HC-Sender acks our ACK - * vectors, when we will free up space in dccpav_buf. - * - * We may well decide to do buffer compression, etc, but for now lets - * just drop. - * - * From Appendix A: - * - * Of course, the circular buffer may overflow, either when the - * HC-Sender is sending data at a very high rate, when the - * HC-Receiver's acknowledgements are not reaching the HC-Sender, - * or when the HC-Sender is forgetting to acknowledge those acks - * (so the HC-Receiver is unable to clean up old state). In this - * case, the HC-Receiver should either compress the buffer (by - * increasing run lengths when possible), transfer its state to - * a larger buffer, or, as a last resort, drop all received - * packets, without processing them whatsoever, until its buffer - * shrinks again. - */ - - /* See if this is the first ackno being inserted */ - if (av->dccpav_vec_len == 0) { - av->dccpav_buf[av->dccpav_buf_head] = state; - av->dccpav_vec_len = 1; - } else if (after48(ackno, av->dccpav_buf_ackno)) { - const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno, - ackno); - - /* - * Look if the state of this packet is the same as the - * previous ackno and if so if we can bump the head len. - */ - if (delta == 1 && - dccp_ackvec_state(av, av->dccpav_buf_head) == state && - (dccp_ackvec_len(av, av->dccpav_buf_head) < - DCCP_ACKVEC_LEN_MASK)) - av->dccpav_buf[av->dccpav_buf_head]++; - else if (dccp_ackvec_set_buf_head_state(av, delta, state)) - return -ENOBUFS; - } else { - /* - * A.1.2. Old Packets - * - * When a packet with Sequence Number S arrives, and - * S <= buf_ackno, the HC-Receiver will scan the table - * for the byte corresponding to S. (Indexing structures - * could reduce the complexity of this scan.) - */ - u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno); - unsigned int index = av->dccpav_buf_head; - - while (1) { - const u8 len = dccp_ackvec_len(av, index); - const u8 state = dccp_ackvec_state(av, index); - /* - * valid packets not yet in dccpav_buf have a reserved - * entry, with a len equal to 0. - */ - if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED && - len == 0 && delta == 0) { /* Found our - reserved seat! */ - dccp_pr_debug("Found %llu reserved seat!\n", - (unsigned long long)ackno); - av->dccpav_buf[index] = state; - goto out; - } - /* len == 0 means one packet */ - if (delta < len + 1) - goto out_duplicate; - - delta -= len + 1; - if (++index == av->dccpav_buf_len) - index = 0; - } - } - - av->dccpav_buf_ackno = ackno; - dccp_timestamp(sk, &av->dccpav_time); -out: - dccp_pr_debug(""); - return 0; - -out_duplicate: - /* Duplicate packet */ - dccp_pr_debug("Received a dup or already considered lost " - "packet: %llu\n", (unsigned long long)ackno); - return -EILSEQ; -} - -#ifdef CONFIG_IP_DCCP_DEBUG -void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len) -{ - if (!dccp_debug) - return; - - printk("ACK vector len=%d, ackno=%llu |", len, - (unsigned long long)ackno); - - while (len--) { - const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6; - const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; - - printk("%d,%d|", state, rl); - ++vector; - } - - printk("\n"); -} - -void dccp_ackvec_print(const struct dccp_ackvec *av) -{ - dccp_ackvector_print(av->dccpav_buf_ackno, - av->dccpav_buf + av->dccpav_buf_head, - av->dccpav_vec_len); -} -#endif - -static void dccp_ackvec_trow_away_ack_record(struct dccp_ackvec *av) -{ - /* - * As we're keeping track of the ack vector size (dccpav_vec_len) and - * the sent ack vector size (dccpav_sent_len) we don't need - * dccpav_buf_tail at all, but keep this code here as in the future - * we'll implement a vector of ack records, as suggested in - * draft-ietf-dccp-spec-11.txt Appendix A. -acme - */ -#if 0 - av->dccpav_buf_tail = av->dccpav_ack_ptr + 1; - if (av->dccpav_buf_tail >= av->dccpav_vec_len) - av->dccpav_buf_tail -= av->dccpav_vec_len; -#endif - av->dccpav_vec_len -= av->dccpav_sent_len; -} - -void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk, - const u64 ackno) -{ - /* Check if we actually sent an ACK vector */ - if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1) - return; - - if (ackno == av->dccpav_ack_seqno) { -#ifdef CONFIG_IP_DCCP_DEBUG - struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT rx ack: " : "server rx ack: "; -#endif - dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " - "ack_ackno=%llu, ACKED!\n", - debug_prefix, 1, - (unsigned long long)av->dccpav_ack_seqno, - (unsigned long long)av->dccpav_ack_ackno); - dccp_ackvec_trow_away_ack_record(av); - av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; - } -} - -static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, - struct sock *sk, u64 ackno, - const unsigned char len, - const unsigned char *vector) -{ - unsigned char i; - - /* Check if we actually sent an ACK vector */ - if (av->dccpav_ack_seqno == DCCP_MAX_SEQNO + 1) - return; - /* - * We're in the receiver half connection, so if the received an ACK - * vector ackno (e.g. 50) before dccpav_ack_seqno (e.g. 52), we're - * not interested. - * - * Extra explanation with example: - * - * if we received an ACK vector with ackno 50, it can only be acking - * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). - */ - /* dccp_pr_debug("is %llu < %llu? ", ackno, av->dccpav_ack_seqno); */ - if (before48(ackno, av->dccpav_ack_seqno)) { - /* dccp_pr_debug_cat("yes\n"); */ - return; - } - /* dccp_pr_debug_cat("no\n"); */ - - i = len; - while (i--) { - const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK; - u64 ackno_end_rl; - - dccp_set_seqno(&ackno_end_rl, ackno - rl); - - /* - * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, - * av->dccpav_ack_seqno, ackno); - */ - if (between48(av->dccpav_ack_seqno, ackno_end_rl, ackno)) { - const u8 state = (*vector & - DCCP_ACKVEC_STATE_MASK) >> 6; - /* dccp_pr_debug_cat("yes\n"); */ - - if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { -#ifdef CONFIG_IP_DCCP_DEBUG - struct dccp_sock *dp = dccp_sk(sk); - const char *debug_prefix = - dp->dccps_role == DCCP_ROLE_CLIENT ? - "CLIENT rx ack: " : "server rx ack: "; -#endif - dccp_pr_debug("%sACK vector 0, len=%d, " - "ack_seqno=%llu, ack_ackno=%llu, " - "ACKED!\n", - debug_prefix, len, - (unsigned long long) - av->dccpav_ack_seqno, - (unsigned long long) - av->dccpav_ack_ackno); - dccp_ackvec_trow_away_ack_record(av); - } - /* - * If dccpav_ack_seqno was not received, no problem - * we'll send another ACK vector. - */ - av->dccpav_ack_seqno = DCCP_MAX_SEQNO + 1; - break; - } - /* dccp_pr_debug_cat("no\n"); */ - - dccp_set_seqno(&ackno, ackno_end_rl - 1); - ++vector; - } -} - -int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, - const u8 opt, const u8 *value, const u8 len) -{ - if (len > DCCP_MAX_ACKVEC_LEN) - return -1; - - /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */ - dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq, - len, value); - return 0; -} diff --git a/trunk/net/dccp/ackvec.h b/trunk/net/dccp/ackvec.h deleted file mode 100644 index 8ca51c9191f7..000000000000 --- a/trunk/net/dccp/ackvec.h +++ /dev/null @@ -1,133 +0,0 @@ -#ifndef _ACKVEC_H -#define _ACKVEC_H -/* - * net/dccp/ackvec.h - * - * An implementation of the DCCP protocol - * Copyright (c) 2005 Arnaldo Carvalho de Melo - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include - -/* Read about the ECN nonce to see why it is 253 */ -#define DCCP_MAX_ACKVEC_LEN 253 - -#define DCCP_ACKVEC_STATE_RECEIVED 0 -#define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) -#define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) - -#define DCCP_ACKVEC_STATE_MASK 0xC0 /* 11000000 */ -#define DCCP_ACKVEC_LEN_MASK 0x3F /* 00111111 */ - -/** struct dccp_ackvec - ack vector - * - * This data structure is the one defined in the DCCP draft - * Appendix A. - * - * @dccpav_buf_head - circular buffer head - * @dccpav_buf_tail - circular buffer tail - * @dccpav_buf_ackno - ack # of the most recent packet acknowledgeable in the - * buffer (i.e. %dccpav_buf_head) - * @dccpav_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked - * by the buffer with State 0 - * - * Additionally, the HC-Receiver must keep some information about the - * Ack Vectors it has recently sent. For each packet sent carrying an - * Ack Vector, it remembers four variables: - * - * @dccpav_ack_seqno - the Sequence Number used for the packet - * (HC-Receiver seqno) - * @dccpav_ack_ptr - the value of buf_head at the time of acknowledgement. - * @dccpav_ack_ackno - the Acknowledgement Number used for the packet - * (HC-Sender seqno) - * @dccpav_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. - * - * @dccpav_buf_len - circular buffer length - * @dccpav_time - the time in usecs - * @dccpav_buf - circular buffer of acknowledgeable packets - */ -struct dccp_ackvec { - unsigned int dccpav_buf_head; - unsigned int dccpav_buf_tail; - u64 dccpav_buf_ackno; - u64 dccpav_ack_seqno; - u64 dccpav_ack_ackno; - unsigned int dccpav_ack_ptr; - unsigned int dccpav_sent_len; - unsigned int dccpav_vec_len; - unsigned int dccpav_buf_len; - struct timeval dccpav_time; - u8 dccpav_buf_nonce; - u8 dccpav_ack_nonce; - u8 dccpav_buf[0]; -}; - -struct sock; -struct sk_buff; - -#ifdef CONFIG_IP_DCCP_ACKVEC -extern struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len, - const unsigned int __nocast priority); -extern void dccp_ackvec_free(struct dccp_ackvec *av); - -extern int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, - const u64 ackno, const u8 state); - -extern void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, - struct sock *sk, const u64 ackno); -extern int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, - const u8 opt, const u8 *value, const u8 len); - -extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); - -static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) -{ - return av->dccpav_sent_len != av->dccpav_vec_len; -} -#else /* CONFIG_IP_DCCP_ACKVEC */ -static inline struct dccp_ackvec *dccp_ackvec_alloc(unsigned int len, - const unsigned int __nocast priority) -{ - return NULL; -} - -static inline void dccp_ackvec_free(struct dccp_ackvec *av) -{ -} - -static inline int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk, - const u64 ackno, const u8 state) -{ - return -1; -} - -static inline void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, - struct sock *sk, const u64 ackno) -{ -} - -static inline int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb, - const u8 opt, const u8 *value, const u8 len) -{ - return -1; -} - -static inline int dccp_insert_option_ackvec(const struct sock *sk, - const struct sk_buff *skb) -{ - return -1; -} - -static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) -{ - return 0; -} -#endif /* CONFIG_IP_DCCP_ACKVEC */ -#endif /* _ACKVEC_H */ diff --git a/trunk/net/dccp/ccid.h b/trunk/net/dccp/ccid.h index 21e55142dcd3..962f1e9e2f7e 100644 --- a/trunk/net/dccp/ccid.h +++ b/trunk/net/dccp/ccid.h @@ -14,7 +14,6 @@ */ #include -#include #include #include #include @@ -55,14 +54,6 @@ struct ccid { struct tcp_info *info); void (*ccid_hc_tx_get_info)(struct sock *sk, struct tcp_info *info); - int (*ccid_hc_rx_getsockopt)(struct sock *sk, - const int optname, int len, - u32 __user *optval, - int __user *optlen); - int (*ccid_hc_tx_getsockopt)(struct sock *sk, - const int optname, int len, - u32 __user *optval, - int __user *optlen); }; extern int ccid_register(struct ccid *ccid); @@ -186,26 +177,4 @@ static inline void ccid_hc_tx_get_info(struct ccid *ccid, struct sock *sk, if (ccid->ccid_hc_tx_get_info != NULL) ccid->ccid_hc_tx_get_info(sk, info); } - -static inline int ccid_hc_rx_getsockopt(struct ccid *ccid, struct sock *sk, - const int optname, int len, - u32 __user *optval, int __user *optlen) -{ - int rc = -ENOPROTOOPT; - if (ccid->ccid_hc_rx_getsockopt != NULL) - rc = ccid->ccid_hc_rx_getsockopt(sk, optname, len, - optval, optlen); - return rc; -} - -static inline int ccid_hc_tx_getsockopt(struct ccid *ccid, struct sock *sk, - const int optname, int len, - u32 __user *optval, int __user *optlen) -{ - int rc = -ENOPROTOOPT; - if (ccid->ccid_hc_tx_getsockopt != NULL) - rc = ccid->ccid_hc_tx_getsockopt(sk, optname, len, - optval, optlen); - return rc; -} #endif /* _CCID_H */ diff --git a/trunk/net/dccp/ccids/ccid3.c b/trunk/net/dccp/ccids/ccid3.c index aa68e0ab274d..38aa84986118 100644 --- a/trunk/net/dccp/ccids/ccid3.c +++ b/trunk/net/dccp/ccids/ccid3.c @@ -1120,60 +1120,6 @@ static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rtt = hctx->ccid3hctx_rtt; } -static int ccid3_hc_rx_getsockopt(struct sock *sk, const int optname, int len, - u32 __user *optval, int __user *optlen) -{ - const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - const void *val; - - /* Listen socks doesn't have a private CCID block */ - if (sk->sk_state == DCCP_LISTEN) - return -EINVAL; - - switch (optname) { - case DCCP_SOCKOPT_CCID_RX_INFO: - if (len < sizeof(hcrx->ccid3hcrx_tfrc)) - return -EINVAL; - len = sizeof(hcrx->ccid3hcrx_tfrc); - val = &hcrx->ccid3hcrx_tfrc; - break; - default: - return -ENOPROTOOPT; - } - - if (put_user(len, optlen) || copy_to_user(optval, val, len)) - return -EFAULT; - - return 0; -} - -static int ccid3_hc_tx_getsockopt(struct sock *sk, const int optname, int len, - u32 __user *optval, int __user *optlen) -{ - const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk); - const void *val; - - /* Listen socks doesn't have a private CCID block */ - if (sk->sk_state == DCCP_LISTEN) - return -EINVAL; - - switch (optname) { - case DCCP_SOCKOPT_CCID_TX_INFO: - if (len < sizeof(hctx->ccid3hctx_tfrc)) - return -EINVAL; - len = sizeof(hctx->ccid3hctx_tfrc); - val = &hctx->ccid3hctx_tfrc; - break; - default: - return -ENOPROTOOPT; - } - - if (put_user(len, optlen) || copy_to_user(optval, val, len)) - return -EFAULT; - - return 0; -} - static struct ccid ccid3 = { .ccid_id = 3, .ccid_name = "ccid3", @@ -1193,8 +1139,6 @@ static struct ccid ccid3 = { .ccid_hc_rx_packet_recv = ccid3_hc_rx_packet_recv, .ccid_hc_rx_get_info = ccid3_hc_rx_get_info, .ccid_hc_tx_get_info = ccid3_hc_tx_get_info, - .ccid_hc_rx_getsockopt = ccid3_hc_rx_getsockopt, - .ccid_hc_tx_getsockopt = ccid3_hc_tx_getsockopt, }; module_param(ccid3_debug, int, 0444); diff --git a/trunk/net/dccp/ccids/ccid3.h b/trunk/net/dccp/ccids/ccid3.h index 0bde4583d091..eb248778eea3 100644 --- a/trunk/net/dccp/ccids/ccid3.h +++ b/trunk/net/dccp/ccids/ccid3.h @@ -40,7 +40,6 @@ #include #include #include -#include #define TFRC_MIN_PACKET_SIZE 16 #define TFRC_STD_PACKET_SIZE 256 @@ -94,15 +93,12 @@ struct ccid3_options_received { * @ccid3hctx_hist - Packet history */ struct ccid3_hc_tx_sock { - struct tfrc_tx_info ccid3hctx_tfrc; -#define ccid3hctx_x ccid3hctx_tfrc.tfrctx_x -#define ccid3hctx_x_recv ccid3hctx_tfrc.tfrctx_x_recv -#define ccid3hctx_x_calc ccid3hctx_tfrc.tfrctx_x_calc -#define ccid3hctx_rtt ccid3hctx_tfrc.tfrctx_rtt -#define ccid3hctx_p ccid3hctx_tfrc.tfrctx_p -#define ccid3hctx_t_rto ccid3hctx_tfrc.tfrctx_rto -#define ccid3hctx_t_ipi ccid3hctx_tfrc.tfrctx_ipi + u32 ccid3hctx_x; + u32 ccid3hctx_x_recv; + u32 ccid3hctx_x_calc; u16 ccid3hctx_s; + u32 ccid3hctx_rtt; + u32 ccid3hctx_p; u8 ccid3hctx_state; u8 ccid3hctx_last_win_count; u8 ccid3hctx_idle; @@ -110,19 +106,19 @@ struct ccid3_hc_tx_sock { struct timer_list ccid3hctx_no_feedback_timer; struct timeval ccid3hctx_t_ld; struct timeval ccid3hctx_t_nom; + u32 ccid3hctx_t_rto; + u32 ccid3hctx_t_ipi; u32 ccid3hctx_delta; struct list_head ccid3hctx_hist; struct ccid3_options_received ccid3hctx_options_received; }; struct ccid3_hc_rx_sock { - struct tfrc_rx_info ccid3hcrx_tfrc; -#define ccid3hcrx_x_recv ccid3hcrx_tfrc.tfrcrx_x_recv -#define ccid3hcrx_rtt ccid3hcrx_tfrc.tfrcrx_rtt -#define ccid3hcrx_p ccid3hcrx_tfrc.tfrcrx_p u64 ccid3hcrx_seqno_last_counter:48, ccid3hcrx_state:8, ccid3hcrx_last_counter:4; + u32 ccid3hcrx_rtt; + u32 ccid3hcrx_p; u32 ccid3hcrx_bytes_recv; struct timeval ccid3hcrx_tstamp_last_feedback; struct timeval ccid3hcrx_tstamp_last_ack; @@ -131,6 +127,7 @@ struct ccid3_hc_rx_sock { u16 ccid3hcrx_s; u32 ccid3hcrx_pinv; u32 ccid3hcrx_elapsed_time; + u32 ccid3hcrx_x_recv; }; static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk) diff --git a/trunk/net/dccp/dccp.h b/trunk/net/dccp/dccp.h index 5871c027f9dc..95c4630b3b18 100644 --- a/trunk/net/dccp/dccp.h +++ b/trunk/net/dccp/dccp.h @@ -17,7 +17,6 @@ #include #include #include -#include "ackvec.h" #ifdef CONFIG_IP_DCCP_DEBUG extern int dccp_debug; @@ -259,12 +258,13 @@ extern int dccp_v4_send_reset(struct sock *sk, extern void dccp_send_close(struct sock *sk, const int active); struct dccp_skb_cb { - __u8 dccpd_type:4; - __u8 dccpd_ccval:4; - __u8 dccpd_reset_code; - __u16 dccpd_opt_len; + __u8 dccpd_type; + __u8 dccpd_reset_code; + __u8 dccpd_service; + __u8 dccpd_ccval; __u64 dccpd_seq; __u64 dccpd_ack_seq; + int dccpd_opt_len; }; #define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0])) @@ -359,17 +359,6 @@ static inline void dccp_update_gss(struct sock *sk, u64 seq) (dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1)); } - -static inline int dccp_ack_pending(const struct sock *sk) -{ - const struct dccp_sock *dp = dccp_sk(sk); - return dp->dccps_timestamp_echo != 0 || -#ifdef CONFIG_IP_DCCP_ACKVEC - (dp->dccps_options.dccpo_send_ack_vector && - dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) || -#endif - inet_csk_ack_scheduled(sk); -} extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb); extern void dccp_insert_option_elapsed_time(struct sock *sk, @@ -383,6 +372,65 @@ extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb, extern struct socket *dccp_ctl_socket; +#define DCCP_ACKPKTS_STATE_RECEIVED 0 +#define DCCP_ACKPKTS_STATE_ECN_MARKED (1 << 6) +#define DCCP_ACKPKTS_STATE_NOT_RECEIVED (3 << 6) + +#define DCCP_ACKPKTS_STATE_MASK 0xC0 /* 11000000 */ +#define DCCP_ACKPKTS_LEN_MASK 0x3F /* 00111111 */ + +/** struct dccp_ackpkts - acknowledgeable packets + * + * This data structure is the one defined in the DCCP draft + * Appendix A. + * + * @dccpap_buf_head - circular buffer head + * @dccpap_buf_tail - circular buffer tail + * @dccpap_buf_ackno - ack # of the most recent packet acknowledgeable in the + * buffer (i.e. %dccpap_buf_head) + * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked + * by the buffer with State 0 + * + * Additionally, the HC-Receiver must keep some information about the + * Ack Vectors it has recently sent. For each packet sent carrying an + * Ack Vector, it remembers four variables: + * + * @dccpap_ack_seqno - the Sequence Number used for the packet + * (HC-Receiver seqno) + * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement. + * @dccpap_ack_ackno - the Acknowledgement Number used for the packet + * (HC-Sender seqno) + * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0. + * + * @dccpap_buf_len - circular buffer length + * @dccpap_time - the time in usecs + * @dccpap_buf - circular buffer of acknowledgeable packets + */ +struct dccp_ackpkts { + unsigned int dccpap_buf_head; + unsigned int dccpap_buf_tail; + u64 dccpap_buf_ackno; + u64 dccpap_ack_seqno; + u64 dccpap_ack_ackno; + unsigned int dccpap_ack_ptr; + unsigned int dccpap_buf_vector_len; + unsigned int dccpap_ack_vector_len; + unsigned int dccpap_buf_len; + struct timeval dccpap_time; + u8 dccpap_buf_nonce; + u8 dccpap_ack_nonce; + u8 dccpap_buf[0]; +}; + +extern struct dccp_ackpkts * + dccp_ackpkts_alloc(unsigned int len, + const unsigned int __nocast priority); +extern void dccp_ackpkts_free(struct dccp_ackpkts *ap); +extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, + u64 ackno, u8 state); +extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, + struct sock *sk, u64 ackno); + extern void dccp_timestamp(const struct sock *sk, struct timeval *tv); static inline suseconds_t timeval_usecs(const struct timeval *tv) @@ -423,4 +471,15 @@ static inline void timeval_sub_usecs(struct timeval *tv, } } +#ifdef CONFIG_IP_DCCP_DEBUG +extern void dccp_ackvector_print(const u64 ackno, + const unsigned char *vector, int len); +extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap); +#else +static inline void dccp_ackvector_print(const u64 ackno, + const unsigned char *vector, + int len) { } +static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { } +#endif + #endif /* _DCCP_H */ diff --git a/trunk/net/dccp/input.c b/trunk/net/dccp/input.c index 1b6b2cb12376..c74034cf7ede 100644 --- a/trunk/net/dccp/input.c +++ b/trunk/net/dccp/input.c @@ -16,7 +16,6 @@ #include -#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -61,8 +60,8 @@ static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb) struct dccp_sock *dp = dccp_sk(sk); if (dp->dccps_options.dccpo_send_ack_vector) - dccp_ackvec_check_rcv_ackno(dp->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_ack_seq); + dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq); } static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb) @@ -165,11 +164,37 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); - if (dp->dccps_options.dccpo_send_ack_vector && - dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_ACKVEC_STATE_RECEIVED)) - goto discard; + /* + * FIXME: check ECN to see if we should use + * DCCP_ACKPKTS_STATE_ECN_MARKED + */ + if (dp->dccps_options.dccpo_send_ack_vector) { + struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; + + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKPKTS_STATE_RECEIVED)) { + LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable " + "packets buffer full!\n"); + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MIN, + DCCP_RTO_MAX); + goto discard; + } + + /* + * FIXME: this activation is probably wrong, have to study more + * TCP delack machinery and how it fits into DCCP draft, but + * for now it kinda "works" 8) + */ + if (!inet_csk_ack_scheduled(sk)) { + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, + DCCP_RTO_MAX); + } + } ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); @@ -359,9 +384,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, } out_invalid_packet: - /* dccp_v4_do_rcv will send a reset */ - DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR; - return 1; + return 1; /* dccp_v4_do_rcv will send a reset, but... + FIXME: the reset code should be + DCCP_RESET_CODE_PACKET_ERROR */ } static int dccp_rcv_respond_partopen_state_process(struct sock *sk, @@ -408,7 +433,6 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct dccp_hdr *dh, unsigned len) { struct dccp_sock *dp = dccp_sk(sk); - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const int old_state = sk->sk_state; int queued = 0; @@ -449,8 +473,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dh->dccph_type == DCCP_PKT_RESET) goto discard; - /* Caller (dccp_v4_do_rcv) will send Reset */ - dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; + /* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/ return 1; } @@ -464,17 +487,36 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, if (dccp_parse_options(sk, skb)) goto discard; - if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) + if (DCCP_SKB_CB(skb)->dccpd_ack_seq != + DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb); ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb); - if (dp->dccps_options.dccpo_send_ack_vector && - dccp_ackvec_add(dp->dccps_hc_rx_ackvec, sk, - DCCP_SKB_CB(skb)->dccpd_seq, - DCCP_ACKVEC_STATE_RECEIVED)) - goto discard; + /* + * FIXME: check ECN to see if we should use + * DCCP_ACKPKTS_STATE_ECN_MARKED + */ + if (dp->dccps_options.dccpo_send_ack_vector) { + if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk, + DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_ACKPKTS_STATE_RECEIVED)) + goto discard; + /* + * FIXME: this activation is probably wrong, have to + * study more TCP delack machinery and how it fits into + * DCCP draft, but for now it kinda "works" 8) + */ + if ((dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == + DCCP_MAX_SEQNO + 1) && + !inet_csk_ack_scheduled(sk)) { + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + TCP_DELACK_MIN, + DCCP_RTO_MAX); + } + } } /* @@ -509,7 +551,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, dh->dccph_type == DCCP_PKT_REQUEST) || (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) { - dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC); + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_PKT_SYNC); goto discard; } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) { dccp_rcv_closereq(sk, skb); @@ -520,13 +563,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { - dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK); + dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq, + DCCP_PKT_SYNCACK); goto discard; } switch (sk->sk_state) { case DCCP_CLOSED: - dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; return 1; case DCCP_REQUESTING: diff --git a/trunk/net/dccp/ipv4.c b/trunk/net/dccp/ipv4.c index 40fe6afacde6..2afaa464e7f0 100644 --- a/trunk/net/dccp/ipv4.c +++ b/trunk/net/dccp/ipv4.c @@ -23,7 +23,6 @@ #include #include -#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -247,9 +246,6 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, dp->dccps_role = DCCP_ROLE_CLIENT; - if (dccp_service_not_initialized(sk)) - return -EPROTO; - if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; @@ -665,16 +661,6 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk, dccp_hdr(skb)->dccph_sport); } -static inline int dccp_bad_service_code(const struct sock *sk, - const __u32 service) -{ - const struct dccp_sock *dp = dccp_sk(sk); - - if (dp->dccps_service == service) - return 0; - return !dccp_list_has_service(dp->dccps_service_list, service); -} - int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct inet_request_sock *ireq; @@ -683,22 +669,13 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct dccp_request_sock *dreq; const __u32 saddr = skb->nh.iph->saddr; const __u32 daddr = skb->nh.iph->daddr; - const __u32 service = dccp_hdr_request(skb)->dccph_req_service; - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; struct dst_entry *dst = NULL; /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) { - reset_code = DCCP_RESET_CODE_NO_CONNECTION; + (RTCF_BROADCAST | RTCF_MULTICAST)) goto drop; - } - if (dccp_bad_service_code(sk, service)) { - reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; - goto drop; - } /* * TW buckets are converted to open requests without * limitations, they conserve resources and peer is @@ -741,9 +718,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * dccp_create_openreq_child. */ dreq = dccp_rsk(req); - dreq->dreq_isr = dcb->dccpd_seq; - dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); - dreq->dreq_service = service; + dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq; + dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); + dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service; if (dccp_v4_send_response(sk, req, dst)) goto drop_and_free; @@ -758,7 +735,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) __reqsk_free(req); drop: DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); - dcb->dccpd_reset_code = reset_code; return -1; } @@ -1029,6 +1005,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: + DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; dccp_v4_ctl_send_reset(skb); discard: kfree_skb(skb); @@ -1113,7 +1090,45 @@ int dccp_v4_rcv(struct sk_buff *skb) goto discard_it; dh = dccp_hdr(skb); +#if 0 + /* + * Use something like this to simulate some DATA/DATAACK loss to test + * dccp_ackpkts_add, you'll get something like this on a session that + * sends 10 DATA/DATAACK packets: + * + * ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1| + * + * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet + * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets + * with the same state + * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet + * + * So... + * + * 281473596467422 was received + * 281473596467421 was not received + * 281473596467420 was received + * 281473596467419 was not received + * 281473596467418 was received + * 281473596467417 was not received + * 281473596467416 was received + * 281473596467415 was not received + * 281473596467414 was received + * 281473596467413 was received (this one was the 3way handshake + * RESPONSE) + * + */ + if (dh->dccph_type == DCCP_PKT_DATA || + dh->dccph_type == DCCP_PKT_DATAACK) { + static int discard = 0; + if (discard) { + discard = 0; + goto discard_it; + } + discard = 1; + } +#endif DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; @@ -1227,9 +1242,11 @@ static int dccp_v4_init_sock(struct sock *sk) do_gettimeofday(&dp->dccps_epoch); if (dp->dccps_options.dccpo_send_ack_vector) { - dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN, - GFP_KERNEL); - if (dp->dccps_hc_rx_ackvec == NULL) + dp->dccps_hc_rx_ackpkts = + dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, + GFP_KERNEL); + + if (dp->dccps_hc_rx_ackpkts == NULL) return -ENOMEM; } @@ -1241,18 +1258,16 @@ static int dccp_v4_init_sock(struct sock *sk) * setsockopt(CCIDs-I-want/accept). -acme */ if (likely(!dccp_ctl_socket_init)) { - dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_rx_ccid, + dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); - dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_tx_ccid, + dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk); if (dp->dccps_hc_rx_ccid == NULL || dp->dccps_hc_tx_ccid == NULL) { ccid_exit(dp->dccps_hc_rx_ccid, sk); ccid_exit(dp->dccps_hc_tx_ccid, sk); - if (dp->dccps_options.dccpo_send_ack_vector) { - dccp_ackvec_free(dp->dccps_hc_rx_ackvec); - dp->dccps_hc_rx_ackvec = NULL; - } + dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); + dp->dccps_hc_rx_ackpkts = NULL; dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; return -ENOMEM; } @@ -1265,7 +1280,6 @@ static int dccp_v4_init_sock(struct sock *sk) sk->sk_write_space = dccp_write_space; dp->dccps_mss_cache = 536; dp->dccps_role = DCCP_ROLE_UNDEFINED; - dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; return 0; } @@ -1287,17 +1301,10 @@ static int dccp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash != NULL) inet_put_port(&dccp_hashinfo, sk); - if (dp->dccps_service_list != NULL) { - kfree(dp->dccps_service_list); - dp->dccps_service_list = NULL; - } - ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); - if (dp->dccps_options.dccpo_send_ack_vector) { - dccp_ackvec_free(dp->dccps_hc_rx_ackvec); - dp->dccps_hc_rx_ackvec = NULL; - } + dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts); + dp->dccps_hc_rx_ackpkts = NULL; ccid_exit(dp->dccps_hc_rx_ccid, sk); ccid_exit(dp->dccps_hc_tx_ccid, sk); dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; diff --git a/trunk/net/dccp/minisocks.c b/trunk/net/dccp/minisocks.c index 1393461898bb..18461bc04cbe 100644 --- a/trunk/net/dccp/minisocks.c +++ b/trunk/net/dccp/minisocks.c @@ -19,7 +19,6 @@ #include #include -#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -94,24 +93,22 @@ struct sock *dccp_create_openreq_child(struct sock *sk, struct inet_connection_sock *newicsk = inet_csk(sk); struct dccp_sock *newdp = dccp_sk(newsk); - newdp->dccps_role = DCCP_ROLE_SERVER; - newdp->dccps_hc_rx_ackvec = NULL; - newdp->dccps_service_list = NULL; - newdp->dccps_service = dreq->dreq_service; - newicsk->icsk_rto = DCCP_TIMEOUT_INIT; + newdp->dccps_hc_rx_ackpkts = NULL; + newdp->dccps_role = DCCP_ROLE_SERVER; + newicsk->icsk_rto = DCCP_TIMEOUT_INIT; do_gettimeofday(&newdp->dccps_epoch); if (newdp->dccps_options.dccpo_send_ack_vector) { - newdp->dccps_hc_rx_ackvec = - dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN, - GFP_ATOMIC); + newdp->dccps_hc_rx_ackpkts = + dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN, + GFP_ATOMIC); /* * XXX: We're using the same CCIDs set on the parent, * i.e. sk_clone copied the master sock and left the * CCID pointers for this child, that is why we do the * __ccid_get calls. */ - if (unlikely(newdp->dccps_hc_rx_ackvec == NULL)) + if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL)) goto out_free; } @@ -119,7 +116,7 @@ struct sock *dccp_create_openreq_child(struct sock *sk, newsk) != 0 || ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) { - dccp_ackvec_free(newdp->dccps_hc_rx_ackvec); + dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts); ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk); ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk); out_free: diff --git a/trunk/net/dccp/options.c b/trunk/net/dccp/options.c index 0a76426c9aea..d4c4242d8dd7 100644 --- a/trunk/net/dccp/options.c +++ b/trunk/net/dccp/options.c @@ -18,15 +18,19 @@ #include #include -#include "ackvec.h" #include "ccid.h" #include "dccp.h" +static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, + struct sock *sk, + const u64 ackno, + const unsigned char len, + const unsigned char *vector); + /* stores the default values for new connection. may be changed with sysctl */ static const struct dccp_options dccpo_default_values = { .dccpo_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW, - .dccpo_rx_ccid = DCCPF_INITIAL_CCID, - .dccpo_tx_ccid = DCCPF_INITIAL_CCID, + .dccpo_ccid = DCCPF_INITIAL_CCID, .dccpo_send_ack_vector = DCCPF_INITIAL_SEND_ACK_VECTOR, .dccpo_send_ndp_count = DCCPF_INITIAL_SEND_NDP_COUNT, }; @@ -109,13 +113,25 @@ int dccp_parse_options(struct sock *sk, struct sk_buff *skb) opt_recv->dccpor_ndp); break; case DCCPO_ACK_VECTOR_0: - case DCCPO_ACK_VECTOR_1: + if (len > DCCP_MAX_ACK_VECTOR_LEN) + goto out_invalid_option; + if (pkt_type == DCCP_PKT_DATA) continue; - if (dp->dccps_options.dccpo_send_ack_vector && - dccp_ackvec_parse(sk, skb, opt, value, len)) - goto out_invalid_option; + opt_recv->dccpor_ack_vector_len = len; + opt_recv->dccpor_ack_vector_idx = value - options; + + dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n", + debug_prefix, len, + (unsigned long long) + DCCP_SKB_CB(skb)->dccpd_ack_seq); + dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, + value, len); + dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, + sk, + DCCP_SKB_CB(skb)->dccpd_ack_seq, + len, value); break; case DCCPO_TIMESTAMP: if (len != 4) @@ -336,6 +352,86 @@ void dccp_insert_option_elapsed_time(struct sock *sk, EXPORT_SYMBOL_GPL(dccp_insert_option_elapsed_time); +static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb) +{ + struct dccp_sock *dp = dccp_sk(sk); +#ifdef CONFIG_IP_DCCP_DEBUG + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT TX opt: " : "server TX opt: "; +#endif + struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; + int len = ap->dccpap_buf_vector_len + 2; + struct timeval now; + u32 elapsed_time; + unsigned char *to, *from; + + dccp_timestamp(sk, &now); + elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10; + + if (elapsed_time != 0) + dccp_insert_option_elapsed_time(sk, skb, elapsed_time); + + if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) { + LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to " + "insert ACK Vector!\n"); + return; + } + + /* + * XXX: now we have just one ack vector sent record, so + * we have to wait for it to be cleared. + * + * Of course this is not acceptable, but this is just for + * basic testing now. + */ + if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1) + return; + + DCCP_SKB_CB(skb)->dccpd_opt_len += len; + + to = skb_push(skb, len); + *to++ = DCCPO_ACK_VECTOR_0; + *to++ = len; + + len = ap->dccpap_buf_vector_len; + from = ap->dccpap_buf + ap->dccpap_buf_head; + + /* Check if buf_head wraps */ + if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) { + const unsigned int tailsize = (ap->dccpap_buf_len - + ap->dccpap_buf_head); + + memcpy(to, from, tailsize); + to += tailsize; + len -= tailsize; + from = ap->dccpap_buf; + } + + memcpy(to, from, len); + /* + * From draft-ietf-dccp-spec-11.txt: + * + * For each acknowledgement it sends, the HC-Receiver will add an + * acknowledgement record. ack_seqno will equal the HC-Receiver + * sequence number it used for the ack packet; ack_ptr will equal + * buf_head; ack_ackno will equal buf_ackno; and ack_nonce will + * equal buf_nonce. + * + * This implemention uses just one ack record for now. + */ + ap->dccpap_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq; + ap->dccpap_ack_ptr = ap->dccpap_buf_head; + ap->dccpap_ack_ackno = ap->dccpap_buf_ackno; + ap->dccpap_ack_nonce = ap->dccpap_buf_nonce; + ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len; + + dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, " + "ack_ackno=%llu\n", + debug_prefix, ap->dccpap_ack_vector_len, + (unsigned long long) ap->dccpap_ack_seqno, + (unsigned long long) ap->dccpap_ack_ackno); +} + void dccp_timestamp(const struct sock *sk, struct timeval *tv) { const struct dccp_sock *dp = dccp_sk(sk); @@ -432,8 +528,9 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) if (!dccp_packet_without_ack(skb)) { if (dp->dccps_options.dccpo_send_ack_vector && - dccp_ackvec_pending(dp->dccps_hc_rx_ackvec)) - dccp_insert_option_ackvec(sk, skb); + (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != + DCCP_MAX_SEQNO + 1)) + dccp_insert_option_ack_vector(sk, skb); if (dp->dccps_timestamp_echo != 0) dccp_insert_option_timestamp_echo(sk, skb); } @@ -460,3 +557,331 @@ void dccp_insert_options(struct sock *sk, struct sk_buff *skb) } } } + +struct dccp_ackpkts *dccp_ackpkts_alloc(const unsigned int len, + const unsigned int __nocast priority) +{ + struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority); + + if (ap != NULL) { +#ifdef CONFIG_IP_DCCP_DEBUG + memset(ap->dccpap_buf, 0xFF, len); +#endif + ap->dccpap_buf_len = len; + ap->dccpap_buf_head = + ap->dccpap_buf_tail = + ap->dccpap_buf_len - 1; + ap->dccpap_buf_ackno = + ap->dccpap_ack_ackno = + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + ap->dccpap_buf_nonce = ap->dccpap_buf_nonce = 0; + ap->dccpap_ack_ptr = 0; + ap->dccpap_time.tv_sec = 0; + ap->dccpap_time.tv_usec = 0; + ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0; + } + + return ap; +} + +void dccp_ackpkts_free(struct dccp_ackpkts *ap) +{ + if (ap != NULL) { +#ifdef CONFIG_IP_DCCP_DEBUG + memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len); +#endif + kfree(ap); + } +} + +static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap, + const unsigned int index) +{ + return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK; +} + +static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap, + const unsigned int index) +{ + return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK; +} + +/* + * If several packets are missing, the HC-Receiver may prefer to enter multiple + * bytes with run length 0, rather than a single byte with a larger run length; + * this simplifies table updates if one of the missing packets arrives. + */ +static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap, + const unsigned int packets, + const unsigned char state) +{ + unsigned int gap; + signed long new_head; + + if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len) + return -ENOBUFS; + + gap = packets - 1; + new_head = ap->dccpap_buf_head - packets; + + if (new_head < 0) { + if (gap > 0) { + memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED, + gap + new_head + 1); + gap = -new_head; + } + new_head += ap->dccpap_buf_len; + } + + ap->dccpap_buf_head = new_head; + + if (gap > 0) + memset(ap->dccpap_buf + ap->dccpap_buf_head + 1, + DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap); + + ap->dccpap_buf[ap->dccpap_buf_head] = state; + ap->dccpap_buf_vector_len += packets; + return 0; +} + +/* + * Implements the draft-ietf-dccp-spec-11.txt Appendix A + */ +int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk, + u64 ackno, u8 state) +{ + /* + * Check at the right places if the buffer is full, if it is, tell the + * caller to start dropping packets till the HC-Sender acks our ACK + * vectors, when we will free up space in dccpap_buf. + * + * We may well decide to do buffer compression, etc, but for now lets + * just drop. + * + * From Appendix A: + * + * Of course, the circular buffer may overflow, either when the + * HC-Sender is sending data at a very high rate, when the + * HC-Receiver's acknowledgements are not reaching the HC-Sender, + * or when the HC-Sender is forgetting to acknowledge those acks + * (so the HC-Receiver is unable to clean up old state). In this + * case, the HC-Receiver should either compress the buffer (by + * increasing run lengths when possible), transfer its state to + * a larger buffer, or, as a last resort, drop all received + * packets, without processing them whatsoever, until its buffer + * shrinks again. + */ + + /* See if this is the first ackno being inserted */ + if (ap->dccpap_buf_vector_len == 0) { + ap->dccpap_buf[ap->dccpap_buf_head] = state; + ap->dccpap_buf_vector_len = 1; + } else if (after48(ackno, ap->dccpap_buf_ackno)) { + const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, + ackno); + + /* + * Look if the state of this packet is the same as the + * previous ackno and if so if we can bump the head len. + */ + if (delta == 1 && + dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state && + (dccp_ackpkts_len(ap, ap->dccpap_buf_head) < + DCCP_ACKPKTS_LEN_MASK)) + ap->dccpap_buf[ap->dccpap_buf_head]++; + else if (dccp_ackpkts_set_buf_head_state(ap, delta, state)) + return -ENOBUFS; + } else { + /* + * A.1.2. Old Packets + * + * When a packet with Sequence Number S arrives, and + * S <= buf_ackno, the HC-Receiver will scan the table + * for the byte corresponding to S. (Indexing structures + * could reduce the complexity of this scan.) + */ + u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno); + unsigned int index = ap->dccpap_buf_head; + + while (1) { + const u8 len = dccp_ackpkts_len(ap, index); + const u8 state = dccp_ackpkts_state(ap, index); + /* + * valid packets not yet in dccpap_buf have a reserved + * entry, with a len equal to 0. + */ + if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED && + len == 0 && delta == 0) { /* Found our + reserved seat! */ + dccp_pr_debug("Found %llu reserved seat!\n", + (unsigned long long) ackno); + ap->dccpap_buf[index] = state; + goto out; + } + /* len == 0 means one packet */ + if (delta < len + 1) + goto out_duplicate; + + delta -= len + 1; + if (++index == ap->dccpap_buf_len) + index = 0; + } + } + + ap->dccpap_buf_ackno = ackno; + dccp_timestamp(sk, &ap->dccpap_time); +out: + dccp_pr_debug(""); + dccp_ackpkts_print(ap); + return 0; + +out_duplicate: + /* Duplicate packet */ + dccp_pr_debug("Received a dup or already considered lost " + "packet: %llu\n", (unsigned long long) ackno); + return -EILSEQ; +} + +#ifdef CONFIG_IP_DCCP_DEBUG +void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, + int len) +{ + if (!dccp_debug) + return; + + printk("ACK vector len=%d, ackno=%llu |", len, + (unsigned long long) ackno); + + while (len--) { + const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6; + const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); + + printk("%d,%d|", state, rl); + ++vector; + } + + printk("\n"); +} + +void dccp_ackpkts_print(const struct dccp_ackpkts *ap) +{ + dccp_ackvector_print(ap->dccpap_buf_ackno, + ap->dccpap_buf + ap->dccpap_buf_head, + ap->dccpap_buf_vector_len); +} +#endif + +static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap) +{ + /* + * As we're keeping track of the ack vector size + * (dccpap_buf_vector_len) and the sent ack vector size + * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but + * keep this code here as in the future we'll implement a vector of + * ack records, as suggested in draft-ietf-dccp-spec-11.txt + * Appendix A. -acme + */ +#if 0 + ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1; + if (ap->dccpap_buf_tail >= ap->dccpap_buf_len) + ap->dccpap_buf_tail -= ap->dccpap_buf_len; +#endif + ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len; +} + +void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk, + u64 ackno) +{ + /* Check if we actually sent an ACK vector */ + if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) + return; + + if (ackno == ap->dccpap_ack_seqno) { +#ifdef CONFIG_IP_DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT rx ack: " : "server rx ack: "; +#endif + dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, " + "ack_ackno=%llu, ACKED!\n", + debug_prefix, 1, + (unsigned long long) ap->dccpap_ack_seqno, + (unsigned long long) ap->dccpap_ack_ackno); + dccp_ackpkts_trow_away_ack_record(ap); + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + } +} + +static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap, + struct sock *sk, u64 ackno, + const unsigned char len, + const unsigned char *vector) +{ + unsigned char i; + + /* Check if we actually sent an ACK vector */ + if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1) + return; + /* + * We're in the receiver half connection, so if the received an ACK + * vector ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're + * not interested. + * + * Extra explanation with example: + * + * if we received an ACK vector with ackno 50, it can only be acking + * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent). + */ + /* dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno); */ + if (before48(ackno, ap->dccpap_ack_seqno)) { + /* dccp_pr_debug_cat("yes\n"); */ + return; + } + /* dccp_pr_debug_cat("no\n"); */ + + i = len; + while (i--) { + const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK); + u64 ackno_end_rl; + + dccp_set_seqno(&ackno_end_rl, ackno - rl); + + /* + * dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, + * ap->dccpap_ack_seqno, ackno); + */ + if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) { + const u8 state = (*vector & + DCCP_ACKPKTS_STATE_MASK) >> 6; + /* dccp_pr_debug_cat("yes\n"); */ + + if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) { +#ifdef CONFIG_IP_DCCP_DEBUG + struct dccp_sock *dp = dccp_sk(sk); + const char *debug_prefix = + dp->dccps_role == DCCP_ROLE_CLIENT ? + "CLIENT rx ack: " : "server rx ack: "; +#endif + dccp_pr_debug("%sACK vector 0, len=%d, " + "ack_seqno=%llu, ack_ackno=%llu, " + "ACKED!\n", + debug_prefix, len, + (unsigned long long) + ap->dccpap_ack_seqno, + (unsigned long long) + ap->dccpap_ack_ackno); + dccp_ackpkts_trow_away_ack_record(ap); + } + /* + * If dccpap_ack_seqno was not received, no problem + * we'll send another ACK vector. + */ + ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1; + break; + } + /* dccp_pr_debug_cat("no\n"); */ + + dccp_set_seqno(&ackno, ackno_end_rl - 1); + ++vector; + } +} diff --git a/trunk/net/dccp/output.c b/trunk/net/dccp/output.c index 4786bdcddcc9..ea6d0e91e511 100644 --- a/trunk/net/dccp/output.c +++ b/trunk/net/dccp/output.c @@ -16,7 +16,6 @@ #include -#include "ackvec.h" #include "ccid.h" #include "dccp.h" @@ -86,7 +85,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb) switch (dcb->dccpd_type) { case DCCP_PKT_REQUEST: dccp_hdr_request(skb)->dccph_req_service = - dp->dccps_service; + dcb->dccpd_service; break; case DCCP_PKT_RESET: dccp_hdr_reset(skb)->dccph_reset_code = @@ -226,6 +225,7 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) err = dccp_wait_for_ccid(sk, skb, timeo); if (err == 0) { + const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts; struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); const int len = skb->len; @@ -236,7 +236,15 @@ int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) inet_csk(sk)->icsk_rto, DCCP_RTO_MAX); dcb->dccpd_type = DCCP_PKT_DATAACK; - } else if (dccp_ack_pending(sk)) + /* + * FIXME: we really should have a + * dccps_ack_pending or use icsk. + */ + } else if (inet_csk_ack_scheduled(sk) || + dp->dccps_timestamp_echo != 0 || + (dp->dccps_options.dccpo_send_ack_vector && + ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 && + ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)) dcb->dccpd_type = DCCP_PKT_DATAACK; else dcb->dccpd_type = DCCP_PKT_DATA; @@ -262,7 +270,6 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, struct request_sock *req) { struct dccp_hdr *dh; - struct dccp_request_sock *dreq; const int dccp_header_size = sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext) + sizeof(struct dccp_hdr_response); @@ -278,9 +285,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, skb->dst = dst_clone(dst); skb->csum = 0; - dreq = dccp_rsk(req); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE; - DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss; + DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss; dccp_insert_options(sk, skb); skb->h.raw = skb_push(skb, dccp_header_size); @@ -294,9 +300,8 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; dh->dccph_type = DCCP_PKT_RESPONSE; dh->dccph_x = 1; - dccp_hdr_set_seq(dh, dreq->dreq_iss); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr); - dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service; + dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss); + dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr); dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr, inet_rsk(req)->rmt_addr); @@ -392,6 +397,9 @@ int dccp_connect(struct sock *sk) skb_reserve(skb, MAX_DCCP_HEADER); DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST; + /* FIXME: set service to something meaningful, coming + * from userspace*/ + DCCP_SKB_CB(skb)->dccpd_service = 0; skb->csum = 0; skb_set_owner_w(skb, sk); diff --git a/trunk/net/dccp/proto.c b/trunk/net/dccp/proto.c index a1cfd0e9e3bc..18a0e69c9dc7 100644 --- a/trunk/net/dccp/proto.c +++ b/trunk/net/dccp/proto.c @@ -94,15 +94,7 @@ EXPORT_SYMBOL_GPL(dccp_state_name); static inline int dccp_listen_start(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - - dp->dccps_role = DCCP_ROLE_LISTEN; - /* - * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE) - * before calling listen() - */ - if (dccp_service_not_initialized(sk)) - return -EPROTO; + dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN; return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE); } @@ -210,42 +202,6 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg) return -ENOIOCTLCMD; } -static int dccp_setsockopt_service(struct sock *sk, const u32 service, - char __user *optval, int optlen) -{ - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_service_list *sl = NULL; - - if (service == DCCP_SERVICE_INVALID_VALUE || - optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32)) - return -EINVAL; - - if (optlen > sizeof(service)) { - sl = kmalloc(optlen, GFP_KERNEL); - if (sl == NULL) - return -ENOMEM; - - sl->dccpsl_nr = optlen / sizeof(u32) - 1; - if (copy_from_user(sl->dccpsl_list, - optval + sizeof(service), - optlen - sizeof(service)) || - dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) { - kfree(sl); - return -EFAULT; - } - } - - lock_sock(sk); - dp->dccps_service = service; - - if (dp->dccps_service_list != NULL) - kfree(dp->dccps_service_list); - - dp->dccps_service_list = sl; - release_sock(sk); - return 0; -} - int dccp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { @@ -262,10 +218,8 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, if (get_user(val, (int __user *)optval)) return -EFAULT; - if (optname == DCCP_SOCKOPT_SERVICE) - return dccp_setsockopt_service(sk, val, optval, optlen); - lock_sock(sk); + dp = dccp_sk(sk); err = 0; @@ -282,37 +236,6 @@ int dccp_setsockopt(struct sock *sk, int level, int optname, return err; } -static int dccp_getsockopt_service(struct sock *sk, int len, - u32 __user *optval, - int __user *optlen) -{ - const struct dccp_sock *dp = dccp_sk(sk); - const struct dccp_service_list *sl; - int err = -ENOENT, slen = 0, total_len = sizeof(u32); - - lock_sock(sk); - if (dccp_service_not_initialized(sk)) - goto out; - - if ((sl = dp->dccps_service_list) != NULL) { - slen = sl->dccpsl_nr * sizeof(u32); - total_len += slen; - } - - err = -EINVAL; - if (total_len > len) - goto out; - - err = 0; - if (put_user(total_len, optlen) || - put_user(dp->dccps_service, optval) || - (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen))) - err = -EFAULT; -out: - release_sock(sk); - return err; -} - int dccp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -325,7 +248,8 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - if (len < sizeof(int)) + len = min_t(unsigned int, len, sizeof(int)); + if (len < 0) return -EINVAL; dp = dccp_sk(sk); @@ -333,17 +257,7 @@ int dccp_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case DCCP_SOCKOPT_PACKET_SIZE: val = dp->dccps_packet_size; - len = sizeof(dp->dccps_packet_size); break; - case DCCP_SOCKOPT_SERVICE: - return dccp_getsockopt_service(sk, len, - (u32 __user *)optval, optlen); - case 128 ... 191: - return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, - len, (u32 __user *)optval, optlen); - case 192 ... 255: - return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname, - len, (u32 __user *)optval, optlen); default: return -ENOPROTOOPT; } diff --git a/trunk/net/ipv4/netfilter/Kconfig b/trunk/net/ipv4/netfilter/Kconfig index e2162d270073..30aa8e2ee214 100644 --- a/trunk/net/ipv4/netfilter/Kconfig +++ b/trunk/net/ipv4/netfilter/Kconfig @@ -51,14 +51,6 @@ config IP_NF_CONNTRACK_EVENTS IF unsure, say `N'. -config IP_NF_CONNTRACK_NETLINK - tristate 'Connection tracking netlink interface' - depends on IP_NF_CONNTRACK && NETFILTER_NETLINK - depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m - help - This option enables support for a netlink-based userspace interface - - config IP_NF_CT_PROTO_SCTP tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' depends on IP_NF_CONNTRACK && EXPERIMENTAL @@ -782,5 +774,11 @@ config IP_NF_ARP_MANGLE Allows altering the ARP packet payload: source and destination hardware and network addresses. +config IP_NF_CONNTRACK_NETLINK + tristate 'Connection tracking netlink interface' + depends on IP_NF_CONNTRACK && NETFILTER_NETLINK + help + This option enables support for a netlink-based userspace interface + endmenu diff --git a/trunk/net/ipv4/netfilter/ip_conntrack_core.c b/trunk/net/ipv4/netfilter/ip_conntrack_core.c index f8cd8e42961e..19cba16e6e1e 100644 --- a/trunk/net/ipv4/netfilter/ip_conntrack_core.c +++ b/trunk/net/ipv4/netfilter/ip_conntrack_core.c @@ -1143,10 +1143,7 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct, if (del_timer(&ct->timeout)) { ct->timeout.expires = jiffies + extra_jiffies; add_timer(&ct->timeout); - /* FIXME: We loose some REFRESH events if this function - * is called without an skb. I'll fix this later -HW */ - if (skb) - ip_conntrack_event_cache(IPCT_REFRESH, skb); + ip_conntrack_event_cache(IPCT_REFRESH, skb); } ct_add_counters(ct, ctinfo, skb); write_unlock_bh(&ip_conntrack_lock); diff --git a/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c b/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c index 9bcb398fbc1f..7d38913754b1 100644 --- a/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -31,7 +30,7 @@ #include #include -#define CLUSTERIP_VERSION "0.8" +#define CLUSTERIP_VERSION "0.7" #define DEBUG_CLUSTERIP @@ -50,14 +49,13 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP"); struct clusterip_config { struct list_head list; /* list of all configs */ atomic_t refcount; /* reference count */ - atomic_t entries; /* number of entries/rules - * referencing us */ u_int32_t clusterip; /* the IP address */ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ struct net_device *dev; /* device */ u_int16_t num_total_nodes; /* total number of nodes */ - unsigned long local_nodes; /* node number array */ + u_int16_t num_local_nodes; /* number of local nodes */ + u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */ #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; /* proc dir entry */ @@ -68,7 +66,8 @@ struct clusterip_config { static LIST_HEAD(clusterip_configs); -/* clusterip_lock protects the clusterip_configs list */ +/* clusterip_lock protects the clusterip_configs list _AND_ the configurable + * data within all structurses (num_local_nodes, local_nodes[]) */ static DEFINE_RWLOCK(clusterip_lock); #ifdef CONFIG_PROC_FS @@ -77,48 +76,23 @@ static struct proc_dir_entry *clusterip_procdir; #endif static inline void -clusterip_config_get(struct clusterip_config *c) -{ +clusterip_config_get(struct clusterip_config *c) { atomic_inc(&c->refcount); } static inline void -clusterip_config_put(struct clusterip_config *c) -{ - if (atomic_dec_and_test(&c->refcount)) - kfree(c); -} - -/* increase the count of entries(rules) using/referencing this config */ -static inline void -clusterip_config_entry_get(struct clusterip_config *c) -{ - atomic_inc(&c->entries); -} - -/* decrease the count of entries using/referencing this config. If last - * entry(rule) is removed, remove the config from lists, but don't free it - * yet, since proc-files could still be holding references */ -static inline void -clusterip_config_entry_put(struct clusterip_config *c) -{ - if (atomic_dec_and_test(&c->entries)) { +clusterip_config_put(struct clusterip_config *c) { + if (atomic_dec_and_test(&c->refcount)) { write_lock_bh(&clusterip_lock); list_del(&c->list); write_unlock_bh(&clusterip_lock); - dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); dev_put(c->dev); - - /* In case anyone still accesses the file, the open/close - * functions are also incrementing the refcount on their own, - * so it's safe to remove the entry even if it's in use. */ -#ifdef CONFIG_PROC_FS - remove_proc_entry(c->pde->name, c->pde->parent); -#endif + kfree(c); } } + static struct clusterip_config * __clusterip_config_find(u_int32_t clusterip) { @@ -137,7 +111,7 @@ __clusterip_config_find(u_int32_t clusterip) } static inline struct clusterip_config * -clusterip_config_find_get(u_int32_t clusterip, int entry) +clusterip_config_find_get(u_int32_t clusterip) { struct clusterip_config *c; @@ -148,24 +122,11 @@ clusterip_config_find_get(u_int32_t clusterip, int entry) return NULL; } atomic_inc(&c->refcount); - if (entry) - atomic_inc(&c->entries); read_unlock_bh(&clusterip_lock); return c; } -static void -clusterip_config_init_nodelist(struct clusterip_config *c, - const struct ipt_clusterip_tgt_info *i) -{ - int n; - - for (n = 0; n < i->num_local_nodes; n++) { - set_bit(i->local_nodes[n] - 1, &c->local_nodes); - } -} - static struct clusterip_config * clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, struct net_device *dev) @@ -182,11 +143,11 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, c->clusterip = ip; memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); c->num_total_nodes = i->num_total_nodes; - clusterip_config_init_nodelist(c, i); + c->num_local_nodes = i->num_local_nodes; + memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes)); c->hash_mode = i->hash_mode; c->hash_initval = i->hash_initval; atomic_set(&c->refcount, 1); - atomic_set(&c->entries, 1); #ifdef CONFIG_PROC_FS /* create proc dir entry */ @@ -210,28 +171,53 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, static int clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) { + int i; - if (nodenum == 0 || - nodenum > c->num_total_nodes) - return 1; + write_lock_bh(&clusterip_lock); - /* check if we already have this number in our bitfield */ - if (test_and_set_bit(nodenum - 1, &c->local_nodes)) + if (c->num_local_nodes >= CLUSTERIP_MAX_NODES + || nodenum > CLUSTERIP_MAX_NODES) { + write_unlock_bh(&clusterip_lock); return 1; + } + + /* check if we alrady have this number in our array */ + for (i = 0; i < c->num_local_nodes; i++) { + if (c->local_nodes[i] == nodenum) { + write_unlock_bh(&clusterip_lock); + return 1; + } + } + + c->local_nodes[c->num_local_nodes++] = nodenum; + write_unlock_bh(&clusterip_lock); return 0; } static int clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) { - if (nodenum == 0 || - nodenum > c->num_total_nodes) + int i; + + write_lock_bh(&clusterip_lock); + + if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) { + write_unlock_bh(&clusterip_lock); return 1; + } - if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) - return 0; + for (i = 0; i < c->num_local_nodes; i++) { + if (c->local_nodes[i] == nodenum) { + int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1)); + memmove(&c->local_nodes[i], &c->local_nodes[i+1], size); + c->num_local_nodes--; + write_unlock_bh(&clusterip_lock); + return 0; + } + } + write_unlock_bh(&clusterip_lock); return 1; } @@ -299,7 +285,25 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) static inline int clusterip_responsible(struct clusterip_config *config, u_int32_t hash) { - return test_bit(hash - 1, &config->local_nodes); + int i; + + read_lock_bh(&clusterip_lock); + + if (config->num_local_nodes == 0) { + read_unlock_bh(&clusterip_lock); + return 0; + } + + for (i = 0; i < config->num_local_nodes; i++) { + if (config->local_nodes[i] == hash) { + read_unlock_bh(&clusterip_lock); + return 1; + } + } + + read_unlock_bh(&clusterip_lock); + + return 0; } /*********************************************************************** @@ -411,26 +415,8 @@ checkentry(const char *tablename, /* FIXME: further sanity checks */ - config = clusterip_config_find_get(e->ip.dst.s_addr, 1); - if (config) { - if (cipinfo->config != NULL) { - /* Case A: This is an entry that gets reloaded, since - * it still has a cipinfo->config pointer. Simply - * increase the entry refcount and return */ - if (cipinfo->config != config) { - printk(KERN_ERR "CLUSTERIP: Reloaded entry " - "has invalid config pointer!\n"); - return 0; - } - clusterip_config_entry_get(cipinfo->config); - } else { - /* Case B: This is a new rule referring to an existing - * clusterip config. */ - cipinfo->config = config; - clusterip_config_entry_get(cipinfo->config); - } - } else { - /* Case C: This is a completely new clusterip config */ + config = clusterip_config_find_get(e->ip.dst.s_addr); + if (!config) { if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); return 0; @@ -457,9 +443,10 @@ checkentry(const char *tablename, } dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); } - cipinfo->config = config; } + cipinfo->config = config; + return 1; } @@ -468,10 +455,13 @@ static void destroy(void *matchinfo, unsigned int matchinfosize) { struct ipt_clusterip_tgt_info *cipinfo = matchinfo; - /* if no more entries are referencing the config, remove it - * from the list and destroy the proc entry */ - clusterip_config_entry_put(cipinfo->config); - + /* we first remove the proc entry and then drop the reference + * count. In case anyone still accesses the file, the open/close + * functions are also incrementing the refcount on their own */ +#ifdef CONFIG_PROC_FS + remove_proc_entry(cipinfo->config->pde->name, + cipinfo->config->pde->parent); +#endif clusterip_config_put(cipinfo->config); } @@ -543,7 +533,7 @@ arp_mangle(unsigned int hook, /* if there is no clusterip configuration for the arp reply's * source ip, we don't want to mangle it */ - c = clusterip_config_find_get(payload->src_ip, 0); + c = clusterip_config_find_get(payload->src_ip); if (!c) return NF_ACCEPT; @@ -584,69 +574,56 @@ static struct nf_hook_ops cip_arp_ops = { #ifdef CONFIG_PROC_FS -struct clusterip_seq_position { - unsigned int pos; /* position */ - unsigned int weight; /* number of bits set == size */ - unsigned int bit; /* current bit */ - unsigned long val; /* current value */ -}; - static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) { struct proc_dir_entry *pde = s->private; struct clusterip_config *c = pde->data; - unsigned int weight; - u_int32_t local_nodes; - struct clusterip_seq_position *idx; - - /* FIXME: possible race */ - local_nodes = c->local_nodes; - weight = hweight32(local_nodes); - if (*pos >= weight) + unsigned int *nodeidx; + + read_lock_bh(&clusterip_lock); + if (*pos >= c->num_local_nodes) return NULL; - idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); - if (!idx) + nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL); + if (!nodeidx) return ERR_PTR(-ENOMEM); - idx->pos = *pos; - idx->weight = weight; - idx->bit = ffs(local_nodes); - idx->val = local_nodes; - clear_bit(idx->bit - 1, &idx->val); - - return idx; + *nodeidx = *pos; + return nodeidx; } static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) { - struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; + struct proc_dir_entry *pde = s->private; + struct clusterip_config *c = pde->data; + unsigned int *nodeidx = (unsigned int *)v; - *pos = ++idx->pos; - if (*pos >= idx->weight) { + *pos = ++(*nodeidx); + if (*pos >= c->num_local_nodes) { kfree(v); return NULL; } - idx->bit = ffs(idx->val); - clear_bit(idx->bit - 1, &idx->val); - return idx; + return nodeidx; } static void clusterip_seq_stop(struct seq_file *s, void *v) { kfree(v); + + read_unlock_bh(&clusterip_lock); } static int clusterip_seq_show(struct seq_file *s, void *v) { - struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; + struct proc_dir_entry *pde = s->private; + struct clusterip_config *c = pde->data; + unsigned int *nodeidx = (unsigned int *)v; - if (idx->pos != 0) + if (*nodeidx != 0) seq_putc(s, ','); + seq_printf(s, "%u", c->local_nodes[*nodeidx]); - seq_printf(s, "%u", idx->bit); - - if (idx->pos == idx->weight - 1) + if (*nodeidx == c->num_local_nodes-1) seq_putc(s, '\n'); return 0; diff --git a/trunk/net/ipv6/udp.c b/trunk/net/ipv6/udp.c index 6001948600f3..2b9bf9bd177f 100644 --- a/trunk/net/ipv6/udp.c +++ b/trunk/net/ipv6/udp.c @@ -639,7 +639,6 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, int tclass = -1; int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int err; - int connected = 0; /* destination address check */ if (sin6) { @@ -749,7 +748,6 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, fl->fl_ip_dport = inet->dport; daddr = &np->daddr; fl->fl6_flowlabel = np->flow_label; - connected = 1; } if (!fl->oif) @@ -772,7 +770,6 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; - connected = 0; } if (opt == NULL) opt = np->opt; @@ -790,13 +787,10 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, ipv6_addr_copy(&final, &fl->fl6_dst); ipv6_addr_copy(&fl->fl6_dst, rt0->addr); final_p = &final; - connected = 0; } - if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) { + if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) fl->oif = np->mcast_oif; - connected = 0; - } err = ip6_dst_lookup(sk, &dst, fl); if (err) @@ -852,7 +846,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, else if (!corkreq) err = udp_v6_push_pending_frames(sk, up); - if (dst && connected) + if (dst) ip6_dst_store(sk, dst, ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? &np->daddr : NULL); diff --git a/trunk/net/socket.c b/trunk/net/socket.c index f9264472377f..c699e93c33d7 100644 --- a/trunk/net/socket.c +++ b/trunk/net/socket.c @@ -1862,8 +1862,7 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag if (err < 0) goto out_freeiov; } - err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT), - COMPAT_FLAGS(msg)); + err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg)); if (err) goto out_freeiov; if (MSG_CMSG_COMPAT & flags)