diff --git a/[refs] b/[refs] index 80d1e1415548..671a5b89afbb 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: d28934ad8a4e87203a95de9c376611de8bc2f013 +refs/heads/master: 88b9e2bef3e38c053ec8f054f2cbb9345724cdb1 diff --git a/trunk/drivers/net/bnx2.c b/trunk/drivers/net/bnx2.c index 2486a656f12d..d4548101e495 100644 --- a/trunk/drivers/net/bnx2.c +++ b/trunk/drivers/net/bnx2.c @@ -35,8 +35,8 @@ #include #include #include -#include #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#include #define BCM_VLAN 1 #endif #include diff --git a/trunk/drivers/net/loopback.c b/trunk/drivers/net/loopback.c index 3b43bfd85a0f..49f6bc036a92 100644 --- a/trunk/drivers/net/loopback.c +++ b/trunk/drivers/net/loopback.c @@ -64,6 +64,68 @@ struct pcpu_lstats { unsigned long bytes; }; +/* KISS: just allocate small chunks and copy bits. + * + * So, in fact, this is documentation, explaining what we expect + * of largesending device modulo TCP checksum, which is ignored for loopback. + */ + +#ifdef LOOPBACK_TSO +static void emulate_large_send_offload(struct sk_buff *skb) +{ + struct iphdr *iph = ip_hdr(skb); + struct tcphdr *th = (struct tcphdr *)(skb_network_header(skb) + + (iph->ihl * 4)); + unsigned int doffset = (iph->ihl + th->doff) * 4; + unsigned int mtu = skb_shinfo(skb)->gso_size + doffset; + unsigned int offset = 0; + u32 seq = ntohl(th->seq); + u16 id = ntohs(iph->id); + + while (offset + doffset < skb->len) { + unsigned int frag_size = min(mtu, skb->len - offset) - doffset; + struct sk_buff *nskb = alloc_skb(mtu + 32, GFP_ATOMIC); + + if (!nskb) + break; + skb_reserve(nskb, 32); + skb_set_mac_header(nskb, -ETH_HLEN); + skb_reset_network_header(nskb); + iph = ip_hdr(nskb); + skb_copy_to_linear_data(nskb, skb_network_header(skb), + doffset); + if (skb_copy_bits(skb, + doffset + offset, + nskb->data + doffset, + frag_size)) + BUG(); + skb_put(nskb, doffset + frag_size); + nskb->ip_summed = CHECKSUM_UNNECESSARY; + nskb->dev = skb->dev; + nskb->priority = skb->priority; + nskb->protocol = skb->protocol; + nskb->dst = dst_clone(skb->dst); + memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); + nskb->pkt_type = skb->pkt_type; + + th = (struct tcphdr *)(skb_network_header(nskb) + iph->ihl * 4); + iph->tot_len = htons(frag_size + doffset); + iph->id = htons(id); + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *) iph, iph->ihl); + th->seq = htonl(seq); + if (offset + doffset + frag_size < skb->len) + th->fin = th->psh = 0; + netif_rx(nskb); + offset += frag_size; + seq += frag_size; + id++; + } + + dev_kfree_skb(skb); +} +#endif /* LOOPBACK_TSO */ + /* * The higher levels take care of making this non-reentrant (it's * called with bh's disabled). @@ -75,6 +137,9 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) skb_orphan(skb); skb->protocol = eth_type_trans(skb,dev); +#ifndef LOOPBACK_MUST_CHECKSUM + skb->ip_summed = CHECKSUM_UNNECESSARY; +#endif #ifdef LOOPBACK_TSO if (skb_is_gso(skb)) { @@ -169,7 +234,9 @@ static void loopback_setup(struct net_device *dev) dev->type = ARPHRD_LOOPBACK; /* 0x0001*/ dev->flags = IFF_LOOPBACK; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST +#ifdef LOOPBACK_TSO | NETIF_F_TSO +#endif | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX diff --git a/trunk/drivers/net/tun.c b/trunk/drivers/net/tun.c index 6daea0c91862..e6bbc639c2d0 100644 --- a/trunk/drivers/net/tun.c +++ b/trunk/drivers/net/tun.c @@ -358,66 +358,6 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait) return mask; } -/* prepad is the amount to reserve at front. len is length after that. - * linear is a hint as to how much to copy (usually headers). */ -static struct sk_buff *tun_alloc_skb(size_t prepad, size_t len, size_t linear, - gfp_t gfp) -{ - struct sk_buff *skb; - unsigned int i; - - skb = alloc_skb(prepad + len, gfp|__GFP_NOWARN); - if (skb) { - skb_reserve(skb, prepad); - skb_put(skb, len); - return skb; - } - - /* Under a page? Don't bother with paged skb. */ - if (prepad + len < PAGE_SIZE) - return NULL; - - /* Start with a normal skb, and add pages. */ - skb = alloc_skb(prepad + linear, gfp); - if (!skb) - return NULL; - - skb_reserve(skb, prepad); - skb_put(skb, linear); - - len -= linear; - - for (i = 0; i < MAX_SKB_FRAGS; i++) { - skb_frag_t *f = &skb_shinfo(skb)->frags[i]; - - f->page = alloc_page(gfp|__GFP_ZERO); - if (!f->page) - break; - - f->page_offset = 0; - f->size = PAGE_SIZE; - - skb->data_len += PAGE_SIZE; - skb->len += PAGE_SIZE; - skb->truesize += PAGE_SIZE; - skb_shinfo(skb)->nr_frags++; - - if (len < PAGE_SIZE) { - len = 0; - break; - } - len -= PAGE_SIZE; - } - - /* Too large, or alloc fail? */ - if (unlikely(len)) { - kfree_skb(skb); - skb = NULL; - } - - return skb; -} - /* Get packet from user space buffer */ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, size_t count) { @@ -451,12 +391,14 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun, struct iovec *iv, return -EINVAL; } - if (!(skb = tun_alloc_skb(align, len, gso.hdr_len, GFP_KERNEL))) { + if (!(skb = alloc_skb(len + align, GFP_KERNEL))) { tun->dev->stats.rx_dropped++; return -ENOMEM; } - if (skb_copy_datagram_from_iovec(skb, 0, iv, len)) { + if (align) + skb_reserve(skb, align); + if (memcpy_fromiovec(skb_put(skb, len), iv, len)) { tun->dev->stats.rx_dropped++; kfree_skb(skb); return -EFAULT; @@ -806,36 +748,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) return err; } -static int tun_get_iff(struct net *net, struct file *file, struct ifreq *ifr) -{ - struct tun_struct *tun = file->private_data; - - if (!tun) - return -EBADFD; - - DBG(KERN_INFO "%s: tun_get_iff\n", tun->dev->name); - - strcpy(ifr->ifr_name, tun->dev->name); - - ifr->ifr_flags = 0; - - if (ifr->ifr_flags & TUN_TUN_DEV) - ifr->ifr_flags |= IFF_TUN; - else - ifr->ifr_flags |= IFF_TAP; - - if (tun->flags & TUN_NO_PI) - ifr->ifr_flags |= IFF_NO_PI; - - if (tun->flags & TUN_ONE_QUEUE) - ifr->ifr_flags |= IFF_ONE_QUEUE; - - if (tun->flags & TUN_VNET_HDR) - ifr->ifr_flags |= IFF_VNET_HDR; - - return 0; -} - /* This is like a cut-down ethtool ops, except done via tun fd so no * privs required. */ static int set_offload(struct net_device *dev, unsigned long arg) @@ -921,15 +833,6 @@ static int tun_chr_ioctl(struct inode *inode, struct file *file, DBG(KERN_INFO "%s: tun_chr_ioctl cmd %d\n", tun->dev->name, cmd); switch (cmd) { - case TUNGETIFF: - ret = tun_get_iff(current->nsproxy->net_ns, file, &ifr); - if (ret) - return ret; - - if (copy_to_user(argp, &ifr, sizeof(ifr))) - return -EFAULT; - break; - case TUNSETNOCSUM: /* Disable/Enable checksum */ if (arg) diff --git a/trunk/drivers/net/wireless/ath9k/hw.c b/trunk/drivers/net/wireless/ath9k/hw.c index bde162f128ab..a17eb130f574 100644 --- a/trunk/drivers/net/wireless/ath9k/hw.c +++ b/trunk/drivers/net/wireless/ath9k/hw.c @@ -5017,7 +5017,11 @@ static void ath9k_hw_spur_mitigate(struct ath_hal *ah, for (i = 0; i < 123; i++) { if ((cur_vit_mask > lower) && (cur_vit_mask < upper)) { - if ((abs(cur_vit_mask - bin)) < 75) + + /* workaround for gcc bug #37014 */ + volatile int tmp = abs(cur_vit_mask - bin); + + if (tmp < 75) mask_amt = 1; else mask_amt = 0; diff --git a/trunk/include/linux/if_tun.h b/trunk/include/linux/if_tun.h index 8529f57ba263..4c6307ad9fdb 100644 --- a/trunk/include/linux/if_tun.h +++ b/trunk/include/linux/if_tun.h @@ -45,7 +45,6 @@ #define TUNGETFEATURES _IOR('T', 207, unsigned int) #define TUNSETOFFLOAD _IOW('T', 208, unsigned int) #define TUNSETTXFILTER _IOW('T', 209, unsigned int) -#define TUNGETIFF _IOR('T', 210, unsigned int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 diff --git a/trunk/include/linux/skbuff.h b/trunk/include/linux/skbuff.h index 909923717830..358661c9990e 100644 --- a/trunk/include/linux/skbuff.h +++ b/trunk/include/linux/skbuff.h @@ -1452,10 +1452,6 @@ extern int skb_copy_datagram_iovec(const struct sk_buff *from, extern int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen, struct iovec *iov); -extern int skb_copy_datagram_from_iovec(struct sk_buff *skb, - int offset, - struct iovec *from, - int len); extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb); extern int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags); diff --git a/trunk/include/net/sch_generic.h b/trunk/include/net/sch_generic.h index 84d25f2e6188..a7abfda3e447 100644 --- a/trunk/include/net/sch_generic.h +++ b/trunk/include/net/sch_generic.h @@ -27,7 +27,6 @@ enum qdisc_state_t { __QDISC_STATE_RUNNING, __QDISC_STATE_SCHED, - __QDISC_STATE_DEACTIVATED, }; struct qdisc_size_table { @@ -61,6 +60,7 @@ struct Qdisc struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; + struct rcu_head q_rcu; int (*reshape_fail)(struct sk_buff *skb, struct Qdisc *q); diff --git a/trunk/net/bridge/br_device.c b/trunk/net/bridge/br_device.c index 4f52c3d50ebe..9b58d70b0e7d 100644 --- a/trunk/net/bridge/br_device.c +++ b/trunk/net/bridge/br_device.c @@ -148,16 +148,11 @@ static int br_set_tx_csum(struct net_device *dev, u32 data) } static struct ethtool_ops br_ethtool_ops = { - .get_drvinfo = br_getinfo, - .get_link = ethtool_op_get_link, - .get_tx_csum = ethtool_op_get_tx_csum, - .set_tx_csum = br_set_tx_csum, - .get_sg = ethtool_op_get_sg, - .set_sg = br_set_sg, - .get_tso = ethtool_op_get_tso, - .set_tso = br_set_tso, - .get_ufo = ethtool_op_get_ufo, - .get_flags = ethtool_op_get_flags, + .get_drvinfo = br_getinfo, + .get_link = ethtool_op_get_link, + .set_sg = br_set_sg, + .set_tx_csum = br_set_tx_csum, + .set_tso = br_set_tso, }; void br_dev_setup(struct net_device *dev) diff --git a/trunk/net/core/datagram.c b/trunk/net/core/datagram.c index 52f577a0f544..dd61dcad6019 100644 --- a/trunk/net/core/datagram.c +++ b/trunk/net/core/datagram.c @@ -339,93 +339,6 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, return -EFAULT; } -/** - * skb_copy_datagram_from_iovec - Copy a datagram from an iovec. - * @skb: buffer to copy - * @offset: offset in the buffer to start copying to - * @from: io vector to copy to - * @len: amount of data to copy to buffer from iovec - * - * Returns 0 or -EFAULT. - * Note: the iovec is modified during the copy. - */ -int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, - struct iovec *from, int len) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - - /* Copy header. */ - if (copy > 0) { - if (copy > len) - copy = len; - if (memcpy_fromiovec(skb->data + offset, from, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - } - - /* Copy paged appendix. Hmm... why does this look so complicated? */ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - WARN_ON(start > offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - int err; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; - - if (copy > len) - copy = len; - vaddr = kmap(page); - err = memcpy_fromiovec(vaddr + frag->page_offset + - offset - start, from, copy); - kunmap(page); - if (err) - goto fault; - - if (!(len -= copy)) - return 0; - offset += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - WARN_ON(start > offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - if (skb_copy_datagram_from_iovec(list, - offset - start, - from, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - } - start = end; - } - } - if (!len) - return 0; - -fault: - return -EFAULT; -} -EXPORT_SYMBOL(skb_copy_datagram_from_iovec); - static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, u8 __user *to, int len, __wsum *csump) diff --git a/trunk/net/core/dev.c b/trunk/net/core/dev.c index 8d133802372b..600bb23c4c2e 100644 --- a/trunk/net/core/dev.c +++ b/trunk/net/core/dev.c @@ -1339,23 +1339,19 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) } -static inline void __netif_reschedule(struct Qdisc *q) -{ - struct softnet_data *sd; - unsigned long flags; - - local_irq_save(flags); - sd = &__get_cpu_var(softnet_data); - q->next_sched = sd->output_queue; - sd->output_queue = q; - raise_softirq_irqoff(NET_TX_SOFTIRQ); - local_irq_restore(flags); -} - void __netif_schedule(struct Qdisc *q) { - if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) - __netif_reschedule(q); + if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) { + struct softnet_data *sd; + unsigned long flags; + + local_irq_save(flags); + sd = &__get_cpu_var(softnet_data); + q->next_sched = sd->output_queue; + sd->output_queue = q; + raise_softirq_irqoff(NET_TX_SOFTIRQ); + local_irq_restore(flags); + } } EXPORT_SYMBOL(__netif_schedule); @@ -1804,13 +1800,9 @@ int dev_queue_xmit(struct sk_buff *skb) spin_lock(root_lock); - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { - kfree_skb(skb); - rc = NET_XMIT_DROP; - } else { - rc = qdisc_enqueue_root(skb, q); - qdisc_run(q); - } + rc = qdisc_enqueue_root(skb, q); + qdisc_run(q); + spin_unlock(root_lock); goto out; @@ -1982,15 +1974,15 @@ static void net_tx_action(struct softirq_action *h) head = head->next_sched; + smp_mb__before_clear_bit(); + clear_bit(__QDISC_STATE_SCHED, &q->state); + root_lock = qdisc_lock(q); if (spin_trylock(root_lock)) { - smp_mb__before_clear_bit(); - clear_bit(__QDISC_STATE_SCHED, - &q->state); qdisc_run(q); spin_unlock(root_lock); } else { - __netif_reschedule(q); + __netif_schedule(q); } } } @@ -2092,8 +2084,7 @@ static int ing_filter(struct sk_buff *skb) q = rxq->qdisc; if (q != &noop_qdisc) { spin_lock(qdisc_lock(q)); - if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) - result = qdisc_enqueue_root(skb, q); + result = qdisc_enqueue_root(skb, q); spin_unlock(qdisc_lock(q)); } diff --git a/trunk/net/core/skbuff.c b/trunk/net/core/skbuff.c index ca1ccdf1ef76..84640172d65d 100644 --- a/trunk/net/core/skbuff.c +++ b/trunk/net/core/skbuff.c @@ -2256,7 +2256,14 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) segs = nskb; tail = nskb; - __copy_skb_header(nskb, skb); + nskb->dev = skb->dev; + skb_copy_queue_mapping(nskb, skb); + nskb->priority = skb->priority; + nskb->protocol = skb->protocol; + nskb->vlan_tci = skb->vlan_tci; + nskb->dst = dst_clone(skb->dst); + memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); + nskb->pkt_type = skb->pkt_type; nskb->mac_len = skb->mac_len; skb_reserve(nskb, headroom); @@ -2267,7 +2274,6 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) skb_copy_from_linear_data(skb, skb_put(nskb, doffset), doffset); if (!sg) { - nskb->ip_summed = CHECKSUM_NONE; nskb->csum = skb_copy_and_csum_bits(skb, offset, skb_put(nskb, len), len, 0); @@ -2277,6 +2283,8 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) frag = skb_shinfo(nskb)->frags; k = 0; + nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->csum = skb->csum; skb_copy_from_linear_data_offset(skb, offset, skb_put(nskb, hsize), hsize); diff --git a/trunk/net/dccp/input.c b/trunk/net/dccp/input.c index 803933ab396d..df2f110df94a 100644 --- a/trunk/net/dccp/input.c +++ b/trunk/net/dccp/input.c @@ -411,6 +411,12 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, struct dccp_sock *dp = dccp_sk(sk); long tstamp = dccp_timestamp(); + /* Stop the REQUEST timer */ + inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); + WARN_ON(sk->sk_send_head == NULL); + __kfree_skb(sk->sk_send_head); + sk->sk_send_head = NULL; + if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) { dccp_pr_debug("invalid ackno: S.AWL=%llu, " @@ -435,12 +441,6 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, DCCP_ACKVEC_STATE_RECEIVED)) goto out_invalid_packet; /* FIXME: change error code */ - /* Stop the REQUEST timer */ - inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); - WARN_ON(sk->sk_send_head == NULL); - kfree_skb(sk->sk_send_head); - sk->sk_send_head = NULL; - dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq; dccp_update_gsr(sk, dp->dccps_isr); /* diff --git a/trunk/net/ipv6/ipv6_sockglue.c b/trunk/net/ipv6/ipv6_sockglue.c index 4e5eac301f91..741cfcd96f88 100644 --- a/trunk/net/ipv6/ipv6_sockglue.c +++ b/trunk/net/ipv6/ipv6_sockglue.c @@ -911,7 +911,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } else { if (np->rxopt.bits.rxinfo) { struct in6_pktinfo src_info; - src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if; + src_info.ipi6_ifindex = np->mcast_oif; ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } @@ -921,7 +921,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, } if (np->rxopt.bits.rxoinfo) { struct in6_pktinfo src_info; - src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif : sk->sk_bound_dev_if; + src_info.ipi6_ifindex = np->mcast_oif; ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr); put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info); } diff --git a/trunk/net/sched/sch_api.c b/trunk/net/sched/sch_api.c index d91a2338877c..c25465e5607a 100644 --- a/trunk/net/sched/sch_api.c +++ b/trunk/net/sched/sch_api.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include @@ -427,7 +426,7 @@ static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) wd->qdisc->flags &= ~TCQ_F_THROTTLED; smp_wmb(); - __netif_schedule(qdisc_root(wd->qdisc)); + __netif_schedule(wd->qdisc); return HRTIMER_NORESTART; } @@ -638,8 +637,11 @@ static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid if (new || old) qdisc_notify(skb, n, clid, old, new); - if (old) + if (old) { + spin_lock_bh(&old->q.lock); qdisc_destroy(old); + spin_unlock_bh(&old->q.lock); + } } /* Graft qdisc "new" to class "classid" of qdisc "parent" or @@ -705,10 +707,6 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, return err; } -/* lockdep annotation is needed for ingress; egress gets it only for name */ -static struct lock_class_key qdisc_tx_lock; -static struct lock_class_key qdisc_rx_lock; - /* Allocate and initialize new qdisc. @@ -769,7 +767,6 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (handle == TC_H_INGRESS) { sch->flags |= TCQ_F_INGRESS; handle = TC_H_MAKE(TC_H_INGRESS, 0); - lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock); } else { if (handle == 0) { handle = qdisc_alloc_handle(dev); @@ -777,7 +774,6 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (handle == 0) goto err_out3; } - lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock); } sch->handle = handle; @@ -1089,10 +1085,16 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) graft: if (1) { + spinlock_t *root_lock; + err = qdisc_graft(dev, p, skb, n, clid, q, NULL); if (err) { - if (q) + if (q) { + root_lock = qdisc_root_lock(q); + spin_lock_bh(root_lock); qdisc_destroy(q); + spin_unlock_bh(root_lock); + } return err; } } diff --git a/trunk/net/sched/sch_cbq.c b/trunk/net/sched/sch_cbq.c index 47ef492c4ff4..4e261ce62f48 100644 --- a/trunk/net/sched/sch_cbq.c +++ b/trunk/net/sched/sch_cbq.c @@ -654,7 +654,7 @@ static enum hrtimer_restart cbq_undelay(struct hrtimer *timer) } sch->flags &= ~TCQ_F_THROTTLED; - __netif_schedule(qdisc_root(sch)); + __netif_schedule(sch); return HRTIMER_NORESTART; } diff --git a/trunk/net/sched/sch_generic.c b/trunk/net/sched/sch_generic.c index c3ed4d44fc14..468574682caa 100644 --- a/trunk/net/sched/sch_generic.c +++ b/trunk/net/sched/sch_generic.c @@ -518,17 +518,14 @@ void qdisc_reset(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_reset); -void qdisc_destroy(struct Qdisc *qdisc) +/* this is the rcu callback function to clean up a qdisc when there + * are no further references to it */ + +static void __qdisc_destroy(struct rcu_head *head) { + struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu); const struct Qdisc_ops *ops = qdisc->ops; - if (qdisc->flags & TCQ_F_BUILTIN || - !atomic_dec_and_test(&qdisc->refcnt)) - return; - - if (qdisc->parent) - list_del(&qdisc->list); - #ifdef CONFIG_NET_SCHED qdisc_put_stab(qdisc->stab); #endif @@ -545,6 +542,20 @@ void qdisc_destroy(struct Qdisc *qdisc) kfree((char *) qdisc - qdisc->padded); } + +/* Under qdisc_lock(qdisc) and BH! */ + +void qdisc_destroy(struct Qdisc *qdisc) +{ + if (qdisc->flags & TCQ_F_BUILTIN || + !atomic_dec_and_test(&qdisc->refcnt)) + return; + + if (qdisc->parent) + list_del(&qdisc->list); + + call_rcu(&qdisc->q_rcu, __qdisc_destroy); +} EXPORT_SYMBOL(qdisc_destroy); static bool dev_all_qdisc_sleeping_noop(struct net_device *dev) @@ -586,9 +597,6 @@ static void transition_one_qdisc(struct net_device *dev, struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping; int *need_watchdog_p = _need_watchdog; - if (!(new_qdisc->flags & TCQ_F_BUILTIN)) - clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state); - rcu_assign_pointer(dev_queue->qdisc, new_qdisc); if (need_watchdog_p && new_qdisc != &noqueue_qdisc) *need_watchdog_p = 1; @@ -632,9 +640,6 @@ static void dev_deactivate_queue(struct net_device *dev, if (qdisc) { spin_lock_bh(qdisc_lock(qdisc)); - if (!(qdisc->flags & TCQ_F_BUILTIN)) - set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state); - dev_queue->qdisc = qdisc_default; qdisc_reset(qdisc); @@ -642,7 +647,7 @@ static void dev_deactivate_queue(struct net_device *dev, } } -static bool some_qdisc_is_busy(struct net_device *dev) +static bool some_qdisc_is_busy(struct net_device *dev, int lock) { unsigned int i; @@ -656,12 +661,14 @@ static bool some_qdisc_is_busy(struct net_device *dev) q = dev_queue->qdisc_sleeping; root_lock = qdisc_lock(q); - spin_lock_bh(root_lock); + if (lock) + spin_lock_bh(root_lock); val = (test_bit(__QDISC_STATE_RUNNING, &q->state) || test_bit(__QDISC_STATE_SCHED, &q->state)); - spin_unlock_bh(root_lock); + if (lock) + spin_unlock_bh(root_lock); if (val) return true; @@ -671,6 +678,8 @@ static bool some_qdisc_is_busy(struct net_device *dev) void dev_deactivate(struct net_device *dev) { + bool running; + netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc); dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc); @@ -680,8 +689,25 @@ void dev_deactivate(struct net_device *dev) synchronize_rcu(); /* Wait for outstanding qdisc_run calls. */ - while (some_qdisc_is_busy(dev)) - yield(); + do { + while (some_qdisc_is_busy(dev, 0)) + yield(); + + /* + * Double-check inside queue lock to ensure that all effects + * of the queue run are visible when we return. + */ + running = some_qdisc_is_busy(dev, 1); + + /* + * The running flag should never be set at this point because + * we've already set dev->qdisc to noop_qdisc *inside* the same + * pair of spin locks. That is, if any qdisc_run starts after + * our initial test it should see the noop_qdisc and then + * clear the RUNNING bit before dropping the queue lock. So + * if it is set here then we've found a bug. + */ + } while (WARN_ON_ONCE(running)); } static void dev_init_scheduler_queue(struct net_device *dev, @@ -710,10 +736,14 @@ static void shutdown_scheduler_queue(struct net_device *dev, struct Qdisc *qdisc_default = _qdisc_default; if (qdisc) { + spinlock_t *root_lock = qdisc_lock(qdisc); + dev_queue->qdisc = qdisc_default; dev_queue->qdisc_sleeping = qdisc_default; + spin_lock_bh(root_lock); qdisc_destroy(qdisc); + spin_unlock_bh(root_lock); } } diff --git a/trunk/net/sched/sch_htb.c b/trunk/net/sched/sch_htb.c index 0df0df202ed0..6febd245e62b 100644 --- a/trunk/net/sched/sch_htb.c +++ b/trunk/net/sched/sch_htb.c @@ -577,7 +577,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) sch->qstats.drops++; cl->qstats.drops++; } - return ret; + return NET_XMIT_DROP; } else { cl->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; @@ -623,7 +623,7 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) sch->qstats.drops++; cl->qstats.drops++; } - return ret; + return NET_XMIT_DROP; } else htb_activate(q, cl); diff --git a/trunk/net/sched/sch_prio.c b/trunk/net/sched/sch_prio.c index a6697c686c7f..eac197610edf 100644 --- a/trunk/net/sched/sch_prio.c +++ b/trunk/net/sched/sch_prio.c @@ -113,11 +113,11 @@ prio_requeue(struct sk_buff *skb, struct Qdisc* sch) if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) { sch->q.qlen++; sch->qstats.requeues++; - return NET_XMIT_SUCCESS; + return 0; } if (net_xmit_drop_count(ret)) sch->qstats.drops++; - return ret; + return NET_XMIT_DROP; } diff --git a/trunk/net/sched/sch_tbf.c b/trunk/net/sched/sch_tbf.c index 94c61598b86a..7d3b7ff3bf07 100644 --- a/trunk/net/sched/sch_tbf.c +++ b/trunk/net/sched/sch_tbf.c @@ -123,8 +123,15 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc* sch) struct tbf_sched_data *q = qdisc_priv(sch); int ret; - if (qdisc_pkt_len(skb) > q->max_size) - return qdisc_reshape_fail(skb, sch); + if (qdisc_pkt_len(skb) > q->max_size) { + sch->qstats.drops++; +#ifdef CONFIG_NET_CLS_ACT + if (sch->reshape_fail == NULL || sch->reshape_fail(skb, sch)) +#endif + kfree_skb(skb); + + return NET_XMIT_DROP; + } ret = qdisc_enqueue(skb, q->qdisc); if (ret != 0) {